diff --git a/.github/workflows/check-urls.yml b/.github/workflows/check-urls.yml
index 67d7731..d56adba 100644
--- a/.github/workflows/check-urls.yml
+++ b/.github/workflows/check-urls.yml
@@ -42,6 +42,6 @@ jobs:
         print_all: false
         timeout: 2
         retry_count# : 2
-        exclude_urls: https://hal.archives-ouvertes.fr/hal-00990252/document
-        exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c,https://dev.azure.com/
+        exclude_urls: https://hal.archives-ouvertes.fr/hal-00990252/document,https://github.com/onnx/tensorflow-onnx
+        exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c,https://dev.azure.com/,https://github.com/onnx/tensorflow-onnx
         # force_pass : true
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index ba80296..3ad7c7c 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -21,7 +21,7 @@ jobs:
 
       - uses: actions/setup-python@v4
         with:
-          python-version: '3.11'
+          python-version: '3.12'
 
       - uses: tlylt/install-graphviz@v1
 
@@ -35,7 +35,7 @@ jobs:
         run: python -m pip install -r requirements-dev.txt
 
       - name: Cache pip
-        uses: actions/cache@v2
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip-${{ hashFiles('requirements-dev.txt') }}
@@ -57,7 +57,7 @@ jobs:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
       - name: Install
-        run: python setup.py install
+        run: python -m pip install -e . -v
 
       - name: Copy license, changelogs
         run: |
@@ -83,6 +83,6 @@ jobs:
             exit 1
           fi
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           path: ./dist/html/**
diff --git a/.github/workflows/wheels-any.yml b/.github/workflows/wheels-any.yml
index c20a15d..4bf89c7 100644
--- a/.github/workflows/wheels-any.yml
+++ b/.github/workflows/wheels-any.yml
@@ -19,11 +19,11 @@ jobs:
 
       - uses: actions/setup-python@v4
         with:
-          python-version: '3.11'
+          python-version: '3.12'
 
       - name: build wheel
         run: python -m pip wheel .
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           path: ./onnx_array_api*.whl
diff --git a/.gitignore b/.gitignore
index f2c1f91..64d45d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 *.dylib
 *.so
 *.whl
+*.xlsx
 coverage.html/*
 _cache/*
 .coverage
@@ -13,6 +14,8 @@ build/*
 *egg-info/*
 onnxruntime_profile*
 prof
+test*.png
+_doc/sg_execution_times.rst
 _doc/auto_examples/*
 _doc/examples/_cache/*
 _doc/examples/onnxruntime_profile*
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 706cfed..8a91bbe 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -1,6 +1,42 @@
 Change Logs
 ===========
 
+0.3.1
++++++
+
+* :pr:`100`: updates requirements, add 3.12
+* :pr:`96`: supports local functions in translator
+* :pr:`95`: improves translation to GraphBuilder
+
+0.3.0
++++++
+
+* :pr:`93`: fixes evaluator type in ``compare_onnx_execution``
+* :pr:`92`: avoids recursion errors in profiling
+* :pr:`87`: adds command line to replace contant by ConstantOfShape
+* :pr:`79`: first draft to export to GraphBuilder
+* :pr:`77`: supports ConcatOfShape and Slice with the light API
+
+0.2.0
++++++
+
+* :pr:`76`, :pr:`79`: add a mode to compare models without execution
+* :pr:`75`: add QuickGelu to ExtendedReferenceEvaluator
+* :pr:`71`: adds tools to compare two onnx graphs
+* :pr:`61`: adds function to plot onnx model as graphs
+* :pr:`60`: supports translation of local functions
+* :pr:`59`: add methods to update nodes in GraphAPI 
+
+0.1.3
++++++
+
+* :pr:`57`: implements GraphBuilder
+* :pr:`49`: adds command line to export a model into code
+* :pr:`48`: support for subgraph in light API
+* :pr:`47`: extends export onnx to code to support inner API
+* :pr:`46`: adds an export to convert an onnx graph into light API code
+* :pr:`45`: fixes light API for operators with two outputs
+
 0.1.2
 +++++
 
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..b4e1709
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,15 @@
+# Code of Conduct
+
+We are a community based on openness, as well as friendly and didactic discussions.
+
+We aspire to treat everybody equally, and value their contributions.
+
+Decisions are made based on technical merit and consensus.
+
+Code is not the only way to help the project. Reviewing pull requests,
+answering questions to help others on mailing lists or issues, organizing and
+teaching tutorials, working on the website, improving the documentation, are
+all priceless contributions.
+
+We abide by the principles of openness, respect, and consideration of others of
+the Python Software Foundation: https://www.python.org/psf/codeofconduct/
diff --git a/LICENSE.txt b/LICENSE.txt
index fa034ef..1a46a8e 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-﻿Copyright (c) 2023, Xavier Dupré
+﻿Copyright (c) 2023-2025, Xavier Dupré
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.rst b/README.rst
index 035911d..f7b387f 100644
--- a/README.rst
+++ b/README.rst
@@ -31,6 +31,14 @@ onnx-array-api: APIs to create ONNX Graphs
 
 **onnx-array-api** implements APIs to create custom ONNX graphs.
 The objective is to speed up the implementation of converter libraries.
+The library is released on
+`pypi/onnx-array-api <https://pypi.org/project/onnx-array-api/>`_
+and its documentation is published at
+`APIs to create ONNX Graphs <https://sdpython.github.io/doc/onnx-array-api/dev/>`_.
+
+Numpy API
++++++++++
+
 The first one matches **numpy API**.
 It gives the user the ability to convert functions written
 following the numpy API to convert that function into ONNX as
@@ -113,10 +121,15 @@ It supports eager mode as well:
     l2_loss=[0.002]
     [0.042]
 
-The second API ir **Light API** tends to do every thing in one line.
+Light API
++++++++++
+
+The second API or **Light API** tends to do every thing in one line.
+It is inspired from the `Reverse Polish Notation
+<https://en.wikipedia.org/wiki/Reverse_Polish_notation>`_.
 The euclidean distance looks like the following:
 
-::
+.. code-block:: python
 
     import numpy as np
     from onnx_array_api.light_api import start
@@ -138,7 +151,29 @@ The euclidean distance looks like the following:
         .to_onnx()
     )    
 
-The library is released on
-`pypi/onnx-array-api <https://pypi.org/project/onnx-array-api/>`_
-and its documentation is published at
-`(Numpy) Array API for ONNX <https://sdpython.github.io/doc/onnx-array-api/dev/>`_.
+GraphBuilder API
+++++++++++++++++
+
+Almost every converting library (converting a machine learned model to ONNX) is implementing
+its own graph builder and customizes it for its needs.
+It handles some frequent tasks such as giving names to intermediate
+results, loading, saving onnx models. It can be used as well to extend an existing graph.
+
+.. code-block:: python
+
+    import numpy as np
+    from onnx_array_api.graph_api  import GraphBuilder
+
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.make_node("Sub", ["X", "Y"])  # the name given to the output is given by the class,
+                                         # it ensures the name is unique
+    init = g.make_initializer(np.array([2], dtype=np.int64))  # the class automatically
+                                                              # converts the array to a tensor
+    r2 = g.make_node("Pow", [r1, init])
+    g.make_node("ReduceSum", [r2], outputs=["Z"])  # the output name is given because
+                                                   # the user wants to choose the name
+    g.make_tensor_output("Z", np.float32, (None, None))
+
+    onx = g.to_onnx()  # final conversion to onnx
diff --git a/_doc/api/graph_api.rst b/_doc/api/graph_api.rst
new file mode 100644
index 0000000..f618b7b
--- /dev/null
+++ b/_doc/api/graph_api.rst
@@ -0,0 +1,22 @@
+========================
+onnx_array_api.graph_api
+========================
+
+
+GraphBuilder
+============
+
+.. autoclass:: onnx_array_api.graph_api.GraphBuilder
+    :members:
+
+NodePattern
+===========
+
+.. autoclass:: onnx_array_api.graph_api.NodePattern
+    :members:
+
+OptimizationOptions
+===================
+
+.. autoclass:: onnx_array_api.graph_api.graph_builder.OptimizationOptions
+    :members:
diff --git a/_doc/api/index.rst b/_doc/api/index.rst
index 0f595f0..8cfe033 100644
--- a/_doc/api/index.rst
+++ b/_doc/api/index.rst
@@ -7,7 +7,9 @@ API
     :maxdepth: 1
 
     array_api
+    graph_api
     light_api
+    translate_api
     npx_core_api
     npx_functions
     npx_jit_eager
diff --git a/_doc/api/light_api.rst b/_doc/api/light_api.rst
index 471eb66..e2a2d32 100644
--- a/_doc/api/light_api.rst
+++ b/_doc/api/light_api.rst
@@ -2,33 +2,80 @@
 onnx_array_api.light_api
 ========================
 
+
+Main API
+========
+
 start
-=====
++++++
 
 .. autofunction:: onnx_array_api.light_api.start
 
+g
++
+
+.. autofunction:: onnx_array_api.light_api.g
+
+Classes for the Light API
+=========================
+
+domain
+++++++
+
+..autofunction:: onnx_array_api.light_api.domain
+
+BaseVar
++++++++
+
+.. autoclass:: onnx_array_api.light_api.var.BaseVar
+    :members:
+
 OnnxGraph
-=========
++++++++++
 
 .. autoclass:: onnx_array_api.light_api.OnnxGraph
     :members:
 
-BaseVar
-=======
+ProtoType
++++++++++
 
-.. autoclass:: onnx_array_api.light_api.var.BaseVar
+.. autoclass:: onnx_array_api.light_api.model.ProtoType
+    :members:
+
+SubDomain
++++++++++
+
+.. autoclass:: onnx_array_api.light_api.var.SubDomain
     :members:
 
 Var
-===
++++
 
 .. autoclass:: onnx_array_api.light_api.Var
     :members:
     :inherited-members:
 
 Vars
-====
+++++
 
 .. autoclass:: onnx_array_api.light_api.Vars
     :members:
     :inherited-members:
+
+Available operators
+===================
+
+One input
++++++++++
+
+.. autoclass:: onnx_array_api.light_api._op_var.OpsVar
+    :members:
+
+Two inputs or more
+++++++++++++++++++
+
+.. autoclass:: onnx_array_api.light_api._op_vars.OpsVars
+    :members:
+
+
+
diff --git a/_doc/api/plotting.rst b/_doc/api/plotting.rst
index 830cc86..db6076c 100644
--- a/_doc/api/plotting.rst
+++ b/_doc/api/plotting.rst
@@ -6,6 +6,8 @@ Dot
 
 .. autofunction:: onnx_array_api.plotting.dot_plot.to_dot
 
+.. autofunction:: onnx_array_api.plotting.graphviz_helper.plot_dot
+
 Statistics
 ++++++++++
 
diff --git a/_doc/api/reference.rst b/_doc/api/reference.rst
index acbf90a..3b4ae7d 100644
--- a/_doc/api/reference.rst
+++ b/_doc/api/reference.rst
@@ -5,3 +5,33 @@ ExtendedReferenceEvaluator
 ++++++++++++++++++++++++++
 
 .. autoclass:: onnx_array_api.reference.ExtendedReferenceEvaluator
+    :members:
+
+ResultType
+++++++++++
+
+.. autoclass:: onnx_array_api.reference.ResultType
+    :members:
+
+ResultExecution
++++++++++++++++
+
+.. autoclass:: onnx_array_api.reference.ResultExecution
+    :members:
+
+YieldEvaluator
+++++++++++++++
+
+.. autoclass:: onnx_array_api.reference.YieldEvaluator
+    :members:
+
+DistanceExecution
++++++++++++++++++
+
+.. autoclass:: onnx_array_api.reference.DistanceExecution
+    :members:
+
+compare_onnx_execution
+++++++++++++++++++++++
+
+.. autofunction:: onnx_array_api.reference.compare_onnx_execution
diff --git a/_doc/api/tools.rst b/_doc/api/tools.rst
index ef161e0..e0450dc 100644
--- a/_doc/api/tools.rst
+++ b/_doc/api/tools.rst
@@ -6,6 +6,11 @@ Benchmark
 
 .. autofunction:: onnx_array_api.ext_test_case.measure_time
 
+Manipulations
++++++++++++++
+
+.. autofunction:: onnx_array_api.tools.replace_constants.replace_initializer_by_constant_of_shape
+
 Examples
 ++++++++
 
diff --git a/_doc/api/translate_api.rst b/_doc/api/translate_api.rst
new file mode 100644
index 0000000..f2d90df
--- /dev/null
+++ b/_doc/api/translate_api.rst
@@ -0,0 +1,58 @@
+============================
+onnx_array_api.translate_api
+============================
+
+
+Main API
+========
+
+translate
++++++++++
+
+.. autofunction:: onnx_array_api.translate_api.translate
+
+make_helper
++++++++++++
+
+.. autofunction:: onnx_array_api.translate_api.make_helper.make_node_extended
+
+.. autofunction:: onnx_array_api.translate_api.make_helper.make_ref_attribute
+
+Classes for the Translater
+==========================
+
+BaseEmitter
++++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.base_emitter.BaseEmitter
+    :members:
+
+EventType
++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.base_emitter.EventType
+    :members:
+
+InnerEmitter
+++++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.inner_emitter.InnerEmitter
+    :members:
+
+InnerEmitterShortInitializer
+++++++++++++++++++++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.inner_emitter.InnerEmitterShortInitializer
+    :members:
+
+LightEmitter
+++++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.light_emitter.LightEmitter
+    :members:
+
+Translater
+++++++++++
+
+.. autoclass:: onnx_array_api.translate_api.translate.Translater
+    :members:
diff --git a/_doc/command_lines.rst b/_doc/command_lines.rst
new file mode 100644
index 0000000..38ca5f2
--- /dev/null
+++ b/_doc/command_lines.rst
@@ -0,0 +1,52 @@
+=============
+command lines
+=============
+
+compare
+=======
+
+The function convers an onnx file into some code.
+
+::
+
+    python -m compare -m1 model1.onnx -m2 model2.onnx -v 1
+
+Output example::
+
+    [compare_onnx_execution] got 2 inputs
+    [compare_onnx_execution] execute first model
+    [compare_onnx_execution] got 5 results
+    [compare_onnx_execution] execute second model
+    [compare_onnx_execution] got 5 results
+    [compare_onnx_execution] compute edit distance
+    [compare_onnx_execution] got 4 pairs
+    [compare_onnx_execution] done
+    = | INPUT  float32  5x6             AAAA          X    | INPUT  float32  5x6             AAAA          X   
+    = | INPUT  float32  5x6             AAAA          Y    | INPUT  float32  5x6             AAAA          Y   
+    = | RESULT float32  5x6             AABB Add      res  | RESULT float32  5x6             AABB Add      res 
+    = | RESULT float32  5x6             AAAA Cos      Z    | RESULT float32  5x6             AAAA Cos      Z 
+
+.. runpython::
+
+    from onnx_array_api._command_lines_parser import get_parser_compare
+    get_parser_compare().print_help()
+
+See function :func:`onnx_array_api.reference.compare_onnx_execution`.
+
+translate
+=========
+
+The function convers an onnx file into some code.
+
+::
+
+    python -m translate ...
+
+Output example::
+
+    not yet ready  
+
+.. runpython::
+
+    from onnx_array_api._command_lines_parser import get_parser_translate
+    get_parser_translate().print_help()
diff --git a/_doc/conf.py b/_doc/conf.py
index 925dc11..eaf8eb1 100644
--- a/_doc/conf.py
+++ b/_doc/conf.py
@@ -35,7 +35,7 @@
 source_suffix = ".rst"
 master_doc = "index"
 project = "onnx-array-api"
-copyright = "2023, Xavier Dupré"
+copyright = "2023-2024, Xavier Dupré"
 author = "Xavier Dupré"
 version = __version__
 release = __version__
@@ -117,9 +117,11 @@
     "ast": "https://docs.python.org/3/library/ast.html",
     "cProfile.Profile": "https://docs.python.org/3/library/profile.html#profile.Profile",
     "DOT": "https://graphviz.org/doc/info/lang.html",
+    "Graphviz": "https://graphviz.org/",
     "inner API": "https://onnx.ai/onnx/intro/python.html",
     "JIT": "https://en.wikipedia.org/wiki/Just-in-time_compilation",
     "onnx": "https://onnx.ai/onnx/",
+    "onnx-graphsurgeon": "https://github.com/NVIDIA/TensorRT/tree/main/tools/onnx-graphsurgeon",
     "onnx.helper": "https://onnx.ai/onnx/api/helper.html",
     "ONNX": "https://onnx.ai/",
     "ONNX Operators": "https://onnx.ai/onnx/operators/",
@@ -144,11 +146,9 @@
     "torch.onnx": "https://pytorch.org/docs/stable/onnx.html",
     #
     "C_OrtValue": (
-        "http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/"
-        "api/onnxruntime_python/ortvalue.html#c-class-ortvalue-or-c-ortvalue"
+        "https://onnxruntime.ai/docs/api/csharp/api/Microsoft.ML.OnnxRuntime.OrtValue.html"
     ),
     "OrtValue": (
-        "http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/"
-        "api/onnxruntime_python/ortvalue.html#onnxruntime.OrtValue"
+        "https://onnxruntime.ai/docs/api/python/api_summary.html#onnxruntime.OrtValue"
     ),
 }
diff --git a/_doc/examples/plot_benchmark_rf.py b/_doc/examples/plot_benchmark_rf.py
index 8b62e3d..c1ce486 100644
--- a/_doc/examples/plot_benchmark_rf.py
+++ b/_doc/examples/plot_benchmark_rf.py
@@ -12,6 +12,7 @@
 import and registration of necessary converters
 ++++++++++++++++++++++++++++++++++++++++++++++++
 """
+
 import pickle
 import os
 import time
@@ -21,8 +22,6 @@
 import numpy
 import pandas
 from lightgbm import LGBMRegressor
-from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm
-from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
 from onnxruntime import InferenceSession, SessionOptions
 from psutil import cpu_count
 from sphinx_runpython.runpython import run_cmd
@@ -32,14 +31,16 @@
 from sklearn.ensemble import RandomForestRegressor
 from tqdm import tqdm
 from xgboost import XGBRegressor
+from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
 
 
 def skl2onnx_convert_lightgbm(scope, operator, container):
+    from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
+        convert_lightgbm,
+    )
+
     options = scope.get_options(operator.raw_operator)
-    if "split" in options:
-        operator.split = options["split"]
-    else:
-        operator.split = None
+    operator.split = options.get("split", None)
     convert_lightgbm(scope, operator, container)
 
 
@@ -99,7 +100,7 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):
     :return: number of runs, sum of the time, average, median
     """
     times = []
-    for n in range(repeat):
+    for _n in range(repeat):
         perf = time.perf_counter()
         fct(X)
         delta = time.perf_counter() - perf
@@ -237,7 +238,10 @@ def measure_inference(fct, X, repeat, max_time=5, quantile=1):
     # onnxruntime
     bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictO")
     r, t, mean, med = measure_inference(
-        lambda x: sess.run(None, {"X": x}), X, repeat=repeat, max_time=max_time
+        lambda x, sess=sess: sess.run(None, {"X": x}),
+        X,
+        repeat=repeat,
+        max_time=max_time,
     )
     o2 = obs.copy()
     o2.update(dict(avg=mean, med=med, n_runs=r, ttime=t, name="ort_"))
diff --git a/_doc/examples/plot_onnx_diff.py b/_doc/examples/plot_onnx_diff.py
new file mode 100644
index 0000000..7b6ecdf
--- /dev/null
+++ b/_doc/examples/plot_onnx_diff.py
@@ -0,0 +1,69 @@
+"""
+
+.. _l-onnx-diff-example:
+
+Compares the conversions of the same model with different options
+=================================================================
+
+The script compares two onnx models obtained with the same trained
+scikit-learn models but converted with different options.
+
+A model
++++++++
+"""
+
+from sklearn.mixture import GaussianMixture
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from skl2onnx import to_onnx
+from onnx_array_api.reference import compare_onnx_execution
+from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+
+data = load_iris()
+X_train, X_test = train_test_split(data.data)
+model = GaussianMixture()
+model.fit(X_train)
+
+#################################
+# Conversion to onnx
+# ++++++++++++++++++
+
+onx = to_onnx(
+    model, X_train[:1], options={id(model): {"score_samples": True}}, target_opset=12
+)
+
+print(onnx_simple_text_plot(onx))
+
+##################################
+# Conversion to onnx without ReduceLogSumExp
+# ++++++++++++++++++++++++++++++++++++++++++
+
+onx2 = to_onnx(
+    model,
+    X_train[:1],
+    options={id(model): {"score_samples": True}},
+    black_op={"ReduceLogSumExp"},
+    target_opset=12,
+)
+
+print(onnx_simple_text_plot(onx2))
+
+
+#############################################
+# Differences
+# +++++++++++
+#
+# Function :func:`onnx_array_api.reference.compare_onnx_execution`
+# compares the intermediate results of two onnx models. Then it finds
+# the best alignmet between the two models using an edit distance.
+
+res1, res2, align, dc = compare_onnx_execution(onx, onx2, verbose=1)
+print("------------")
+text = dc.to_str(res1, res2, align)
+print(text)
+
+###############################
+# See :ref:`l-long-output-compare_onnx_execution` for a better view.
+# The display shows that ReduceSumSquare was replaced by Mul + ReduceSum,
+# and ReduceLogSumExp by ReduceMax + Sub + Exp + Log + Add.
diff --git a/_doc/examples/plot_onnxruntime.py b/_doc/examples/plot_onnxruntime.py
index fcace3e..0aba6ac 100644
--- a/_doc/examples/plot_onnxruntime.py
+++ b/_doc/examples/plot_onnxruntime.py
@@ -87,14 +87,14 @@ def loop(n=1000):
     x = np.random.randn(n, 2).astype(np.float32)
     y = np.random.randn(n, 2).astype(np.float32)
 
-    obs = measure_time(lambda: myloss(x, y))
+    obs = measure_time(lambda x=x, y=y: myloss(x, y))
     obs["name"] = "numpy"
     obs["n"] = n
     data.append(obs)
 
     xort = OrtTensor.from_array(x)
     yort = OrtTensor.from_array(y)
-    obs = measure_time(lambda: ort_myloss(xort, yort))
+    obs = measure_time(lambda xort=xort, yort=yort: ort_myloss(xort, yort))
     obs["name"] = "ort"
     obs["n"] = n
     data.append(obs)
diff --git a/_doc/examples/plot_optimization.py b/_doc/examples/plot_optimization.py
index 466fac0..c78419b 100644
--- a/_doc/examples/plot_optimization.py
+++ b/_doc/examples/plot_optimization.py
@@ -15,6 +15,7 @@
 Optimize a model with onnxruntime
 +++++++++++++++++++++++++++++++++
 """
+
 import os
 from pprint import pprint
 import numpy
diff --git a/_doc/examples/plot_profiling.py b/_doc/examples/plot_profiling.py
index 7a61b68..201de95 100644
--- a/_doc/examples/plot_profiling.py
+++ b/_doc/examples/plot_profiling.py
@@ -15,6 +15,7 @@
 Optimize a model with onnxruntime
 +++++++++++++++++++++++++++++++++
 """
+
 import os
 import numpy
 import matplotlib.pyplot as plt
diff --git a/_doc/index.rst b/_doc/index.rst
index 52d2cf6..9bdc4e2 100644
--- a/_doc/index.rst
+++ b/_doc/index.rst
@@ -36,6 +36,7 @@ The objective is to speed up the implementation of converter libraries.
     tutorial/index
     api/index
     tech/index
+    command_lines
     auto_examples/index
 
 .. toctree::
@@ -44,12 +45,86 @@ The objective is to speed up the implementation of converter libraries.
 
     CHANGELOGS
     license
-
-**Numpy API**
+    long_outputs
 
 Sources available on
 `github/onnx-array-api <https://github.com/sdpython/onnx-array-api>`_.
 
+GraphBuilder API
+++++++++++++++++
+
+Almost every converting library (converting a machine learned model to ONNX) is implementing
+its own graph builder and customizes it for its needs.
+It handles some frequent tasks such as giving names to intermediate
+results, loading, saving onnx models. It can be used as well to extend an existing graph.
+See :ref:`l-graph-api`.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.graph_api  import GraphBuilder
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.make_node("Sub", ["X", "Y"])  # the name given to the output is given by the class,
+                                         # it ensures the name is unique
+    init = g.make_initializer(np.array([2], dtype=np.int64))  # the class automatically
+                                                              # converts the array to a tensor
+    r2 = g.make_node("Pow", [r1, init])
+    g.make_node("ReduceSum", [r2], outputs=["Z"])  # the output name is given because
+                                                   # the user wants to choose the name
+    g.make_tensor_output("Z", np.float32, (None, None))
+
+    onx = g.to_onnx()  # final conversion to onnx
+
+    print(onnx_simple_text_plot(onx))
+
+Light API
++++++++++
+
+The syntax is inspired from the
+`Reverse Polish Notation <https://en.wikipedia.org/wiki/Reverse_Polish_notation>`_.
+This kind of API is easy to use to build new graphs,
+less easy to extend an existing graph. See :ref:`l-light-api`.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.light_api import start
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    model = (
+        start()
+        .vin("X")
+        .vin("Y")
+        .bring("X", "Y")
+        .Sub()
+        .rename("dxy")
+        .cst(np.array([2], dtype=np.int64), "two")
+        .bring("dxy", "two")
+        .Pow()
+        .ReduceSum()
+        .rename("Z")
+        .vout()
+        .to_onnx()
+    )
+
+    print(onnx_simple_text_plot(model))
+
+Numpy API
++++++++++
+
+Writing ONNX graphs requires to know ONNX syntax unless
+it is possible to reuse an existing syntax such as :epkg:`numpy`.
+This is what this API is doing.
+This kind of API is easy to use to build new graphs,
+almost impossible to use to extend new graphs as it usually requires
+to know onnx for that. See :ref:`l-numpy-api-onnx`.
+
 .. runpython::
     :showcode:
     :warningout: DeprecationWarning, FutureWarning
@@ -109,29 +184,10 @@ Sources available on
     res = jitted_myloss(x, y)
     print(to_dot(jitted_myloss.get_onnx()))
 
-**Light API**
-
-.. runpython::
-    :showcode:
-
-    import numpy as np
-    from onnx_array_api.light_api import start
-    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
-
-    model = (
-        start()
-        .vin("X")
-        .vin("Y")
-        .bring("X", "Y")
-        .Sub()
-        .rename("dxy")
-        .cst(np.array([2], dtype=np.int64), "two")
-        .bring("dxy", "two")
-        .Pow()
-        .ReduceSum()
-        .rename("Z")
-        .vout()
-        .to_onnx()
-    )
+Older versions
+++++++++++++++
 
-    print(onnx_simple_text_plot(model))
+* `0.3.0 <../v0.3.0/index.html>`_
+* `0.2.0 <../v0.2.0/index.html>`_
+* `0.1.3 <../v0.1.3/index.html>`_
+* `0.1.2 <../v0.1.2/index.html>`_
diff --git a/_doc/long_outputs.rst b/_doc/long_outputs.rst
new file mode 100644
index 0000000..745382b
--- /dev/null
+++ b/_doc/long_outputs.rst
@@ -0,0 +1,47 @@
+:hide-toc:
+
+==========================
+Long outputs uneasy to see
+==========================
+
+onnx
+====
+
+.. _l-long-output-compare_onnx_execution:
+
+onnx_array_api.reference.compare_onnx_execution
++++++++++++++++++++++++++++++++++++++++++++++++
+
+From example :ref:`l-onnx-diff-example` for function
+:func:`onnx_array_api.reference.compare_onnx_execution`.
+See also `raw rendering <https://github.com/sdpython/onnx-array-api/blob/main/_doc/long_outputs.rst#onnx_array_apireferencecompare_onnx_execution>`_.
+
+::
+
+     1 = | INITIA float64  1               HAAA            Ad_Addcst    | INITIA float64  1               HAAA            Ad_Addcst   
+     2 = | INITIA float64  4x4             ADZF            Ge_Gemmcst   | INITIA float64  4x4             ADZF            Ge_Gemmcst  
+     3 = | INITIA float64  4               USEA            Ge_Gemmcst1  | INITIA float64  4               USEA            Ge_Gemmcst1 
+     4 = | INITIA float64  1               AAAA            Mu_Mulcst    | INITIA float64  1               AAAA            Mu_Mulcst   
+     5 = | INITIA float64  1               DAAA            Ad_Addcst1   | INITIA float64  1               DAAA            Ad_Addcst1  
+     6 = | INITIA float64  1               AAAA            Ad_Addcst2   | INITIA float64  1               AAAA            Ad_Addcst2  
+     7 = | INPUT  float64  1x4             AAAA            X            | INPUT  float64  1x4             AAAA            X           
+     8 = | RESULT float64  1x4             UTFC Gemm       Ge_Y0        | RESULT float64  1x4             UTFC Gemm       Ge_Y0       
+     9 + |                                                              | RESULT float64  1x4             TIEG Mul        Mu_C01       
+    10 ~ | RESULT float64  1x1             NAAA ReduceSumS Re_reduced0  | RESULT float64  1x1             NAAA ReduceSum  Re_reduced0 
+    11 = | RESULT float64  1x1             NAAA Concat     Co_concat_re | RESULT float64  1x1             NAAA Concat     Co_concat_re
+    12 = | RESULT float64  1x1             UAAA Add        Ad_C02       | RESULT float64  1x1             UAAA Add        Ad_C02      
+    13 = | RESULT float64  1x1             DAAA Mul        Mu_C0        | RESULT float64  1x1             DAAA Mul        Mu_C0       
+    14 = | RESULT float64  1x1             GAAA Add        Ad_C01       | RESULT float64  1x1             GAAA Add        Ad_C01      
+    15 = | RESULT float64  1x1             GAAA Add        Ad_C0        | RESULT float64  1x1             GAAA Add        Ad_C0       
+    16 = | RESULT int64    1x1             AAAA ArgMax     label        | RESULT int64    1x1             AAAA ArgMax     label       
+    17 + |                                                              | RESULT float64  1x1             GAAA ReduceMax  Re_reduced03 
+    18 + |                                                              | RESULT float64  1x1             AAAA Sub        Su_C01       
+    19 + |                                                              | RESULT float64  1x1             BAAA Exp        Ex_output0   
+    20 + |                                                              | RESULT float64  1x1             BAAA ReduceSum  Re_reduced02 
+    21 + |                                                              | RESULT float64  1x1             AAAA Log        Lo_output0   
+    22 ~ | RESULT float64  1x1             GAAA ReduceLogS score_sample | RESULT float64  1x1             GAAA Add        score_sample
+    23 = | RESULT float64  1x1             AAAA Sub        Su_C0        | RESULT float64  1x1             AAAA Sub        Su_C0       
+    24 = | RESULT float64  1x1             BAAA Exp        probabilitie | RESULT float64  1x1             BAAA Exp        probabilitie
+    25 = | OUTPUT int64    1x1             AAAA            label        | OUTPUT int64    1x1             AAAA            label       
+    26 = | OUTPUT float64  1x1             BAAA            probabilitie | OUTPUT float64  1x1             BAAA            probabilitie
+    27 = | OUTPUT float64  1x1             GAAA            score_sample | OUTPUT float64  1x1             GAAA            score_sample    
diff --git a/_doc/tech/aapi.rst b/_doc/tech/aapi.rst
index 13e6c02..bbbad63 100644
--- a/_doc/tech/aapi.rst
+++ b/_doc/tech/aapi.rst
@@ -8,7 +8,7 @@ Python is not strongly typed and many different types can be used
 to represent a value. Argument *axis* can be an integer or a tuple
 (see `min from Array API
 <https://data-apis.org/array-api/2022.12/API_specification/
-generated/array_api.min.html>`
+generated/array_api.min.html>`_
 for example). On the other side, `ReduceMin from ONNX
 <https://onnx.ai/onnx/operators/onnx__ReduceMin.html>`_
 is considered as a tensor.
diff --git a/_doc/tutorial/graph_api.rst b/_doc/tutorial/graph_api.rst
new file mode 100644
index 0000000..b373cc3
--- /dev/null
+++ b/_doc/tutorial/graph_api.rst
@@ -0,0 +1,59 @@
+.. _l-graph-api:
+
+=================================
+GraphBuilder: common API for ONNX
+=================================
+
+This is a very common way to build ONNX graph. There are some
+annoying steps while building an ONNX graph. The first one is to
+give unique names to every intermediate result in the graph. The second
+is the conversion from numpy arrays to onnx tensors. A *graph builder*,
+here implemented by class
+:class:`GraphBuilder <onnx_array_api.graph_api.GraphBuilder>`
+usually makes these two frequent tasks easier.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.graph_api  import GraphBuilder
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.make_node("Sub", ["X", "Y"])  # the name given to the output is given by the class,
+                                         # it ensures the name is unique
+    init = g.make_initializer(np.array([2], dtype=np.int64))  # the class automatically
+                                                              # converts the array to a tensor
+    r2 = g.make_node("Pow", [r1, init])
+    g.make_node("ReduceSum", [r2], outputs=["Z"])  # the output name is given because
+                                                   # the user wants to choose the name
+    g.make_tensor_output("Z", np.float32, (None, None))
+
+    onx = g.to_onnx()  # final conversion to onnx
+
+    print(onnx_simple_text_plot(onx))
+
+A more simple versions of the same code to produce the same graph.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.graph_api  import GraphBuilder
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.op.Sub("X", "Y")  # the method name indicates which operator to use,
+                             # this can be used when there is no ambiguity about the
+                             # number of outputs
+    r2 = g.op.Pow(r1, np.array([2], dtype=np.int64))
+    g.op.ReduceSum(r2, outputs=["Z"])  # the still wants the user to specify the name
+    g.make_tensor_output("Z", np.float32, (None, None))
+    
+    onx = g.to_onnx()
+
+    print(onnx_simple_text_plot(onx))
diff --git a/_doc/tutorial/index.rst b/_doc/tutorial/index.rst
index e3ca8d7..9fcc557 100644
--- a/_doc/tutorial/index.rst
+++ b/_doc/tutorial/index.rst
@@ -7,6 +7,8 @@ Tutorial
     :maxdepth: 1
 
     onnx_api
+    graph_api
     light_api
     numpy_api
+    tools
     benchmarks
diff --git a/_doc/tutorial/light_api.rst b/_doc/tutorial/light_api.rst
index 4e18793..35474fa 100644
--- a/_doc/tutorial/light_api.rst
+++ b/_doc/tutorial/light_api.rst
@@ -76,3 +76,32 @@ operator `+` to be available as well and that the case. They are
 defined in class :class:`Var <onnx_array_api.light_api.Var>` or
 :class:`Vars <onnx_array_api.light_api.Vars>` depending on the number of
 inputs they require. Their name starts with a lower letter.
+
+Other domains
+=============
+
+The following example uses operator *Normalizer* from domain
+*ai.onnx.ml*. The operator name is called with the syntax
+`<domain>.<operator name>`. The domain may have dots in its name
+but it must follow the python definition of a variable.
+The operator *Normalizer* becomes `ai.onnx.ml.Normalizer`.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.light_api import start
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    model = (
+        start(opset=19, opsets={"ai.onnx.ml": 3})
+        .vin("X")
+        .reshape((-1, 1))
+        .rename("USE")
+        .ai.onnx.ml.Normalizer(norm="MAX")
+        .rename("Y")
+        .vout()
+        .to_onnx()
+    )
+
+    print(onnx_simple_text_plot(model))
diff --git a/_doc/tutorial/onnx_api.rst b/_doc/tutorial/onnx_api.rst
index 99af2a7..2b673fb 100644
--- a/_doc/tutorial/onnx_api.rst
+++ b/_doc/tutorial/onnx_api.rst
@@ -14,7 +14,7 @@ onnx syntax. :epkg:`scikit-learn` is implemented with :epkg:`numpy` and there
 is no converter from numpy to onnx. Sometimes, it is needed to extend
 an existing onnx models or to merge models coming from different packages.
 Sometimes, they are just not available, only onnx is.
-Let's see how it looks like a very simply example.
+Let's see how it looks like with a very simply example.
 
 Euclidian distance
 ==================
@@ -71,7 +71,11 @@ the true implementation would be the following.
             n2 = oh.make_node("Pow", ["dxy", "two"], ["dxy2"])
             n3 = oh.make_node("ReduceSum", ["dxy2"], [output_name])
             graph = oh.make_graph([n1, n2, n3], "euclidian", [X, Y], [Z], [two])
-            model = oh.make_model(graph, opset_imports=[oh.make_opsetid("", opset)])
+            model = oh.make_model(
+                graph,
+                opset_imports=[oh.make_opsetid("", opset)],
+                ir_version=9,
+            )
             return model
 
 
@@ -263,12 +267,10 @@ A couple of examples.
 
     model = MyModel()
     kwargs = {"bias": 3.}
-    args = (torch.randn(2, 2, 2),)
+    inputs = (torch.randn(2, 2, 2),)
 
-    export_output = torch.onnx.dynamo_export(
-        model,
-        *args,
-        **kwargs).save("my_simple_model.onnx")    
+    export_output = torch.onnx.dynamo_export(model, inputs, **kwargs)
+    export_output.save("my_simple_model.onnx")    
 
 .. code-block:: python
 
@@ -462,6 +464,7 @@ onnxblocks
 `onnxblocks <https://onnxruntime.ai/docs/api/python/on_device_training/training_artifacts.html#prepare-for-training>`_
 was introduced in onnxruntime to define custom losses in order to train
 a model with :epkg:`onnxruntime-training`. It is mostly used for this usage.
+The syntax is similar to pytorch.
 
 .. code-block:: python
 
@@ -507,37 +510,109 @@ a model with :epkg:`onnxruntime-training`. It is mostly used for this usage.
     # Successful completion of the above call will generate 4 files in the current working directory,
     # one for each of the artifacts mentioned above (training_model.onnx, eval_model.onnx, checkpoint, op)
 
-numpy API for onnx
-++++++++++++++++++
+ONNX GraphSurgeon
++++++++++++++++++
 
-See :ref:`l-numpy-api-onnx`. This API was introduced to create graphs
-by using numpy API. If a function is defined only with numpy,
-it should be possible to use the exact same code to create the
-corresponding onnx graph. That's what this API tries to achieve.
-It works with the exception of control flow. In that case, the function
-produces different onnx graphs depending on the execution path.
+:epkg:`onnx-graphsurgeon` implements main class `Graph` which provides
+all the necessary method to add nodes, import existing onnx files.
+The following example is taken from `onnx-graphsurgeon/examples
+<https://github.com/NVIDIA/TensorRT/tree/master/tools/onnx-graphsurgeon/examples>`_.
+The first part generates a graph.
+
+.. code-block:: python
+
+    import onnx_graphsurgeon as gs
+    import numpy as np
+    import onnx
+
+    # Computes Y = x0 + (a * x1 + b)
+
+    shape = (1, 3, 224, 224)
+    # Inputs
+    x0 = gs.Variable(name="x0", dtype=np.float32, shape=shape)
+    x1 = gs.Variable(name="x1", dtype=np.float32, shape=shape)
+
+    # Intermediate tensors
+    a = gs.Constant("a", values=np.ones(shape=shape, dtype=np.float32))
+    b = gs.Constant("b", values=np.ones(shape=shape, dtype=np.float32))
+    mul_out = gs.Variable(name="mul_out")
+    add_out = gs.Variable(name="add_out")
+
+    # Outputs
+    Y = gs.Variable(name="Y", dtype=np.float32, shape=shape)
+
+    nodes = [
+        # mul_out = a * x1
+        gs.Node(op="Mul", inputs=[a, x1], outputs=[mul_out]),
+        # add_out = mul_out + b
+        gs.Node(op="Add", inputs=[mul_out, b], outputs=[add_out]),
+        # Y = x0 + add
+        gs.Node(op="Add", inputs=[x0, add_out], outputs=[Y]),
+    ]
+
+    graph = gs.Graph(nodes=nodes, inputs=[x0, x1], outputs=[Y])
+    onnx.save(gs.export_onnx(graph), "model.onnx")
+
+The second part modifies it.
+
+.. code-block:: python
+
+    import onnx_graphsurgeon as gs
+    import numpy as np
+    import onnx
+
+    graph = gs.import_onnx(onnx.load("model.onnx"))
+
+    # 1. Remove the `b` input of the add node
+    first_add = [node for node in graph.nodes if node.op == "Add"][0]
+    first_add.inputs = [inp for inp in first_add.inputs if inp.name != "b"]
+
+    # 2. Change the Add to a LeakyRelu
+    first_add.op = "LeakyRelu"
+    first_add.attrs["alpha"] = 0.02
+
+    # 3. Add an identity after the add node
+    identity_out = gs.Variable("identity_out", dtype=np.float32)
+    identity = gs.Node(op="Identity", inputs=first_add.outputs, outputs=[identity_out])
+    graph.nodes.append(identity)
+
+    # 4. Modify the graph output to be the identity output
+    graph.outputs = [identity_out]
+
+    # 5. Remove unused nodes/tensors, and topologically sort the graph
+    # ONNX requires nodes to be topologically sorted to be considered valid.
+    # Therefore, you should only need to sort the graph when you have added new nodes out-of-order.
+    # In this case, the identity node is already in the correct spot (it is the last node,
+    # and was appended to the end of the list), but to be on the safer side, we can sort anyway.
+    graph.cleanup().toposort()
+
+    onnx.save(gs.export_onnx(graph), "modified.onnx")
+
+Graph Builder API
++++++++++++++++++
+
+See :ref:`l-graph-api`. This API is very similar to what *skl2onnx* implements.
+It is still about adding nodes to a graph but some tasks are automated such as
+naming the results or converting constants to onnx classes.
 
 .. runpython::
     :showcode:
 
     import numpy as np
-    from onnx_array_api.npx import jit_onnx
+    from onnx_array_api.graph_api  import GraphBuilder
     from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
 
-    def l2_loss(x, y):
-        return ((x - y) ** 2).sum(keepdims=1)
-
-    jitted_myloss = jit_onnx(l2_loss)
-    dummy = np.array([0], dtype=np.float32)
-
-    # The function is executed. Only then a onnx graph is created.
-    # One is created depending on the input type.
-    jitted_myloss(dummy, dummy)
+    g = GraphBuilder()
+    g.make_tensor_input("X", np.float32, (None, None))
+    g.make_tensor_input("Y", np.float32, (None, None))
+    r1 = g.op.Sub("X", "Y")
+    r2 = g.op.Pow(r1, np.array([2], dtype=np.int64))
+    g.op.ReduceSum(r2, outputs=["Z"])
+    g.make_tensor_output("Z", np.float32, (None, None))
+    
+    onx = g.to_onnx()
 
-    # get_onnx only works if it was executed once or at least with
-    # the same input type
-    model = jitted_myloss.get_onnx()
-    print(onnx_simple_text_plot(model))
+    print(onnx_simple_text_plot(onx))
 
 Light API
 +++++++++
@@ -570,3 +645,35 @@ There is no eager mode.
     )
 
     print(onnx_simple_text_plot(model))
+
+numpy API for onnx
+++++++++++++++++++
+
+See :ref:`l-numpy-api-onnx`. This API was introduced to create graphs
+by using numpy API. If a function is defined only with numpy,
+it should be possible to use the exact same code to create the
+corresponding onnx graph. That's what this API tries to achieve.
+It works with the exception of control flow. In that case, the function
+produces different onnx graphs depending on the execution path.
+
+.. runpython::
+    :showcode:
+
+    import numpy as np
+    from onnx_array_api.npx import jit_onnx
+    from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+    def l2_loss(x, y):
+        return ((x - y) ** 2).sum(keepdims=1)
+
+    jitted_myloss = jit_onnx(l2_loss)
+    dummy = np.array([0], dtype=np.float32)
+
+    # The function is executed. Only then a onnx graph is created.
+    # One is created depending on the input type.
+    jitted_myloss(dummy, dummy)
+
+    # get_onnx only works if it was executed once or at least with
+    # the same input type
+    model = jitted_myloss.get_onnx()
+    print(onnx_simple_text_plot(model))
diff --git a/_doc/tutorial/tools.rst b/_doc/tutorial/tools.rst
new file mode 100644
index 0000000..fe673f7
--- /dev/null
+++ b/_doc/tutorial/tools.rst
@@ -0,0 +1,20 @@
+=====
+Tools
+=====
+
+Some of useful tools.
+
+Text representation
+===================
+
+Plotting a graph is great but difficult to read when
+the graph is big and it is slow.
+:func:`onnx_array_api.plotting.text_plot.onnx_simple_text_plot`
+prints out a text representation.
+
+Differences between two models
+==============================
+
+How to understand the differences between two models
+assuming they are producing the same outputs?
+Example :ref:`l-onnx-diff-example` shows how to do it.
diff --git a/_unittests/onnx-numpy-skips.txt b/_unittests/onnx-numpy-skips.txt
index bf91e86..5deb50e 100644
--- a/_unittests/onnx-numpy-skips.txt
+++ b/_unittests/onnx-numpy-skips.txt
@@ -1,9 +1,17 @@
 # API failures
 # see https://github.com/data-apis/array-api-tests/blob/master/numpy-skips.txt
 # uses __setitem__
+array_api_tests/test_creation_functions.py::test_arange
 array_api_tests/test_creation_functions.py::test_asarray_arrays
 array_api_tests/test_creation_functions.py::test_empty
 array_api_tests/test_creation_functions.py::test_empty_like
+array_api_tests/test_creation_functions.py::test_eye
+array_api_tests/test_creation_functions.py::test_full
+array_api_tests/test_creation_functions.py::test_full_like
+array_api_tests/test_creation_functions.py::test_ones
+array_api_tests/test_creation_functions.py::test_ones_like
+array_api_tests/test_creation_functions.py::test_zeros
+array_api_tests/test_creation_functions.py::test_zeros_like
 # fails to precision issue
 array_api_tests/test_creation_functions.py::test_linspace
 array_api_tests/test_creation_functions.py::test_meshgrid
diff --git a/_unittests/ut_array_api/test_hypothesis_array_api.py b/_unittests/ut_array_api/test_hypothesis_array_api.py
index 95b1447..f55d230 100644
--- a/_unittests/ut_array_api/test_hypothesis_array_api.py
+++ b/_unittests/ut_array_api/test_hypothesis_array_api.py
@@ -1,7 +1,7 @@
 import unittest
-import warnings
 from os import getenv
 from functools import reduce
+import packaging.version as pv
 import numpy as np
 from operator import mul
 from hypothesis import given
@@ -44,9 +44,7 @@ class TestHypothesisArraysApis(ExtTestCase):
 
     @classmethod
     def setUpClass(cls):
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            from numpy import array_api as xp
+        import array_api_strict as xp
 
         api_version = getenv(
             "ARRAY_API_TESTS_VERSION",
@@ -63,6 +61,9 @@ def test_strategies(self):
         self.assertNotEmpty(self.xps)
         self.assertNotEmpty(self.onxps)
 
+    @unittest.skipIf(
+        pv.Version(np.__version__) >= pv.Version("2.0"), reason="abandonned"
+    )
     def test_scalar_strategies(self):
         dtypes = dict(
             integer_dtypes=self.xps.integer_dtypes(),
@@ -139,6 +140,9 @@ def fctonx(x, kw):
         fctonx()
         self.assertEqual(len(args_onxp), len(args_np))
 
+    @unittest.skipIf(
+        pv.Version(np.__version__) >= pv.Version("2.0"), reason="abandonned"
+    )
     def test_square_sizes_strategies(self):
         dtypes = dict(
             integer_dtypes=self.xps.integer_dtypes(),
diff --git a/_unittests/ut_array_api/test_onnx_numpy.py b/_unittests/ut_array_api/test_onnx_numpy.py
index 412088f..aa666a7 100644
--- a/_unittests/ut_array_api/test_onnx_numpy.py
+++ b/_unittests/ut_array_api/test_onnx_numpy.py
@@ -1,4 +1,3 @@
-import sys
 import unittest
 import numpy as np
 from onnx import TensorProto
@@ -91,9 +90,7 @@ def test_arange_int00a(self):
         mat = xp.arange(a, b)
         matnp = mat.numpy()
         self.assertEqual(matnp.shape, (0,))
-        expected = np.arange(0, 0)
-        if sys.platform == "win32":
-            expected = expected.astype(np.int64)
+        expected = np.arange(0, 0).astype(np.int64)
         self.assertEqualArray(matnp, expected)
 
     @ignore_warnings(DeprecationWarning)
@@ -101,9 +98,7 @@ def test_arange_int00(self):
         mat = xp.arange(0, 0)
         matnp = mat.numpy()
         self.assertEqual(matnp.shape, (0,))
-        expected = np.arange(0, 0)
-        if sys.platform == "win32":
-            expected = expected.astype(np.int64)
+        expected = np.arange(0, 0).astype(np.int64)
         self.assertEqualArray(matnp, expected)
 
     def test_ones_like_uint16(self):
diff --git a/_unittests/ut_graph_api/data/debug_7951-CPUep.0.onnx b/_unittests/ut_graph_api/data/debug_7951-CPUep.0.onnx
new file mode 100644
index 0000000..77ba377
Binary files /dev/null and b/_unittests/ut_graph_api/data/debug_7951-CPUep.0.onnx differ
diff --git a/_unittests/ut_graph_api/test_graph_builder.py b/_unittests/ut_graph_api/test_graph_builder.py
new file mode 100644
index 0000000..9e6229b
--- /dev/null
+++ b/_unittests/ut_graph_api/test_graph_builder.py
@@ -0,0 +1,443 @@
+import contextlib
+import io
+import unittest
+import numpy as np
+import onnx
+from onnx_array_api.ext_test_case import ExtTestCase, skipif_ci_apple
+from onnx_array_api.graph_api.graph_builder import GraphBuilder, OptimizationOptions
+from onnx_array_api.reference import (
+    from_array_extended,
+    ExtendedReferenceEvaluator as ReferenceEvaluator,
+)
+
+
+class TestGraphBuilder(ExtTestCase):
+    def call_optimizer(self, onx):
+        gr = GraphBuilder(onx)
+        gr.remove_unused()
+        return gr.to_onnx()
+
+    def test_remove_unused_nodes(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+        onx = self.call_optimizer(model)
+        self.assertEqual(len(onx.graph.node), 1)
+        self.assertEqual(onx.graph.node[0].op_type, "Mul")
+
+    def test_initializers(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z)
+            <float two = {2.0}> {
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+        self.assertEqual(len(model.graph.initializer), 1)
+        onx = self.call_optimizer(model)
+        self.assertEqual(len(onx.graph.node), 1)
+        self.assertEqual(onx.graph.node[0].op_type, "Mul")
+        self.assertEqual(len(onx.graph.initializer), 0)
+
+    def test_keep_unused_outputs(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[M] z) {
+                w1, w2, w3 = Split (x)
+                z = Mul(w3, w3)
+            }"""
+        )
+        onx = self.call_optimizer(model)
+        self.assertEqual(len(onx.graph.node), 2)
+        self.assertEqual(onx.graph.node[0].op_type, "Split")
+
+    def test_exc(self):
+        self.assertRaise(lambda: GraphBuilder([]), NotImplementedError)
+
+    def test_simple(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+            onx = g.to_onnx()
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_simple_big(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (30, 40)
+            w = np.random.randn(*shape).astype(np.float32)
+
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (30, 1))
+            onx = g.to_onnx()
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    @skipif_ci_apple("libomp is missing")
+    def test_constant_folding(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.constant_folding()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Transpose", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    @skipif_ci_apple("libomp is missing")
+    def test_constant_folding2(self):
+        g = GraphBuilder(
+            optimization_options=OptimizationOptions(constant_folding=True)
+        )
+
+        shape = (10, 4)
+        w = np.random.randn(*shape).astype(np.float32)
+        x = g.make_tensor_input("X", np.float32, shape)
+        weight = g.make_initializer(w)
+        cst = g.get_constant(weight)
+        self.assertEqualArray(w, cst)
+        one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+        transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+        res = g.op.MatMul(x, transposed)
+        g.op.Reshape(res, one, outputs="y")
+        g.make_tensor_output("y", np.float32, (10, 1))
+
+        g.optimize()
+
+        onx = g.to_onnx()
+        node_types = [n.op_type for n in onx.graph.node]
+        self.assertNotIn("Transpose", node_types)
+        ref = ReferenceEvaluator(onx)
+        x = np.random.randn(*shape).astype(np.float32)
+        expected = (x @ w.T).reshape((-1, 1))
+        feeds = {"X": x}
+        got = ref.run(None, feeds)
+        self.assertEqualArray(expected, got[0])
+
+    def test_remove_identity(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.Identity(g.op.MatMul(x, transposed))
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.remove_identity_nodes()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Identity", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_remove_identity_input(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            x = g.op.Identity(x)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.remove_identity_nodes()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Identity", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_remove_identity_output(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            r = g.op.Reshape(res, one)
+            g.op.Identity(r, outputs=["y"])
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.remove_identity_nodes()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Identity", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_remove_unused_nodes_simple(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+            x = g.make_tensor_input("X", np.float32, shape)
+            weight = g.make_initializer(w)
+            cst = g.make_initializer(np.array([2], dtype=np.float32))
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+            res = g.op.MatMul(x, transposed)
+            g.op.Add(res, cst)
+            g.op.Reshape(res, one, outputs=["y"])
+            g.make_tensor_output("y", np.float32, (10, 1))
+
+            g.remove_identity_nodes()
+
+            onx = g.to_onnx()
+            node_types = [n.op_type for n in onx.graph.node]
+            self.assertNotIn("Add", node_types)
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    @skipif_ci_apple("libomp is missing")
+    def test_constant_array(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+
+            x = g.make_tensor_input("X", np.float32, shape)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            res = g.op.MatMul(x, w.T)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+            onx = g.to_onnx()
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    @skipif_ci_apple("libomp is missing")
+    def test_constant_array_2(self):
+        with contextlib.redirect_stdout(io.StringIO()):
+            g = GraphBuilder(verbose=10)
+
+            shape = (10, 4)
+            w = np.random.randn(*shape).astype(np.float32)
+
+            x = g.make_tensor_input("X", np.float32, shape)
+            one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+            opc = g.op.Constant(value=from_array_extended(w.T))
+            res = g.op.MatMul(x, opc)
+            g.op.Reshape(res, one, outputs="y")
+            g.make_tensor_output("y", np.float32, (10, 1))
+            self.assertTrue(g.has_shape("X"))
+            self.assertTrue(g.has_type("X"))
+            self.assertEqual(g.get_type("X"), 1)
+            self.assertEqual(g.get_shape("X"), (10, 4))
+            self.assertEqual(g.rank("X"), 2)
+            onx = g.to_onnx()
+            ref = ReferenceEvaluator(onx)
+            x = np.random.randn(*shape).astype(np.float32)
+            expected = (x @ w.T).reshape((-1, 1))
+            feeds = {"X": x}
+            got = ref.run(None, feeds)
+            self.assertEqualArray(expected, got[0])
+
+    def test_get_type(self):
+        g = GraphBuilder()
+        self.assertEqual(g._get_type(np.float32), onnx.TensorProto.FLOAT)
+        self.assertEqual(g._get_type(np.int64), onnx.TensorProto.INT64)
+        self.assertEqual(g._get_type(None), onnx.TensorProto.UNDEFINED)
+
+    def test_make_nodes_prefix(self):
+        g1 = GraphBuilder()
+        g1.make_tensor_input("X", np.float32, shape=None)
+        g1.op.Add("X", np.array([1], dtype=np.float32), outputs=["y"])
+        g1.make_tensor_output("y", np.float32, shape=None)
+
+        g = GraphBuilder()
+
+        shape = (10, 4)
+        w = np.random.randn(*shape).astype(np.float32)
+
+        x = g.make_tensor_input("X", np.float32, shape)
+        weight = g.make_initializer(w)
+        one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+        transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+        res = g.op.MatMul(x, transposed)
+        res2 = g.make_nodes(g1, [res], ["k"], prefix="J")
+        g.op.Reshape(res2, one, outputs="y")
+        g.make_tensor_output("y", np.float32, (10, 1))
+        onx = g.to_onnx()
+        ref = ReferenceEvaluator(onx)
+        x = np.random.randn(*shape).astype(np.float32)
+        expected = (x @ w.T).reshape((-1, 1)) + 1
+        feeds = {"X": x}
+        got = ref.run(None, feeds)
+        self.assertEqualArray(expected, got[0])
+
+    def test_make_nodes_noprefix(self):
+        g1 = GraphBuilder()
+        g1.make_tensor_input("X", np.float32, shape=None)
+        g1.op.Add("X", np.array([1], dtype=np.float32), outputs=["y"])
+        g1.make_tensor_output("y", np.float32, shape=None)
+
+        g = GraphBuilder()
+
+        shape = (10, 4)
+        w = np.random.randn(*shape).astype(np.float32)
+
+        x = g.make_tensor_input("X", np.float32, shape)
+        weight = g.make_initializer(w)
+        one = g.make_initializer(np.array([-1, 1], dtype=np.int64))
+        transposed = g.make_node("Transpose", [weight], perm=[1, 0])
+        res = g.op.MatMul(x, transposed)
+        res2 = g.make_nodes(g1, [res], ["k"])
+        g.op.Reshape(res2, one, outputs="y")
+        g.make_tensor_output("y", np.float32, (10, 1))
+        onx = g.to_onnx()
+        ref = ReferenceEvaluator(onx)
+        x = np.random.randn(*shape).astype(np.float32)
+        expected = (x @ w.T).reshape((-1, 1)) + 1
+        feeds = {"X": x}
+        got = ref.run(None, feeds)
+        self.assertEqualArray(expected, got[0])
+
+    def test_node_pattern(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, four)
+            }"""
+        )
+        gr = GraphBuilder(model)
+        p = gr.np(index=0)
+        r = repr(p)
+        self.assertEqual("NodePattern(index=0, op_type=None, name=None)", r)
+
+    def test_update_node_attribute(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, four)
+            }"""
+        )
+        gr = GraphBuilder(model)
+        self.assertEqual(len(gr.nodes), 3)
+        m = gr.update_attribute(gr.np(op_type="Constant"), value_float=float(1))
+        self.assertEqual(m, 1)
+        self.assertEqual(len(gr.nodes), 3)
+        onx = gr.to_onnx()
+        self.assertEqual(len(onx.graph.node), 3)
+        node = onx.graph.node[0]
+        self.assertIn("f: 1", str(node))
+
+    def test_delete_node_attribute(self):
+        model = onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, four)
+            }"""
+        )
+        gr = GraphBuilder(model)
+        self.assertEqual(len(gr.nodes), 3)
+        m = gr.update_attribute(
+            gr.np(op_type="Constant"), value_float=gr.DELETE, value_int=1
+        )
+        self.assertEqual(m, 1)
+        self.assertEqual(len(gr.nodes), 3)
+        onx = gr.to_onnx()
+        self.assertEqual(len(onx.graph.node), 3)
+        node = onx.graph.node[0]
+        self.assertNotIn('name: "value_float"', str(node))
+        self.assertIn("i: 1", str(node))
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_graph_api/test_graph_builder_optim.py b/_unittests/ut_graph_api/test_graph_builder_optim.py
new file mode 100644
index 0000000..5ec827d
--- /dev/null
+++ b/_unittests/ut_graph_api/test_graph_builder_optim.py
@@ -0,0 +1,38 @@
+import os
+import unittest
+import onnx
+from onnx.inliner import inline_local_functions
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.graph_api.graph_builder import GraphBuilder
+
+
+class TestGraphBuilderOptim(ExtTestCase):
+    def test_wcheck_afiles(self):
+        import onnxruntime
+
+        data = os.path.join(os.path.dirname(__file__), "data")
+        filename = [f for f in os.listdir(data) if f.endswith(".onnx")]
+        for f in filename:
+            with self.subTest(f=f):
+                onx = onnx.load(os.path.join(data, f))
+                sess = onnxruntime.InferenceSession(
+                    os.path.join(data, f), providers=["CPUExecutionProvider"]
+                )
+                assert sess
+                onxi = inline_local_functions(onx)
+                sess = onnxruntime.InferenceSession(
+                    onxi.SerializeToString(), providers=["CPUExecutionProvider"]
+                )
+                assert sess
+                g = GraphBuilder(onxi)
+                g.optimize(check_order=True)
+                g.check_order()
+                onx2 = g.to_onnx()
+                sess2 = onnxruntime.InferenceSession(
+                    onx2.SerializeToString(), providers=["CPUExecutionProvider"]
+                )
+                assert sess2
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_light_api/test_backend_export.py b/_unittests/ut_light_api/test_backend_export.py
new file mode 100644
index 0000000..91f4dd4
--- /dev/null
+++ b/_unittests/ut_light_api/test_backend_export.py
@@ -0,0 +1,317 @@
+import sys
+import unittest
+from typing import Any, Dict, List, Optional
+from difflib import unified_diff
+import packaging.version as pv
+import numpy
+from numpy.testing import assert_allclose
+from onnx.defs import onnx_opset_version
+import onnx.backend.base
+import onnx.backend.test
+import onnx.shape_inference
+import onnx.version_converter
+from onnx import ModelProto, TensorProto, __version__ as onnx_version
+from onnx.helper import (
+    make_function,
+    make_graph,
+    make_model,
+    make_node,
+    make_opsetid,
+    make_tensor_value_info,
+)
+from onnx.reference.op_run import to_array_extended
+from onnx.numpy_helper import from_array, to_array
+from onnx.backend.base import Device, DeviceType
+from onnx_array_api.reference import ExtendedReferenceEvaluator
+from onnx_array_api.translate_api.make_helper import make_node_extended
+from onnx_array_api.translate_api import translate
+from onnx_array_api.plotting.text_plot import onnx_simple_text_plot
+
+verbosity = 10 if "-v" in sys.argv or "--verbose" in sys.argv else 0
+
+
+class ReferenceImplementationError(RuntimeError):
+    "Fails, export cannot be compared."
+
+
+class ExportWrapper:
+    apis = ["onnx", "light"]
+
+    def __init__(self, model):
+        self.model = model
+        self.expected_sess = ExtendedReferenceEvaluator(self.model, verbose=verbosity)
+
+    @property
+    def input_names(self):
+        return self.expected_sess.input_names
+
+    @property
+    def input_types(self):
+        return self.expected_sess.input_types
+
+    @property
+    def output_names(self):
+        return self.expected_sess.output_names
+
+    @property
+    def output_types(self):
+        return self.expected_sess.output_types
+
+    def run(
+        self, names: Optional[List[str]], feeds: Optional[Dict[str, Any]] = None
+    ) -> List[Any]:
+        try:
+            expected = self.expected_sess.run(names, feeds)
+        except (RuntimeError, AssertionError, TypeError, KeyError) as e:
+            raise ReferenceImplementationError(
+                f"ReferenceImplementation fails with "
+                f"{onnx_simple_text_plot(self.model)}"
+                f"\n--RAW--\n{self.model}"
+            ) from e
+
+        for api in self.apis:
+            try:
+                code = translate(self.model, api=api)
+            except NotImplementedError:
+                continue
+            except ValueError as e:
+                raise AssertionError(
+                    f"Unable to translate model for api {api!r}, "
+                    f"\n--BASE--\n{onnx_simple_text_plot(self.model)}"
+                    f"\n--EXPECTED--\n{expected}"
+                ) from e
+            try:
+                code_compiled = compile(code, "<string>", mode="exec")
+            except Exception as e:
+                new_code = "\n".join(
+                    [f"{i+1:04} {line}" for i, line in enumerate(code.split("\n"))]
+                )
+                raise AssertionError(f"ERROR {e}\n{new_code}")  # noqa: B904
+
+            locs = {
+                "np": numpy,
+                "to_array": to_array,
+                "to_array_extended": to_array_extended,
+                "from_array": from_array,
+                "TensorProto": TensorProto,
+                "make_function": make_function,
+                "make_opsetid": make_opsetid,
+                "make_model": make_model,
+                "make_graph": make_graph,
+                "make_node": make_node,
+                "make_node_extended": make_node_extended,
+                "make_tensor_value_info": make_tensor_value_info,
+            }
+            globs = locs.copy()
+            try:
+                exec(code_compiled, globs, locs)
+            except (TypeError, NameError, ValueError) as e:
+                new_code = "\n".join(
+                    [f"{i+1:04} {line}" for i, line in enumerate(code.split("\n"))]
+                )
+                raise AssertionError(
+                    f"Unable to executed code for api {api!r}\n{new_code}"
+                ) from e
+            export_model = locs["model"]
+            ref = ExtendedReferenceEvaluator(export_model, verbose=verbosity)
+            try:
+                got = ref.run(names, feeds)
+            except (TypeError, AttributeError) as e:
+                diff = "\n".join(
+                    unified_diff(
+                        str(self.model).split("\n"),
+                        str(export_model).split("\n"),
+                        fromfile="before",
+                        tofile="after",
+                    )
+                )
+                raise AssertionError(
+                    f"Unable to run the exported model for api {api!r}, "
+                    f"\n--BASE--\n{onnx_simple_text_plot(self.model)}"
+                    f"\n--EXP[{api}]--\n{onnx_simple_text_plot(export_model)}"
+                    f"\n--CODE--\n{code}"
+                    f"\n--FEEDS--\n{feeds}"
+                    f"\n--EXPECTED--\n{expected}"
+                    f"\n--DIFF--\n{diff}"
+                ) from e
+            if len(expected) != len(got):
+                raise AssertionError(
+                    f"Unexpected number of outputs for api {api!r}, "
+                    f"{len(expected)} != {len(got)}."
+                    f"\n--BASE--\n{onnx_simple_text_plot(self.model)}"
+                    f"\n--EXP[{api}]--\n{onnx_simple_text_plot(export_model)}"
+                )
+            for a, b in zip(expected, got):
+                if not isinstance(a, numpy.ndarray):
+                    continue
+                if a.shape != b.shape or a.dtype != b.dtype:
+                    raise AssertionError(
+                        f"Shape or type discrepancies for api {api!r}."
+                        f"\n--BASE--\n{onnx_simple_text_plot(self.model)}"
+                        f"\n--EXP[{api}]--\n{onnx_simple_text_plot(export_model)}"
+                    )
+                if a.dtype in (numpy.str_, object, numpy.object_) or isinstance(
+                    a.dtype, getattr(getattr(numpy, "dtypes", None), "StrDType", type)
+                ):
+                    if a.tolist() != b.tolist():
+                        raise AssertionError(
+                            f"Text discrepancies for api {api!r} "
+                            f"with a.dtype={a.dtype} "
+                            f"and b.dtype={b.dtype}"
+                            f"\n--BASE--\n{onnx_simple_text_plot(self.model)}"
+                            f"\n--EXP[{api}]--\n{onnx_simple_text_plot(export_model)}"
+                        )
+                    continue
+                try:
+                    assert_allclose(a, b, atol=1e-3)
+                except (AssertionError, TypeError) as e:
+                    raise AssertionError(
+                        f"Discrepancies for api {api!r} with a.dtype={a.dtype} "
+                        f"and b.dtype={b.dtype} (type-dtype={type(a.dtype)})"
+                        f"\n--BASE--\n{onnx_simple_text_plot(self.model)}"
+                        f"\n--EXP[{api}]--\n{onnx_simple_text_plot(export_model)}"
+                    ) from e
+
+            return expected
+
+
+class ExportBackendRep(onnx.backend.base.BackendRep):
+    def __init__(self, session):
+        self._session = session
+
+    def run(self, inputs, **kwargs):
+        if isinstance(inputs, numpy.ndarray):
+            inputs = [inputs]
+        if isinstance(inputs, list):
+            if len(inputs) == len(self._session.input_names):
+                feeds = dict(zip(self._session.input_names, inputs))
+            else:
+                feeds = {}
+                pos_inputs = 0
+                for inp, tshape in zip(
+                    self._session.input_names, self._session.input_types
+                ):
+                    shape = tuple(d.dim_value for d in tshape.tensor_type.shape.dim)
+                    if shape == inputs[pos_inputs].shape:
+                        feeds[inp] = inputs[pos_inputs]
+                        pos_inputs += 1
+                        if pos_inputs >= len(inputs):
+                            break
+        elif isinstance(inputs, dict):
+            feeds = inputs
+        else:
+            raise TypeError(f"Unexpected input type {type(inputs)!r}.")
+        outs = self._session.run(None, feeds)
+        return outs
+
+
+class ExportBackend(onnx.backend.base.Backend):
+    @classmethod
+    def is_opset_supported(cls, model):  # pylint: disable=unused-argument
+        return True, ""
+
+    @classmethod
+    def supports_device(cls, device: str) -> bool:
+        d = Device(device)
+        return d.type == DeviceType.CPU  # type: ignore[no-any-return]
+
+    @classmethod
+    def create_inference_session(cls, model):
+        return ExportWrapper(model)
+
+    @classmethod
+    def prepare(
+        cls, model: Any, device: str = "CPU", **kwargs: Any
+    ) -> ExportBackendRep:
+        if isinstance(model, ExportWrapper):
+            return ExportBackendRep(model)
+        if isinstance(model, (str, bytes, ModelProto)):
+            inf = cls.create_inference_session(model)
+            return cls.prepare(inf, device, **kwargs)
+        raise TypeError(f"Unexpected type {type(model)} for model.")
+
+    @classmethod
+    def run_model(cls, model, inputs, device=None, **kwargs):
+        rep = cls.prepare(model, device, **kwargs)
+        return rep.run(inputs, **kwargs)
+
+    @classmethod
+    def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
+        raise NotImplementedError("Unable to run the model node by node.")
+
+
+backend_test = onnx.backend.test.BackendTest(ExportBackend, __name__)
+
+# The following tests are too slow with the reference implementation (Conv).
+backend_test.exclude(
+    "(FLOAT8|BFLOAT16|INT4|_opt_|_3d_|_momentum_|_4d_|int4"
+    "|test_adagrad"
+    "|test_adam"
+    "|test_ai_onnx_ml_"
+    "|test_cast_FLOAT16"
+    "|test_cast_FLOAT_to_STRING"
+    "|test_castlike_FLOAT16"
+    "|test_castlike_FLOAT_to_STRING"
+    "|test_bernoulli"
+    "|test_bvlc_alexnet"
+    "|test_conv"  # too long
+    "|test_gradient_"
+    "|test_densenet121"
+    "|test_inception_v1"
+    "|test_inception_v2"
+    "|test_loop11_"
+    "|test_loop16_seq_none"
+    "|test_MaxPool2d"
+    "|test_quantizelinear_e"
+    "|test_resnet50"
+    "|test_sequence_model"
+    "|test_scan_sum"
+    "|test_scatter_with_axis"
+    "|test_scatter_without_axis"
+    "|test_shufflenet"
+    "|test_squeezenet"
+    "|test_vgg19"
+    "|test_zfnet512"
+    "|test_range_float_type_positive_delta_expanded"
+    "|test_range_int32_type_negative_delta_expanded"
+    ")"
+)
+
+if onnx_opset_version() < 22:
+    backend_test.exclude(
+        "("
+        "test_dft_inverse_cpu"
+        "|test_dft_inverse_opset19_cpu"
+        "|test_lppool_1d_default_cpu"
+        "|test_lppool_2d_default_cpu"
+        "|test_lppool_2d_dilations_cpu"
+        "|test_lppool_2d_pads_cpu"
+        "|test_lppool_2d_same_lower_cpu"
+        "|test_lppool_2d_same_upper_cpu"
+        "|test_lppool_2d_strides_cpu"
+        "|test_lppool_3d_default_cpu"
+        ")"
+    )
+
+if pv.Version(onnx_version) < pv.Version("1.16.0"):
+    backend_test.exclude("(test_strnorm|test_range_)")
+
+# The following tests cannot pass because they consists in generating random number.
+backend_test.exclude("(test_bernoulli)")
+
+# import all test cases at global scope to make them visible to python.unittest
+globals().update(backend_test.test_cases)
+
+if __name__ == "__main__":
+    res = unittest.main(verbosity=2, exit=False)
+    tests_run = res.result.testsRun
+    errors = len(res.result.errors)
+    skipped = len(res.result.skipped)
+    unexpected_successes = len(res.result.unexpectedSuccesses)
+    expected_failures = len(res.result.expectedFailures)
+    print("---------------------------------")
+    print(
+        f"tests_run={tests_run} errors={errors} skipped={skipped} "
+        f"unexpected_successes={unexpected_successes} "
+        f"expected_failures={expected_failures}"
+    )
diff --git a/_unittests/ut_light_api/test_light_api.py b/_unittests/ut_light_api/test_light_api.py
index 3feaa2a..f936cc1 100644
--- a/_unittests/ut_light_api/test_light_api.py
+++ b/_unittests/ut_light_api/test_light_api.py
@@ -1,7 +1,8 @@
+import inspect
 import unittest
 from typing import Callable, Optional
 import numpy as np
-from onnx import ModelProto
+from onnx import GraphProto, ModelProto, TensorProto
 from onnx.defs import (
     get_all_schemas_with_history,
     onnx_opset_version,
@@ -10,8 +11,9 @@
     SchemaError,
 )
 from onnx.reference import ReferenceEvaluator
-from onnx_array_api.ext_test_case import ExtTestCase
-from onnx_array_api.light_api import start, OnnxGraph, Var
+from onnx_array_api.ext_test_case import ExtTestCase, skipif_ci_windows
+from onnx_array_api.light_api import start, OnnxGraph, Var, g
+from onnx_array_api.light_api.var import SubDomain
 from onnx_array_api.light_api._op_var import OpsVar
 from onnx_array_api.light_api._op_vars import OpsVars
 
@@ -138,12 +140,13 @@ def list_ops_missing(self, n_inputs):
                     methods.append("")
                 new_missing.append(m)
             text = "\n".join(methods)
-            if len(new_missing) > 0:
+            if new_missing:
                 raise AssertionError(
                     f"n_inputs={n_inputs}: missing method for operators "
                     f"{new_missing}\n{text}"
                 )
 
+    @skipif_ci_windows("Unstable on Windows.")
     def test_list_ops_missing(self):
         self.list_ops_missing(1)
         self.list_ops_missing(2)
@@ -208,7 +211,7 @@ def test_neg(self):
         self.assertIsInstance(v, Var)
         self.assertEqual(["X"], v.parent.input_names)
         s = str(v)
-        self.assertEqual("X:FLOAT", s)
+        self.assertEqual("X:FLOAT:[]", s)
         onx = start().vin("X").Neg().rename("Y").vout().to_onnx()
         self.assertIsInstance(onx, ModelProto)
         ref = ReferenceEvaluator(onx)
@@ -402,6 +405,168 @@ def test_operator_bool(self):
             got = ref.run(None, {"X": a, "Y": b})[0]
             self.assertEqualArray(f(a, b), got)
 
+    def test_topk(self):
+        onx = (
+            start()
+            .vin("X", np.float32)
+            .vin("K", np.int64)
+            .bring("X", "K")
+            .TopK()
+            .rename("Values", "Indices")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        ref = ReferenceEvaluator(onx)
+        x = np.array([[0, 1, 2, 3], [9, 8, 7, 6]], dtype=np.float32)
+        k = np.array([2], dtype=np.int64)
+        got = ref.run(None, {"X": x, "K": k})
+        self.assertEqualArray(np.array([[3, 2], [9, 8]], dtype=np.float32), got[0])
+        self.assertEqualArray(np.array([[3, 2], [0, 1]], dtype=np.int64), got[1])
+
+    def test_topk_reverse(self):
+        onx = (
+            start()
+            .vin("X", np.float32)
+            .vin("K", np.int64)
+            .bring("X", "K")
+            .TopK(largest=0)
+            .rename("Values", "Indices")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        ref = ReferenceEvaluator(onx)
+        x = np.array([[0, 1, 2, 3], [9, 8, 7, 6]], dtype=np.float32)
+        k = np.array([2], dtype=np.int64)
+        got = ref.run(None, {"X": x, "K": k})
+        self.assertEqualArray(np.array([[0, 1], [6, 7]], dtype=np.float32), got[0])
+        self.assertEqualArray(np.array([[0, 1], [3, 2]], dtype=np.int64), got[1])
+
+    def test_if(self):
+        gg = g().cst(np.array([0], dtype=np.int64)).rename("Z").vout()
+        onx = gg.to_onnx()
+        self.assertIsInstance(onx, GraphProto)
+        self.assertEqual(len(onx.input), 0)
+        self.assertEqual(len(onx.output), 1)
+        self.assertEqual([o.name for o in onx.output], ["Z"])
+        onx = (
+            start(opset=19)
+            .vin("X", np.float32)
+            .ReduceSum()
+            .rename("Xs")
+            .cst(np.array([0], dtype=np.float32))
+            .left_bring("Xs")
+            .Greater()
+            .If(
+                then_branch=g().cst(np.array([1], dtype=np.int64)).rename("Z").vout(),
+                else_branch=g().cst(np.array([0], dtype=np.int64)).rename("Z").vout(),
+            )
+            .rename("W")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        ref = ReferenceEvaluator(onx)
+        x = np.array([0, 1, 2, 3, 9, 8, 7, 6], dtype=np.float32)
+        got = ref.run(None, {"X": x})
+        self.assertEqualArray(np.array([1], dtype=np.int64), got[0])
+        got = ref.run(None, {"X": -x})
+        self.assertEqualArray(np.array([0], dtype=np.int64), got[0])
+
+    def test_domain(self):
+        onx = start(opsets={"ai.onnx.ml": 3}).vin("X").reshape((-1, 1)).rename("USE")
+
+        class A:
+            def g(self):
+                return True
+
+        def ah(self):
+            return True
+
+        setattr(A, "h", ah)  # noqa: B010
+
+        self.assertTrue(A().h())
+        self.assertIn("(self)", str(inspect.signature(A.h)))
+        self.assertTrue(issubclass(onx._ai, SubDomain))
+        self.assertIsInstance(onx.ai, SubDomain)
+        self.assertIsInstance(onx.ai.parent, Var)
+        self.assertTrue(issubclass(onx._ai._onnx, SubDomain))
+        self.assertIsInstance(onx.ai.onnx, SubDomain)
+        self.assertIsInstance(onx.ai.onnx.parent, Var)
+        self.assertTrue(issubclass(onx._ai._onnx._ml, SubDomain))
+        self.assertIsInstance(onx.ai.onnx.ml, SubDomain)
+        self.assertIsInstance(onx.ai.onnx.ml.parent, Var)
+        self.assertIn("(self,", str(inspect.signature(onx._ai._onnx._ml.Normalizer)))
+        onx = onx.ai.onnx.ml.Normalizer(norm="MAX")
+        onx = onx.rename("Y").vout().to_onnx()
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Normalizer", str(onx))
+        self.assertIn('domain: "ai.onnx.ml"', str(onx))
+        self.assertIn('input: "USE"', str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        expected = (a > 0).astype(int).astype(np.float32).reshape((-1, 1))
+        self.assertEqualArray(expected, got)
+
+    def test_input_shape(self):
+        kernel = (np.arange(9) + 1).reshape(3, 3).astype(np.float32)
+        model = (
+            start()
+            .vin("X", shape=[None, None])
+            .cst(kernel[np.newaxis, np.newaxis, ...])
+            .rename("W")
+            .bring("X", "W")
+            .Conv(pads=[1, 1, 1, 1])
+            .rename("Y")
+            .vout(shape=[])
+            .to_onnx()
+        )
+        i = str(model.graph.input[0]).replace("\n", "").replace(" ", "")
+        self.assertNotIn("shape{}", i)
+
+    def test_constant_of_shape(self):
+        onx = (
+            start()
+            .vin("X", TensorProto.INT64, shape=[None, None])
+            .ConstantOfShape()
+            .vout(shape=[])
+            .to_onnx()
+        )
+        ref = ReferenceEvaluator(onx)
+        got = ref.run(None, {"X": np.array([2, 3], dtype=np.int64)})[0]
+        self.assertEqualArray(np.zeros((2, 3), dtype=np.float32), got)
+
+    def test_constant_of_shape_value(self):
+        onx = (
+            start()
+            .vin("X", TensorProto.INT64, shape=[None, None])
+            .ConstantOfShape(value=np.array([1], dtype=np.float32))
+            .vout(shape=[])
+            .to_onnx()
+        )
+        ref = ReferenceEvaluator(onx)
+        got = ref.run(None, {"X": np.array([2, 3], dtype=np.int64)})[0]
+        self.assertEqualArray(np.ones((2, 3), dtype=np.float32), got)
+
+    def test_slice(self):
+        onx = (
+            start(opset=18, ir_version=9)
+            .cst(np.array([1], dtype=np.int64), name="one")
+            .cst(np.array([2], dtype=np.int64), name="two")
+            .vin("X", TensorProto.INT64, shape=[None, None])
+            .ConstantOfShape(value=np.array([1], dtype=np.float32))
+            .rename("CX")
+            .bring("CX", "one", "two", "one")
+            .Slice()
+            .vout(shape=[])
+            .to_onnx()
+        )
+        ref = ReferenceEvaluator(onx)
+        got = ref.run(None, {"X": np.array([2, 3], dtype=np.int64)})[0]
+        self.assertEqualArray(np.ones((2, 1), dtype=np.float32), got)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_npx/test_npx.py b/_unittests/ut_npx/test_npx.py
index 83703ba..873665d 100644
--- a/_unittests/ut_npx/test_npx.py
+++ b/_unittests/ut_npx/test_npx.py
@@ -20,7 +20,7 @@
 from onnx.reference import ReferenceEvaluator
 from onnx.shape_inference import infer_shapes
 
-from onnx_array_api.ext_test_case import ExtTestCase, ignore_warnings
+from onnx_array_api.ext_test_case import ExtTestCase, ignore_warnings, skipif_ci_windows
 from onnx_array_api.reference import ExtendedReferenceEvaluator
 from onnx_array_api.npx import ElemType, eager_onnx, jit_onnx
 from onnx_array_api.npx.npx_core_api import (
@@ -208,7 +208,7 @@ def local1(
             return x
 
         def local2(
-            x: TensorType[ElemType.floats, "T"]
+            x: TensorType[ElemType.floats, "T"],
         ) -> TensorType[ElemType.floats, "T"]:
             return x
 
@@ -1355,6 +1355,7 @@ def test_clip_none(self):
         got = ref.run(None, {"A": x})
         self.assertEqualArray(y, got[0])
 
+    @skipif_ci_windows("Unstable on Windows.")
     def test_arange_inline(self):
         # arange(5)
         f = arange_inline(Input("A"))
@@ -1391,6 +1392,7 @@ def test_arange_inline(self):
         got = ref.run(None, {"A": x1, "B": x2, "C": x3})
         self.assertEqualArray(y, got[0])
 
+    @skipif_ci_windows("Unstable on Windows.")
     def test_arange_inline_dtype(self):
         # arange(1, 5, 2), dtype
         f = arange_inline(Input("A"), Input("B"), Input("C"), dtype=np.float64)
diff --git a/_unittests/ut_npx/test_sklearn_array_api.py b/_unittests/ut_npx/test_sklearn_array_api.py
index 083c009..9c0d56f 100644
--- a/_unittests/ut_npx/test_sklearn_array_api.py
+++ b/_unittests/ut_npx/test_sklearn_array_api.py
@@ -17,6 +17,7 @@ class TestSklearnArrayAPI(ExtTestCase):
         reason="reshape ArrayAPI not followed",
     )
     @ignore_warnings(DeprecationWarning)
+    @unittest.skip("not maintained")
     def test_sklearn_array_api_linear_discriminant(self):
         X = np.array(
             [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float64
@@ -39,6 +40,7 @@ def test_sklearn_array_api_linear_discriminant(self):
         reason="reshape ArrayAPI not followed",
     )
     @ignore_warnings(DeprecationWarning)
+    @unittest.skip("not maintained")
     def test_sklearn_array_api_linear_discriminant_float32(self):
         X = np.array(
             [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float32
diff --git a/_unittests/ut_ort/test_ort_profile.py b/_unittests/ut_ort/test_ort_profile.py
index e868860..6e139cb 100644
--- a/_unittests/ut_ort/test_ort_profile.py
+++ b/_unittests/ut_ort/test_ort_profile.py
@@ -57,8 +57,6 @@ def myloss(x, y):
         prof = ort_profile(optimized, feeds)
         events = {
             "kernel_time",
-            "fence_before",
-            "fence_after",
             "SequentialExecutor::Execute",
             "model_run",
             "model_loading_array",
diff --git a/_unittests/ut_ort/test_ort_tensor.py b/_unittests/ut_ort/test_ort_tensor.py
index cb4377d..a9598a5 100644
--- a/_unittests/ut_ort/test_ort_tensor.py
+++ b/_unittests/ut_ort/test_ort_tensor.py
@@ -6,7 +6,7 @@
 from onnx.defs import onnx_opset_version
 from onnx.reference import ReferenceEvaluator
 from onnxruntime import InferenceSession
-from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.ext_test_case import ExtTestCase, skipif_ci_windows
 from onnx_array_api.npx import eager_onnx, jit_onnx
 from onnx_array_api.npx.npx_functions import absolute as absolute_inline
 from onnx_array_api.npx.npx_functions import cdist as cdist_inline
@@ -20,6 +20,7 @@
 
 
 class TestOrtTensor(ExtTestCase):
+    @skipif_ci_windows("Unstable on Windows")
     def test_eager_numpy_type_ort(self):
         def impl(A):
             self.assertIsInstance(A, EagerOrtTensor)
@@ -45,6 +46,7 @@ def impl(A):
         self.assertEqualArray(z, res.numpy())
         self.assertEqual(res.numpy().dtype, np.float64)
 
+    @skipif_ci_windows("Unstable on Windows")
     def test_eager_numpy_type_ort_op(self):
         def impl(A):
             self.assertIsInstance(A, EagerOrtTensor)
@@ -68,6 +70,7 @@ def impl(A):
         self.assertEqualArray(z, res.numpy())
         self.assertEqual(res.numpy().dtype, np.float64)
 
+    @skipif_ci_windows("Unstable on Windows")
     def test_eager_ort(self):
         def impl(A):
             print("A")
@@ -141,6 +144,7 @@ def impl(A):
         self.assertEqual(tuple(res.shape()), z.shape)
         self.assertStartsWith("A\nB\nC\n", text)
 
+    @skipif_ci_windows("Unstable on Windows")
     def test_cdist_com_microsoft(self):
         from scipy.spatial.distance import cdist as scipy_cdist
 
@@ -193,7 +197,7 @@ def impl(xa, xb):
         if len(pieces) > 2:
             raise AssertionError(f"Function is not using argument:\n{onx}")
 
-    def test_astype(self):
+    def test_astype_w2(self):
         f = absolute_inline(copy_inline(Input("A")).astype(DType(TensorProto.FLOAT)))
         onx = f.to_onnx(constraints={"A": Float64[None]})
         x = np.array([[-5, 6]], dtype=np.float64)
@@ -204,7 +208,7 @@ def test_astype(self):
         got = ref.run(None, {"A": x})
         self.assertEqualArray(z, got[0])
 
-    def test_astype0(self):
+    def test_astype0_w2(self):
         f = absolute_inline(copy_inline(Input("A")).astype(DType(TensorProto.FLOAT)))
         onx = f.to_onnx(constraints={"A": Float64[None]})
         x = np.array(-5, dtype=np.float64)
@@ -215,6 +219,7 @@ def test_astype0(self):
         got = ref.run(None, {"A": x})
         self.assertEqualArray(z, got[0])
 
+    @skipif_ci_windows("Unstable on Windows")
     def test_eager_ort_cast(self):
         def impl(A):
             return A.astype(DType("FLOAT"))
diff --git a/_unittests/ut_ort/test_sklearn_array_api_ort.py b/_unittests/ut_ort/test_sklearn_array_api_ort.py
index 330f74b..f50fce1 100644
--- a/_unittests/ut_ort/test_sklearn_array_api_ort.py
+++ b/_unittests/ut_ort/test_sklearn_array_api_ort.py
@@ -4,7 +4,7 @@
 from onnx.defs import onnx_opset_version
 from sklearn import config_context, __version__ as sklearn_version
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
-from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.ext_test_case import ExtTestCase, skipif_ci_windows
 from onnx_array_api.ort.ort_tensors import EagerOrtTensor, OrtTensor
 
 
@@ -16,7 +16,9 @@ class TestSklearnArrayAPIOrt(ExtTestCase):
         Version(sklearn_version) <= Version("1.2.2"),
         reason="reshape ArrayAPI not followed",
     )
-    def test_sklearn_array_api_linear_discriminant(self):
+    @skipif_ci_windows("Unstable on Windows.")
+    @unittest.skip("discontinued")
+    def test_sklearn_array_api_linear_discriminant_ort(self):
         X = np.array(
             [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float64
         )
@@ -38,7 +40,9 @@ def test_sklearn_array_api_linear_discriminant(self):
         Version(sklearn_version) <= Version("1.2.2"),
         reason="reshape ArrayAPI not followed",
     )
-    def test_sklearn_array_api_linear_discriminant_float32(self):
+    @skipif_ci_windows("Unstable on Windows.")
+    @unittest.skip("discontinued")
+    def test_sklearn_array_api_linear_discriminant_ort_float32(self):
         X = np.array(
             [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=np.float32
         )
diff --git a/_unittests/ut_plotting/test_dot_plot.py b/_unittests/ut_plotting/test_dot_plot.py
index 5c03746..4c8c4dd 100644
--- a/_unittests/ut_plotting/test_dot_plot.py
+++ b/_unittests/ut_plotting/test_dot_plot.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 import unittest
 
diff --git a/_unittests/ut_plotting/test_graphviz.py b/_unittests/ut_plotting/test_graphviz.py
new file mode 100644
index 0000000..420779e
--- /dev/null
+++ b/_unittests/ut_plotting/test_graphviz.py
@@ -0,0 +1,51 @@
+import os
+import unittest
+import onnx.parser
+from onnx_array_api.ext_test_case import (
+    ExtTestCase,
+    skipif_ci_windows,
+    skipif_ci_apple,
+)
+from onnx_array_api.plotting.dot_plot import to_dot
+from onnx_array_api.plotting.graphviz_helper import draw_graph_graphviz, plot_dot
+
+
+class TestGraphviz(ExtTestCase):
+    @classmethod
+    def _get_graph(cls):
+        return onnx.parser.parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+
+    @skipif_ci_windows("graphviz not installed")
+    @skipif_ci_apple("graphviz not installed")
+    def test_draw_graph_graphviz(self):
+        fout = "test_draw_graph_graphviz.png"
+        dot = to_dot(self._get_graph())
+        draw_graph_graphviz(dot, image=fout)
+        self.assertExists(os.path.exists(fout))
+
+    @skipif_ci_windows("graphviz not installed")
+    @skipif_ci_apple("graphviz not installed")
+    def test_draw_graph_graphviz_proto(self):
+        fout = "test_draw_graph_graphviz_proto.png"
+        dot = self._get_graph()
+        draw_graph_graphviz(dot, image=fout)
+        self.assertExists(os.path.exists(fout))
+
+    @skipif_ci_windows("graphviz not installed")
+    @skipif_ci_apple("graphviz not installed")
+    def test_plot_dot(self):
+        dot = to_dot(self._get_graph())
+        ax = plot_dot(dot)
+        ax.get_figure().savefig("test_plot_dot.png")
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_plotting/test_text_plot.py b/_unittests/ut_plotting/test_text_plot.py
index 963b5cb..5844ff0 100644
--- a/_unittests/ut_plotting/test_text_plot.py
+++ b/_unittests/ut_plotting/test_text_plot.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 import textwrap
 import unittest
@@ -95,6 +94,7 @@ def test_onnx_text_plot_tree_cls_2(self):
                +f 0:1 1:0 2:0
         """
         ).strip(" \n\r")
+        res = res.replace("np.float32(", "").replace(")", "")
         self.assertEqual(expected, res.strip(" \n\r"))
 
     @ignore_warnings((UserWarning, FutureWarning))
diff --git a/_unittests/ut_reference/test_backend_extended_reference_evaluator.py b/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
index 4bc0927..fbf12b7 100644
--- a/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
+++ b/_unittests/ut_reference/test_backend_extended_reference_evaluator.py
@@ -61,8 +61,6 @@ def create_inference_session(cls, model):
     def prepare(
         cls, model: Any, device: str = "CPU", **kwargs: Any
     ) -> ExtendedReferenceEvaluatorBackendRep:
-        # if isinstance(model, ExtendedReferenceEvaluatorBackendRep):
-        #    return model
         if isinstance(model, ExtendedReferenceEvaluator):
             return ExtendedReferenceEvaluatorBackendRep(model)
         if isinstance(model, (str, bytes, ModelProto)):
@@ -151,7 +149,7 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
     "|test_scan_sum)"
 )
 
-if onnx_opset_version() < 21:
+if onnx_opset_version() < 200:
     # The following tests are using types not supported by NumPy.
     # They could be if method to_array is extended to support custom
     # types the same as the reference implementation does
@@ -166,8 +164,10 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
         "|test_cast_no_saturate_"
         "|_to_FLOAT8"
         "|_FLOAT8"
+        "|INT4"
         "|test_quantizelinear_e4m3fn"
         "|test_quantizelinear_e5m2"
+        "|test_scatter_with"
         ")"
     )
 
@@ -217,6 +217,25 @@ def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs):
     # The following tests fail due to a type mismatch.
     backend_test.exclude("(test_eyelike_without_dtype)")
 
+if onnx_opset_version() < 22:
+    backend_test.exclude(
+        "("
+        "test_adagrad_cpu"
+        "|test_adagrad_multiple_cpu"
+        "|test_dft_inverse_cpu"
+        "|test_dft_inverse_opset19_cpu"
+        "|test_lppool_1d_default_cpu"
+        "|test_lppool_2d_default_cpu"
+        "|test_lppool_2d_dilations_cpu"
+        "|test_lppool_2d_pads_cpu"
+        "|test_lppool_2d_same_lower_cpu"
+        "|test_lppool_2d_same_upper_cpu"
+        "|test_lppool_2d_strides_cpu"
+        "|test_lppool_3d_default_cpu"
+        ")"
+    )
+
+
 # The following tests fail due to discrepancies (small but still higher than 1e-7).
 backend_test.exclude("test_adam_multiple")  # 1e-2
 
diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py
new file mode 100644
index 0000000..605c1f8
--- /dev/null
+++ b/_unittests/ut_reference/test_evaluator_yield.py
@@ -0,0 +1,554 @@
+import unittest
+import numpy as np
+from onnx import TensorProto
+from onnx.checker import check_model
+from onnx.helper import (
+    make_function,
+    make_graph,
+    make_model,
+    make_node,
+    make_opsetid,
+    make_tensor_value_info,
+)
+from onnx.numpy_helper import from_array
+from onnx.parser import parse_model
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.reference import (
+    YieldEvaluator,
+    ResultType,
+    DistanceExecution,
+    ResultExecution,
+    compare_onnx_execution,
+)
+from onnx_array_api.reference.evaluator_yield import make_summary
+
+
+class TestArrayTensor(ExtTestCase):
+    def test_make_summary(self):
+        a = np.arange(12).reshape(3, 4)
+        v = make_summary(a)
+        self.assertEqual(v, "DMVE")
+        a = np.arange(12)
+        v = make_summary(a)
+        self.assertEqual(v, "DMVE")
+        a = np.arange(12).astype(np.float32)
+        v = make_summary(a)
+        self.assertEqual(v, "DMVE")
+        a = np.arange(13)
+        a[-1] = 0
+        v = make_summary(a)
+        self.assertEqual(v, "GWMA")
+
+    def test_evaluator_yield(self):
+        new_domain = "custom_domain"
+        opset_imports = [make_opsetid("", 14), make_opsetid(new_domain, 1)]
+
+        node1 = make_node("MatMul", ["X", "A"], ["XA"])
+        node2 = make_node("Add", ["XA", "B"], ["Y"])
+
+        linear_regression = make_function(
+            new_domain,
+            "LinearRegression",
+            ["X", "A", "B"],
+            ["Y"],
+            [node1, node2],
+            opset_imports,
+            [],
+        )
+
+        X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        A = make_tensor_value_info("A", TensorProto.FLOAT, [None, None])
+        B = make_tensor_value_info("B", TensorProto.FLOAT, [None, None])
+        Y = make_tensor_value_info("Y", TensorProto.FLOAT, None)
+
+        graph = make_graph(
+            [
+                make_node(
+                    "LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain
+                ),
+                make_node("Abs", ["Y1"], ["Y"]),
+            ],
+            "example",
+            [X, A, B],
+            [Y],
+        )
+
+        onnx_model = make_model(
+            graph, opset_imports=opset_imports, functions=[linear_regression]
+        )
+
+        cst = np.arange(4).reshape((-1, 2)).astype(np.float32)
+        yield_eval = YieldEvaluator(onnx_model)
+        results = list(
+            yield_eval.enumerate_results(None, {"A": cst, "B": cst, "X": cst})
+        )
+        expected = [
+            (
+                ResultType.INPUT,
+                "A",
+                np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32),
+                None,
+            ),
+            (
+                ResultType.INPUT,
+                "B",
+                np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32),
+                None,
+            ),
+            (
+                ResultType.INPUT,
+                "X",
+                np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32),
+                None,
+            ),
+            (
+                ResultType.RESULT,
+                "Y1",
+                np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32),
+                "LinearRegression",
+            ),
+            (
+                ResultType.RESULT,
+                "Y",
+                np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32),
+                "Abs",
+            ),
+            (
+                ResultType.OUTPUT,
+                "Y",
+                np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32),
+                None,
+            ),
+        ]
+        self.assertEqual(len(expected), len(results))
+        for a, b in zip(expected, results):
+            self.assertEqual(len(a), len(b))
+            self.assertEqual(a[0], b[0])
+            self.assertEqual(a[1], b[1])
+            self.assertEqual(a[2].tolist(), b[2].tolist())
+            self.assertEqual(a[3], b[3])
+
+    def test_evaluator_yield_summary(self):
+        new_domain = "custom_domain"
+        opset_imports = [make_opsetid("", 14), make_opsetid(new_domain, 1)]
+
+        node1 = make_node("MatMul", ["X", "A"], ["XA"])
+        node2 = make_node("Add", ["XA", "B"], ["Y"])
+
+        linear_regression = make_function(
+            new_domain,
+            "LinearRegression",
+            ["X", "A", "B"],
+            ["Y"],
+            [node1, node2],
+            opset_imports,
+            [],
+        )
+
+        X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        A = make_tensor_value_info("A", TensorProto.FLOAT, [None, None])
+        B = make_tensor_value_info("B", TensorProto.FLOAT, [None, None])
+        Y = make_tensor_value_info("Y", TensorProto.FLOAT, None)
+
+        graph = make_graph(
+            [
+                make_node(
+                    "LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain
+                ),
+                make_node("Abs", ["Y1"], ["Y"]),
+            ],
+            "example",
+            [X, A, B],
+            [Y],
+        )
+
+        onnx_model = make_model(
+            graph, opset_imports=opset_imports, functions=[linear_regression]
+        )
+
+        cst = np.arange(4).reshape((-1, 2)).astype(np.float32)
+        yield_eval = YieldEvaluator(onnx_model)
+        results = list(
+            yield_eval.enumerate_summarized(None, {"A": cst, "B": cst, "X": cst})
+        )
+        expected = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        self.assertEqual(len(expected), len(results))
+        for a, b in zip(expected, results):
+            self.assertEqual(len(a), len(b))
+            self.assertEqual(a[0], b[0])
+            self.assertEqual(a[1], b[1])
+            self.assertEqual(a[2], b[2])
+            self.assertEqual(a[3], b[3])
+            self.assertEqual(a[4], b[4])
+            self.assertEqual(a[5], b[5])
+
+    def test_distance_pair(self):
+        el1 = (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None)
+        el2 = el1
+        dc = DistanceExecution()
+        self.assertEqual(dc.distance_pair(el1, el2), 0)
+        el2 = (ResultType.INPUT, np.dtype("float16"), (2, 2), "ABCD", None)
+        self.assertEqual(dc.distance_pair(el1, el2), 2)
+        el2 = (ResultType.OUTPUT, np.dtype("float16"), (2, 2, 4), "GBCD", "Abs")
+        self.assertEqual(dc.distance_pair(el1, el2), 1130)
+        el2 = (ResultType.OUTPUT, np.dtype("float16"), (2, 3), "GBCD", "Abs")
+        self.assertEqual(dc.distance_pair(el1, el2), 1021)
+
+    def test_distance_sequence_0(self):
+        expected = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(expected, expected)
+        self.assertEqual(d, 0)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
+
+    def test_distance_sequence_ins(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, dc.insert_cost)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 3), (5, 4)])
+        d, align = dc.distance_sequence(s2, s1)
+        self.assertEqual(d, dc.insert_cost)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (3, 4), (4, 5)])
+
+    def test_distance_sequence_equal(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Z"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, 0)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
+
+    def test_distance_sequence_diff(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIP", "Abs", "Z"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, 1)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
+
+    def test_distance_sequence_diff2(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 3), "CEIP", "Abs", "Z"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIP", None, "Y"),
+        ]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, 5)
+        self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)])
+
+    def test_distance_sequence_str(self):
+        s1 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 3), "ABCD", None, "X"),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Exp", "H"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"),
+        ]
+        s2 = [
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"),
+            (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"),
+            (
+                ResultType.RESULT,
+                np.dtype("float32"),
+                (2, 2),
+                "CEIO",
+                "LinearRegression",
+                "Y1",
+            ),
+            (ResultType.RESULT, np.dtype("float32"), (2, 3), "CEIP", "Abs", "Z"),
+            (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIP", None, "Y"),
+        ]
+        s1 = [ResultExecution(*s) for s in s1]
+        s2 = [ResultExecution(*s) for s in s2]
+
+        dc = DistanceExecution()
+        d, align = dc.distance_sequence(s1, s2)
+        self.assertEqual(d, 1008)
+        self.assertEqual(
+            align, [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4), (6, 5)]
+        )
+        text = dc.to_str(s1, s2, align)
+        self.assertIn("OUTPUT", text)
+        expected = """
+            001=|INPUTfloat322:2x2ABCDA|INPUTfloat322:2x2ABCDA
+            002=|INPUTfloat322:2x2ABCDB|INPUTfloat322:2x2ABCDB
+            003~|INPUTfloat322:2x3ABCDX|INPUTfloat322:2x2ABCDX
+            004-|RESULTfloat322:2x2CEIOExpH|
+            005=|RESULTfloat322:2x2CEIOLinearRegressioY1|RESULTfloat322:2x2CEIOLinearRegressioY1
+            006~|RESULTfloat322:2x2CEIOAbsY|RESULTfloat322:2x3CEIPAbsZ
+            007~|OUTPUTfloat322:2x2CEIOY|OUTPUTfloat322:2x2CEIPY
+        """.replace(
+            "            ", ""
+        ).strip(
+            "\n "
+        )
+        self.maxDiff = None
+        self.assertEqual(expected, text.replace(" ", "").strip("\n"))
+
+    def test_compare_execution(self):
+        m1 = parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+        m2 = parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                z = Mul(x, x)
+            }"""
+        )
+        res1, res2, align, dc = compare_onnx_execution(m1, m2)
+        text = dc.to_str(res1, res2, align)
+        self.assertIn("CAAA Constant", text)
+        self.assertEqual(len(align), 5)
+
+    def test_compare_execution_discrepancies(self):
+        m1 = parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(x, x)
+            }"""
+        )
+        m2 = parse_model(
+            """
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                z = Mul(x, x)
+            }"""
+        )
+        res1, res2, align, dc = compare_onnx_execution(m1, m2, keep_tensor=True)
+        text = dc.to_str(res1, res2, align)
+        print(text)
+        self.assertIn("CAAA Constant", text)
+        self.assertIn("| a=", text)
+        self.assertIn(" r=", text)
+
+    def test_no_execution(self):
+        model = make_model(
+            make_graph(
+                [
+                    make_node("Unsqueeze", ["X", "zero"], ["xu1"]),
+                    make_node("Unsqueeze", ["xu1", "un"], ["xu2"]),
+                    make_node("Reshape", ["xu2", "shape1"], ["xm1"]),
+                    make_node("Reshape", ["Y", "shape2"], ["xm2c"]),
+                    make_node("Cast", ["xm2c"], ["xm2"], to=1),
+                    make_node("MatMul", ["xm1", "xm2"], ["xm"]),
+                    make_node("Reshape", ["xm", "shape3"], ["Z"]),
+                ],
+                "dummy",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, [32, 128]),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, [3, 5, 128, 64]),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, [3, 5, 32, "N"])],
+                [
+                    from_array(np.array([0], dtype=np.int64), name="zero"),
+                    from_array(np.array([1], dtype=np.int64), name="un"),
+                    from_array(np.array([1, 32, 128], dtype=np.int64), name="shape1"),
+                    from_array(np.array([15, 128, 64], dtype=np.int64), name="shape2"),
+                    from_array(np.array([3, 5, 32, 64], dtype=np.int64), name="shape3"),
+                ],
+            )
+        )
+        check_model(model)
+        res1, res2, align, dc = compare_onnx_execution(model, model, mode="nodes")
+        text = dc.to_str(res1, res2, align)
+        self.assertIn("012 = | NODE", text)
+
+        model2 = make_model(
+            make_graph(
+                [
+                    make_node("Unsqueeze", ["X", "zero"], ["xu1"]),
+                    make_node("Unsqueeze", ["xu1", "un"], ["xu2"]),
+                    make_node("Reshape", ["xu2", "shape1"], ["xm1"]),
+                    make_node("Reshape", ["Y", "shape2"], ["xm2c"]),
+                    make_node("MatMul", ["xm1", "xm2c"], ["xm"]),
+                    make_node("Reshape", ["xm", "shape3"], ["Z"]),
+                ],
+                "dummy",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, [32, 128]),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, [3, 5, 128, 64]),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, [3, 5, 32, "N"])],
+                [
+                    from_array(np.array([0], dtype=np.int64), name="zero"),
+                    from_array(np.array([1], dtype=np.int64), name="un"),
+                    from_array(np.array([1, 32, 128], dtype=np.int64), name="shape1"),
+                    from_array(np.array([15, 128, 64], dtype=np.int64), name="shape2"),
+                    from_array(np.array([3, 5, 32, 64], dtype=np.int64), name="shape3"),
+                ],
+            )
+        )
+        check_model(model2)
+        res1, res2, align, dc = compare_onnx_execution(model, model2, mode="nodes")
+        text = dc.to_str(res1, res2, align)
+        self.assertIn("012 = | NODE", text)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_reference/test_reference_ops.py b/_unittests/ut_reference/test_reference_ops.py
new file mode 100644
index 0000000..9ae6fec
--- /dev/null
+++ b/_unittests/ut_reference/test_reference_ops.py
@@ -0,0 +1,146 @@
+import unittest
+import numpy as np
+from onnx import TensorProto
+from onnx.helper import (
+    make_graph,
+    make_model,
+    make_node,
+    make_tensor_value_info,
+    make_opsetid,
+)
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.reference import ExtendedReferenceEvaluator
+
+
+class TestReferenceOps(ExtTestCase):
+
+    def test_fused_matmul(self):
+        model = make_model(
+            make_graph(
+                [make_node("FusedMatMul", ["X", "Y"], ["Z"], domain="com.microsoft")],
+                "name",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, None),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, None),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+        )
+        ref = ExtendedReferenceEvaluator(model)
+        a = np.arange(4).reshape(-1, 2)
+        got = ref.run(None, {"X": a, "Y": a})
+        self.assertEqualArray(a @ a, got[0])
+
+    def test_fused_matmul11(self):
+        model = make_model(
+            make_graph(
+                [
+                    make_node(
+                        "FusedMatMul",
+                        ["X", "Y"],
+                        ["Z"],
+                        transA=1,
+                        transB=1,
+                        domain="com.microsoft",
+                    )
+                ],
+                "name",
+                [
+                    make_tensor_value_info("X", TensorProto.FLOAT, None),
+                    make_tensor_value_info("Y", TensorProto.FLOAT, None),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+        )
+        ref = ExtendedReferenceEvaluator(model)
+        a = np.arange(4).reshape(-1, 2)
+        got = ref.run(None, {"X": a, "Y": a})
+        self.assertEqualArray(a.T @ a.T, got[0])
+
+    def test_memcpy(self):
+        model = make_model(
+            make_graph(
+                [
+                    make_node("MemcpyToHost", ["X"], ["Z"]),
+                    make_node("MemcpyFromHost", ["X"], ["Z"]),
+                ],
+                "name",
+                [make_tensor_value_info("X", TensorProto.FLOAT, None)],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+            ir_version=9,
+        )
+        a = np.arange(4).reshape(-1, 2).astype(np.float32)
+        ref = ExtendedReferenceEvaluator(model)
+        got = ref.run(None, {"X": a})
+        self.assertEqualArray(a, got[0])
+
+    def test_quick_gelu(self):
+        from onnxruntime import InferenceSession
+
+        for alpha in [0.0, 2.0]:
+            model = make_model(
+                make_graph(
+                    [
+                        make_node(
+                            "QuickGelu",
+                            ["X"],
+                            ["Z"],
+                            domain="com.microsoft",
+                            alpha=alpha,
+                        )
+                    ],
+                    "name",
+                    [make_tensor_value_info("X", TensorProto.FLOAT, None)],
+                    [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+                ),
+                opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)],
+                ir_version=9,
+            )
+            sess = InferenceSession(
+                model.SerializeToString(), providers=["CPUExecutionProvider"]
+            )
+            a = np.arange(4).reshape(-1, 2).astype(np.float32)
+            expected = sess.run(None, {"X": a})
+            ref = ExtendedReferenceEvaluator(model)
+            got = ref.run(None, {"X": a})
+            self.assertEqualArray(expected[0], got[0])
+
+    def test_scatter_elements(self):
+        model = make_model(
+            make_graph(
+                [
+                    make_node(
+                        "ScatterElements",
+                        ["data", "indices", "updates"],
+                        ["Z"],
+                        axis=3,
+                        reduction="add",
+                    )
+                ],
+                "name",
+                [
+                    make_tensor_value_info("data", TensorProto.FLOAT, None),
+                    make_tensor_value_info("indices", TensorProto.INT64, None),
+                    make_tensor_value_info("updates", TensorProto.FLOAT, None),
+                ],
+                [make_tensor_value_info("Z", TensorProto.FLOAT, None)],
+            ),
+            opset_imports=[make_opsetid("", 18)],
+        )
+        data = np.zeros(2**4, dtype=np.float32).reshape((2, 2, 2, 2))
+        indices = np.array([[[[0]]]], dtype=np.int64)
+        updates = np.array([[[[1]]]], dtype=np.float32)
+        y = np.array(
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.float32
+        ).reshape((2, 2, 2, 2))
+        ref = ExtendedReferenceEvaluator(model)
+        got = ref.run(None, {"data": data, "indices": indices, "updates": updates})
+        self.assertEqualArray(y, got[0])
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_tools/test_replace_constants.py b/_unittests/ut_tools/test_replace_constants.py
new file mode 100644
index 0000000..5cad1c2
--- /dev/null
+++ b/_unittests/ut_tools/test_replace_constants.py
@@ -0,0 +1,160 @@
+import unittest
+import numpy as np
+import onnx
+import onnx.helper as oh
+import onnx.numpy_helper as onh
+from onnx import TensorProto
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.reference import (
+    ExtendedReferenceEvaluator as ReferenceEvaluator,
+)
+from onnx_array_api.tools.replace_constants import (
+    replace_initializer_by_constant_of_shape,
+)
+
+
+class TestReplaceConstants(ExtTestCase):
+
+    def test_replace_initializer(self):
+        dtype = np.float32
+        value = np.random.randn(2, 100).astype(dtype)
+        A = onh.from_array(value, name="A")
+        value = np.array([1], dtype=dtype)
+        C = onh.from_array(value, name="C")
+
+        X = oh.make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        Y = oh.make_tensor_value_info("Y", TensorProto.FLOAT, [None])
+        node1 = oh.make_node("MatMul", ["X", "A"], ["AX"])
+        node2 = oh.make_node("Sub", ["AX", "C"], ["Y"])
+        graph = oh.make_graph([node1, node2], "lr", [X], [Y], [A, C])
+        model_def = oh.make_model(graph)
+
+        x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
+        oinf1 = ReferenceEvaluator(model_def)
+        y1 = oinf1.run(None, {"X": x})[0]  # type: ignore[index]
+        repl = replace_initializer_by_constant_of_shape(model_def)
+        node_types = {n.op_type for n in repl.graph.node}
+        self.assertIn("ConstantOfShape", node_types)
+        oinf2 = ReferenceEvaluator(repl)
+        y1[:, :] = 3.5
+        y1[0, :] = 0.5
+        y2 = oinf2.run(None, {"X": x})[0]  # type: ignore[index]
+        self.assertEqualArray(y1, y2)
+
+    def test_replace_constant(self):
+        dtype = np.float32
+        value = np.random.randn(2, 10).astype(dtype)
+        A = onh.from_array(value, name="A")
+        value = np.array([1], dtype=dtype)
+        C = onh.from_array(value, name="C")
+
+        X = oh.make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        Y = oh.make_tensor_value_info("Y", TensorProto.FLOAT, [None])
+        node0 = oh.make_node("Constant", [], ["A"], value=A)
+        node1 = oh.make_node("MatMul", ["X", "A"], ["AX"])
+        node2 = oh.make_node("Sub", ["AX", "C"], ["Y"])
+        graph = oh.make_graph([node0, node1, node2], "lr", [X], [Y], [C])
+        model_def = oh.make_model(graph)
+
+        x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
+        oinf1 = ReferenceEvaluator(model_def)
+        y1 = oinf1.run(None, {"X": x})[0]  # type: ignore[index]
+        repl = replace_initializer_by_constant_of_shape(model_def, threshold=0)
+        node_types = {n.op_type for n in repl.graph.node}
+        self.assertIn("ConstantOfShape", node_types)
+        oinf2 = ReferenceEvaluator(repl)
+        y1[:, :] = 4
+        y1[0, :] = 1
+        y2 = oinf2.run(None, {"X": x})[0]  # type: ignore[index]
+        self.assertEqualArray(y1, y2)
+
+    def test_replace_constant_function(self):
+        dtype = np.float32
+        value = np.random.randn(2, 100).astype(dtype)
+        A = onh.from_array(value, name="A")
+        value = np.array([1], dtype=dtype)
+        C = onh.from_array(value, name="C")
+
+        X = oh.make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        Y = oh.make_tensor_value_info("Y", TensorProto.FLOAT, [None])
+        nodeC = oh.make_node("Constant", [], ["C"], value=C)
+        node0 = oh.make_node("Constant", [], ["A"], value=A)
+        node1 = oh.make_node("MatMul", ["X", "A"], ["AX"])
+        node2 = oh.make_node("Sub", ["AX", "C"], ["Y"])
+        opset_imports = [
+            oh.make_opsetid("", onnx.defs.onnx_opset_version()),
+            oh.make_opsetid("custom", 1),
+        ]
+        fct = oh.make_function(
+            "custom",
+            "unittest",
+            ["X"],
+            ["Y"],
+            [nodeC, node0, node1, node2],
+            opset_imports,
+        )
+
+        node = oh.make_node("unittest", ["X"], ["Y"], domain="custom")
+        graph = oh.make_graph([node], "lr", [X], [Y], [C])
+        model_def = oh.make_model(graph, functions=[fct], opset_imports=opset_imports)
+
+        x = np.array([1, 2, 4, 5, 5, 4]).astype(np.float32).reshape((3, 2))
+        oinf1 = ReferenceEvaluator(model_def)
+        y1 = oinf1.run(None, {"X": x})[0]  # type: ignore[index]
+        repl = replace_initializer_by_constant_of_shape(model_def)
+        node_types = {n.op_type for n in repl.functions[0].node}
+        self.assertIn("ConstantOfShape", node_types)
+        oinf2 = ReferenceEvaluator(repl)
+        y1[:, :] = 3.5
+        y1[0, :] = 0.5
+        y2 = oinf2.run(None, {"X": x})[0]  # type: ignore[index]
+        self.assertEqualArray(y1, y2)
+
+    def test_replace_constant_graph(self):
+        value = np.array([0], dtype=np.float32)
+        zero = onh.from_array(value, name="zero")
+
+        X = oh.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [None, None])
+        Y = oh.make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [None])
+
+        rsum = oh.make_node("ReduceSum", ["X"], ["rsum"])
+        cond = oh.make_node("Greater", ["rsum", "zero"], ["cond"])
+
+        then_out = oh.make_tensor_value_info("then_out", onnx.TensorProto.FLOAT, None)
+        then_cst = onh.from_array(np.array([1] * 129).astype(np.float32))
+
+        then_const_node = oh.make_node(
+            "Constant", inputs=[], outputs=["then_out"], value=then_cst, name="cst1"
+        )
+        then_body = oh.make_graph([then_const_node], "then_body", [], [then_out])
+
+        else_out = oh.make_tensor_value_info("else_out", onnx.TensorProto.FLOAT, None)
+        else_cst = onh.from_array(np.array([-1] * 129).astype(np.float32))
+        else_const_node = oh.make_node(
+            "Constant", inputs=[], outputs=["else_out"], value=else_cst, name="cst2"
+        )
+        else_body = oh.make_graph([else_const_node], "else_body", [], [else_out])
+
+        if_node = oh.make_node(
+            "If", ["cond"], ["Y"], then_branch=then_body, else_branch=else_body
+        )
+        graph = oh.make_graph([rsum, cond, if_node], "if", [X], [Y], [zero])
+        onnx_model = oh.make_model(
+            graph, opset_imports=[oh.make_opsetid("", onnx.defs.onnx_opset_version())]
+        )
+        self.assertNotIn("ConstantOfShape", str(onnx_model))
+
+        x = np.ones((3, 2), dtype=np.float32)
+        oinf1 = ReferenceEvaluator(onnx_model)
+        y1 = oinf1.run(None, {"X": x})[0]  # type: ignore[index]
+        repl = replace_initializer_by_constant_of_shape(onnx_model)
+        self.assertIn("ConstantOfShape", str(repl))
+        oinf2 = ReferenceEvaluator(repl)
+        y2 = oinf2.run(None, {"X": x})[0]  # type: ignore[index]
+        y1 = y1.copy()
+        y1[:] = 0.5
+        self.assertEqualArray(y1, y2)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_translate_api/_data/custom_ops_type_inference_fails_0.onnx b/_unittests/ut_translate_api/_data/custom_ops_type_inference_fails_0.onnx
new file mode 100644
index 0000000..8116ec3
Binary files /dev/null and b/_unittests/ut_translate_api/_data/custom_ops_type_inference_fails_0.onnx differ
diff --git a/_unittests/ut_translate_api/_data/stft_inlined_batch_1.onnx b/_unittests/ut_translate_api/_data/stft_inlined_batch_1.onnx
new file mode 100644
index 0000000..172de97
Binary files /dev/null and b/_unittests/ut_translate_api/_data/stft_inlined_batch_1.onnx differ
diff --git a/_unittests/ut_translate_api/test_translate.py b/_unittests/ut_translate_api/test_translate.py
new file mode 100644
index 0000000..98629d8
--- /dev/null
+++ b/_unittests/ut_translate_api/test_translate.py
@@ -0,0 +1,230 @@
+import unittest
+from textwrap import dedent
+import numpy as np
+from onnx import ModelProto, TensorProto
+from onnx.defs import onnx_opset_version
+from onnx.reference import ReferenceEvaluator
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.light_api import start, g
+from onnx_array_api.translate_api import translate
+from onnx_array_api.translate_api.base_emitter import EventType
+
+OPSET_API = min(19, onnx_opset_version() - 1)
+
+
+class TestTranslate(ExtTestCase):
+    def test_event_type(self):
+        self.assertEqual(
+            EventType.to_str(EventType.INITIALIZER), "EventType.INITIALIZER"
+        )
+
+    def test_exp(self):
+        onx = start(opset=19).vin("X").Exp().rename("Y").vout().to_onnx()
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Exp", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+        code = translate(onx)
+        expected = dedent(
+            """
+        (
+            start(opset=19)
+            .vin('X', elem_type=TensorProto.FLOAT)
+            .bring('X')
+            .Exp()
+            .rename('Y')
+            .bring('Y')
+            .vout(elem_type=TensorProto.FLOAT)
+            .to_onnx()
+        )"""
+        ).strip("\n")
+        self.assertEqual(expected, code)
+
+        onx2 = (
+            start(opset=19)
+            .vin("X", elem_type=TensorProto.FLOAT)
+            .bring("X")
+            .Exp()
+            .rename("Y")
+            .bring("Y")
+            .vout(elem_type=TensorProto.FLOAT)
+            .to_onnx()
+        )
+        ref = ReferenceEvaluator(onx2)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+    def test_transpose(self):
+        onx = (
+            start(opset=19)
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Transpose", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(a.reshape((-1, 1)).T, got)
+
+        code = translate(onx)
+        expected = dedent(
+            """
+            (
+                start(opset=19)
+                .cst(np.array([-1, 1], dtype=np.int64))
+                .rename('r')
+                .vin('X', elem_type=TensorProto.FLOAT)
+                .bring('X', 'r')
+                .Reshape()
+                .rename('r0_0')
+                .bring('r0_0')
+                .Transpose(perm=[1, 0])
+                .rename('Y')
+                .bring('Y')
+                .vout(elem_type=TensorProto.FLOAT)
+                .to_onnx()
+            )"""
+        ).strip("\n")
+        self.assertEqual(expected, code)
+
+    def test_topk_reverse(self):
+        onx = (
+            start(opset=19)
+            .vin("X", np.float32)
+            .vin("K", np.int64)
+            .bring("X", "K")
+            .TopK(largest=0)
+            .rename("Values", "Indices")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        ref = ReferenceEvaluator(onx)
+        x = np.array([[0, 1, 2, 3], [9, 8, 7, 6]], dtype=np.float32)
+        k = np.array([2], dtype=np.int64)
+        got = ref.run(None, {"X": x, "K": k})
+        self.assertEqualArray(np.array([[0, 1], [6, 7]], dtype=np.float32), got[0])
+        self.assertEqualArray(np.array([[0, 1], [3, 2]], dtype=np.int64), got[1])
+
+        code = translate(onx)
+        expected = dedent(
+            """
+            (
+                start(opset=19)
+                .vin('X', elem_type=TensorProto.FLOAT)
+                .vin('K', elem_type=TensorProto.INT64)
+                .bring('X', 'K')
+                .TopK(axis=-1, largest=0, sorted=1)
+                .rename('Values', 'Indices')
+                .bring('Values')
+                .vout(elem_type=TensorProto.FLOAT)
+                .bring('Indices')
+                .vout(elem_type=TensorProto.FLOAT)
+                .to_onnx()
+            )"""
+        ).strip("\n")
+        self.assertEqual(expected, code)
+
+    def test_export_if(self):
+        onx = (
+            start(opset=19)
+            .vin("X", np.float32)
+            .ReduceSum()
+            .rename("Xs")
+            .cst(np.array([0], dtype=np.float32))
+            .left_bring("Xs")
+            .Greater()
+            .If(
+                then_branch=g().cst(np.array([1], dtype=np.int64)).rename("Z").vout(),
+                else_branch=g().cst(np.array([0], dtype=np.int64)).rename("Z").vout(),
+            )
+            .rename("W")
+            .vout()
+            .to_onnx()
+        )
+
+        self.assertIsInstance(onx, ModelProto)
+        ref = ReferenceEvaluator(onx)
+        x = np.array([[0, 1, 2, 3], [9, 8, 7, 6]], dtype=np.float32)
+        k = np.array([2], dtype=np.int64)
+        got = ref.run(None, {"X": x, "K": k})
+        self.assertEqualArray(np.array([1], dtype=np.int64), got[0])
+
+        code = translate(onx)
+        selse = (
+            "g().cst(np.array([0], dtype=np.int64)).rename('Z')."
+            "bring('Z').vout(elem_type=TensorProto.FLOAT)"
+        )
+        sthen = (
+            "g().cst(np.array([1], dtype=np.int64)).rename('Z')."
+            "bring('Z').vout(elem_type=TensorProto.FLOAT)"
+        )
+        expected = dedent(
+            f"""
+            (
+                start(opset=19)
+                .cst(np.array([0.0], dtype=np.float32))
+                .rename('r')
+                .vin('X', elem_type=TensorProto.FLOAT)
+                .bring('X')
+                .ReduceSum(keepdims=1, noop_with_empty_axes=0)
+                .rename('Xs')
+                .bring('Xs', 'r')
+                .Greater()
+                .rename('r1_0')
+                .bring('r1_0')
+                .If(else_branch={selse}, then_branch={sthen})
+                .rename('W')
+                .bring('W')
+                .vout(elem_type=TensorProto.FLOAT)
+                .to_onnx()
+            )"""
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code)
+
+    def test_aionnxml(self):
+        onx = (
+            start(opset=19, opsets={"ai.onnx.ml": 3})
+            .vin("X")
+            .reshape((-1, 1))
+            .rename("USE")
+            .ai.onnx.ml.Normalizer(norm="MAX")
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx)
+        expected = dedent(
+            """
+            (
+                start(opset=19, opsets={'ai.onnx.ml': 3})
+                .cst(np.array([-1, 1], dtype=np.int64))
+                .rename('r')
+                .vin('X', elem_type=TensorProto.FLOAT)
+                .bring('X', 'r')
+                .Reshape()
+                .rename('USE')
+                .bring('USE')
+                .ai.onnx.ml.Normalizer(norm='MAX')
+                .rename('Y')
+                .bring('Y')
+                .vout(elem_type=TensorProto.FLOAT)
+                .to_onnx()
+            )"""
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_translate_api/test_translate_builder.py b/_unittests/ut_translate_api/test_translate_builder.py
new file mode 100644
index 0000000..b1ad394
--- /dev/null
+++ b/_unittests/ut_translate_api/test_translate_builder.py
@@ -0,0 +1,285 @@
+import unittest
+from textwrap import dedent
+import numpy as np
+import onnx.helper as oh
+from onnx import ModelProto, TensorProto
+from onnx.checker import check_model
+from onnx.defs import onnx_opset_version
+from onnx.reference import ReferenceEvaluator
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.light_api import start
+from onnx_array_api.graph_api import GraphBuilder
+from onnx_array_api.translate_api import translate, Translater
+from onnx_array_api.translate_api.builder_emitter import BuilderEmitter
+
+
+OPSET_API = min(19, onnx_opset_version() - 1)
+
+
+class TestTranslateBuilder(ExtTestCase):
+    def setUp(self):
+        self.maxDiff = None
+
+    def test_exp(self):
+        onx = start(opset=19, ir_version=10).vin("X").Exp().rename("Y").vout().to_onnx()
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Exp", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+        code = translate(onx, api="builder")
+        expected = (
+            dedent(
+                """
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",
+        ):
+            Y = op.Exp(X, outputs=['Y'])
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+        g = GraphBuilder({'': 19}, ir_version=10)
+        g.make_tensor_input("X", TensorProto.FLOAT, ())
+        light_api(g.op, "X")
+        g.make_tensor_output("Y", TensorProto.FLOAT, ()__SUFFIX__)
+        model = g.to_onnx()
+        """
+            )
+            .strip("\n")
+            .replace("__SUFFIX__", ", is_dimension=False, indexed=False")
+        )
+        self.assertEqual(expected, code.strip("\n"))
+
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",  # noqa: F722
+        ):
+            Y = op.Exp(X, outputs=["Y"])
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+        g2 = GraphBuilder({"": 19})
+        g2.make_tensor_input("X", TensorProto.FLOAT, ("A",))
+        light_api(g2.op, "X")
+        g2.make_tensor_output(
+            "Y", TensorProto.FLOAT, ("A",), is_dimension=False, indexed=False
+        )
+        onx2 = g2.to_onnx()
+
+        ref = ReferenceEvaluator(onx2)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+    def test_zdoc(self):
+        onx = (
+            start(opset=19, ir_version=10)
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx, api="builder")
+        expected = (
+            dedent(
+                """
+            def light_api(
+                op: "GraphBuilder",
+                X: "FLOAT[]",
+            ):
+                r = np.array([-1, 1], dtype=np.int64)
+                r0_0 = op.Reshape(X, r, outputs=['r0_0'])
+                Y = op.Transpose(r0_0, perm=[1, 0], outputs=['Y'])
+                op.Identity(Y, outputs=["Y"])
+                return Y
+
+            g = GraphBuilder({'': 19}, ir_version=10)
+            g.make_tensor_input("X", TensorProto.FLOAT, ())
+            light_api(g.op, "X")
+            g.make_tensor_output("Y", TensorProto.FLOAT, ()__SUFFIX__)
+            model = g.to_onnx()
+            """
+            )
+            .strip("\n")
+            .replace("__SUFFIX__", ", is_dimension=False, indexed=False")
+        )
+        self.maxDiff = None
+        self.assertEqual(expected, code.strip("\n"))
+
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",  # noqa: F722
+        ):
+            r = np.array([-1, 1], dtype=np.int64)
+            r0_0 = op.Reshape(X, r)
+            Y = op.Transpose(r0_0, perm=[1, 0])
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+        g = GraphBuilder({"": 21})
+        X = g.make_tensor_input("X", TensorProto.FLOAT, ())
+        light_api(g.op, X)
+        g.make_tensor_output("Y", TensorProto.FLOAT, ())
+        model = g.to_onnx()
+        self.assertNotEmpty(model)
+        check_model(model)
+
+    def test_exp_f(self):
+        onx = start(opset=19, ir_version=10).vin("X").Exp().rename("Y").vout().to_onnx()
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Exp", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+        tr = Translater(onx, emitter=BuilderEmitter("mm"))
+        code = tr.export(as_str=True)
+
+        expected = (
+            dedent(
+                """
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",
+        ):
+            Y = op.Exp(X, outputs=['Y'])
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+
+        def mm() -> "ModelProto":
+            g = GraphBuilder({'': 19}, ir_version=10)
+            g.make_tensor_input("X", TensorProto.FLOAT, ())
+            light_api(g.op, "X")
+            g.make_tensor_output("Y", TensorProto.FLOAT, ()__SUFFIX__)
+            model = g.to_onnx()
+            return model
+
+
+        model = mm()
+        """
+            )
+            .strip("\n")
+            .replace("__SUFFIX__", ", is_dimension=False, indexed=False")
+        )
+        self.assertEqual(expected, code.strip("\n"))
+
+        def light_api(
+            op: "GraphBuilder",
+            X: "FLOAT[]",  # noqa: F722
+        ):
+            Y = op.Exp(X)
+            op.Identity(Y, outputs=["Y"])
+            return Y
+
+        g2 = GraphBuilder({"": 19})
+        g2.make_tensor_input("X", TensorProto.FLOAT, ("A",))
+        light_api(g2.op, "X")
+        g2.make_tensor_output(
+            "Y", TensorProto.FLOAT, ("A",), is_dimension=False, indexed=False
+        )
+        onx2 = g2.to_onnx()
+
+        ref = ReferenceEvaluator(onx2)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+    def test_local_function(self):
+        new_domain = "custom"
+
+        linear_regression = oh.make_function(
+            new_domain,
+            "LinearRegression",
+            ["x", "a", "b"],
+            ["y"],
+            [
+                oh.make_node("MatMul", ["x", "a"], ["xa"]),
+                oh.make_node("Add", ["xa", "b"], ["y"]),
+            ],
+            [oh.make_opsetid("", 14)],
+            [],
+        )
+
+        graph = oh.make_graph(
+            [
+                oh.make_node(
+                    "LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain
+                ),
+                oh.make_node("Abs", ["Y1"], ["Y"]),
+            ],
+            "example",
+            [
+                oh.make_tensor_value_info("X", TensorProto.FLOAT, [None, None]),
+                oh.make_tensor_value_info("A", TensorProto.FLOAT, [None, None]),
+                oh.make_tensor_value_info("B", TensorProto.FLOAT, [None, None]),
+            ],
+            [oh.make_tensor_value_info("Y", TensorProto.FLOAT, None)],
+        )
+
+        onnx_model = oh.make_model(
+            graph,
+            opset_imports=[oh.make_opsetid("", 14), oh.make_opsetid(new_domain, 1)],
+            functions=[linear_regression],
+            ir_version=10,
+        )
+        tr = Translater(onnx_model, emitter=BuilderEmitter("mm"))
+        code = tr.export(as_str=True)
+
+        expected = (
+            dedent(
+                """
+            def example(
+                op: "GraphBuilder",
+                X: "FLOAT[, ]",
+                A: "FLOAT[, ]",
+                B: "FLOAT[, ]",
+            ):
+                Y1 = op.LinearRegression(X, A, B, domain='custom', outputs=['Y1'])
+                Y = op.Abs(Y1, outputs=['Y'])
+                op.Identity(Y, outputs=["Y"])
+                return Y
+
+
+            def make_custom_LinearRegression(g: "GraphBuilder"):
+                gr = GraphBuilder({'': 14}, as_function=True)
+                x = gr.make_tensor_input('x')
+                a = gr.make_tensor_input('a')
+                b = gr.make_tensor_input('b')
+                op = gr.op
+                xa = op.MatMul(x, a, outputs=['xa'])
+                y = op.Add(xa, b, outputs=['y'])
+                gr.make_tensor_output(y)
+                g.add_function(builder=gr)
+                return gr
+
+
+            def mm() -> "ModelProto":
+                g = GraphBuilder({'': 14, 'custom': 1}, ir_version=10)
+                g.make_tensor_input("X", TensorProto.FLOAT, ('', ''))
+                g.make_tensor_input("A", TensorProto.FLOAT, ('', ''))
+                g.make_tensor_input("B", TensorProto.FLOAT, ('', ''))
+                example(g.op, "X", "A", "B")
+                g.make_tensor_output("Y", TensorProto.FLOAT, ()__SUFFIX__)
+                make_custom_LinearRegression(g)
+                model = g.to_onnx()
+                return model
+
+
+            model = mm()
+        """
+            )
+            .strip("\n")
+            .replace("__SUFFIX__", ", is_dimension=False, indexed=False")
+        )
+        self.assertEqual(expected, code.strip("\n"))
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_translate_api/test_translate_classic.py b/_unittests/ut_translate_api/test_translate_classic.py
new file mode 100644
index 0000000..4f65b99
--- /dev/null
+++ b/_unittests/ut_translate_api/test_translate_classic.py
@@ -0,0 +1,502 @@
+import unittest
+import os
+from textwrap import dedent
+import numpy as np
+from onnx import ModelProto, TensorProto, load
+from onnx.defs import onnx_opset_version
+from onnx.reference import ReferenceEvaluator
+from onnx.reference.op_run import OpRun
+from onnx.helper import (
+    make_tensor_value_info,
+    make_node,
+    make_graph,
+    make_model,
+    make_opsetid,
+)
+from onnx.checker import check_model
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.light_api import start
+from onnx_array_api.translate_api import translate
+
+OPSET_API = min(19, onnx_opset_version() - 1)
+
+
+class TestTranslateClassic(ExtTestCase):
+    def test_check_code(self):
+        opset_imports = [
+            make_opsetid("", 19),
+        ]
+        inputs = []
+        outputs = []
+        nodes = []
+        initializers = []
+        sparse_initializers = []
+        functions = []
+        inputs.append(make_tensor_value_info("X", TensorProto.FLOAT, shape=[]))
+        nodes.append(make_node("Exp", ["X"], ["Y"]))
+        outputs.append(make_tensor_value_info("Y", TensorProto.FLOAT, shape=[]))
+        graph = make_graph(
+            nodes,
+            "onename",
+            inputs,
+            outputs,
+            initializers,
+            sparse_initializer=sparse_initializers,
+        )
+        model = make_model(graph, functions=functions, opset_imports=opset_imports)
+        check_model(model)
+
+    def test_exp(self):
+        onx = start(opset=19).vin("X").Exp().rename("Y").vout().to_onnx()
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Exp", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+        code = translate(onx, api="onnx")
+
+        expected = dedent(
+            """
+        opset_imports = [
+            make_opsetid('', 19),
+        ]
+        inputs = []
+        outputs = []
+        nodes = []
+        initializers = []
+        sparse_initializers = []
+        functions = []
+        inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
+        nodes.append(
+            make_node_extended(
+                'Exp',
+                ['X'],
+                ['Y']
+            )
+        )
+        outputs.append(make_tensor_value_info('Y', TensorProto.FLOAT, shape=[]))
+        graph = make_graph(
+            nodes,
+            'light_api',
+            inputs,
+            outputs,
+            initializers,
+            sparse_initializer=sparse_initializers,
+        )
+        model = make_model(
+            graph,
+            functions=functions,
+            opset_imports=opset_imports
+        )"""
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code)
+
+        onx2 = (
+            start(opset=19)
+            .vin("X", elem_type=TensorProto.FLOAT)
+            .bring("X")
+            .Exp()
+            .rename("Y")
+            .bring("Y")
+            .vout(elem_type=TensorProto.FLOAT)
+            .to_onnx()
+        )
+        ref = ReferenceEvaluator(onx2)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(np.exp(a), got)
+
+    def test_transpose(self):
+        onx = (
+            start(opset=19)
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Transpose", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(a.reshape((-1, 1)).T, got)
+
+        code = translate(onx, api="onnx")
+        expected = dedent(
+            """
+            opset_imports = [
+                make_opsetid('', 19),
+            ]
+            inputs = []
+            outputs = []
+            nodes = []
+            initializers = []
+            sparse_initializers = []
+            functions = []
+            initializers.append(
+                from_array(
+                    np.array([-1, 1], dtype=np.int64),
+                    name='r'
+                )
+            )
+            inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
+            nodes.append(
+                make_node_extended(
+                    'Reshape',
+                    ['X', 'r'],
+                    ['r0_0']
+                )
+            )
+            nodes.append(
+                make_node_extended(
+                    'Transpose',
+                    ['r0_0'],
+                    ['Y'],
+                    perm=[1, 0]
+                )
+            )
+            outputs.append(make_tensor_value_info('Y', TensorProto.FLOAT, shape=[]))
+            graph = make_graph(
+                nodes,
+                'light_api',
+                inputs,
+                outputs,
+                initializers,
+                sparse_initializer=sparse_initializers,
+            )
+            model = make_model(
+                graph,
+                functions=functions,
+                opset_imports=opset_imports
+            )"""
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code)
+
+    def test_transpose_short(self):
+        onx = (
+            start(opset=19)
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        self.assertIn("Transpose", str(onx))
+        ref = ReferenceEvaluator(onx)
+        a = np.arange(10).astype(np.float32)
+        got = ref.run(None, {"X": a})[0]
+        self.assertEqualArray(a.reshape((-1, 1)).T, got)
+
+        code = translate(onx, api="onnx-short")
+        expected = dedent(
+            """
+            opset_imports = [
+                make_opsetid('', 19),
+            ]
+            inputs = []
+            outputs = []
+            nodes = []
+            initializers = []
+            sparse_initializers = []
+            functions = []
+            initializers.append(
+                from_array(
+                    np.array([-1, 1], dtype=np.int64),
+                    name='r'
+                )
+            )
+            inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
+            nodes.append(
+                make_node_extended(
+                    'Reshape',
+                    ['X', 'r'],
+                    ['r0_0']
+                )
+            )
+            nodes.append(
+                make_node_extended(
+                    'Transpose',
+                    ['r0_0'],
+                    ['Y'],
+                    perm=[1, 0]
+                )
+            )
+            outputs.append(make_tensor_value_info('Y', TensorProto.FLOAT, shape=[]))
+            graph = make_graph(
+                nodes,
+                'light_api',
+                inputs,
+                outputs,
+                initializers,
+                sparse_initializer=sparse_initializers,
+            )
+            model = make_model(
+                graph,
+                functions=functions,
+                opset_imports=opset_imports
+            )"""
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code)
+
+    def test_topk_reverse(self):
+        onx = (
+            start(opset=19)
+            .vin("X", np.float32)
+            .vin("K", np.int64)
+            .bring("X", "K")
+            .TopK(largest=0)
+            .rename("Values", "Indices")
+            .vout()
+            .to_onnx()
+        )
+        self.assertIsInstance(onx, ModelProto)
+        ref = ReferenceEvaluator(onx)
+        x = np.array([[0, 1, 2, 3], [9, 8, 7, 6]], dtype=np.float32)
+        k = np.array([2], dtype=np.int64)
+        got = ref.run(None, {"X": x, "K": k})
+        self.assertEqualArray(np.array([[0, 1], [6, 7]], dtype=np.float32), got[0])
+        self.assertEqualArray(np.array([[0, 1], [3, 2]], dtype=np.int64), got[1])
+
+        code = translate(onx, api="onnx")
+        expected = dedent(
+            """
+            opset_imports = [
+                make_opsetid('', 19),
+            ]
+            inputs = []
+            outputs = []
+            nodes = []
+            initializers = []
+            sparse_initializers = []
+            functions = []
+            inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
+            inputs.append(make_tensor_value_info('K', TensorProto.INT64, shape=[]))
+            nodes.append(
+                make_node_extended(
+                    'TopK',
+                    ['X', 'K'],
+                    ['Values', 'Indices'],
+                    axis=-1,
+                    largest=0,
+                    sorted=1
+                )
+            )
+            outputs.append(make_tensor_value_info('Values', TensorProto.FLOAT, shape=[]))
+            outputs.append(make_tensor_value_info('Indices', TensorProto.FLOAT, shape=[]))
+            graph = make_graph(
+                nodes,
+                'light_api',
+                inputs,
+                outputs,
+                initializers,
+                sparse_initializer=sparse_initializers,
+            )
+            model = make_model(
+                graph,
+                functions=functions,
+                opset_imports=opset_imports
+            )"""
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code)
+
+    def test_fft(self):
+        data = os.path.join(
+            os.path.dirname(__file__), "_data", "stft_inlined_batch_1.onnx"
+        )
+        onx = load(data)
+        code = translate(onx, api="onnx")
+        try:
+            compile(code, "<string>", mode="exec")
+        except Exception as e:
+            new_code = "\n".join(
+                [f"{i+1:04} {line}" for i, line in enumerate(code.split("\n"))]
+            )
+            raise AssertionError(f"ERROR {e}\n{new_code}")  # noqa: B904
+
+    def test_aionnxml(self):
+        onx = (
+            start(opset=19, opsets={"ai.onnx.ml": 3})
+            .vin("X")
+            .reshape((-1, 1))
+            .rename("USE")
+            .ai.onnx.ml.Normalizer(norm="MAX")
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx, api="onnx")
+        expected = dedent(
+            """
+            opset_imports = [
+                make_opsetid('', 19),
+                make_opsetid('ai.onnx.ml', 3),
+            ]
+            inputs = []
+            outputs = []
+            nodes = []
+            initializers = []
+            sparse_initializers = []
+            functions = []
+            initializers.append(
+                from_array(
+                    np.array([-1, 1], dtype=np.int64),
+                    name='r'
+                )
+            )
+            inputs.append(make_tensor_value_info('X', TensorProto.FLOAT, shape=[]))
+            nodes.append(
+                make_node_extended(
+                    'Reshape',
+                    ['X', 'r'],
+                    ['USE']
+                )
+            )
+            nodes.append(
+                make_node_extended(
+                    'Normalizer',
+                    ['USE'],
+                    ['Y'],
+                    domain='ai.onnx.ml',
+                    norm='MAX'
+                )
+            )
+            outputs.append(make_tensor_value_info('Y', TensorProto.FLOAT, shape=[]))
+            graph = make_graph(
+                nodes,
+                'light_api',
+                inputs,
+                outputs,
+                initializers,
+                sparse_initializer=sparse_initializers,
+            )
+            model = make_model(
+                graph,
+                functions=functions,
+                opset_imports=opset_imports
+            )"""
+        ).strip("\n")
+        self.maxDiff = None
+        self.assertEqual(expected, code)
+
+    @classmethod
+    def _code_line(cls, code):
+        lines = code.split("\n")
+        return "\n".join(f"{i+1:03d} {line}" for i, line in enumerate(lines))
+
+    @classmethod
+    def _run(cls, code):
+        try:
+            code_compiled = compile(code, "<string>", mode="exec")
+        except Exception as e:
+            raise AssertionError(
+                f"Compilation failed due to {e}\n---\n{cls._code_line(code)}\n---\n{e}"
+            ) from e
+
+        import onnx
+        import onnx.helper
+        import onnx.numpy_helper
+        import onnx_array_api.translate_api.make_helper
+        import onnx.reference.custom_element_types
+
+        def from_array_extended(tensor, name=None):
+            dt = tensor.dtype
+            if (
+                dt == onnx.reference.custom_element_types.float8e4m3fn
+                and dt.descr[0][0] == "e4m3fn"
+            ):
+                to = TensorProto.FLOAT8E4M3FN
+                dt_to = np.uint8
+            elif (
+                dt == onnx.reference.custom_element_types.bfloat16
+                and dt.descr[0][0] == "bfloat16"
+            ):
+                to = TensorProto.BFLOAT16
+                dt_to = np.uint16
+            else:
+                return onnx.numpy_helper.from_array(tensor, name)
+
+            t = onnx.numpy_helper.from_array(tensor.astype(dt_to), name)
+            t.data_type = to
+            return t
+
+        globs = onnx.__dict__.copy()
+        globs.update(onnx.helper.__dict__)
+        globs.update(onnx.numpy_helper.__dict__)
+        globs.update(onnx_array_api.translate_api.make_helper.__dict__)
+        globs.update(onnx.reference.custom_element_types.__dict__)
+        globs["from_array_extended"] = from_array_extended
+        locs = {}
+        try:
+            exec(code_compiled, globs, locs)
+        except Exception as e:
+            raise AssertionError(
+                f"Execution failed due to {e}\n---\n{cls._code_line(code)}\n---\n{e}"
+            ) from e
+        return globs, locs
+
+    def test_remove_nodes(self):
+        path = os.path.join(
+            os.path.dirname(__file__), "_data", "custom_ops_type_inference_fails_0.onnx"
+        )
+        onx = load(path)
+        code = translate(onx, api="onnx")
+        _, locs = self._run(code)
+        self.assertIn("model", locs)
+        model = locs["model"]
+        x = np.arange(4).reshape((-1, 2)).astype(np.float32)
+        feeds = {"X": x}
+
+        class CustomGemmFloat8E4M3FN(OpRun):
+            op_domain = "onnx_extented.ortops.tutorial.cpu"
+
+            def _run(
+                self,
+                x,
+                y,
+                bias=None,
+                scale_x=None,
+                scale_y=None,
+                scale_z=None,
+                transA=False,
+                transB=False,
+                dtype=None,
+                rowMajor=None,
+                computeType=None,
+            ):
+                if scale_x is not None:
+                    x = x * scale_x
+                if transA:
+                    x = x.T
+                if scale_y is not None:
+                    y = y * scale_y
+                if transB:
+                    y = y.T
+                z = x @ y
+                if bias is not None:
+                    z += bias
+                if scale_z is not None:
+                    z = z / scale_z
+                return (z,)
+
+        ref = ReferenceEvaluator(onx, new_ops=[CustomGemmFloat8E4M3FN])
+        expected = ref.run(None, feeds)[0]
+        ref2 = ReferenceEvaluator(model, new_ops=[CustomGemmFloat8E4M3FN])
+        got = ref2.run(None, feeds)[0]
+        self.assertEqualArray(expected, got)
+
+        # with open("debug_test_remove_nodes.py", "w") as f:
+        #     f.write(code)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_validation/test_docs.py b/_unittests/ut_validation/test_docs.py
index 3b1307f..96cfcd3 100644
--- a/_unittests/ut_validation/test_docs.py
+++ b/_unittests/ut_validation/test_docs.py
@@ -1,8 +1,7 @@
 import unittest
-import sys
 import numpy as np
 from onnx.reference import ReferenceEvaluator
-from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.ext_test_case import ExtTestCase, skipif_ci_windows
 from onnx_array_api.validation.docs import make_euclidean, make_euclidean_skl2onnx
 
 
@@ -27,7 +26,7 @@ def test_make_euclidean_skl2onnx(self):
         got = ref.run(None, {"X": X, "Y": Y})[0]
         self.assertEqualArray(expected, got)
 
-    @unittest.skipIf(sys.platform == "win32", reason="unstable on Windows")
+    @skipif_ci_windows("Unstable on Windows.")
     def test_make_euclidean_np(self):
         from onnx_array_api.npx import jit_onnx
 
diff --git a/_unittests/ut_validation/test_f8.py b/_unittests/ut_validation/test_f8.py
index b44683f..4c6517f 100644
--- a/_unittests/ut_validation/test_f8.py
+++ b/_unittests/ut_validation/test_f8.py
@@ -88,7 +88,7 @@ def test_fe5m2_to_float32_paper(self):
         self.assertEqual(fe5m2_to_float32(int("11111100", 2)), -numpy.inf)
 
     def test_fe4m3fn_to_float32_all(self):
-        for i in range(0, 256):
+        for i in range(256):
             a = fe4m3_to_float32_float(i)
             b = fe4m3_to_float32(i)
             if numpy.isnan(a):
@@ -97,7 +97,7 @@ def test_fe4m3fn_to_float32_all(self):
             self.assertEqual(a, b)
 
     def test_fe4m3fn_to_float32_all_ml_types(self):
-        for i in range(0, 256):
+        for i in range(256):
             a = fe4m3_to_float32_float(i)
             b = fe4m3_to_float32(i)
             c = new_cvt_float32_to_e4m3fn(b)
@@ -188,7 +188,7 @@ def test_search_float32_into_fe5m2_simple(self):
                         self.assertEqual(b1, b2)
 
     def test_search_float32_into_fe4m3fn_equal(self):
-        values = [(fe4m3_to_float32_float(i), i) for i in range(0, 256)]
+        values = [(fe4m3_to_float32_float(i), i) for i in range(256)]
         values.sort()
 
         for value, expected in values:
@@ -208,7 +208,7 @@ def test_search_float32_into_fe4m3fn_equal(self):
                     self.assertIn(nf, (0, 128))
 
     def test_search_float32_into_fe5m2_equal(self):
-        values = [(fe5m2_to_float32_float(i), i) for i in range(0, 256)]
+        values = [(fe5m2_to_float32_float(i), i) for i in range(256)]
         values.sort()
 
         for value, expected in values:
@@ -233,7 +233,7 @@ def test_search_float32_into_fe5m2_equal(self):
                 self.assertEqual(fe5m2_to_float32(nf), float(cf))
 
     def test_search_float32_into_fe4m3fn(self):
-        values = [(fe4m3_to_float32_float(i), i) for i in range(0, 256)]
+        values = [(fe4m3_to_float32_float(i), i) for i in range(256)]
         values.sort()
 
         obs = []
@@ -308,7 +308,7 @@ def test_search_float32_into_fe4m3fn(self):
             )
 
     def test_search_float32_into_fe5m2(self):
-        values = [(fe5m2_to_float32_float(i), i) for i in range(0, 256)]
+        values = [(fe5m2_to_float32_float(i), i) for i in range(256)]
         values.sort()
 
         obs = []
@@ -344,7 +344,7 @@ def test_search_float32_into_fe5m2(self):
                             add = value
                         else:
                             add = v - value
-                            if len(w) > 0:
+                            if w:
                                 raise AssertionError(
                                     f"A warning was thrown for v={v}, "
                                     f"value={value}, w={w[0]}."
@@ -651,7 +651,7 @@ def test_search_float32_into_fe5m2fnuz_simple(self):
                 self.assertEqual(expected, got)
 
     def test_fe4m3fnuz_to_float32_all(self):
-        for i in range(0, 256):
+        for i in range(256):
             a = fe4m3_to_float32_float(i, uz=True)
             b = fe4m3_to_float32(i, uz=True)
             if numpy.isnan(a):
@@ -660,7 +660,7 @@ def test_fe4m3fnuz_to_float32_all(self):
             self.assertEqual(a, b)
 
     def test_fe5m2fnuz_to_float32_all(self):
-        for i in range(0, 256):
+        for i in range(256):
             a = fe5m2_to_float32_float(i, fn=True, uz=True)
             b = fe5m2_to_float32(i, fn=True, uz=True)
             if numpy.isnan(a):
@@ -669,7 +669,7 @@ def test_fe5m2fnuz_to_float32_all(self):
             self.assertEqual(a, b)
 
     def test_search_float32_into_fe4m3fnuz(self):
-        values = [(fe4m3_to_float32_float(i, uz=True), i) for i in range(0, 256)]
+        values = [(fe4m3_to_float32_float(i, uz=True), i) for i in range(256)]
         values.sort()
 
         obs = []
@@ -715,9 +715,7 @@ def test_search_float32_into_fe4m3fnuz(self):
             )
 
     def test_search_float32_into_fe5m2fnuz(self):
-        values = [
-            (fe5m2_to_float32_float(i, fn=True, uz=True), i) for i in range(0, 256)
-        ]
+        values = [(fe5m2_to_float32_float(i, fn=True, uz=True), i) for i in range(256)]
         values.sort()
 
         obs = []
@@ -1235,7 +1233,7 @@ def test_nan(self):
                 expected,
             )
         ]
-        for i in range(0, 23):
+        for i in range(23):
             v = 0x7F800000 | (1 << i)
             f = numpy.uint32(v).view(numpy.float32)
             values.append((i, v, f, expected))
@@ -1246,6 +1244,21 @@ def test_nan(self):
                 f8 = float32_to_fe4m3(x)
                 self.assertEqual(e, f8)
 
+    def test_negative_zero_uz(self):
+        self.assertEqual(numpy.float32(-0.0), numpy.float32(0.0))
+        self.assertEqual(float32_to_fe4m3(-0.00000001, fn=True, uz=False), 128)
+        self.assertEqual(float32_to_fe4m3(0.00000001, fn=True, uz=True), 0)
+        self.assertEqual(float32_to_fe4m3(-0.00000001, fn=True, uz=True), 0)
+        self.assertEqual(float32_to_fe5m2(-0.00000001, fn=False, uz=False), 128)
+        self.assertEqual(float32_to_fe5m2(0.00000001, fn=True, uz=True), 0)
+        self.assertEqual(float32_to_fe5m2(-0.00000001, fn=True, uz=True), 0)
+        self.assertEqual(float32_to_fe4m3(-0.0001, fn=True, uz=False), 128)
+        self.assertEqual(float32_to_fe4m3(-0.0001, fn=True, uz=True), 0)
+        self.assertEqual(search_float32_into_fe4m3(-0.0001, fn=True, uz=False), 128)
+        self.assertEqual(search_float32_into_fe4m3(-0.0001, fn=True, uz=True), 0)
+        self.assertEqual(search_float32_into_fe5m2(-0.000001, fn=False, uz=False), 128)
+        self.assertEqual(search_float32_into_fe5m2(-0.000001, fn=True, uz=True), 0)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_command_lines1.py b/_unittests/ut_xrun_doc/test_command_lines1.py
new file mode 100644
index 0000000..0503f55
--- /dev/null
+++ b/_unittests/ut_xrun_doc/test_command_lines1.py
@@ -0,0 +1,120 @@
+import os
+import tempfile
+import unittest
+from contextlib import redirect_stdout
+from io import StringIO
+from onnx import TensorProto
+from onnx.helper import (
+    make_graph,
+    make_model,
+    make_node,
+    make_opsetid,
+    make_tensor_value_info,
+)
+from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api._command_lines_parser import (
+    get_main_parser,
+    get_parser_compare,
+    get_parser_translate,
+    get_parser_replace,
+    main,
+)
+
+
+class TestCommandLines1(ExtTestCase):
+    def test_main_parser(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_main_parser().print_help()
+        text = st.getvalue()
+        self.assertIn("translate", text)
+
+    def test_parser_translate(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_translate().print_help()
+        text = st.getvalue()
+        self.assertIn("model", text)
+
+    def test_parser_replace(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_replace().print_help()
+        text = st.getvalue()
+        self.assertIn("model", text)
+
+    def test_command_translate(self):
+        X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
+        Y = make_tensor_value_info("Y", TensorProto.FLOAT, [5, 6])
+        Z = make_tensor_value_info("Z", TensorProto.FLOAT, [None, None])
+        graph = make_graph(
+            [
+                make_node("Add", ["X", "Y"], ["res"]),
+                make_node("Cos", ["res"], ["Z"]),
+            ],
+            "g",
+            [X, Y],
+            [Z],
+        )
+        onnx_model = make_model(graph, opset_imports=[make_opsetid("", 18)])
+
+        with tempfile.TemporaryDirectory() as root:
+            model_file = os.path.join(root, "model.onnx")
+            with open(model_file, "wb") as f:
+                f.write(onnx_model.SerializeToString())
+
+            args = ["translate", "-m", model_file]
+            st = StringIO()
+            with redirect_stdout(st):
+                main(args)
+
+            code = st.getvalue()
+            self.assertIn("model = make_model(", code)
+
+            args = ["translate", "-m", model_file, "-a", "light"]
+            st = StringIO()
+            with redirect_stdout(st):
+                main(args)
+
+            code = st.getvalue()
+            self.assertIn("start(opset=", code)
+
+    def test_parser_compare(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_compare().print_help()
+        text = st.getvalue()
+        self.assertIn("model1", text)
+
+    def test_command_compare(self):
+        X = make_tensor_value_info("X", TensorProto.FLOAT, [5, 6])
+        Y = make_tensor_value_info("Y", TensorProto.FLOAT, [5, 6])
+        Z = make_tensor_value_info("Z", TensorProto.FLOAT, [5, 6])
+        graph = make_graph(
+            [
+                make_node("Add", ["X", "Y"], ["res"]),
+                make_node("Cos", ["res"], ["Z"]),
+            ],
+            "g",
+            [X, Y],
+            [Z],
+        )
+        onnx_model = make_model(graph, opset_imports=[make_opsetid("", 18)])
+
+        with tempfile.TemporaryDirectory() as root:
+            model_file = os.path.join(root, "model.onnx")
+            with open(model_file, "wb") as f:
+                f.write(onnx_model.SerializeToString())
+
+            args = ["compare", "-m1", model_file, "-m2", model_file, "-v", "1"]
+            st = StringIO()
+            with redirect_stdout(st):
+                main(args)
+
+            code = st.getvalue()
+            self.assertIn("[compare_onnx_execution]", code)
+            self.assertIn("ADFF", code)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_documentation_examples.py b/_unittests/ut_xrun_doc/test_documentation_examples.py
index 2d50728..6f6a5d1 100644
--- a/_unittests/ut_xrun_doc/test_documentation_examples.py
+++ b/_unittests/ut_xrun_doc/test_documentation_examples.py
@@ -5,7 +5,7 @@
 import subprocess
 import time
 from onnx_array_api import __file__ as onnx_array_api_file
-from onnx_array_api.ext_test_case import ExtTestCase
+from onnx_array_api.ext_test_case import ExtTestCase, is_windows
 
 VERBOSE = 0
 ROOT = os.path.realpath(os.path.abspath(os.path.join(onnx_array_api_file, "..", "..")))
@@ -26,10 +26,10 @@ def import_source(module_file_path, module_name):
 class TestDocumentationExamples(ExtTestCase):
     def run_test(self, fold: str, name: str, verbose=0) -> int:
         ppath = os.environ.get("PYTHONPATH", "")
-        if len(ppath) == 0:
+        if not ppath:
             os.environ["PYTHONPATH"] = ROOT
         elif ROOT not in ppath:
-            sep = ";" if sys.platform == "win32" else ":"
+            sep = ";" if is_windows() else ":"
             os.environ["PYTHONPATH"] = ppath + sep + ROOT
         perf = time.perf_counter()
         try:
@@ -42,14 +42,14 @@ def run_test(self, fold: str, name: str, verbose=0) -> int:
             res = p.communicate()
             out, err = res
             st = err.decode("ascii", errors="ignore")
-            if len(st) > 0 and "Traceback" in st:
+            if st and "Traceback" in st:
                 if '"dot" not found in path.' in st:
                     # dot not installed, this part
                     # is tested in onnx framework
                     if verbose:
                         print(f"failed: {name!r} due to missing dot.")
                     return 0
-                raise AssertionError(
+                raise AssertionError(  # noqa: B904
                     "Example '{}' (cmd: {} - exec_prefix='{}') "
                     "failed due to\n{}"
                     "".format(name, cmds, sys.exec_prefix, st)
@@ -65,14 +65,15 @@ def add_test_methods(cls):
         fold = os.path.normpath(os.path.join(this, "..", "..", "_doc", "examples"))
         found = os.listdir(fold)
         for name in found:
-            if name.startswith("plot_") and name.endswith(".py"):
-                short_name = os.path.split(os.path.splitext(name)[0])[-1]
+            if not name.startswith("plot_") or not name.endswith(".py"):
+                continue
+            short_name = os.path.split(os.path.splitext(name)[0])[-1]
 
-                def _test_(self, name=name):
-                    res = self.run_test(fold, name, verbose=VERBOSE)
-                    self.assertTrue(res)
+            def _test_(self, name=name):
+                res = self.run_test(fold, name, verbose=VERBOSE)
+                self.assertTrue(res)
 
-                setattr(cls, f"test_{short_name}", _test_)
+            setattr(cls, f"test_{short_name}", _test_)
 
 
 TestDocumentationExamples.add_test_methods()
diff --git a/_unittests/ut_xrun_doc/test_profiling.py b/_unittests/ut_xrun_doc/test_profiling.py
index e6c7e69..a7d3ce1 100644
--- a/_unittests/ut_xrun_doc/test_profiling.py
+++ b/_unittests/ut_xrun_doc/test_profiling.py
@@ -1,6 +1,7 @@
 """
 @brief      test tree node (time=5s)
 """
+
 import os
 import sys
 import time
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 89a4ed9..e9b3859 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -4,8 +4,8 @@ jobs:
     vmImage: 'ubuntu-latest'
   strategy:
     matrix:
-      Python311-Linux:
-        python.version: '3.11'
+      Python312-Linux:
+        python.version: '3.12'
     maxParallel: 3
 
   steps:
@@ -24,7 +24,7 @@ jobs:
   - script: pip install -r requirements-dev.txt
     displayName: 'Install Requirements dev'
   - script: |
-      ruff .
+      ruff check .
     displayName: 'Ruff'
   - script: |
       black --diff .
@@ -35,6 +35,9 @@ jobs:
   - script: |
       python -m pip install . -v -v -v
     displayName: 'install wheel'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       python -m pytest
     displayName: 'Runs Unit Tests'
@@ -48,8 +51,8 @@ jobs:
     vmImage: 'ubuntu-latest'
   strategy:
     matrix:
-      Python311-Linux:
-        python.version: '3.11'
+      Python312-Linux:
+        python.version: '3.12'
     maxParallel: 3
 
   steps:
@@ -78,11 +81,14 @@ jobs:
   - script: pip install onnxmltools --no-deps
     displayName: 'Install onnxmltools'
   - script: |
-      ruff .
+      ruff check .
     displayName: 'Ruff'
   - script: |
       black --diff .
     displayName: 'Black'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       python -m pytest
     displayName: 'Runs Unit Tests'
@@ -125,16 +131,19 @@ jobs:
       export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_numpy
       cd array-api-tests
     displayName: 'Set API'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_numpy
       cd array-api-tests
       python -m pytest -x array_api_tests/test_creation_functions.py --skips-file=../_unittests/onnx-numpy-skips.txt --hypothesis-explain
     displayName: "numpy test_creation_functions.py"
-  - script: |
-      export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_ort
-      cd array-api-tests
-      python -m pytest -x array_api_tests/test_creation_functions.py --skips-file=../_unittests/onnx-ort-skips.txt --hypothesis-explain
-    displayName: "ort test_creation_functions.py"
+  # - script: |
+  #     export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_ort
+  #    cd array-api-tests
+  #     python -m pytest -x array_api_tests/test_creation_functions.py --skips-file=../_unittests/onnx-ort-skips.txt --hypothesis-explain
+  #   displayName: "ort test_creation_functions.py"
   #- script: |
   #    export ARRAY_API_TESTS_MODULE=onnx_array_api.array_api.onnx_numpy
   #    cd array-api-tests
@@ -146,8 +155,8 @@ jobs:
     vmImage: 'ubuntu-latest'
   strategy:
     matrix:
-      Python311-Linux:
-        python.version: '3.11'
+      Python312-Linux:
+        python.version: '3.12'
     maxParallel: 3
 
   steps:
@@ -172,11 +181,14 @@ jobs:
   - script: pip install onnxmltools --no-deps
     displayName: 'Install onnxmltools'
   - script: |
-      ruff .
+      ruff check .
     displayName: 'Ruff'
   - script: |
       black --diff .
     displayName: 'Black'
+  - script: |
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
       python -m pytest --cov
     displayName: 'Runs Unit Tests'
@@ -196,8 +208,8 @@ jobs:
     vmImage: 'windows-latest'
   strategy:
     matrix:
-      Python311-Windows:
-        python.version: '3.11'
+      Python312-Windows:
+        python.version: '3.12'
     maxParallel: 3
 
   steps:
@@ -214,56 +226,15 @@ jobs:
   - script: pip install onnxmltools --no-deps
     displayName: 'Install onnxmltools'
   - script: |
-      python -m pytest
-    displayName: 'Runs Unit Tests'
-  - script: |
-      python -u setup.py bdist_wheel
-    displayName: 'Build Package'
-  - task: PublishPipelineArtifact@0
-    inputs:
-      artifactName: 'wheel-windows-$(python.version)'
-      targetPath: 'dist'
-
-- job: 'TestMac'
-  pool:
-    vmImage: 'macOS-latest'
-  strategy:
-    matrix:
-      Python311-Mac:
-        python.version: '3.11'
-    maxParallel: 3
-
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(python.version)'
-      architecture: 'x64'
-  - script: gcc --version
-    displayName: 'gcc version'
-  #- script: brew upgrade
-  #  displayName: 'brew upgrade'
-  #- script: brew update
-  #  displayName: 'brew update'
-  - script: export
-    displayName: 'export'
-  - script: gcc --version
-    displayName: 'gcc version'
-  - script: python -m pip install --upgrade pip setuptools wheel
-    displayName: 'Install tools'
-  - script: pip install -r requirements.txt
-    displayName: 'Install Requirements'
-  - script: pip install -r requirements-dev.txt
-    displayName: 'Install Requirements dev'
-  - script: pip install onnxmltools --no-deps
-    displayName: 'Install onnxmltools'
+      python -m pip freeze
+    displayName: 'pip freeze'
   - script: |
-      python -m pytest
+      python -m pytest -v
     displayName: 'Runs Unit Tests'
   - script: |
       python -u setup.py bdist_wheel
     displayName: 'Build Package'
   - task: PublishPipelineArtifact@0
     inputs:
-      artifactName: 'wheel-mac-$(python.version)'
+      artifactName: 'wheel-windows-$(python.version)'
       targetPath: 'dist'
-
diff --git a/onnx_array_api/__init__.py b/onnx_array_api/__init__.py
index b2a711d..98371ac 100644
--- a/onnx_array_api/__init__.py
+++ b/onnx_array_api/__init__.py
@@ -1,7 +1,6 @@
-# coding: utf-8
 """
 APIs to create ONNX Graphs.
 """
 
-__version__ = "0.1.2"
+__version__ = "0.3.1"
 __author__ = "Xavier Dupré"
diff --git a/onnx_array_api/__main__.py b/onnx_array_api/__main__.py
new file mode 100644
index 0000000..1fb5c0c
--- /dev/null
+++ b/onnx_array_api/__main__.py
@@ -0,0 +1,4 @@
+from ._command_lines_parser import main
+
+if __name__ == "__main__":
+    main()
diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py
new file mode 100644
index 0000000..d1eac62
--- /dev/null
+++ b/onnx_array_api/_command_lines_parser.py
@@ -0,0 +1,245 @@
+import sys
+import onnx
+from typing import Any, List, Optional
+from argparse import ArgumentParser
+from textwrap import dedent
+
+
+def get_main_parser() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="onnx-array-api",
+        description="onnx-array-api main command line.",
+        epilog="Type 'python -m onnx_array_api <cmd> --help' "
+        "to get help for a specific command.",
+    )
+    parser.add_argument(
+        "cmd",
+        choices=["translate", "compare", "replace"],
+        help=dedent(
+            """
+        Selects a command.
+
+        'translate' exports an onnx graph into a piece of code replicating it,
+        'compare' compares the execution of two onnx models,
+        'replace' replaces constant and initliazers by ConstantOfShape
+                  to make the model lighter
+        """
+        ),
+    )
+    return parser
+
+
+def get_parser_translate() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="translate",
+        description=dedent(
+            """
+        Translates an onnx model into a piece of code to replicate it.
+        The result is printed on the standard output.
+        """
+        ),
+        epilog="This is mostly used to write unit tests without adding "
+        "an onnx file to the repository.",
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        required=True,
+        help="onnx model to translate",
+    )
+    parser.add_argument(
+        "-a",
+        "--api",
+        choices=["onnx", "light", "onnx-short", "builder"],
+        default="onnx",
+        help="API to choose, API from onnx package or light API.",
+    )
+    return parser
+
+
+def _cmd_translate(argv: List[Any]):
+    from .translate_api import translate
+
+    parser = get_parser_translate()
+    args = parser.parse_args(argv[1:])
+    onx = onnx.load(args.model)
+    code = translate(onx, api=args.api)
+    print(code)
+
+
+def get_parser_compare() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="compare",
+        description=dedent(
+            """
+        Compares the execution of two onnx models.
+        """
+        ),
+        epilog="This is used when two models are different but "
+        "should produce the same results.",
+    )
+    parser.add_argument(
+        "-m1",
+        "--model1",
+        type=str,
+        required=True,
+        help="first onnx model",
+    )
+    parser.add_argument(
+        "-m2",
+        "--model2",
+        type=str,
+        required=True,
+        help="second onnx model",
+    )
+    parser.add_argument(
+        "-m",
+        "--mode",
+        choices=["execute", "nodes"],
+        default="execute",
+        help="compare the execution ('execute') or the nodes only ('nodes')",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        default=0,
+        help="verbosity",
+    )
+    parser.add_argument(
+        "-c",
+        "--column-size",
+        default=60,
+        help="column size when displaying the results",
+    )
+    parser.add_argument(
+        "-d",
+        "--discrepancies",
+        default=0,
+        help="show precise discrepancies when mode is execution",
+    )
+    return parser
+
+
+def _cmd_compare(argv: List[Any]):
+    from .reference import compare_onnx_execution
+
+    parser = get_parser_compare()
+    args = parser.parse_args(argv[1:])
+    if args.verbose in ("1", 1, "True", True):
+        print(f"[compare] first model {args.model1!r}")
+        print(f"[compare] second model {args.model2!r}")
+    onx1 = onnx.load(args.model1)
+    onx2 = onnx.load(args.model2)
+    if args.verbose in ("1", 1, "True", True):
+        print(f"[compare] first model has {len(onx1.graph.node)} nodes")
+        print(f"[compare] second model has {len(onx2.graph.node)} nodes")
+    res1, res2, align, dc = compare_onnx_execution(
+        onx1,
+        onx2,
+        verbose=args.verbose,
+        mode=args.mode,
+        keep_tensor=args.discrepancies in (1, "1", "True", True),
+    )
+    text = dc.to_str(res1, res2, align, column_size=int(args.column_size))
+    print(text)
+
+
+def get_parser_replace() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="translate",
+        description=dedent(
+            """
+        Replaces constants and initializes by ConstOfShape or any other nodes
+        to make the model smaller.
+        """
+        ),
+        epilog="This is mostly used to write unit tests without adding "
+        "a big file to the repository.",
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        required=True,
+        help="onnx model to translate",
+    )
+    parser.add_argument(
+        "-o",
+        "--out",
+        type=str,
+        required=True,
+        help="output file",
+    )
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        default=128,
+        help="Threshold above which every constant is replaced",
+    )
+    parser.add_argument(
+        "--type",
+        default="ConstontOfShape",
+        help="Inserts this operator type",
+    )
+    parser.add_argument(
+        "--domain",
+        default="",
+        help="Inserts this domain",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        default=0,
+        help="verbosity",
+    )
+    return parser
+
+
+def _cmd_replace(argv: List[Any]):
+    from .tools.replace_constants import replace_initializer_by_constant_of_shape
+
+    parser = get_parser_replace()
+    args = parser.parse_args(argv[1:])
+    if args.verbose in ("1", 1, "True", True):
+        print(f"[compare] load model {args.model!r}")
+    onx = onnx.load(args.model)
+    new_onx = replace_initializer_by_constant_of_shape(
+        onx, threshold=args.threshold, op_type=args.type, domain=args.domain
+    )
+    if args.verbose in ("1", 1, "True", True):
+        print(f"[compare] save model {args.out!r}")
+    onnx.save(new_onx, args.out)
+
+
+def main(argv: Optional[List[Any]] = None):
+    fcts = dict(translate=_cmd_translate, compare=_cmd_compare, replace=_cmd_replace)
+
+    if argv is None:
+        argv = sys.argv[1:]
+    if (len(argv) <= 1 and argv[0] not in fcts) or argv[-1] in ("--help", "-h"):
+        if len(argv) < 2:
+            parser = get_main_parser()
+            parser.parse_args(argv)
+        else:
+            parsers = dict(
+                translate=get_parser_translate,
+                compare=get_parser_compare,
+                replace=get_parser_replace,
+            )
+            cmd = argv[0]
+            if cmd not in parsers:
+                raise ValueError(
+                    f"Unknown command {cmd!r}, it should be in {list(sorted(parsers))}."
+                )
+            parser = parsers[cmd]()
+            parser.parse_args(argv[1:])
+        raise RuntimeError("The programme should have exited before.")
+
+    cmd = argv[0]
+    if cmd in fcts:
+        fcts[cmd](argv)
+    else:
+        raise ValueError(
+            f"Unknown command {cmd!r}, use --help to get the list of known command."
+        )
diff --git a/onnx_array_api/_helpers.py b/onnx_array_api/_helpers.py
index f9808ca..9331098 100644
--- a/onnx_array_api/_helpers.py
+++ b/onnx_array_api/_helpers.py
@@ -9,7 +9,7 @@ def np_dtype_to_tensor_dtype(dtype: Any):
     """
     try:
         dt = helper.np_dtype_to_tensor_dtype(dtype)
-    except KeyError:
+    except (KeyError, ValueError):
         if dtype == np.float32:
             dt = TensorProto.FLOAT
         elif dtype == np.float64:
@@ -40,6 +40,10 @@ def np_dtype_to_tensor_dtype(dtype: Any):
             dt = TensorProto.INT64
         elif dtype is float:
             dt = TensorProto.DOUBLE
+        elif dtype == np.complex64:
+            dt = TensorProto.COMPLEX64
+        elif dtype == np.complex128:
+            dt = TensorProto.COMPLEX128
         else:
-            raise KeyError(f"Unable to guess type for dtype={dtype}.")
+            raise KeyError(f"Unable to guess type for dtype={dtype}.")  # noqa: B904
     return dt
diff --git a/onnx_array_api/annotations.py b/onnx_array_api/annotations.py
new file mode 100644
index 0000000..c29102c
--- /dev/null
+++ b/onnx_array_api/annotations.py
@@ -0,0 +1,99 @@
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import numpy as np
+from onnx import FunctionProto, GraphProto, ModelProto, TensorProto, TensorShapeProto
+from onnx.helper import np_dtype_to_tensor_dtype
+
+NP_DTYPE = np.dtype
+ELEMENT_TYPE = Union[int, NP_DTYPE]
+SHAPE_TYPE = Tuple[int, ...]
+VAR_CONSTANT_TYPE = Union["Var", TensorProto, np.ndarray]
+GRAPH_PROTO = Union[FunctionProto, GraphProto, ModelProto]
+
+AI_ONNX_ML = "ai.onnx.ml"
+
+ELEMENT_TYPE_NAME = {
+    getattr(TensorProto, k): k
+    for k in dir(TensorProto)
+    if isinstance(getattr(TensorProto, k), int) and "_" not in k
+}
+
+
+class SubDomain:
+    pass
+
+
+def domain(domain: str, op_type: Optional[str] = None) -> Callable:
+    """
+    Registers one operator into a sub domain. It should be used as a
+    decorator. One example:
+
+    .. code-block:: python
+
+        @domain("ai.onnx.ml")
+        def Normalizer(self, norm: str = "MAX"):
+            return self.make_node("Normalizer", self, norm=norm, domain="ai.onnx.ml")
+    """
+    names = [op_type]
+
+    def decorate(op_method: Callable) -> Callable:
+        if names[0] is None:
+            names[0] = op_method.__name__
+
+        def wrapper(self, *args: List[Any], **kwargs: Dict[str, Any]) -> Any:
+            return op_method(self.parent, *args, **kwargs)
+
+        wrapper.__qual__name__ = f"[{domain}]{names[0]}"
+        wrapper.__name__ = f"[{domain}]{names[0]}"
+        wrapper.__domain__ = domain
+        return wrapper
+
+    return decorate
+
+
+_type_numpy = {
+    np.float32: TensorProto.FLOAT,
+    np.float64: TensorProto.DOUBLE,
+    np.float16: TensorProto.FLOAT16,
+    np.int8: TensorProto.INT8,
+    np.int16: TensorProto.INT16,
+    np.int32: TensorProto.INT32,
+    np.int64: TensorProto.INT64,
+    np.uint8: TensorProto.UINT8,
+    np.uint16: TensorProto.UINT16,
+    np.uint32: TensorProto.UINT32,
+    np.uint64: TensorProto.UINT64,
+    np.bool_: TensorProto.BOOL,
+    np.str_: TensorProto.STRING,
+    np.complex64: TensorProto.COMPLEX64,
+    np.complex128: TensorProto.COMPLEX128,
+}
+
+
+def elem_type_int(elem_type: ELEMENT_TYPE) -> int:
+    """
+    Converts an element type into an onnx element type (int).
+
+    :param elem_type: integer or numpy type
+    :return: int
+    """
+    if isinstance(elem_type, int):
+        return elem_type
+    if elem_type in _type_numpy:
+        return _type_numpy[elem_type]
+    return np_dtype_to_tensor_dtype(elem_type)
+
+
+def _pick_dim(d, empty_dim):
+    if d.dim_value:
+        return d.dim_value
+    if d.dim_param:
+        return d.dim_param
+    return empty_dim
+
+
+def make_shape(shape: TensorShapeProto, empty_dim: Optional[Any] = None) -> SHAPE_TYPE:
+    "Extracts a shape from a tensor type."
+    if hasattr(shape, "dim"):
+        res = [_pick_dim(d, empty_dim=empty_dim) for i, d in enumerate(shape.dim)]
+        return tuple(res)
+    return None
diff --git a/onnx_array_api/array_api/__init__.py b/onnx_array_api/array_api/__init__.py
index f4b3c4d..9b67b4b 100644
--- a/onnx_array_api/array_api/__init__.py
+++ b/onnx_array_api/array_api/__init__.py
@@ -47,12 +47,14 @@ def _finfo(dtype):
             continue
         if isinstance(v, (np.float32, np.float64, np.float16)):
             d[k] = float(v)
+        elif isinstance(v, (np.complex128, np.complex64)):
+            d[k] = complex(v)
         else:
             d[k] = v
     d["dtype"] = DType(np_dtype_to_tensor_dtype(dt))
     nres = type("finfo", (res.__class__,), d)
-    setattr(nres, "smallest_normal", float(res.smallest_normal))
-    setattr(nres, "tiny", float(res.tiny))
+    setattr(nres, "smallest_normal", float(res.smallest_normal))  # noqa: B010
+    setattr(nres, "tiny", float(res.tiny))  # noqa: B010
     return nres
 
 
@@ -84,8 +86,8 @@ def _iinfo(dtype):
             d[k] = v
     d["dtype"] = DType(np_dtype_to_tensor_dtype(dt))
     nres = type("iinfo", (res.__class__,), d)
-    setattr(nres, "min", int(res.min))
-    setattr(nres, "max", int(res.max))
+    setattr(nres, "min", int(res.min))  # noqa: B010
+    setattr(nres, "max", int(res.max))  # noqa: B010
     return nres
 
 
@@ -124,6 +126,8 @@ def _finalize_array_api(module, function_names, TEagerTensor):
     module.float16 = DType(TensorProto.FLOAT16)
     module.float32 = DType(TensorProto.FLOAT)
     module.float64 = DType(TensorProto.DOUBLE)
+    module.complex64 = DType(TensorProto.COMPLEX64)
+    module.complex128 = DType(TensorProto.COMPLEX128)
     module.int8 = DType(TensorProto.INT8)
     module.int16 = DType(TensorProto.INT16)
     module.int32 = DType(TensorProto.INT32)
@@ -133,10 +137,10 @@ def _finalize_array_api(module, function_names, TEagerTensor):
     module.uint32 = DType(TensorProto.UINT32)
     module.uint64 = DType(TensorProto.UINT64)
     module.bfloat16 = DType(TensorProto.BFLOAT16)
-    setattr(module, "bool", DType(TensorProto.BOOL))
-    setattr(module, "str", DType(TensorProto.STRING))
-    setattr(module, "finfo", _finfo)
-    setattr(module, "iinfo", _iinfo)
+    setattr(module, "bool", DType(TensorProto.BOOL))  # noqa: B010
+    setattr(module, "str", DType(TensorProto.STRING))  # noqa: B010
+    setattr(module, "finfo", _finfo)  # noqa: B010
+    setattr(module, "iinfo", _iinfo)  # noqa: B010
 
     if function_names is None:
         function_names = supported_functions
@@ -146,7 +150,10 @@ def _finalize_array_api(module, function_names, TEagerTensor):
         if f is None:
             f2 = getattr(npx_functions, name, None)
             if f2 is None:
-                warnings.warn(f"Function {name!r} is not available in {module!r}.")
+                warnings.warn(
+                    f"Function {name!r} is not available in {module!r}.",
+                    stacklevel=0,
+                )
                 continue
             f = lambda TEagerTensor, *args, _f=f2, **kwargs: _f(  # noqa: E731
                 *args, **kwargs
diff --git a/onnx_array_api/array_api/_onnx_common.py b/onnx_array_api/array_api/_onnx_common.py
index 6e8ee6d..7c486ce 100644
--- a/onnx_array_api/array_api/_onnx_common.py
+++ b/onnx_array_api/array_api/_onnx_common.py
@@ -1,11 +1,8 @@
 from typing import Any, Optional
-import warnings
 import numpy as np
 from onnx import TensorProto
+import array_api_strict
 
-with warnings.catch_warnings():
-    warnings.simplefilter("ignore")
-    from numpy.array_api._array_object import Array
 from ..npx.npx_types import (
     DType,
     ElemType,
@@ -30,6 +27,9 @@
 )
 
 
+Array = type(array_api_strict.ones((1,)))
+
+
 # These functions with no specific code do not have to be
 # implemented. They are automatically added in
 # :mod:`onnx_array_api.array_api`. It needs
@@ -46,14 +46,13 @@ def asarray(
     dtype: Optional[DType] = None,
     order: Optional[str] = None,
     like: Any = None,
+    device: Optional[str] = None,
     copy: bool = False,
 ) -> EagerTensor:
     """
     Converts anything into an array.
     """
-    """
-    Converts anything into an array.
-    """
+    assert device is None, f"asarray not implemented yet for device={device!r}"
     if order not in ("C", None):
         raise NotImplementedError(f"asarray is not implemented for order={order!r}.")
     if like is not None:
@@ -88,18 +87,20 @@ def asarray(
                     v = TEagerTensor(va)
     elif isinstance(a, float):
         v = TEagerTensor(np.array(a, dtype=np.float64))
+    elif isinstance(a, complex):
+        v = TEagerTensor(np.array(a, dtype=np.complex128))
     elif isinstance(a, bool):
         v = TEagerTensor(np.array(a, dtype=np.bool_))
     elif isinstance(a, str):
         v = TEagerTensor(np.array(a, dtype=np.str_))
     elif isinstance(a, list):
-        if all(map(lambda x: isinstance(x, bool), a)):
+        if all(isinstance(x, bool) for x in a):
             v = TEagerTensor(np.array(a, dtype=np.bool_))
-        elif all(map(lambda x: isinstance(x, int), a)):
+        elif all(isinstance(x, int) for x in a):
             try:
                 cvt = np.array(a, dtype=np.int64)
             except OverflowError as e:
-                if all(map(lambda x: x >= 0, a)):
+                if all(x >= 0 for x in a):
                     cvt = np.array(a, dtype=np.uint64)
                 else:
                     raise e
@@ -108,7 +109,7 @@ def asarray(
             v = TEagerTensor(np.array(a))
     elif isinstance(a, np.ndarray):
         v = TEagerTensor(a)
-    elif isinstance(a, Array):
+    elif Array and isinstance(a, Array):
         v = TEagerTensor(np.asarray(a))
     else:
         raise RuntimeError(f"Unexpected type {type(a)} for the first input.")
@@ -128,9 +129,7 @@ def arange(
     step: EagerTensor[OptTensorType[ElemType.int64, "I", (1,)]] = None,
     dtype: OptParType[DType] = None,
 ) -> EagerTensor[TensorType[ElemType.numerics, "T"]]:
-    use_float = any(
-        map(lambda x: isinstance(x, float), [start_or_stop, stop_or_step, step])
-    )
+    use_float = any(isinstance(x, float) for x in [start_or_stop, stop_or_step, step])
     if isinstance(start_or_stop, int):
         start_or_stop = TEagerTensor(
             np.array([start_or_stop], dtype=np.float64 if use_float else np.int64)
@@ -208,7 +207,7 @@ def eye(
     /,
     *,
     k: ParType[int] = 0,
-    dtype: ParType[DType] = DType(TensorProto.DOUBLE),
+    dtype: ParType[DType] = DType(TensorProto.DOUBLE),  # noqa: B008
 ):
     if isinstance(n_rows, int):
         n_rows = TEagerTensor(np.array(n_rows, dtype=np.int64))
@@ -246,7 +245,7 @@ def linspace(
     dtype: OptParType[DType] = None,
     endpoint: ParType[int] = 1,
 ) -> EagerTensor[TensorType[ElemType.numerics, "T"]]:
-    use_float = any(map(lambda x: isinstance(x, float), [start, stop]))
+    use_float = any(isinstance(x, float) for x in [start, stop])
     if isinstance(start, int):
         start = TEagerTensor(
             np.array(start, dtype=np.float64 if use_float else np.int64)
diff --git a/onnx_array_api/ext_test_case.py b/onnx_array_api/ext_test_case.py
index 6726008..d91ba1a 100644
--- a/onnx_array_api/ext_test_case.py
+++ b/onnx_array_api/ext_test_case.py
@@ -6,11 +6,43 @@
 from io import StringIO
 from timeit import Timer
 from typing import Any, Callable, Dict, List, Optional
-
 import numpy
 from numpy.testing import assert_allclose
 
 
+def is_azure() -> bool:
+    "Tells if the job is running on Azure DevOps."
+    return os.environ.get("AZURE_HTTP_USER_AGENT", "undefined") != "undefined"
+
+
+def is_windows() -> bool:
+    return sys.platform == "win32"
+
+
+def is_apple() -> bool:
+    return sys.platform == "darwin"
+
+
+def skipif_ci_windows(msg) -> Callable:
+    """
+    Skips a unit test if it runs on :epkg:`azure pipeline` on :epkg:`Windows`.
+    """
+    if is_windows() and is_azure():
+        msg = f"Test does not work on azure pipeline (linux). {msg}"
+        return unittest.skip(msg)
+    return lambda x: x
+
+
+def skipif_ci_apple(msg) -> Callable:
+    """
+    Skips a unit test if it runs on :epkg:`azure pipeline` on :epkg:`Windows`.
+    """
+    if is_apple() and is_azure():
+        msg = f"Test does not work on azure pipeline (Apple). {msg}"
+        return unittest.skip(msg)
+    return lambda x: x
+
+
 def ignore_warnings(warns: List[Warning]) -> Callable:
     """
     Catches warnings.
@@ -203,17 +235,19 @@ def assertRaise(self, fct: Callable, exc_type: Exception):
             fct()
         except exc_type as e:
             if not isinstance(e, exc_type):
-                raise AssertionError(f"Unexpected exception {type(e)!r}.")
+                raise AssertionError(f"Unexpected exception {type(e)!r}.")  # noqa: B904
             return
         raise AssertionError("No exception was raised.")
 
     def assertEmpty(self, value: Any):
-        if value is None:
-            return
-        if len(value) == 0:
+        if not value:
             return
         raise AssertionError(f"value is not empty: {value!r}.")
 
+    def assertExists(self, name):
+        if not os.path.exists(name):
+            raise AssertionError(f"File or folder {name!r} does not exists.")
+
     def assertHasAttr(self, cls: type, name: str):
         if not hasattr(cls, name):
             raise AssertionError(f"Class {cls} has no attribute {name!r}.")
@@ -222,7 +256,7 @@ def assertNotEmpty(self, value: Any):
         if value is None:
             raise AssertionError(f"value is empty: {value!r}.")
         if isinstance(value, (list, dict, tuple, set)):
-            if len(value) == 0:
+            if value:
                 raise AssertionError(f"value is empty: {value!r}.")
 
     def assertStartsWith(self, prefix: str, full: str):
@@ -232,7 +266,7 @@ def assertStartsWith(self, prefix: str, full: str):
     @classmethod
     def tearDownClass(cls):
         for name, line, w in cls._warns:
-            warnings.warn(f"\n{name}:{line}: {type(w)}\n  {str(w)}")
+            warnings.warn(f"\n{name}:{line}: {type(w)}\n  {str(w)}", stacklevel=0)
 
     def capture(self, fct: Callable):
         """
@@ -243,9 +277,8 @@ def capture(self, fct: Callable):
         """
         sout = StringIO()
         serr = StringIO()
-        with redirect_stdout(sout):
-            with redirect_stderr(serr):
-                res = fct()
+        with redirect_stdout(sout), redirect_stderr(serr):
+            res = fct()
         return res, sout.getvalue(), serr.getvalue()
 
     def relative_path(self, filename: str, *names: List[str]) -> str:
diff --git a/onnx_array_api/graph_api/__init__.py b/onnx_array_api/graph_api/__init__.py
new file mode 100644
index 0000000..15e274e
--- /dev/null
+++ b/onnx_array_api/graph_api/__init__.py
@@ -0,0 +1 @@
+from .graph_builder import GraphBuilder, NodePattern
diff --git a/onnx_array_api/graph_api/graph_builder.py b/onnx_array_api/graph_api/graph_builder.py
new file mode 100644
index 0000000..5e414ed
--- /dev/null
+++ b/onnx_array_api/graph_api/graph_builder.py
@@ -0,0 +1,1024 @@
+import sys
+from functools import partial
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union
+import numpy as np
+from onnx.defs import onnx_opset_version
+import onnx.helper as oh
+import onnx.numpy_helper as onh
+from onnx import (
+    AttributeProto,
+    FunctionProto,
+    GraphProto,
+    ModelProto,
+    NodeProto,
+    TensorProto,
+)
+from onnx.reference import ReferenceEvaluator
+
+T = "TENSOR"
+
+
+class OptimizationOptions:
+    def __init__(
+        self,
+        remove_unused: bool = True,
+        constant_folding: bool = False,
+        constant_size: int = 1024,
+    ):
+        self.remove_unused = remove_unused
+        self.constant_folding = constant_folding
+        self.constant_size = constant_size
+
+
+class NodePattern:
+    """
+    Class defining a matching pattern able to find nodes in a set of nodes.
+    """
+
+    def __init__(
+        self,
+        index: Optional[int] = None,
+        op_type: Optional[str] = None,
+        name: Optional[None] = None,
+    ):
+        self.index = index
+        self.op_type = op_type
+        self.name = name
+
+    def __repr__(self):
+        "usual"
+        args = ["index", "op_type", "name"]
+        sargs = []
+        for a in args:
+            if a:
+                sargs.append(f"{a}={getattr(self, a)!r}")
+        return f"{self.__class__.__name__}({', '.join(sargs)})"
+
+    def find(self, graph: "GraphBuilder") -> Iterator:
+        """
+        Iterates on nodes matching the pattern.
+        """
+        for index, node in enumerate(graph.nodes):
+            if self.match(index, node):
+                yield node
+
+    def match(self, index, node: NodeProto) -> bool:
+        """
+        Tells if a node is matching this pattern.
+        """
+        if self.index is not None and self.index != index:
+            return False
+        if self.op_type is not None and self.op_type != node.op_type:
+            return False
+        if self.name is not None and self.name != node.name:
+            return False
+        return True
+
+
+class Opset:
+    # defined for opset >= 18
+    # name: number of expected outputs
+    _implemented = {
+        "Add": 1,
+        "And": 1,
+        "Cast": 1,
+        "Concat": 1,
+        "Constant": 1,
+        "Div": 1,
+        "Exp": 1,
+        "Expand": 1,
+        "GatherElements": 1,
+        "Gemm": 1,
+        "Identity": 1,
+        "MatMul": 1,
+        "MaxPool": 2,
+        "Mul": 1,
+        "Log": 1,
+        "Or": 1,
+        "Pow": 1,
+        "Relu": 1,
+        "ReduceSum": 1,
+        "Reshape": 1,
+        "Shape": 1,
+        "Slice": 1,
+        "Squeeze": 1,
+        "Sub": 1,
+        "Transpose": 1,
+        "Unsqueeze": 1,
+    }
+
+    def __init__(self, builder: "GraphBuilder", opset: int):
+        self.opset = opset
+        self.builder = builder
+
+    def __getattr__(self, name):
+        if name in self._implemented:
+            return partial(self.make_node, name)
+        try:
+            return super().__getattr__(name)
+        except AttributeError as e:
+            raise AttributeError(f"Unable to access attribute {name!r}.") from e
+
+    def Initializer(
+        self, init: Union[TensorProto, np.ndarray], name: Optional[str] = None
+    ) -> str:
+        """
+        Creates an initializer.
+
+        :param init: value
+        :param name: name if value is not a TensorProto
+        :return: its name
+        """
+        return self.builder.make_initializer(init, name=name, exists=True)
+
+    def make_node(
+        self,
+        op_type: str,
+        *inputs: Optional[Union[str, List[str]]],
+        outputs: Optional[Union[int, List[str], str]] = None,
+        domain: str = "",
+        **kwargs,
+    ):
+        if outputs is None:
+            outputs = self._implemented[op_type]
+        if inputs is None:
+            inputs = []
+        new_inputs = []
+        for i in inputs:
+            if not isinstance(i, str):
+                name = self.builder.unique_name("cst")
+                self.builder.make_initializer(i, name=name, exists=True)
+                new_inputs.append(name)
+            else:
+                new_inputs.append(i)
+
+        return self.builder.make_node(
+            op_type, new_inputs, outputs=outputs, domain=domain, **kwargs
+        )
+
+
+class GraphBuilder:
+    def __init__(
+        self,
+        target_opset_or_existing_proto: Optional[
+            Union[int, Dict[str, int], ModelProto, FunctionProto]
+        ] = None,
+        input_names: Optional[Sequence[str]] = None,
+        as_function: bool = False,
+        optimization_options: Optional[OptimizationOptions] = None,
+        args: Optional[List[Any]] = None,
+        verbose: int = 0,
+        ir_version: Optional[int] = None,
+    ):
+        self.optimization_options = optimization_options or OptimizationOptions()
+        self.as_function = as_function
+        self.input_args = args
+        self.verbose = verbose
+
+        if target_opset_or_existing_proto is None:
+            target_opset_or_existing_proto = onnx_opset_version() - 1
+        if isinstance(target_opset_or_existing_proto, (int, dict)):
+            self.opsets = (
+                {"": target_opset_or_existing_proto}
+                if isinstance(target_opset_or_existing_proto, int)
+                else target_opset_or_existing_proto
+            )
+            self.ir_version = ir_version
+            self.nodes = []
+            self.initializers_dict = {}
+            self.inputs = []
+            self.outputs = []
+            self._unique_names = set()
+            self.input_names = input_names or []
+            self.current_input = 0
+            self._known_shapes = {}
+            self._known_types = {}
+            self.constants_ = {}
+            self.functions_ = {}
+        elif isinstance(target_opset_or_existing_proto, ModelProto):
+            assert (
+                not input_names
+            ), "input_names must be empty if the input is an existing model."
+            proto = target_opset_or_existing_proto
+            self.opsets = {d.domain: d.version for d in proto.opset_import}
+            self.ir_version = ir_version or target_opset_or_existing_proto.ir_version
+            self.nodes = list(proto.graph.node)
+            self.initializers_dict = {i.name: i for i in proto.graph.initializer}
+            self.initializers_dict.update(
+                {i.name: i for i in proto.graph.sparse_initializer}
+            )
+            self.inputs = list(proto.graph.input)
+            self.outputs = list(proto.graph.output)
+            self.input_names = [i.name for i in proto.graph.input]
+            self.current_input = len(self.inputs)
+            # This should be improve.
+            self._known_shapes = {}
+            self._known_types = {}
+            self.constants_ = {}
+            for k, v in self.initializers_dict.items():
+                self.constants_[k] = None
+                self.set_shape(k, self._get_tensor_shape(v))
+                self.set_type(k, self._get_tensor_type(v))
+            for node in self.nodes:
+                if node.op_type == "Constant":
+                    self.constants_[node.output[0]] = node
+                    self.set_shape(node.output[0], self._get_tensor_shape(node))
+                    self.set_type(node.output[0], self._get_tensor_type(node))
+            for f in proto.functions:
+                self.add_function(f)
+        else:
+            raise NotImplementedError(
+                f"{type(target_opset_or_existing_proto)} is not supported."
+            )
+
+        self.op = Opset(self, self.opsets[""]) if "" in self.opsets else None
+        self._cache_array = []
+
+    def add_local_function(self, domain: str, name: str, gr: "GraphBuilder"):
+        "Adds a local function."
+        assert (
+            domain,
+            name,
+        ) not in self.functions_, f"Function {(domain, name)} was already added."
+        self.functions_[domain, name] = gr
+
+    def _get_tensor_shape(
+        self, proto: Union[NodeProto, TensorProto]
+    ) -> Tuple[int, ...]:
+        if isinstance(proto, TensorProto):
+            return tuple(proto.dims)
+        if isinstance(proto, NodeProto):
+            for att in proto.attribute:
+                if att.name == "value_float":
+                    return tuple()
+                if att.name == "value_int":
+                    return tuple()
+                if att.name == "value_floats":
+                    return tuple(att.floats)
+                if att.name == "value_ints":
+                    return (len(att.ints),)
+                if att.name == "value":
+                    t = onh.to_array(att.t)
+                    return t.shape
+        raise TypeError(
+            f"Unexpected or unsupported scenario type {type(proto)}: {proto}."
+        )
+
+    def _get_tensor_type(self, proto: Union[NodeProto, TensorProto]) -> int:
+        if isinstance(proto, TensorProto):
+            return proto.data_type
+        if isinstance(proto, NodeProto):
+            for att in proto.attribute:
+                if att.name == "value_float":
+                    return TensorProto.FLOAT
+                if att.name == "value_int":
+                    return TensorProto.INT64
+                if att.name == "value_floats":
+                    return TensorProto.FLOAT
+                if att.name == "value_ints":
+                    return TensorProto.INT64
+                if att.name == "value":
+                    t = onh.to_array(att.t)
+                    return oh.np_dtype_to_tensor_dtype(t.dtype)
+        raise ValueError(f"Unexpected type or value {type(proto)}: {proto}.")
+
+    def is_constant(self, name: str) -> bool:
+        """Tells if a result is a constant."""
+        return name in self.constants_
+
+    def get_constant(self, name: str) -> np.ndarray:
+        assert self.is_constant(name), f"Result {name!r} is not a constant."
+        assert (
+            name in self.initializers_dict
+        ), f"Result {name!r} was never evaluated within method 'constant_folding'."
+        value = self.initializers_dict[name]
+        if isinstance(value, np.ndarray):
+            return value
+
+        raise TypeError(f"Unable to convert type {type(value)} into numpy array.")
+
+    def set_shape(self, name: str, shape: Tuple[int, ...]):
+        assert isinstance(
+            name, str
+        ), f"Unexpected type {type(name)} for name, it should be a string."
+        if name in self._known_shapes:
+            assert shape == self._known_shapes[name], (
+                f"Name {name!r} already exists and it is different "
+                f"{self._known_shapes[name]} != {shape}"
+            )
+            return
+        assert isinstance(
+            shape, tuple
+        ), f"Unexpected shape type {type(shape)}, it should be a tuple."
+        self._known_shapes[name] = shape
+
+    def set_type(self, name: str, dtype: int):
+        assert isinstance(name, str), f"Unexpected type {type(name)} for name."
+        int_type = dtype if isinstance(dtype, int) else self._get_type(dtype)
+        if name in self._known_types:
+            assert int_type == self._known_types[name], (
+                f"Name {name!r} already exists and it is different "
+                f"{self._known_types[name]} != {int_type}."
+            )
+        self._known_types[name] = int_type
+
+    def rank(self, name: str) -> int:
+        return len(self.get_shape(name))
+
+    def has_shape(self, name: str) -> bool:
+        return name in self._known_shapes
+
+    def get_shape(self, name: str) -> int:
+        assert name in self._known_shapes, (
+            f"Shape is unknown for result {name!r}, "
+            f"known_shapes={self._known_shapes}."
+        )
+        return self._known_shapes[name]
+
+    def has_type(self, name: str) -> bool:
+        return name in self._known_types
+
+    def get_type(self, name: str) -> int:
+        assert (
+            name in self._known_types
+        ), f"Type is unknown for result {name!r}, known_types={self._known_types}."
+        return self._known_types[name]
+
+    def unique_name(self, prefix: str) -> str:
+        if prefix in self._unique_names:
+            i = 2
+            sug = f"{prefix}2"
+            while sug in self._unique_names:
+                i += 1
+                sug = f"{prefix}{i}"
+            self._unique_names.add(sug)
+            return sug
+        self._unique_names.add(prefix)
+        return prefix
+
+    def _prepare_inputs(self, schema: Optional[Any], *inputs: List[Any]) -> List[str]:
+        input_names = []
+        for i in inputs:
+            self.make_input(i.name, i.dtype, i.shape)
+            input_names.append(i.name)
+        return input_names
+
+    def _get_type(self, elem_type: Any, exc: bool = True) -> int:
+        if not isinstance(elem_type, int):
+            st = str(elem_type)
+            if "float32" in st:
+                elem_type = TensorProto.FLOAT
+            elif "int64" in st:
+                elem_type = TensorProto.INT64
+            elif elem_type is None:
+                elem_type = TensorProto.UNDEFINED
+            elif exc:
+                raise ValueError(f"Unable to interpret elem_type {elem_type!r}.")
+        return elem_type
+
+    def make_initializer(
+        self, value: Any, name: str = "", external: bool = False, exists: bool = False
+    ) -> str:
+        if external:
+            raise NotImplementedError("External initializers are not implemented yet.")
+        if name == "":
+            if exists:
+                raise ValueError("Undefined name cannot exist.")
+            name = self.unique_name("cst")
+        elif not exists:
+            if name in self._unique_names:
+                raise ValueError(f"{name!r} is already assigned.")
+            self._unique_names.add(name)
+        self.set_shape(name, value.shape)
+        self.set_type(name, self._get_type(value.dtype))
+        self.initializers_dict[name] = value
+        self.constants_[name] = None
+        if self.verbose and np.prod(value.shape) > 100:
+            print(
+                f"[GraphBuilder] make_initializer:{name}[{value.dtype}:{value.shape}]"
+            )
+        return name
+
+    def make_tensor_input(
+        self, name: str, elem_type: Any, shape: Tuple[int, ...]
+    ) -> str:
+        if self.current_input < len(self.input_names):
+            # The input needs to be renamed, an identity node is added.
+            input_name = self.input_names[self.current_input]
+            self.make_node("Identity", [input_name], [name])
+        else:
+            self.input_names.append(name)
+            input_name = name
+            if name in self._unique_names:
+                raise ValueError(f"{name!r} is already assigned.")
+            self._unique_names.add(name)
+        self.current_input += 1
+        elem_type = self._get_type(elem_type)
+        self.inputs.append(oh.make_tensor_value_info(input_name, elem_type, shape))
+        if self.verbose:
+            print(f"[GraphBuilder] make_tensor_input:{name}[{elem_type}:{shape}]")
+        if shape:
+            self.set_shape(name, shape)
+        if elem_type:
+            self.set_type(name, elem_type)
+        return name
+
+    def make_tensor_output(
+        self,
+        name: Union[str, List[str]],
+        elem_type: Optional[int] = None,
+        shape: Optional[Tuple[int, ...]] = None,
+        is_dimension: bool = False,
+        indexed: bool = False,
+    ) -> Union[str, List[str]]:
+        if isinstance(name, list):
+            res = []
+            for n in name:
+                res.append(self.make_tensor_output(n, elem_type, shape))
+            return res
+
+        elem_type = self._get_type(elem_type, False)
+        assert (
+            self.as_function or elem_type != 0
+        ), f"Undefined element type for {name!r}."
+        self.outputs.append(oh.make_tensor_value_info(name, elem_type, shape))
+        if self.verbose:
+            print(f"[GraphBuilder] make_tensor_output:{name}[{elem_type}:{shape}]")
+        if shape:
+            self.set_shape(name, shape)
+        if elem_type:
+            self.set_type(name, elem_type)
+        return name
+
+    def make_node(
+        self,
+        op_type: str,
+        inputs: Union[str, List[str]],
+        outputs: Union[int, List[str], str] = 1,
+        domain: str = "",
+        attributes: Optional[List[AttributeProto]] = None,
+        **kwargs,
+    ) -> Union[str, List[str]]:
+        assert (
+            not kwargs or not attributes
+        ), f"Only attributes or kwargs can be filled for node {op_type!r}."
+        if isinstance(inputs, tuple):
+            inputs = list(inputs)
+        if isinstance(outputs, int):
+            assert outputs > 0, f"outputs={outputs} must be > 0."
+            lower = op_type.lower()
+            output_names = [
+                self.unique_name(f"_onx_{lower}{i}") for i in range(outputs)
+            ]
+        elif isinstance(outputs, str):
+            output_names = [outputs]
+        else:
+            output_names = outputs
+        if isinstance(inputs, str):
+            inputs = [inputs]
+
+        # next
+        try:
+            node = oh.make_node(op_type, inputs, output_names, domain=domain, **kwargs)
+        except TypeError as e:
+            raise TypeError(
+                f"A node {op_type!r} cannot be created with "
+                f"inputs={inputs} (types={[type(i) for i in inputs]}), "
+                f"outputs={outputs} "
+                f"(types={[type(o) for o in outputs] if isinstance(outputs, (tuple, list)) else outputs}), "  # noqa: E501
+                f"domain={domain!r}, kwargs={kwargs}."
+            ) from e
+        if attributes:
+            node.attribute.extend(attributes)
+
+        # constant handling, shape, type
+        if node.op_type == "Constant":
+            size = len(node.SerializeToString())
+            assert size < self.optimization_options.constant_size, (
+                f"A node Constant holds a tensor bigger than "
+                f"the constant: {size} >= {self.constant_size}."
+            )
+            k = node.output[0]
+            self.constants_[k] = node
+            shape = self._get_tensor_shape(node)
+            dtype = self._get_tensor_type(node)
+            self.set_shape(k, shape)
+            self.set_type(k, dtype)
+            if self.verbose and np.prod(shape) > 100:
+                print(f"[GraphBuilder] make_constant:{k}[{dtype}:{shape}]")
+        elif node.op_type == "Identity":
+            if node.input[0] in self._known_shapes:
+                self.set_shape(node.output[0], self._known_shapes[node.input[0]])
+            if node.input[0] in self._known_types:
+                self.set_type(node.output[0], self._known_types[node.input[0]])
+            if self.is_constant(node.input[0]):
+                self.constants_[node.output[0]] = node
+        else:
+            if all(map(self.is_constant, node.input)):
+                for o in node.output:
+                    self.constants_[o] = node
+
+        # add the node
+        self.nodes.append(node)
+        if len(output_names) == 1:
+            return output_names[0]
+        return output_names
+
+    def make_nodes(
+        self,
+        builder: "GraphBuilder",
+        input_names: List[str],
+        output_names: List[str],
+        prefix: str = "",
+    ) -> Union[str, List[str]]:
+        """
+        Appends all nodes and initializers from another builder.
+        Handles the renaming of results.
+        The content stored in 'builder' is modified inplace to avoid copying.
+
+        :param builder: other builder
+        :param input_names: input names
+        :param output_names: output names
+        :param prefix: prefix all name from this builder
+        :return: output names
+        """
+        renaming = {}
+        for init, value in builder.initializers_dict.items():
+            name = self.unique_name(f"{prefix}{init}")
+            renaming[init] = name
+            if isinstance(value, TensorProto):
+                value.name = name
+            self.initializers_dict[name] = value
+            self.constants_[name] = None
+            self.set_shape(name, builder._known_shapes[init])
+            self.set_type(name, builder._known_types[init])
+
+        assert len(input_names) == len(builder.inputs), (
+            f"Inconsistency between input_names={input_names} "
+            f"and the other builder inputs={builder.inputs}."
+        )
+
+        for name, inp in zip(input_names, builder.inputs):
+            new_name = self.unique_name(f"{prefix}{inp.name}")
+            renaming[inp.name] = new_name
+            if builder.has_shape(inp.name):
+                self.set_shape(new_name, builder.get_shape(inp.name))
+            if builder.has_type(inp.name):
+                self.set_type(new_name, builder.get_type(inp.name))
+            self.make_node("Identity", [name], [new_name])
+
+        for node in builder.nodes:
+            new_inputs = [renaming[i] for i in node.input]
+            new_outputs = [self.unique_name(f"{prefix}{o}") for o in node.output]
+            for o, no in zip(node.output, new_outputs):
+                renaming[o] = no
+            self.make_node(
+                node.op_type,
+                new_inputs,
+                new_outputs,
+                domain=node.domain,
+                attributes=node.attribute,
+            )
+            for o, no in zip(node.output, new_outputs):
+                if builder.has_shape(o):
+                    self.set_shape(no, builder.get_shape(o))
+                if builder.has_type(o):
+                    self.set_type(no, builder.get_type(o))
+
+        assert len(output_names) == len(builder.outputs), (
+            f"Inconsistency between output_names={output_names} and "
+            f"outputs={builder.outputs}, renaming={renaming}."
+        )
+        for name, out in zip(output_names, builder.outputs):
+            self.make_node("Identity", [renaming[out.name]], [name])
+
+        # opsets and domains
+        for o, v in builder.opsets.items():
+            if o in self.opsets:
+                assert self.opsets[o] == builder.opsets[o], (
+                    f"Opset mismatch for domain {o!r}, "
+                    f"{self.opsets[o]} != {builder.opsets[o]}."
+                )
+                continue
+            self.opsets[o] = v
+
+        if len(output_names) == 1:
+            return output_names[0]
+        return output_names
+
+    def from_array(
+        self, arr: T, name: Optional[str] = None
+    ) -> TensorProto:  # noqa: F821
+        if isinstance(arr, np.ndarray):
+            return self.from_np_array(arr, name)
+        raise NotImplementedError(
+            f"{type(arr)} is not supported yet but initializer {name or ''!r} is."
+        )
+
+    def from_np_array(self, arr: np.ndarray, name: Optional[str] = None) -> TensorProto:
+        arr_cpu = np.ascontiguousarray(arr) if not arr.flags["C_CONTIGUOUS"] else arr
+        if arr_cpu.ctypes.data == arr.ctypes.data:
+            if sys.byteorder == "big":
+                arr_cpu = arr_cpu.copy()
+                np.byteswap(
+                    np.frombuffer(arr_cpu.ctypes.data, dtype=arr_cpu.dtype),
+                    inplace=True,
+                )
+        else:
+            if sys.byteorder == "big":
+                np.byteswap(
+                    np.frombuffer(arr_cpu.ctypes.data, dtype=arr_cpu.dtype),
+                    inplace=True,
+                )
+        # let's the tensor until the builder is released
+        # so the pointer does not disappear
+        self._cache_array.append(arr_cpu)
+
+        tensor = TensorProto()
+        tensor.dims.extend(arr_cpu.shape)
+        tensor.name = name
+        tensor.data_type = self._get_type(arr_cpu.dtype)
+        # this does not work...
+        # tensor.raw_data = arr_cpu.ctypes.data
+        tensor.raw_data = arr_cpu.tobytes()
+        if self.verbose and np.prod(arr_cpu.shape) > 100:
+            print(
+                f"[GraphBuilder] from_array:{tensor.data_type}[{arr_cpu.shape}]:"
+                f"{'swapped' if sys.byteorder == 'big' else ''}"
+            )
+        return tensor
+
+    def _build_initializers(self) -> List[TensorProto]:
+        res = []
+        for k, v in sorted(self.initializers_dict.items()):
+            if isinstance(v, np.ndarray):
+                if np.prod(v.shape) > 100:
+                    if self.verbose:
+                        print(f"[GraphBuilder] from_array:{k}:{v.dtype}[{v.shape}]")
+                    t = self.from_array(v, name=k)
+                else:
+                    t = onh.from_array(v, name=k)
+                res.append(t)
+                continue
+            if isinstance(v, TensorProto):
+                res.append(v)
+                continue
+            raise TypeError(
+                f"Unable to convert initializer {k!r} with type "
+                f"{type(v)} into a TensorProto."
+            )
+        return res
+
+    def process(
+        self,
+        graph_module: Any,
+        interpreter: "Interpreter",  # noqa: F821
+    ):
+        for node in graph_module.graph.nodes:
+            interpreter.run_node(node)
+
+    def to_onnx(
+        self, as_function: bool = False, optimize: bool = True
+    ) -> Union[FunctionProto, ModelProto]:
+        if optimize:
+            self.optimize()
+        if as_function:
+            raise NotImplementedError("Export as FunctionProto is not implemented yet.")
+        dense = self._build_initializers()
+        opsets = [oh.make_opsetid(*o) for o in self.opsets.items()]
+        if as_function:
+            return oh.make_function(
+                self.nodes,
+                self.name,
+                [i.name for i in self.inputs],
+                [o.name for o in self.outputs],
+                domain=self.domain,
+            )
+
+        if self.verbose:
+            print("[GraphBuilder] onh.make_graph")
+        graph = oh.make_graph(
+            self.nodes, "experiment", self.inputs, self.outputs, dense
+        )
+        if self.verbose:
+            print("[GraphBuilder] onh.make_model")
+        model = oh.make_model(graph, opset_imports=opsets)
+        if self.ir_version:
+            model.ir_version = self.ir_version
+        return model
+
+    def _check_order_node(self, ind: int, node: NodeProto, existing: Set[str]):
+        for i in node.input:
+            assert i in existing, (
+                f"Unknown input {i!r} from node {ind}:{node.op_type}:{node.name}. "
+                f"Known: {existing}."
+            )
+        for att in node.attribute:
+            if att.type == AttributeProto.GRAPH and att.g:
+                g_existing = existing.copy()
+                for i in att.g.input:
+                    g_existing.add(i.name)
+                for ind2, node2 in enumerate(att.g.node):
+                    self._check_order_node((ind, ind2), node2, g_existing)
+                for o in att.g.output:
+                    assert (
+                        o.name in g_existing
+                    ), f"Unknown output {o.name!r}. Known: {g_existing}."
+        for o in node.output:
+            existing.add(o)
+
+    def check_order(self):
+        existing = set(self.initializers_dict)
+        for i in self.inputs:
+            existing.add(i.name)
+        for ind, node in enumerate(self.nodes):
+            self._check_order_node(ind, node, existing)
+        for o in self.outputs:
+            assert o.name in existing, f"Unknown output {o.name!r}. Known: {existing}."
+
+    def optimize(self, check_order: bool = False):
+        if check_order:
+            self.check_order()
+        self.remove_identity_nodes()
+        if check_order:
+            self.check_order()
+        if self.optimization_options.remove_unused:
+            self.remove_unused()
+            if check_order:
+                self.check_order()
+        if self.optimization_options.constant_folding:
+            self.constant_folding()
+            if check_order:
+                self.check_order()
+            if self.optimization_options.remove_unused:
+                self.remove_unused()
+                if check_order:
+                    self.check_order()
+
+    def hidden_inputs_graph(self, graph: GraphProto) -> Set[str]:
+        hidden = set()
+        memo = set(i.name for i in graph.initializer)
+        memo |= set(i.name for i in graph.sparse_initializer)
+        for node in graph.node:
+            for i in node.input:
+                if i not in memo:
+                    hidden.add(i)
+            for att in node.attribute:
+                if att.type == AttributeProto.GRAPH and att.g:
+                    hid = self.hidden_inputs_graph(att.g)
+                    less = set(h for h in hid if h not in memo)
+                    hidden |= less
+            memo |= set(node.output)
+        return hidden
+
+    def remove_unused(self):
+        """
+        Simple function to remove unused nodes.
+        It does not look into subgraphs and assumes there is none.
+        Everything is done in one pass.
+        """
+
+        # mark outputs
+        marked = {o.name: set() for o in self.outputs}
+        for node in reversed(self.nodes):
+            used = False
+            for o in node.output:
+                if o in marked:
+                    for i in node.input:
+                        marked[o].add(i)
+                        used = True
+            for att in node.attribute:
+                if att.type == AttributeProto.GRAPH and att.g:
+                    hidden_inputs = self.hidden_inputs_graph(att.g)
+                    for i in hidden_inputs:
+                        marked[i] = set()
+            if used:
+                for i in node.input:
+                    marked[i] = set()
+
+        # removed nodes
+        removed = set()
+        marked_set = set(marked)
+        for ind, node in enumerate(self.nodes):
+            if not (set(node.output) & marked_set):
+                removed.add(ind)
+
+        if self.verbose:
+            for k, v in self.initializers_dict.items():
+                if k not in marked:
+                    v = self.initializers_dict[k]
+                    print(f"[GraphBuilder] remove_initializer:{k}:{v.dtype}[{v.shape}]")
+        self.initializers_dict = {
+            k: v for k, v in self.initializers_dict.items() if k in marked
+        }
+        self.constants_ = {k: v for k, v in self.constants_.items() if k in marked}
+        self.nodes = [node for i, node in enumerate(self.nodes) if i not in removed]
+
+    def _apply_transpose(self, node: NodeProto, feeds: Dict[str, T]) -> T:  # noqa: F821
+        perm = None
+        for att in node.attribute:
+            if att.name == "perm":
+                perm = tuple(att.ints)
+                break
+        assert perm, f"perm not here in node {node}"
+        return [np.transpose(feeds[node.input[0]], perm)]
+
+    def constant_folding(self):
+        """
+        Folds all constants. Constants are marked during the creation of the graph.
+        There is no need to propagate this information.
+        """
+        updates = {}
+        node_to_remove = set()
+        for _k, v in self.constants_.items():
+            if v is None:
+                # this is an initiliazer
+                continue
+            # a node
+            if all(map(self.is_constant, v.output)):
+                node_to_remove.add(tuple(v.output))
+                # node evaluation
+                if v.op_type == "Transpose":
+                    # bypassing onnx.numpy_helper.from_array, too slow
+                    feeds = {i: self.initializers_dict[i] for i in v.input}
+                    output = self._apply_transpose(v, feeds)
+                else:
+                    ref = ReferenceEvaluator(v)
+                    feeds = {i: self.get_constant(i) for i in v.input}
+                    output = ref.run(None, feeds)
+                for name, value in zip(v.output, output):
+                    updates[name] = None
+                    self.initializers_dict[name] = value
+                    if self.verbose:
+                        print(
+                            f"[GraphBuilder] fold_constant:"
+                            f"{v.op_type}:{name}[{value.dtype}:"
+                            f"{value.shape}]:from:{','.join(sorted(feeds))}"
+                        )
+
+        self.constants_.update(updates)
+        new_nodes = []
+        for node in self.nodes:
+            if tuple(node.output) in node_to_remove:
+                continue
+            new_nodes.append(node)
+        self.nodes = new_nodes
+
+    def remove_identity_nodes(self):
+        """
+        Removes identity nodes.
+        """
+        # first pass: detect replacements
+        new_nodes = []
+        input_names = set(i.name for i in self.inputs)
+        output_names = set(i.name for i in self.outputs)
+        replacements = {}
+        replacements_rev = {}
+        for node in self.nodes:
+            if node.op_type != "Identity":
+                new_nodes.append(node)
+                continue
+
+            if node.output[0] not in output_names:
+                old_name, new_name = node.output[0], node.input[0]
+            elif (
+                node.input[0] not in input_names
+                and node.input[0] not in output_names
+                and node.input[0] not in replacements
+            ):
+                old_name, new_name = node.input[0], node.output[0]
+            else:
+                new_nodes.append(node)
+                continue
+
+            # the new name can be set for replacements as well
+            if new_name in replacements:
+                new_name = replacements[new_name]
+                assert new_name not in replacements, (
+                    f"Name {old_name!r} still in {replacements}, "
+                    f"node.op_type={node.op_type!r}, "
+                    f"node.input={node.input}, node.output={node.output}, "
+                    f"input_names={input_names}, output_names={output_names}"
+                )
+            if old_name in replacements_rev:
+                old_old_name = replacements_rev[old_name]
+                replacements[old_old_name] = new_name
+                replacements_rev[new_name] = old_old_name
+            if old_name in replacements:
+                replacements[replacements[old_name]] = new_name
+            assert new_name not in replacements, (
+                f"Name {old_name!r} still in {replacements}, "
+                f"node.op_type={node.op_type!r}, "
+                f"node.input={node.input}, node.output={node.output}, "
+                f"input_names={input_names}, output_names={output_names}"
+            )
+            replacements[old_name] = new_name
+            replacements_rev[new_name] = old_name
+
+            # verification
+            for k, v in replacements.items():
+                assert v not in replacements, (
+                    f"replacement {k}->{v} is not possible because of "
+                    f"{v}->{replacements[v]}, old_name={old_name!r}, "
+                    f"new_name={new_name!r}"
+                )
+
+        # second pass: replacements in initializer
+        for k, v in replacements.items():
+            if k in self.initializers_dict:
+                self.initializers_dict[v] = self.initializers_dict[k]
+                del self.initializers_dict[k]
+                assert self.constants_[v]
+                self.constants_[v] = self.constants_[k]
+                del self.constants_[k]
+
+        # third pass: replacements in node
+        self.nodes = []
+        for node in new_nodes:
+            repo = {o for o in node.output if o in replacements}
+            repi = {o for o in node.input if o in replacements}
+            if repi or repo:
+                new_inputs = [replacements.get(i, i) for i in node.input]
+                new_outputs = [replacements.get(i, i) for i in node.output]
+                new_node = oh.make_node(
+                    node.op_type,
+                    new_inputs,
+                    new_outputs,
+                    domain=node.domain,
+                    name=node.name,
+                )
+                new_node.attribute.extend(node.attribute)
+                self.nodes.append(new_node)
+            else:
+                self.nodes.append(node)
+
+    def np(
+        self,
+        index: Optional[int] = None,
+        op_type: Optional[str] = None,
+        name: Optional[str] = None,
+    ) -> NodePattern:
+        """
+        Returns an instance of :class:`NodePattern
+        <onnx_array_api.graph_api.graph_builder.NodePattern>`.
+        """
+        return NodePattern(index=index, op_type=op_type, name=name)
+
+    def update_attribute(
+        self,
+        pat: NodePattern,
+        recursive: bool = False,
+        **kwargs: Dict[str, Any],
+    ) -> int:
+        """
+        Udates attributes for nodes matching the
+
+        :param pat: returned by method :meth:`GraphBuilder.np`
+        :param recursive: walk through subgraph
+        :param kwargs: attributes to modify
+        :return: number of modified nodes
+        """
+        assert not recursive, "recursive=True is not implemented."
+        modified = 0
+        for node in pat.find(self):
+            up = self.update_node(node, **kwargs)
+            if up:
+                modified += 1
+        return modified
+
+    DELETE = object()
+
+    def update_node(self, node: NodeProto, **kwargs) -> bool:
+        """
+        Updates attributes of a node proto.
+        Returns True if the node was updated.
+        """
+        processed = set()
+        modified = True
+        atts = []
+        for att in node.attribute:
+            if att.name in kwargs:
+                processed.add(att.name)
+                if kwargs[att.name] is GraphBuilder.DELETE:
+                    continue
+                new_att = oh.make_attribute(att.name, kwargs[att.name])
+                assert new_att.type == att.type, (
+                    f"Mismatch value for attribute {att.name!r} has type "
+                    f"{att.type} but the new value leads to "
+                    f"type={new_att.type}."
+                )
+                atts.append(new_att)
+                modified = True
+                continue
+            atts.append(att)
+        for k, v in kwargs.items():
+            if k in processed or v is GraphBuilder.DELETE:
+                continue
+            modified = True
+            new_att = oh.make_attribute(k, v)
+            atts.append(new_att)
+
+        if modified:
+            del node.attribute[:]
+            node.attribute.extend(atts)
+        return modified
diff --git a/onnx_array_api/light_api/__init__.py b/onnx_array_api/light_api/__init__.py
index 272ea0d..83e8878 100644
--- a/onnx_array_api/light_api/__init__.py
+++ b/onnx_array_api/light_api/__init__.py
@@ -1,19 +1,21 @@
 from typing import Dict, Optional
-from .model import OnnxGraph
+from onnx import ModelProto
+from ..annotations import domain
+from .model import OnnxGraph, ProtoType
 from .var import Var, Vars
 
 
 def start(
     opset: Optional[int] = None,
     opsets: Optional[Dict[str, int]] = None,
-    is_function: bool = False,
+    ir_version: Optional[int] = None,
 ) -> OnnxGraph:
     """
     Starts an onnx model.
 
     :param opset: main opset version
-    :param is_function: a :class:`onnx.ModelProto` or a :class:`onnx.FunctionProto`
     :param opsets: others opsets as a dictionary
+    :param ir_version: specify the ir_version as well
     :return: an instance of :class:`onnx_array_api.light_api.OnnxGraph`
 
     A very simple model:
@@ -34,8 +36,23 @@ def start(
         from onnx_array_api.light_api import start
 
         onx = (
-            start().vin("X").vin("Y").bring("X", "Y").Add().rename("Z").vout().to_onnx()
+            start()
+            .vin("X")
+            .vin("Y")
+            .bring("X", "Y")
+            .Add()
+            .rename("Z")
+            .vout()
+            .to_onnx()
         )
         print(onx)
     """
-    return OnnxGraph(opset=opset, opsets=opsets, is_function=is_function)
+    return OnnxGraph(opset=opset, opsets=opsets, ir_version=ir_version)
+
+
+def g() -> OnnxGraph:
+    """
+    Starts a subgraph.
+    :return: an instance of :class:`onnx_array_api.light_api.OnnxGraph`
+    """
+    return OnnxGraph(proto_type=ProtoType.GRAPH)
diff --git a/onnx_array_api/light_api/_op_var.py b/onnx_array_api/light_api/_op_var.py
index e2354eb..1291594 100644
--- a/onnx_array_api/light_api/_op_var.py
+++ b/onnx_array_api/light_api/_op_var.py
@@ -1,4 +1,7 @@
-from typing import List, Optional
+from typing import List, Optional, Union
+import numpy as np
+from ..reference import from_array_extended
+from ..annotations import AI_ONNX_ML, domain
 
 
 class OpsVar:
@@ -30,7 +33,7 @@ def ArgMin(
 
     def AveragePool(
         self,
-        auto_pad: str = b"NOTSET",
+        auto_pad: str = "NOTSET",
         ceil_mode: int = 0,
         count_include_pad: int = 0,
         dilations: Optional[List[int]] = None,
@@ -68,7 +71,12 @@ def Cast(self, saturate: int = 1, to: int = 0) -> "Var":
     def Celu(self, alpha: float = 1.0) -> "Var":
         return self.make_node("Celu", self, alpha=alpha)
 
-    def DepthToSpace(self, blocksize: int = 0, mode: str = b"DCR") -> "Var":
+    def ConstantOfShape(self, value: Optional[np.array] = None) -> "Var":
+        if value is None:
+            return self.make_node("ConstantOfShape", self)
+        return self.make_node("ConstantOfShape", self, value=from_array_extended(value))
+
+    def DepthToSpace(self, blocksize: int = 0, mode: str = "DCR") -> "Var":
         return self.make_node("DepthToSpace", self, blocksize=blocksize, mode=mode)
 
     def DynamicQuantizeLinear(
@@ -109,6 +117,34 @@ def HardSigmoid(
     def Hardmax(self, axis: int = -1) -> "Var":
         return self.make_node("Hardmax", self, axis=axis)
 
+    def If(
+        self,
+        then_branch: Optional[Union["Var", "Vars", "OnnxGraph"]] = None,
+        else_branch: Optional[Union["Var", "Vars", "OnnxGraph"]] = None,
+    ) -> Union["Var", "Vars"]:
+        attr = {}
+        n_outputs = None
+        for name, att in zip(
+            ["then_branch", "else_branch"], [then_branch, else_branch]
+        ):
+            if att is None:
+                raise ValueError(f"Parameter {name!r} cannot be None.")
+            if hasattr(att, "to_onnx"):
+                # Let's overwrite the opsets.
+                att.parent.opset = self.parent.opset
+                att.parent.opsets = self.parent.opsets
+                graph = att.to_onnx()
+                attr[name] = graph
+                if n_outputs is None:
+                    n_outputs = len(graph.output)
+                elif n_outputs != len(graph.output):
+                    raise ValueError(
+                        "then and else branches have different number of outputs."
+                    )
+            else:
+                raise ValueError(f"Unexpeted type {type(att)} for parameter {name!r}.")
+        return self.make_node("If", self, **attr)
+
     def IsInf(self, detect_negative: int = 1, detect_positive: int = 1) -> "Var":
         return self.make_node(
             "IsInf",
@@ -137,7 +173,7 @@ def LpNormalization(self, axis: int = -1, p: int = 2) -> "Var":
 
     def LpPool(
         self,
-        auto_pad: str = b"NOTSET",
+        auto_pad: str = "NOTSET",
         ceil_mode: int = 0,
         dilations: Optional[List[int]] = None,
         kernel_shape: Optional[List[int]] = None,
@@ -278,6 +314,13 @@ def Selu(
     def Shrink(self, bias: float = 0.0, lambd: float = 0.5) -> "Var":
         return self.make_node("Shrink", self, bias=bias, lambd=lambd)
 
+    def Slice(
+        self, starts: "Var", ends: "Var", axes: "Var", steps: Optional["Var"] = None
+    ) -> "Var":
+        if steps is None:
+            return self.make_node("Slice", self, starts, ends, axes)
+        return self.make_node("Slice", self, starts, ends, axes, steps)
+
     def Softmax(self, axis: int = -1) -> "Var":
         return self.make_node("Softmax", self, axis=axis)
 
@@ -291,6 +334,10 @@ def Transpose(self, perm: Optional[List[int]] = None) -> "Var":
         perm = perm or []
         return self.make_node("Transpose", self, perm=perm)
 
+    @domain(AI_ONNX_ML)
+    def Normalizer(self, norm: str = "MAX"):
+        return self.make_node("Normalizer", self, norm=norm, domain=AI_ONNX_ML)
+
 
 def _complete():
     ops_to_add = [
diff --git a/onnx_array_api/light_api/_op_vars.py b/onnx_array_api/light_api/_op_vars.py
index 77dbac6..4f30dbe 100644
--- a/onnx_array_api/light_api/_op_vars.py
+++ b/onnx_array_api/light_api/_op_vars.py
@@ -6,12 +6,14 @@ class OpsVars:
     Operators taking multiple inputs.
     """
 
-    def BitShift(self, direction: str = b"") -> "Var":
+    def BitShift(self, direction: str = "") -> "Var":
         return self.make_node("BitShift", *self.vars_, direction=direction)
 
     def CenterCropPad(self, axes: Optional[List[int]] = None) -> "Var":
-        axes = axes or []
-        return self.make_node("CenterCropPad", *self.vars_, axes=axes)
+        kwargs = {}
+        if axes is not None:
+            kwargs["axes"] = axes
+        return self.make_node("CenterCropPad", *self.vars_, **kwargs)
 
     def Clip(
         self,
@@ -27,12 +29,14 @@ def Col2Im(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        pads = pads or []
-        strides = strides or []
-        return self.make_node(
-            "Col2Im", *self.vars_, dilations=dilations, pads=pads, strides=strides
-        )
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        return self.make_node("Col2Im", *self.vars_, **kwargs)
 
     def Compress(self, axis: int = 0) -> "Var":
         return self.make_node("Compress", *self.vars_, axis=axis)
@@ -42,55 +46,51 @@ def Concat(self, axis: int = 0) -> "Var":
 
     def Conv(
         self,
-        auto_pad: str = b"NOTSET",
+        auto_pad: str = "NOTSET",
         dilations: Optional[List[int]] = None,
         group: int = 1,
         kernel_shape: Optional[List[int]] = None,
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
         return self.make_node(
-            "Conv",
-            *self.vars_,
-            auto_pad=auto_pad,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            pads=pads,
-            strides=strides,
+            "Conv", *self.vars_, auto_pad=auto_pad, group=group, **kwargs
         )
 
     def ConvInteger(
         self,
-        auto_pad: str = b"NOTSET",
+        auto_pad: str = "NOTSET",
         dilations: Optional[List[int]] = None,
         group: int = 1,
         kernel_shape: Optional[List[int]] = None,
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
         return self.make_node(
-            "ConvInteger",
-            *self.vars_,
-            auto_pad=auto_pad,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            pads=pads,
-            strides=strides,
+            "ConvInteger", *self.vars_, auto_pad=auto_pad, group=group, **kwargs
         )
 
     def ConvTranspose(
         self,
-        auto_pad: str = b"NOTSET",
+        auto_pad: str = "NOTSET",
         dilations: Optional[List[int]] = None,
         group: int = 1,
         kernel_shape: Optional[List[int]] = None,
@@ -99,23 +99,21 @@ def ConvTranspose(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        output_padding = output_padding or []
-        output_shape = output_shape or []
-        pads = pads or []
-        strides = strides or []
-        return self.make_node(
-            "ConvTranspose",
-            *self.vars_,
-            auto_pad=auto_pad,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            output_padding=output_padding,
-            output_shape=output_shape,
-            pads=pads,
-            strides=strides,
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        if output_padding is not None:
+            kwargs["output_padding"] = output_padding
+        if output_shape is not None:
+            kwargs["output_shape"] = output_shape
+        return self.make_node(
+            "ConvTranspose", *self.vars_, auto_pad=auto_pad, group=group, **kwargs
         )
 
     def CumSum(self, exclusive: int = 0, reverse: int = 0) -> "Var":
@@ -137,25 +135,23 @@ def DeformConv(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
+        kwargs = {}
+        if dilations is not None:
+            kwargs["dilations"] = dilations
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
         return self.make_node(
-            "DeformConv",
-            *self.vars_,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            offset_group=offset_group,
-            pads=pads,
-            strides=strides,
+            "DeformConv", *self.vars_, group=group, offset_group=offset_group, **kwargs
         )
 
     def DequantizeLinear(self, axis: int = 1) -> "Var":
         return self.make_node("DequantizeLinear", *self.vars_, axis=axis)
 
-    def Einsum(self, equation: str = b"") -> "Var":
+    def Einsum(self, equation: str = "") -> "Var":
         return self.make_node("Einsum", *self.vars_, equation=equation)
 
     def Gather(self, axis: int = 0) -> "Var":
@@ -174,8 +170,8 @@ def Gemm(
     def GridSample(
         self,
         align_corners: int = 0,
-        mode: str = b"bilinear",
-        padding_mode: str = b"zeros",
+        mode: str = "bilinear",
+        padding_mode: str = "zeros",
     ) -> "Var":
         return self.make_node(
             "GridSample",
@@ -206,12 +202,11 @@ def MatMulInteger(
     def MaxRoiPool(
         self, pooled_shape: Optional[List[int]] = None, spatial_scale: float = 1.0
     ) -> "Var":
-        pooled_shape = pooled_shape or []
+        kwargs = {}
+        if pooled_shape is not None:
+            kwargs["pooled_shape"] = pooled_shape
         return self.make_node(
-            "MaxRoiPool",
-            *self.vars_,
-            pooled_shape=pooled_shape,
-            spatial_scale=spatial_scale,
+            "MaxRoiPool", *self.vars_, spatial_scale=spatial_scale, **kwargs
         )
 
     def MaxUnpool(
@@ -220,16 +215,14 @@ def MaxUnpool(
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
-        return self.make_node(
-            "MaxUnpool",
-            *self.vars_,
-            kernel_shape=kernel_shape,
-            pads=pads,
-            strides=strides,
-        )
+        kwargs = {}
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        return self.make_node("MaxUnpool", *self.vars_, **kwargs)
 
     def MelWeightMatrix(self, output_datatype: int = 1) -> "Var":
         return self.make_node(
@@ -240,7 +233,7 @@ def Mod(self, fmod: int = 0) -> "Var":
         return self.make_node("Mod", *self.vars_, fmod=fmod)
 
     def NegativeLogLikelihoodLoss(
-        self, ignore_index: int = 0, reduction: str = b"mean"
+        self, ignore_index: int = 0, reduction: str = "mean"
     ) -> "Var":
         return self.make_node(
             "NegativeLogLikelihoodLoss",
@@ -257,31 +250,29 @@ def NonMaxSuppression(self, center_point_box: int = 0) -> "Var":
     def OneHot(self, axis: int = -1) -> "Var":
         return self.make_node("OneHot", *self.vars_, axis=axis)
 
-    def Pad(self, mode: str = b"constant") -> "Var":
+    def Pad(self, mode: str = "constant") -> "Var":
         return self.make_node("Pad", *self.vars_, mode=mode)
 
     def QLinearConv(
         self,
-        auto_pad: str = b"NOTSET",
+        auto_pad: str = "NOTSET",
         dilations: Optional[List[int]] = None,
         group: int = 1,
         kernel_shape: Optional[List[int]] = None,
         pads: Optional[List[int]] = None,
         strides: Optional[List[int]] = None,
     ) -> "Var":
-        dilations = dilations or []
-        kernel_shape = kernel_shape or []
-        pads = pads or []
-        strides = strides or []
+        kwargs = {}
+        if kernel_shape is not None:
+            kwargs["kernel_shape"] = kernel_shape
+        if pads is not None:
+            kwargs["pads"] = pads
+        if strides is not None:
+            kwargs["strides"] = strides
+        if dilations is not None:
+            kwargs["dilations"] = dilations
         return self.make_node(
-            "QLinearConv",
-            *self.vars_,
-            auto_pad=auto_pad,
-            dilations=dilations,
-            group=group,
-            kernel_shape=kernel_shape,
-            pads=pads,
-            strides=strides,
+            "QLinearConv", *self.vars_, auto_pad=auto_pad, group=group, **kwargs
         )
 
     def QLinearMatMul(
@@ -305,7 +296,9 @@ def RandomNormal(
         seed: float = 0.0,
         shape: Optional[List[int]] = None,
     ) -> "Var":
-        shape = shape or []
+        kwargs = {}
+        if shape is not None:
+            kwargs["shape"] = shape
         return self.make_node(
             "RandomNormal",
             *self.vars_,
@@ -313,7 +306,7 @@ def RandomNormal(
             mean=mean,
             scale=scale,
             seed=seed,
-            shape=shape,
+            **kwargs,
         )
 
     def RandomUniform(
@@ -324,7 +317,9 @@ def RandomUniform(
         seed: float = 0.0,
         shape: Optional[List[int]] = None,
     ) -> "Var":
-        shape = shape or []
+        kwargs = {}
+        if shape is not None:
+            kwargs["shape"] = shape
         return self.make_node(
             "RandomUniform",
             *self.vars_,
@@ -332,7 +327,7 @@ def RandomUniform(
             high=high,
             low=low,
             seed=seed,
-            shape=shape,
+            **kwargs,
         )
 
     def Range(
@@ -431,20 +426,21 @@ def Resize(
         self,
         antialias: int = 0,
         axes: Optional[List[int]] = None,
-        coordinate_transformation_mode: str = b"half_pixel",
+        coordinate_transformation_mode: str = "half_pixel",
         cubic_coeff_a: float = -0.75,
         exclude_outside: int = 0,
         extrapolation_value: float = 0.0,
-        keep_aspect_ratio_policy: str = b"stretch",
-        mode: str = b"nearest",
-        nearest_mode: str = b"round_prefer_floor",
+        keep_aspect_ratio_policy: str = "stretch",
+        mode: str = "nearest",
+        nearest_mode: str = "round_prefer_floor",
     ) -> "Var":
-        axes = axes or []
+        kwargs = {}
+        if axes is not None:
+            kwargs["axes"] = axes
         return self.make_node(
             "Resize",
             *self.vars_,
             antialias=antialias,
-            axes=axes,
             coordinate_transformation_mode=coordinate_transformation_mode,
             cubic_coeff_a=cubic_coeff_a,
             exclude_outside=exclude_outside,
@@ -452,12 +448,13 @@ def Resize(
             keep_aspect_ratio_policy=keep_aspect_ratio_policy,
             mode=mode,
             nearest_mode=nearest_mode,
+            **kwargs,
         )
 
     def RoiAlign(
         self,
-        coordinate_transformation_mode: str = b"half_pixel",
-        mode: str = b"avg",
+        coordinate_transformation_mode: str = "half_pixel",
+        mode: str = "avg",
         output_height: int = 1,
         output_width: int = 1,
         sampling_ratio: int = 0,
@@ -480,12 +477,12 @@ def STFT(self, onesided: int = 1) -> "Var":
     def Scatter(self, axis: int = 0) -> "Var":
         return self.make_node("Scatter", *self.vars_, axis=axis)
 
-    def ScatterElements(self, axis: int = 0, reduction: str = b"none") -> "Var":
+    def ScatterElements(self, axis: int = 0, reduction: str = "none") -> "Var":
         return self.make_node(
             "ScatterElements", *self.vars_, axis=axis, reduction=reduction
         )
 
-    def ScatterND(self, reduction: str = b"none") -> "Var":
+    def ScatterND(self, reduction: str = "none") -> "Var":
         return self.make_node("ScatterND", *self.vars_, reduction=reduction)
 
     def Slice(
@@ -498,13 +495,18 @@ def Slice(
 
     def TopK(self, axis: int = -1, largest: int = 1, sorted: int = 1) -> "Vars":
         return self.make_node(
-            "TopK", *self.vars_, axis=axis, largest=largest, sorted=sorted
+            "TopK",
+            *self.vars_,
+            axis=axis,
+            largest=largest,
+            sorted=sorted,
+            n_outputs=2,
         )
 
     def Trilu(self, upper: int = 1) -> "Var":
         return self.make_node("Trilu", *self.vars_, upper=upper)
 
-    def Upsample(self, mode: str = b"nearest") -> "Var":
+    def Upsample(self, mode: str = "nearest") -> "Var":
         return self.make_node("Upsample", *self.vars_, mode=mode)
 
     def Where(
diff --git a/onnx_array_api/light_api/annotations.py b/onnx_array_api/light_api/annotations.py
deleted file mode 100644
index 8d473fd..0000000
--- a/onnx_array_api/light_api/annotations.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from typing import Tuple, Union
-import numpy as np
-from onnx import FunctionProto, GraphProto, ModelProto, TensorProto, TensorShapeProto
-from onnx.helper import np_dtype_to_tensor_dtype
-
-NP_DTYPE = np.dtype
-ELEMENT_TYPE = Union[int, NP_DTYPE]
-SHAPE_TYPE = Tuple[int, ...]
-VAR_CONSTANT_TYPE = Union["Var", TensorProto, np.ndarray]
-GRAPH_PROTO = Union[FunctionProto, GraphProto, ModelProto]
-
-ELEMENT_TYPE_NAME = {
-    getattr(TensorProto, k): k
-    for k in dir(TensorProto)
-    if isinstance(getattr(TensorProto, k), int)
-}
-
-_type_numpy = {
-    np.float32: TensorProto.FLOAT,
-    np.float64: TensorProto.DOUBLE,
-    np.float16: TensorProto.FLOAT16,
-    np.int8: TensorProto.INT8,
-    np.int16: TensorProto.INT16,
-    np.int32: TensorProto.INT32,
-    np.int64: TensorProto.INT64,
-    np.uint8: TensorProto.UINT8,
-    np.uint16: TensorProto.UINT16,
-    np.uint32: TensorProto.UINT32,
-    np.uint64: TensorProto.UINT64,
-    np.bool_: TensorProto.BOOL,
-    np.str_: TensorProto.STRING,
-}
-
-
-def elem_type_int(elem_type: ELEMENT_TYPE) -> int:
-    """
-    Converts an element type into an onnx element type (int).
-
-    :param elem_type: integer or numpy type
-    :return: int
-    """
-    if isinstance(elem_type, int):
-        return elem_type
-    if elem_type in _type_numpy:
-        return _type_numpy[elem_type]
-    return np_dtype_to_tensor_dtype(elem_type)
-
-
-def make_shape(shape: TensorShapeProto) -> SHAPE_TYPE:
-    "Extracts a shape from a tensor type."
-    if hasattr(shape, "dims"):
-        res = [(d.dim_value if d.dim_value else d.dim_param) for d in shape.dims]
-        return tuple(res)
-    return None
diff --git a/onnx_array_api/light_api/model.py b/onnx_array_api/light_api/model.py
index 090e29c..f6770eb 100644
--- a/onnx_array_api/light_api/model.py
+++ b/onnx_array_api/light_api/model.py
@@ -1,4 +1,5 @@
 from typing import Any, Dict, List, Optional, Union
+from enum import IntEnum
 import numpy as np
 from onnx import NodeProto, SparseTensorProto, TensorProto, ValueInfoProto
 from onnx.checker import check_model
@@ -12,7 +13,8 @@
     make_tensor_type_proto,
 )
 from onnx.numpy_helper import from_array
-from .annotations import (
+from ..ext_test_case import is_azure, is_windows
+from ..annotations import (
     elem_type_int,
     make_shape,
     GRAPH_PROTO,
@@ -22,21 +24,34 @@
 )
 
 
+class ProtoType(IntEnum):
+    """
+    The same code can be used to output a GraphProto, a FunctionProto or a ModelProto.
+    This class specifies the output type at the beginning of the code.
+    """
+
+    FUNCTION = 1
+    GRAPH = 2
+    MODEL = 3
+
+
 class OnnxGraph:
     """
     Contains every piece needed to create an onnx model in a single instructions.
     This API is meant to be light and allows the description of a graph.
 
     :param opset: main opset version
+    :param opsets: other opsets as a dictionary
+    :param ir_version: to specify an ir_version
     :param is_function: a :class:`onnx.ModelProto` or a :class:`onnx.FunctionProto`
-    :param opsets: others opsets as a dictionary
     """
 
     def __init__(
         self,
         opset: Optional[int] = None,
         opsets: Optional[Dict[str, int]] = None,
-        is_function: bool = False,
+        ir_version: Optional[int] = None,
+        proto_type: ProtoType = ProtoType.MODEL,
     ):
         if opsets is not None and "" in opsets:
             if opset is None:
@@ -45,13 +60,14 @@ def __init__(
                 raise ValueError(
                     "The main opset can be specified twice with different values."
                 )
-        if is_function:
+        if proto_type == ProtoType.FUNCTION:
             raise NotImplementedError(
                 "The first version of this API does not support functions."
             )
-        self.is_function = is_function
+        self.proto_type = proto_type
         self.opsets = opsets
         self.opset = opset
+        self.ir_version = ir_version
         self.nodes: List[Union[NodeProto, TensorProto]] = []
         self.inputs: List[ValueInfoProto] = []
         self.outputs: List[ValueInfoProto] = []
@@ -59,6 +75,10 @@ def __init__(
         self.unique_names_: Dict[str, Any] = {}
         self.renames_: Dict[str, str] = {}
 
+    @property
+    def is_function(self) -> bool:
+        return self.proto_type == ProtoType.FUNCTION
+
     def __repr__(self) -> str:
         "usual"
         sts = [f"{self.__class__.__name__}("]
@@ -163,6 +183,8 @@ def make_output(
         :param elem_type: element type (the input is assumed to be a tensor)
         :param shape: shape
         :return: an instance of ValueInfoProto
+
+        If the checker fails, try `shape=[]`.
         """
         if not self.has_name(name):
             raise ValueError(f"Name {name!r} does not exist.")
@@ -231,8 +253,24 @@ def make_node(
 
         node = make_node(op_type, input_names, output_names, domain=domain, **kwargs)
         self.nodes.append(node)
+        if domain != "":
+            if not self.opsets or domain not in self.opsets:
+                raise RuntimeError(f"No opset value was given for domain {domain!r}.")
         return node
 
+    def cst(self, value: np.ndarray, name: Optional[str] = None) -> "Var":
+        """
+        Adds an initializer
+
+        :param value: constant tensor
+        :param name: input name
+        :return: instance of :class:`onnx_array_api.light_api.Var`
+        """
+        from .var import Var
+
+        c = self.make_constant(value, name=name)
+        return Var(self, c.name, elem_type=c.data_type, shape=tuple(c.dims))
+
     def true_name(self, name: str) -> str:
         """
         Some names were renamed. If name is one of them, the function
@@ -281,7 +319,8 @@ def rename(self, old_name: str, new_name: str):
         value = self.unique_names_[old_name]
         if isinstance(value, int):
             raise TypeError(
-                f"Unexpected type {type(value)} for value {old_name!r} renamed into {new_name!r}."
+                f"Unexpected type {type(value)} for value {old_name!r} "
+                f"renamed into {new_name!r}."
             )
         self.unique_names_[new_name] = value
         self.renames_[old_name] = new_name
@@ -299,7 +338,7 @@ def _fix_name_tensor_input(
     ) -> Union[TensorProto, SparseTensorProto, ValueInfoProto]:
         obj = self._fix_name_tensor(obj)
         shape = make_shape(obj.type.tensor_type.shape)
-        if shape is None:
+        if not shape:
             tensor_type_proto = make_tensor_type_proto(
                 obj.type.tensor_type.elem_type, []
             )
@@ -311,7 +350,7 @@ def _fix_name_tensor_output(
     ) -> Union[TensorProto, SparseTensorProto, ValueInfoProto]:
         obj = self._fix_name_tensor(obj)
         shape = make_shape(obj.type.tensor_type.shape)
-        if shape is None:
+        if not shape:
             tensor_type_proto = make_tensor_type_proto(
                 obj.type.tensor_type.elem_type, []
             )
@@ -363,6 +402,13 @@ def to_onnx(self) -> GRAPH_PROTO:
         if self.opsets:
             for k, v in self.opsets.items():
                 opsets.append(make_opsetid(k, v))
+        if self.proto_type == ProtoType.GRAPH:
+            # If no opsets, it a subgraph, not a model.
+            return graph
         model = make_model(graph, opset_imports=opsets)
-        check_model(model)
+        if self.ir_version:
+            model.ir_version = self.ir_version
+        if not is_windows() or not is_azure():
+            # check_model fails sometimes on Windows
+            check_model(model)
         return model
diff --git a/onnx_array_api/light_api/var.py b/onnx_array_api/light_api/var.py
index 6da1ee3..72a9533 100644
--- a/onnx_array_api/light_api/var.py
+++ b/onnx_array_api/light_api/var.py
@@ -1,7 +1,9 @@
-from typing import Any, Dict, List, Optional, Union
+import inspect
+from typing import Any, Dict, List, Optional, Tuple, Union
 import numpy as np
 from onnx import TensorProto
-from .annotations import (
+from onnx.defs import get_schema
+from ..annotations import (
     elem_type_int,
     make_shape,
     ELEMENT_TYPE,
@@ -15,6 +17,26 @@
 from ._op_vars import OpsVars
 
 
+class SubDomain:
+    """
+    Declares a domain or a piece of it (if it contains '.' in its name).
+    """
+
+    def __init__(self, var: "BaseVar"):
+        if not isinstance(var, BaseVar):
+            raise TypeError(f"Unexpected type {type(var)}.")
+        self.parent = var
+
+
+def _getclassattr_(self, name):
+    if not hasattr(self.__class__, name):
+        raise TypeError(
+            f"Unable to find {name!r} in class {self.__class__.__name__!r}, "
+            f"available {dir(self.__class__)}."
+        )
+    return getattr(self.__class__, name)
+
+
 class BaseVar:
     """
     Represents an input, an initializer, a node, an output,
@@ -23,10 +45,94 @@ class BaseVar:
     :param parent: the graph containing the Variable
     """
 
+    def __new__(cls, *args, **kwargs):
+        """
+        If called for the first instantiation of a BaseVar, it process
+        all methods declared with decorator :func:`onnx_array_api.light_api.domain`
+        so that it can be called with a syntax `v.<domain>.<op_type>`.
+        """
+        res = super().__new__(cls)
+        res.__init__(*args, **kwargs)
+        if getattr(cls, "__incomplete", True):
+            for k in dir(cls):
+                att = getattr(cls, k, None)
+                if not att:
+                    continue
+                name = getattr(att, "__name__", None)
+                if not name or name[0] != "[":
+                    continue
+
+                # A function with a domain name
+                if not inspect.isfunction(att):
+                    raise RuntimeError(f"{cls.__name__}.{k} is not a function.")
+                domain, op_type = name[1:].split("]")
+                if "." in domain:
+                    spl = domain.split(".", maxsplit=1)
+                    dname = f"_{spl[0]}"
+                    if not hasattr(cls, dname):
+                        d = type(
+                            f"{cls.__name__}{dname}", (SubDomain,), {"name": dname[1:]}
+                        )
+                        setattr(cls, dname, d)
+                        setattr(
+                            cls,
+                            spl[0],
+                            property(
+                                lambda self, _name_=dname: _getclassattr_(self, _name_)(
+                                    self
+                                )
+                            ),
+                        )
+                    else:
+                        d = getattr(cls, dname)
+                    suffix = spl[0]
+                    for p in spl[1].split("."):
+                        dname = f"_{p}"
+                        suffix += dname
+                        if not hasattr(d, dname):
+                            sd = type(
+                                f"{cls.__name__}_{suffix}",
+                                (SubDomain,),
+                                {"name": suffix},
+                            )
+                            setattr(d, dname, sd)
+                            setattr(
+                                d,
+                                p,
+                                property(
+                                    lambda self, _name_=dname: _getclassattr_(
+                                        self, _name_
+                                    )(self.parent)
+                                ),
+                            )
+                            d = sd
+                        else:
+                            d = getattr(d, dname)
+                elif not hasattr(cls, domain):
+                    dname = f"_{domain}"
+                    d = type(f"{cls.__name__}{dname}", (SubDomain,), {"name": domain})
+                    setattr(cls, dname, d)
+                    setattr(
+                        cls,
+                        domain,
+                        property(
+                            lambda self, _name_=dname: _getclassattr_(self, _name_)(
+                                self
+                            )
+                        ),
+                    )
+
+                setattr(d, op_type, att)
+                setattr(cls, "__incomplete", False)
+
+        return res
+
     def __init__(
         self,
         parent: OnnxGraph,
     ):
+        if not isinstance(parent, OnnxGraph):
+            raise RuntimeError(f"Unexpected parent type {type(parent)}.")
         self.parent = parent
 
     def make_node(
@@ -51,6 +157,27 @@ def make_node(
         :return: instance of :class:`onnx_array_api.light_api.Var` or
             :class:`onnx_array_api.light_api.Vars`
         """
+        if domain in ("", "ai.onnx.ml"):
+            if self.parent.opset is None:
+                schema = get_schema(op_type, domain)
+            else:
+                schema = get_schema(op_type, self.parent.opset, domain)
+            if n_outputs < schema.min_output or n_outputs > schema.max_output:
+                raise RuntimeError(
+                    f"Unexpected number of outputs ({n_outputs}) "
+                    f"for node type {op_type!r}, domain={domain!r}, "
+                    f"version={self.parent.opset}, it should be in "
+                    f"[{schema.min_output}, {schema.max_output}]."
+                )
+            n_inputs = len(inputs)
+            if n_inputs < schema.min_input or n_inputs > schema.max_input:
+                raise RuntimeError(
+                    f"Unexpected number of inputs ({n_inputs}) "
+                    f"for node type {op_type!r}, domain={domain!r}, "
+                    f"version={self.parent.opset}, it should be in "
+                    f"[{schema.min_input}, {schema.max_input}]."
+                )
+
         node_proto = self.parent.make_node(
             op_type,
             *inputs,
@@ -60,9 +187,13 @@ def make_node(
             **kwargs,
         )
         names = node_proto.output
+        if n_outputs is not None and len(node_proto.output) != len(names):
+            raise RuntimeError(
+                f"Expects {n_outputs} outputs but output names are {names}."
+            )
         if len(names) == 1:
             return Var(self.parent, names[0])
-        return Vars(*map(lambda v: Var(self.parent, v), names))
+        return Vars(self.parent, *[Var(self.parent, v) for v in names])
 
     def vin(
         self,
@@ -91,26 +222,6 @@ def cst(self, value: np.ndarray, name: Optional[str] = None) -> "Var":
         c = self.parent.make_constant(value, name=name)
         return Var(self.parent, c.name, elem_type=c.data_type, shape=tuple(c.dims))
 
-    def vout(
-        self,
-        elem_type: ELEMENT_TYPE = TensorProto.FLOAT,
-        shape: Optional[SHAPE_TYPE] = None,
-    ) -> "Var":
-        """
-        Declares a new output to the graph.
-
-        :param elem_type: element_type
-        :param shape: shape
-        :return: instance of :class:`onnx_array_api.light_api.Var`
-        """
-        output = self.parent.make_output(self.name, elem_type=elem_type, shape=shape)
-        return Var(
-            self.parent,
-            output,
-            elem_type=output.type.tensor_type.elem_type,
-            shape=make_shape(output.type.tensor_type.shape),
-        )
-
     def v(self, name: str) -> "Var":
         """
         Retrieves another variable than this one.
@@ -120,13 +231,22 @@ def v(self, name: str) -> "Var":
         """
         return self.parent.get_var(name)
 
-    def bring(self, *vars: List[Union[str, "Var"]]) -> "Vars":
+    def bring(self, *vars: List[Union[str, "Var"]]) -> Union["Var", "Vars"]:
         """
         Creates a set of variable as an instance of
         :class:`onnx_array_api.light_api.Vars`.
         """
+        if len(vars) == 1:
+            return Var(self.parent, vars[0])
         return Vars(self.parent, *vars)
 
+    def vout(self, **kwargs: Dict[str, Any]) -> Union["Var", "Vars"]:
+        """
+        This method needs to be overwritten for Var and Vars depending
+        on the number of variable to declare as outputs.
+        """
+        raise RuntimeError(f"The method was not overwritten in class {type(self)}.")
+
     def left_bring(self, *vars: List[Union[str, "Var"]]) -> "Vars":
         """
         Creates a set of variables as an instance of
@@ -187,6 +307,28 @@ def __str__(self) -> str:
             return s
         return f"{s}:[{''.join(map(str, self.shape))}]"
 
+    def vout(
+        self,
+        elem_type: ELEMENT_TYPE = TensorProto.FLOAT,
+        shape: Optional[SHAPE_TYPE] = None,
+    ) -> "Var":
+        """
+        Declares a new output to the graph.
+
+        :param elem_type: element_type
+        :param shape: shape
+        :return: instance of :class:`onnx_array_api.light_api.Var`
+
+        If the checker fails, try `shape=[]`.
+        """
+        output = self.parent.make_output(self.name, elem_type=elem_type, shape=shape)
+        return Var(
+            self.parent,
+            output,
+            elem_type=output.type.tensor_type.elem_type,
+            shape=make_shape(output.type.tensor_type.shape),
+        )
+
     def rename(self, new_name: str) -> "Var":
         "Renames a variable."
         self.parent.rename(self.name, new_name)
@@ -299,6 +441,42 @@ def _check_nin(self, n_inputs):
             raise RuntimeError(f"Expecting {n_inputs} inputs not {len(self)}.")
         return self
 
-    def rename(self, new_name: str) -> "Var":
+    def rename(self, *new_names: List[str]) -> "Vars":
         "Renames variables."
-        raise NotImplementedError("Not yet implemented.")
+        if len(new_names) != len(self):
+            raise ValueError(
+                f"Vars has {len(self)} elements but the method "
+                f"received {len(new_names)} names."
+            )
+        new_vars = []
+        for var, name in zip(self.vars_, new_names):
+            new_vars.append(var.rename(name))
+        return Vars(self.parent, *new_names)
+
+    def vout(
+        self,
+        *elem_type_shape: List[
+            Union[ELEMENT_TYPE, Tuple[ELEMENT_TYPE, Optional[SHAPE_TYPE]]]
+        ],
+    ) -> "Vars":
+        """
+        Declares a new output to the graph.
+
+        :param elem_type_shape: list of tuple(element_type, shape)
+        :return: instance of :class:`onnx_array_api.light_api.Vars`
+
+        If the checker fails, try `shape=[]`.
+        """
+        vars = []
+        for i, v in enumerate(self.vars_):
+            if i < len(elem_type_shape):
+                if isinstance(elem_type_shape[i]) or len(elem_type_shape[i]) < 2:
+                    elem_type = elem_type_shape[i][0]
+                    shape = None
+                else:
+                    elem_type, shape = elem_type_shape[i]
+            else:
+                elem_type = TensorProto.FLOAT
+                shape = None
+            vars.append(v.vout(elem_type=elem_type, shape=shape))
+        return Vars(self.parent, *vars)
diff --git a/onnx_array_api/npx/npx_array_api.py b/onnx_array_api/npx/npx_array_api.py
index 142a892..a9fb3d6 100644
--- a/onnx_array_api/npx/npx_array_api.py
+++ b/onnx_array_api/npx/npx_array_api.py
@@ -10,8 +10,6 @@ class ArrayApiError(RuntimeError):
     Raised when a function is not supported by the :epkg:`Array API`.
     """
 
-    pass
-
 
 class BaseArrayApi:
     """
diff --git a/onnx_array_api/npx/npx_core_api.py b/onnx_array_api/npx/npx_core_api.py
index d6688cf..a09280a 100644
--- a/onnx_array_api/npx/npx_core_api.py
+++ b/onnx_array_api/npx/npx_core_api.py
@@ -15,7 +15,7 @@
 class args_tuple(tuple):
     """Overwrites a tuple to make the distinction later in the code."""
 
-    pass
+    __slots__ = ()
 
 
 def cst(*args, **kwargs):
@@ -140,7 +140,7 @@ def _xapi(fn: Callable, inline: bool):
 
     # It has the same signature
     def wrapper(*inputs, **kwargs):
-        if any(map(lambda x: isinstance(x, EagerTensor), inputs)):
+        if any(isinstance(x, EagerTensor) for x in inputs):
             tensor_class = None
             for x in inputs:
                 if isinstance(x, EagerTensor):
diff --git a/onnx_array_api/npx/npx_functions.py b/onnx_array_api/npx/npx_functions.py
index db29ca2..c6319f2 100644
--- a/onnx_array_api/npx/npx_functions.py
+++ b/onnx_array_api/npx/npx_functions.py
@@ -1,5 +1,4 @@
 from typing import Tuple, Union
-import array_api_compat.numpy as np_array_api
 import numpy as np
 from onnx import FunctionProto, ModelProto, NodeProto, TensorProto
 from onnx.helper import make_tensor, tensor_dtype_to_np_dtype
@@ -282,7 +281,8 @@ def astype(
             to = DType(TensorProto.STRING)
         else:
             raise TypeError(f"dtype must of type DType, not {type(dtype)}-{dtype}.")
-    return var(a, op="Cast", to=to.code)
+        return var(a, op="Cast", to=to.code)
+    return var(a, op="Cast", to=dtype.code)
 
 
 @npxapi_inline
@@ -480,7 +480,7 @@ def eye(
     /,
     *,
     k: ParType[int] = 0,
-    dtype: ParType[DType] = DType(TensorProto.DOUBLE),
+    dtype: ParType[DType] = DType(TensorProto.DOUBLE),  # noqa: B008
 ):
     "See :func:`numpy.eye`."
     shape = cst(np.array([-1], dtype=np.int64))
@@ -624,6 +624,8 @@ def isdtype(
     See :epkg:`BaseArrayAPI:isdtype`.
     This function is not converted into an onnx graph.
     """
+    import array_api_compat.numpy as np_array_api
+
     if isinstance(dtype, DType):
         dti = tensor_dtype_to_np_dtype(dtype.code)
         return np_array_api.isdtype(dti, kind)
diff --git a/onnx_array_api/npx/npx_functions_test.py b/onnx_array_api/npx/npx_functions_test.py
index 4d442dd..3d03def 100644
--- a/onnx_array_api/npx/npx_functions_test.py
+++ b/onnx_array_api/npx/npx_functions_test.py
@@ -22,21 +22,21 @@
 
 @npxapi_function
 def _min_max(
-    x: TensorType[ElemType.numerics, "T"]
+    x: TensorType[ElemType.numerics, "T"],
 ) -> TupleType[TensorType[ElemType.numerics, "T"], TensorType[ElemType.numerics, "T"]]:
     return tuple_var(var(x, op="ReduceMin"), var(x, op="ReduceMax"))
 
 
 @npxapi_inline
 def _min_max_inline(
-    x: TensorType[ElemType.numerics, "T"]
+    x: TensorType[ElemType.numerics, "T"],
 ) -> TupleType[TensorType[ElemType.numerics, "T"], TensorType[ElemType.numerics, "T"]]:
     return tuple_var(var(x, op="ReduceMin"), var(x, op="ReduceMax"))
 
 
 @npxapi_function
 def absolute(
-    x: TensorType[ElemType.numerics, "T"]
+    x: TensorType[ElemType.numerics, "T"],
 ) -> TensorType[ElemType.numerics, "T"]:
     "See :func:`numpy.absolute`."
     return var(x, op="Abs")
@@ -90,7 +90,7 @@ def log1p(x: TensorType[ElemType.floats, "T"]) -> TensorType[ElemType.floats, "T
 
 @npxapi_function
 def negative(
-    x: TensorType[ElemType.numerics, "T"]
+    x: TensorType[ElemType.numerics, "T"],
 ) -> TensorType[ElemType.numerics, "T"]:
     "See :func:`numpy.negative`."
     return var(x, op="Neg")
diff --git a/onnx_array_api/npx/npx_graph_builder.py b/onnx_array_api/npx/npx_graph_builder.py
index 53d2899..91034f7 100644
--- a/onnx_array_api/npx/npx_graph_builder.py
+++ b/onnx_array_api/npx/npx_graph_builder.py
@@ -1,6 +1,5 @@
 from inspect import Parameter, signature
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-
 import numpy as np
 from onnx import (
     IR_VERSION,
@@ -28,6 +27,7 @@
 from onnx.onnx_cpp2py_export.shape_inference import InferenceError
 from onnx.shape_inference import infer_shapes
 
+from ..ext_test_case import is_windows, is_azure
 from ..reference import from_array_extended as from_array
 from .npx_constants import _OPSET_TO_IR_VERSION, FUNCTION_DOMAIN, ONNX_DOMAIN
 from .npx_function_implementation import get_function_implementation
@@ -450,7 +450,7 @@ def _make_onnx(self):
                 name = inp.name
                 if name is None:
                     raise RuntimeError(
-                        f"Input {i} is None for function " f"{self.function_name!r}."
+                        f"Input {i} is None for function {self.function_name!r}."
                     )
                 inputs.append(name)
 
@@ -473,17 +473,19 @@ def _make_onnx(self):
         model = make_model(
             graph,
             opset_imports=opset_imports,
-            functions=list(f[0] for f in self.functions_.values()),
+            functions=[f[0] for f in self.functions_.values()],
             ir_version=self.ir_version,
         )
-        try:
-            check_model(model)
-        except ValidationError as e:
-            if "Field 'shape' of 'type' is required but missing" in str(e):
-                # checker does like undefined shape
-                pass
-            else:
-                raise RuntimeError(f"Model is not valid\n{model}") from e
+        if not is_windows() or not is_azure():
+            # check_model fails sometimes on Windows
+            try:
+                check_model(model)
+            except ValidationError as e:
+                if "Field 'shape' of 'type' is required but missing" in str(e):
+                    # checker does like undefined shape
+                    pass
+                else:
+                    raise RuntimeError(f"Model is not valid\n{model}") from e
         has_undefined = 0 in set(
             o.type.tensor_type.elem_type for o in model.graph.output
         )
@@ -510,12 +512,7 @@ def _function_to_onnx(self, fct: Callable, n_inputs: int, n_outputs: int):
             there is an undefined number of inputs
         """
         sig = signature(fct)
-        if any(
-            map(
-                lambda t: issubclass(t.annotation, SequenceType),
-                sig.parameters.values(),
-            )
-        ):
+        if any(issubclass(t.annotation, SequenceType) for t in sig.parameters.values()):
             # onnx does not allow undefined number of inputs
             key = fct.__module__, fct.__name__, n_inputs
         else:
@@ -850,7 +847,7 @@ def to_onnx(
                     node_inputs.append(input_name)
                     continue
 
-                if isinstance(i, tuple) and all(map(lambda x: isinstance(x, int), i)):
+                if isinstance(i, tuple) and all(isinstance(x, int) for x in i):
                     ai = np.array(list(i), dtype=np.int64)
                     c = Cst(ai)
                     input_name = self._unique(var._prefix)
@@ -917,7 +914,7 @@ def to_onnx(
                     [(var, i, None) for i in range(var.n_var_outputs)]
                 )
 
-        if len(possible_types) > 0:
+        if possible_types:
             # converts possibles types into a dictionary
             map_types = {}
             for var, i, dt in possible_types:
diff --git a/onnx_array_api/npx/npx_helper.py b/onnx_array_api/npx/npx_helper.py
index f86aadc..b2c6b48 100644
--- a/onnx_array_api/npx/npx_helper.py
+++ b/onnx_array_api/npx/npx_helper.py
@@ -47,7 +47,7 @@ def _process_attributes(attributes):
     nodes = []
     modified = False
     for node in graph.node:
-        if len(set(node.input) & set_rep) == 0:
+        if not (set(node.input) & set_rep):
             modified = True
             new_inputs = [replacements.get(i, i) for i in node.input]
             atts = _process_attributes(node.attribute) or node.attribute
@@ -66,7 +66,7 @@ def _process_attributes(attributes):
     if not modified:
         return None
 
-    if len(set(i.name for i in graph.input) & set_rep) == 0:
+    if not (set(i.name for i in graph.input) & set_rep):
         return make_graph(nodes, graph.name, graph.input, graph.output)
 
     new_inputs = []
@@ -130,8 +130,7 @@ def iter_nodes(nodes: Sequence[NodeProto]) -> Iterator[NodeProto]:
                 and hasattr(att, "g")
                 and att.g is not None
             ):
-                for n in iter_nodes(att.g.node):
-                    yield n
+                yield from iter_nodes(att.g.node)
 
 
 def onnx_model_to_function(
diff --git a/onnx_array_api/npx/npx_jit_eager.py b/onnx_array_api/npx/npx_jit_eager.py
index ef24af7..267eda5 100644
--- a/onnx_array_api/npx/npx_jit_eager.py
+++ b/onnx_array_api/npx/npx_jit_eager.py
@@ -167,7 +167,7 @@ def make_key(self, *values: List[Any], **kwargs: Dict[str, Any]) -> Tuple[Any, .
                         f"to the attribute list, v={v}."
                     )
                 res.append(v.key)
-            elif isinstance(v, (int, float, bool, DType)):
+            elif isinstance(v, (int, float, bool, complex, DType)):
                 if iv in self.kwargs_to_input_:
                     res.append(self.kwargs_to_input_[iv])
                 res.append(type(v))
@@ -204,7 +204,7 @@ def make_key(self, *values: List[Any], **kwargs: Dict[str, Any]) -> Tuple[Any, .
                 if k in self.kwargs_to_input_:
                     res.append(type(v))
                     res.append(v)
-                elif isinstance(v, (int, float, str, type, bool, DType)):
+                elif isinstance(v, (int, float, str, type, bool, complex, DType)):
                     res.append(k)
                     res.append(type(v))
                     res.append(v)
@@ -253,7 +253,7 @@ def to_jit(self, *values, **kwargs):
         """
         self.info("+", "to_jit", args=values, kwargs=kwargs)
         annotations = self.f.__annotations__
-        if len(annotations) > 0:
+        if annotations:
             input_to_kwargs = {}
             kwargs_to_input = {}
             names = list(annotations.keys())
@@ -352,10 +352,10 @@ def to_jit(self, *values, **kwargs):
             if iname in constraints
         ]
         names = [i.name for i in inputs]
-        if len(new_kwargs) > 0:
+        if new_kwargs:
             # An attribute is not named in the numpy API
             # but is the ONNX definition.
-            if len(kwargs) == 0:
+            if not kwargs:
                 kwargs = new_kwargs
             else:
                 kwargs = kwargs.copy()
@@ -375,13 +375,13 @@ def to_jit(self, *values, **kwargs):
             target_opsets=self.target_opsets,
             ir_version=self.ir_version,
         )
-        if len(values) > 0 and len(values[0].shape) == 0:
+        if values and not values[0].shape:
             inps = onx.graph.input[0]
             shape = []
             for d in inps.type.tensor_type.shape.dim:
                 v = d.dim_value if d.dim_value > 0 else d.dim_param
                 shape.append(v)
-            if len(shape) != 0:
+            if shape:
                 raise RuntimeError(
                     f"Shape mismatch, values[0]={values[0]} "
                     f"and inputs={onx.graph.input}."
@@ -441,7 +441,7 @@ def move_input_to_kwargs(
                 f"self.input_to_kwargs_ is not initialized for function {self.f} "
                 f"from module {self.f.__module__!r}."
             )
-        if len(self.input_to_kwargs_) == 0:
+        if not self.input_to_kwargs_:
             return values, kwargs
         new_values = []
         new_kwargs = kwargs.copy()
@@ -563,7 +563,7 @@ class JitOnnx(JitEager):
     def __init__(
         self,
         f: Callable,
-        tensor_class: type = None,
+        tensor_class: Optional[type] = None,
         target_opsets: Optional[Dict[str, int]] = None,
         output_types: Optional[Dict[Any, TensorType]] = None,
         ir_version: Optional[int] = None,
@@ -636,7 +636,7 @@ class EagerOnnx(JitEager):
     def __init__(
         self,
         f: Callable,
-        tensor_class: type = None,
+        tensor_class: Optional[type] = None,
         target_opsets: Optional[Dict[str, int]] = None,
         output_types: Optional[Dict[Any, TensorType]] = None,
         ir_version: Optional[int] = None,
@@ -671,12 +671,12 @@ def _preprocess_constants(self, *args):
                 new_args.append(self.tensor_class(n.inputs[0]))
                 modified = True
             elif isinstance(n, tuple):
-                if all(map(lambda x: isinstance(x, int), n)):
+                if all(isinstance(x, int) for x in n):
                     new_args.append(
                         self.tensor_class(np.array(list(n), dtype=np.int64))
                     )
                     modified = True
-                elif any(map(lambda t: isinstance(t, Var), n)):
+                elif any(isinstance(t, Var) for t in n):
                     raise TypeError(
                         f"Unexpected types in tuple "
                         f"({[type(t) for t in n]}) for input {i}, "
@@ -727,14 +727,14 @@ def __call__(self, *args, already_eager=False, **kwargs):
         )
         if already_eager:
             if any(
-                map(
-                    lambda t: t is not None
+                (
+                    t is not None
                     and not isinstance(
                         t,
                         EagerOnnx.allowed_input_types,
-                    ),
-                    args,
+                    )
                 )
+                for t in args
             ):
                 raise TypeError(
                     f"One of the input is not an EagerTensor or a constant, "
@@ -759,8 +759,8 @@ def __call__(self, *args, already_eager=False, **kwargs):
             try:
                 res = self.f(*values, **kwargs)
             except (AttributeError, TypeError) as e:
-                inp1 = ", ".join(map(str, map(lambda a: type(a).__name__, args)))
-                inp2 = ", ".join(map(str, map(lambda a: type(a).__name__, values)))
+                inp1 = ", ".join(map(str, [type(a).__name__ for a in args]))
+                inp2 = ", ".join(map(str, [type(a).__name__ for a in values]))
                 raise TypeError(
                     f"Unexpected types, input types are args=[{inp1}], "
                     f"values=[{inp2}], kwargs={kwargs}. "
@@ -778,7 +778,7 @@ def __call__(self, *args, already_eager=False, **kwargs):
                         f"from module {self.f.__module__!r}, "
                         f"type of first input is {type(args[0])}."
                     )
-            elif isinstance(res, Var) or any(map(lambda x: isinstance(x, Var), res)):
+            elif isinstance(res, Var) or any(isinstance(x, Var) for x in res):
                 # The function returns instance of type Var.
                 # It does not support eager mode and needs
                 # to be converted into onnx.
diff --git a/onnx_array_api/npx/npx_numpy_tensors.py b/onnx_array_api/npx/npx_numpy_tensors.py
index a106b95..9579455 100644
--- a/onnx_array_api/npx/npx_numpy_tensors.py
+++ b/onnx_array_api/npx/npx_numpy_tensors.py
@@ -220,10 +220,11 @@ def __bool__(self):
             )
         if self.shape == (0,):
             return False
-        if len(self.shape) != 0:
+        if self.shape:
             warnings.warn(
                 f"Conversion to bool only works for scalar, not for {self!r}, "
-                f"bool(...)={bool(self._tensor)}."
+                f"bool(...)={bool(self._tensor)}.",
+                stacklevel=0,
             )
             try:
                 return bool(self._tensor)
@@ -233,7 +234,7 @@ def __bool__(self):
 
     def __int__(self):
         "Implicit conversion to int."
-        if len(self.shape) != 0:
+        if self.shape:
             raise ValueError(
                 f"Conversion to bool only works for scalar, not for {self!r}."
             )
@@ -255,7 +256,7 @@ def __int__(self):
 
     def __float__(self):
         "Implicit conversion to float."
-        if len(self.shape) != 0:
+        if self.shape:
             raise ValueError(
                 f"Conversion to bool only works for scalar, not for {self!r}."
             )
@@ -264,6 +265,8 @@ def __float__(self):
             DType(TensorProto.DOUBLE),
             DType(TensorProto.FLOAT16),
             DType(TensorProto.BFLOAT16),
+            DType(TensorProto.COMPLEX64),
+            DType(TensorProto.COMPLEX128),
         }:
             raise TypeError(
                 f"Conversion to float only works for float scalar, "
@@ -271,6 +274,26 @@ def __float__(self):
             )
         return float(self._tensor)
 
+    def __complex__(self):
+        "Implicit conversion to complex."
+        if self.shape:
+            raise ValueError(
+                f"Conversion to bool only works for scalar, not for {self!r}."
+            )
+        if self.dtype not in {
+            DType(TensorProto.FLOAT),
+            DType(TensorProto.DOUBLE),
+            DType(TensorProto.FLOAT16),
+            DType(TensorProto.BFLOAT16),
+            DType(TensorProto.COMPLEX64),
+            DType(TensorProto.COMPLEX128),
+        }:
+            raise TypeError(
+                f"Conversion to float only works for float scalar, "
+                f"not for dtype={self.dtype}."
+            )
+        return complex(self._tensor)
+
     def __iter__(self):
         """
         The :epkg:`Array API` does not define this function (2022/12).
@@ -279,7 +302,8 @@ def __iter__(self):
         warnings.warn(
             f"Iterators are not implemented in the generic case. "
             f"Every function using them cannot be converted into ONNX "
-            f"(tensors - {type(self)})."
+            f"(tensors - {type(self)}).",
+            stacklevel=0,
         )
         for row in self._tensor:
             yield self.__class__(row)
@@ -289,5 +313,3 @@ class JitNumpyTensor(NumpyTensor, JitTensor):
     """
     Defines a value for a specific backend.
     """
-
-    pass
diff --git a/onnx_array_api/npx/npx_tensors.py b/onnx_array_api/npx/npx_tensors.py
index 3e4faa7..40ebc12 100644
--- a/onnx_array_api/npx/npx_tensors.py
+++ b/onnx_array_api/npx/npx_tensors.py
@@ -10,8 +10,6 @@ class JitTensor:
     Defines a value for a specific jit mode
     """
 
-    pass
-
 
 class EagerTensor(BaseArrayApi):
     """
@@ -93,7 +91,7 @@ def _astype_impl(
 
         if not isinstance(x, Var):
             raise TypeError(f"Input 0 must be a Var not {type(x)}.")
-        meth = getattr(Var, "astype")
+        meth = getattr(Var, "astype")  # noqa: B009
         return meth(x, dtype)
 
     @staticmethod
diff --git a/onnx_array_api/npx/npx_types.py b/onnx_array_api/npx/npx_types.py
index 8284765..2f2a6a6 100644
--- a/onnx_array_api/npx/npx_types.py
+++ b/onnx_array_api/npx/npx_types.py
@@ -11,7 +11,7 @@ class WrapperType:
     WrapperType.
     """
 
-    pass
+    __slots__ = ()
 
 
 class DType(WrapperType):
@@ -78,8 +78,8 @@ def __eq__(self, dt: "DType") -> bool:
             return self.code_ == dt.dtype.code_
         try:
             dti = np_dtype_to_tensor_dtype(dt)
-        except KeyError:
-            raise TypeError(f"dt must be DType not {type(dt)} - {dt!r}.")
+        except KeyError as e:
+            raise TypeError(f"dt must be DType not {type(dt)} - {dt!r}.") from e
         return self.code_ == dti
 
     def __lt__(self, dt: "DType") -> bool:
@@ -90,8 +90,8 @@ def __lt__(self, dt: "DType") -> bool:
             raise TypeError(f"dt must be DType not {type(dt)}.")
         try:
             dti = np_dtype_to_tensor_dtype(dt)
-        except KeyError:
-            raise TypeError(f"dt must be DType not {type(dt)} - {dt}.")
+        except KeyError as e:
+            raise TypeError(f"dt must be DType not {type(dt)} - {dt}.") from e
         return self.code_ < dti
 
     @classmethod
@@ -102,12 +102,10 @@ def type_name(cls) -> str:
 
 class _DType2(DType):
     "Wraps a type into a different type."
-    pass
 
 
 class _DTypes(DType):
     "Wraps a type into a different type."
-    pass
 
 
 class ElemTypeCstInner(WrapperType):
@@ -367,7 +365,7 @@ def onnx_type(cls):
         if cls.dtype == str:
             return AttributeProto.STRING
         raise RuntimeError(
-            f"Unsupported attribute type {cls.dtype!r} " f"for parameter {cls!r}."
+            f"Unsupported attribute type {cls.dtype!r} for parameter {cls!r}."
         )
 
 
@@ -403,9 +401,11 @@ class ShapeType(Tuple[int, ...]):
     Defines a shape type.
     """
 
+    __slots__ = ()
+
     @classmethod
     def __class_getitem__(cls, *args):
-        if any(map(lambda t: t is not None and not isinstance(t, (int, str)), args)):
+        if any((t is not None and not isinstance(t, (int, str))) for t in args):
             raise TypeError(
                 f"Unexpected value for args={args}, every element should int or str."
             )
@@ -504,7 +504,7 @@ def __class_getitem__(cls, *args):
         if name:
             msg.append(name)
         if dtypes is not None:
-            msg.append("_".join(map(lambda t: str(t.dtype), dtypes)))
+            msg.append("_".join(str(t.dtype) for t in dtypes))
         if shape is not None:
             msg.append("_".join(map(str, shape)))
         final = "__".join(msg)
@@ -561,11 +561,11 @@ def _name_set(self):
             s += 1 << dt.dtype
         try:
             return ElemType.set_names[s]
-        except KeyError:
+        except KeyError as e:
             raise RuntimeError(
                 f"Unable to guess element type name for {s}: "
                 f"{repr(self)} in {ElemType.set_names}."
-            )
+            ) from e
 
     @classmethod
     def issuperset(cls, tensor_type: type) -> bool:
@@ -686,7 +686,7 @@ def len(cls):
     @classmethod
     def type_name(cls) -> str:
         "Returns its full name."
-        dts = ", ".join(map(lambda s: s.type_name(), cls.elem_types))
+        dts = ", ".join(s.type_name() for s in cls.elem_types)
         if cls.name:
             newt = f"TupleType[{dts}, {cls.name!r}]"
         else:
diff --git a/onnx_array_api/npx/npx_var.py b/onnx_array_api/npx/npx_var.py
index 27f5455..0e71070 100644
--- a/onnx_array_api/npx/npx_var.py
+++ b/onnx_array_api/npx/npx_var.py
@@ -33,7 +33,7 @@ def __init__(
     ):
         if not issubclass(dtype, ParType):
             raise TypeError(
-                f"dtype for parameter {name!r} must be of " f"ParType not {dtype}."
+                f"dtype for parameter {name!r} must be of ParType not {dtype}."
             )
         if parent_op is None:
             raise ValueError(f"parent_op must be filled for paramenter {name!r}.")
@@ -174,7 +174,7 @@ def to_onnx(
                     f"Mismatch number of outputs, expecting {len(outputs)}, "
                     f"got ({len(onx.output)})."
                 )
-            if len(g.functions_) > 0:
+            if g.functions_:
                 return [g.functions_, onx]
             return onx
 
@@ -453,7 +453,7 @@ def _get_vars(self):
                     deleted.append(var)
                     continue
                 raise TypeError(
-                    f"Unexpected type {type(applied)} as output of " f"function {fct}."
+                    f"Unexpected type {type(applied)} as output of function {fct}."
                 )
             vs.append(var)
             for i in reversed(var.inputs):
@@ -469,11 +469,11 @@ def _get_vars(self):
                     replacement_cst[id(i)] = cst(np.array(i))
                     continue
                 if isinstance(i, tuple):
-                    if all(map(lambda x: isinstance(x, int), i)):
+                    if all(isinstance(x, int) for x in i):
                         cst = Var.get_cst_var()[0]
                         replacement_cst[id(i)] = cst(np.array(list(i), dtype=np.int64))
                         continue
-                    if any(map(lambda t: isinstance(t, Var), i)):
+                    if any(isinstance(t, Var) for t in i):
                         raise TypeError(
                             f"Unexpected types in tuple "
                             f"({[type(t) for t in i]}), "
@@ -1020,7 +1020,7 @@ def __getitem__(self, index: Any) -> "Var":
 
         if not isinstance(index, tuple):
             index = (index,)
-        elif len(index) == 0:
+        elif not index:
             # The array contains a scalar and it needs to be returned.
             return var(self, op="Identity")
 
@@ -1091,7 +1091,7 @@ def __getitem__(self, index: Any) -> "Var":
         starts = np.array(starts, dtype=np.int64)
         axes = np.array(axes, dtype=np.int64)
 
-        if len(needs_shape) > 0:
+        if needs_shape:
             shape = self.shape
             conc = []
             for e in ends:
@@ -1116,7 +1116,7 @@ def __getitem__(self, index: Any) -> "Var":
             sliced_args.append(steps)
         sliced_args_cst = [v if isinstance(v, Var) else cst(v) for v in sliced_args]
         sliced = var(self.self_var, *sliced_args_cst, op="Slice")
-        if len(axis_squeeze) > 0:
+        if axis_squeeze:
             return var(
                 sliced,
                 cst(np.array(axis_squeeze, dtype=np.int64)),
@@ -1138,7 +1138,7 @@ class Input(Var):
     :param annotation: annotation if any is available
     """
 
-    def __init__(self, name: str = None, annotation: Optional[type] = None):
+    def __init__(self, name: Optional[str] = None, annotation: Optional[type] = None):
         Var.__init__(self)
         self.name = name
         self._prefix = name or "I"
@@ -1171,16 +1171,20 @@ def __init__(self, cst: Any):
             Var.__init__(self, np.array(cst, dtype=np.int64), op="Identity")
         elif isinstance(cst, float):
             Var.__init__(self, np.array(cst, dtype=np.float64), op="Identity")
+        elif isinstance(cst, complex):
+            Var.__init__(self, np.array(cst, dtype=np.complex128), op="Identity")
         elif isinstance(cst, list):
-            if all(map(lambda t: isinstance(t, bool), cst)):
+            if all(isinstance(t, bool) for t in cst):
                 Var.__init__(self, np.array(cst, dtype=np.bool_), op="Identity")
-            elif all(map(lambda t: isinstance(t, (int, bool)), cst)):
+            elif all(isinstance(t, (int, bool)) for t in cst):
                 Var.__init__(self, np.array(cst, dtype=np.int64), op="Identity")
-            elif all(map(lambda t: isinstance(t, (float, int, bool)), cst)):
+            elif all(isinstance(t, (float, int, bool)) for t in cst):
                 Var.__init__(self, np.array(cst, dtype=np.float64), op="Identity")
+            elif all(isinstance(t, (float, int, bool, complex)) for t in cst):
+                Var.__init__(self, np.array(cst, dtype=np.complex128), op="Identity")
             else:
                 raise ValueError(
-                    f"Unable to convert cst (type={type(cst)}), " f"value={cst}."
+                    f"Unable to convert cst (type={type(cst)}), value={cst}."
                 )
         else:
             raise NotImplementedError(
diff --git a/onnx_array_api/ort/ort_profile.py b/onnx_array_api/ort/ort_profile.py
index b61df67..ebccaba 100644
--- a/onnx_array_api/ort/ort_profile.py
+++ b/onnx_array_api/ort/ort_profile.py
@@ -52,7 +52,7 @@ def sep_event(s):
     for c in agg_cols:
         df[c] = df[c].fillna("")
     df["dur"] = df["dur"].fillna(0)
-    agg = df[agg_cols + ["dur"]].groupby(agg_cols).sum()
+    agg = df[[*agg_cols, "dur"]].groupby(agg_cols).sum()
     return agg
 
 
@@ -101,14 +101,16 @@ def ort_profile(
     if providers is None:
         providers = ["CPUExecutionProvider"]
     sess = InferenceSession(obj, sess_options, providers=providers, **kwargs)
-    first = list(feeds.values())[0]
+    for v in feeds.values():
+        first = v
+        break
 
     if isinstance(first, numpy.ndarray):
-        for i in range(repeat):
+        for _i in range(repeat):
             sess.run(None, feeds)
     else:
         out_names = [o.name for o in sess.get_outputs()]
-        for i in range(repeat):
+        for _i in range(repeat):
             sess._sess.run_with_ort_values(feeds, out_names, None)
 
     prof = sess.end_profiling()
@@ -177,7 +179,7 @@ def _idx(row):
             df[c] = df[c].apply(str)
     df = df.copy()
     df["count"] = 1
-    gr = df[groupkey + ["dur", "count"]].groupby(groupkey)
+    gr = df[[*groupkey, "dur", "count"]].groupby(groupkey)
     return gr.sum()
 
 
@@ -187,7 +189,9 @@ def _process_shape(s: Tuple[int, ...], keys: Dict[str, str]) -> str:
     for v in value:
         if len(v) != 1:
             raise NotImplementedError(f"Unexpected value {v} in {s!r}.")
-        k, v = list(v.items())[0]
+        for _k, _v in v.items():
+            k, v = _k, _v
+            break
         n = "-".join([keys[k], "x".join(map(str, v))])
         ns.append(n)
     return ",".join(ns)
diff --git a/onnx_array_api/ort/ort_tensors.py b/onnx_array_api/ort/ort_tensors.py
index 2117e3f..4f53e6e 100644
--- a/onnx_array_api/ort/ort_tensors.py
+++ b/onnx_array_api/ort/ort_tensors.py
@@ -86,7 +86,7 @@ def __init__(
             tensor_class: type,
             input_names: List[str],
             onx: ModelProto,
-            f: Callable = None,
+            f: Optional[Callable] = None,
         ):
             try:
                 self.ref = InferenceSession(
@@ -282,5 +282,3 @@ class JitOrtTensor(OrtTensor, OrtCommon, JitTensor):
     """
     Defines a value for :epkg:`onnxruntime` as a backend.
     """
-
-    pass
diff --git a/onnx_array_api/plotting/_helper.py b/onnx_array_api/plotting/_helper.py
index ddca631..5c5d881 100644
--- a/onnx_array_api/plotting/_helper.py
+++ b/onnx_array_api/plotting/_helper.py
@@ -94,7 +94,7 @@ def _extract_attribute_value(
             f"Unable to convert attribute {att.name!r} type {att.type!r}."
         )
     raise AttributeError(  # pragma: no cover
-        f"Unable to convert default value for {ref_att.name!r} " f"type {att.type!r}."
+        f"Unable to convert default value for {ref_att.name!r} type {att.type!r}."
     )
 
 
@@ -120,10 +120,7 @@ def get_tensor_shape(obj):
     for d in obj.tensor_type.shape.dim:
         v = d.dim_value if d.dim_value > 0 else d.dim_param
         shape.append(v)
-    if len(shape) == 0:
-        shape = None
-    else:
-        shape = list(None if s == 0 else s for s in shape)
+    shape = None if not shape else [None if s == 0 else s for s in shape]
     return shape
 
 
@@ -160,6 +157,8 @@ def _get_type(obj0):
     if hasattr(obj, "tensor_type"):
         obj = obj.tensor_type
     if hasattr(obj, "elem_type"):
+        if obj.elem_type == 0:
+            return "NOTENSOR"
         return tensor_dtype_to_np_dtype(obj.elem_type)
     raise RuntimeError(f"Unable to guess type from {obj0!r}.")  # pragma: no cover
 
@@ -184,7 +183,7 @@ def _get_shape(obj):
             arr = to_array(obj)
             return arr.shape
         raise RuntimeError(  # pragma: no cover
-            f"Unable to guess type from {obj0!r}, " f"data_type is {obj.data_type!r}."
+            f"Unable to guess type from {obj0!r}, data_type is {obj.data_type!r}."
         )
     if hasattr(obj, "type"):
         obj = obj.type
diff --git a/onnx_array_api/plotting/dot_plot.py b/onnx_array_api/plotting/dot_plot.py
index fd23f79..af8ad22 100644
--- a/onnx_array_api/plotting/dot_plot.py
+++ b/onnx_array_api/plotting/dot_plot.py
@@ -116,7 +116,12 @@ def myloss(x, y):
     clean_label_reg2 = re.compile("\\\\p\\{[0-9P]{1,6}\\}")
 
     def dot_name(text):
-        return text.replace("/", "_").replace(":", "__").replace(".", "_")
+        return (
+            text.replace("/", "_")
+            .replace(":", "__")
+            .replace(".", "_")
+            .replace("-", "_")
+        )
 
     def dot_label(text):
         if text is None:
@@ -242,7 +247,7 @@ def dot_label(text):
     for node in nodes:
         exp.append("")
         for out in node.output:
-            if len(out) > 0 and out not in inter_vars:
+            if out and out not in inter_vars:
                 inter_vars[out] = out
                 sh = shapes.get(out, "")
                 if sh:
@@ -305,7 +310,7 @@ def dot_label(text):
                 exp.append(f'    label="{node.op_type}\\n({dot_name(field)}){satts}";')
                 exp.append(f"    fontsize={fontsize};")
                 exp.append("    color=black;")
-                exp.append("\n".join(map(lambda s: "  " + s, subgraph.split("\n"))))
+                exp.append("\n".join(f"  {s}" for s in subgraph.split("\n")))
 
                 node0 = body.node[0]
                 connects.append(
@@ -318,7 +323,7 @@ def dot_label(text):
                         f"{dot_name(subprefix)}{dot_name(inp2.name)};"
                     )
                 for out1, out2 in zip(body.output, node.output):
-                    if len(out2) == 0:
+                    if not out2:
                         # Empty output, it cannot be used.
                         continue
                     exp.append(
@@ -346,7 +351,7 @@ def dot_label(text):
                 f"{dot_name(prefix)}{dot_name(node.name)};"
             )
         for out in node.output:
-            if len(out) == 0:
+            if not out:
                 # Empty output, it cannot be used.
                 continue
             exp.append(
diff --git a/onnx_array_api/plotting/graphviz_helper.py b/onnx_array_api/plotting/graphviz_helper.py
new file mode 100644
index 0000000..4aec5e4
--- /dev/null
+++ b/onnx_array_api/plotting/graphviz_helper.py
@@ -0,0 +1,244 @@
+import os
+import subprocess
+import sys
+import tempfile
+from typing import List, Optional, Tuple, Union
+import numpy as np
+from onnx import ModelProto
+
+
+def _find_in_PATH(prog: str) -> Optional[str]:
+    """
+    Looks into every path mentioned in ``%PATH%`` a specific file,
+    it raises an exception if not found.
+
+    :param prog: program to look for
+    :return: path
+    """
+    sep = ";" if sys.platform.startswith("win") else ":"
+    path = os.environ["PATH"]
+    for p in path.split(sep):
+        f = os.path.join(p, prog)
+        if os.path.exists(f):
+            return p
+    return None
+
+
+def _find_graphviz_dot(exc: bool = True) -> str:
+    """
+    Determines the path to graphviz (on Windows),
+    the function tests the existence of versions 34 to 45
+    assuming it was installed in a standard folder:
+    ``C:\\Program Files\\MiKTeX 2.9\\miktex\\bin\\x64``.
+
+    :param exc: raise exception of be silent
+    :return: path to dot
+    :raises FileNotFoundError: if graphviz not found
+    """
+    if sys.platform.startswith("win"):
+        version = list(range(34, 60))
+        version.extend([f"{v}.1" for v in version])
+        for v in version:
+            graphviz_dot = f"C:\\Program Files (x86)\\Graphviz2.{v}\\bin\\dot.exe"
+            if os.path.exists(graphviz_dot):
+                return graphviz_dot
+        extra = ["build/update_modules/Graphviz/bin"]
+        for ext in extra:
+            graphviz_dot = os.path.join(ext, "dot.exe")
+            if os.path.exists(graphviz_dot):
+                return graphviz_dot
+        p = _find_in_PATH("dot.exe")
+        if p is None:
+            if exc:
+                raise FileNotFoundError(
+                    f"Unable to find graphviz, look into paths such as {graphviz_dot}."
+                )
+            return None
+        return os.path.join(p, "dot.exe")
+    # linux
+    return "dot"
+
+
+def _run_subprocess(
+    args: List[str],
+    cwd: Optional[str] = None,
+):
+    assert not isinstance(
+        args, str
+    ), "args should be a sequence of strings, not a string."
+
+    p = subprocess.Popen(
+        args,
+        cwd=cwd,
+        shell=False,
+        env=os.environ,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    raise_exception = False
+    output = ""
+    while True:
+        output = p.stdout.readline().decode(errors="ignore")
+        if output == "" and p.poll() is not None:
+            break
+        if output:
+            if (
+                "fatal error" in output
+                or "CMake Error" in output
+                or "gmake: ***" in output
+                or "): error C" in output
+                or ": error: " in output
+            ):
+                raise_exception = True
+    p.poll()
+    error = p.stderr.readline().decode(errors="ignore")
+    p.stdout.close()
+    if error and raise_exception:
+        raise RuntimeError(
+            f"An error was found in the output. The build is stopped."
+            f"\n{output}\n---\n{error}"
+        )
+    return output + "\n" + error
+
+
+def _run_graphviz(filename: str, image: str, engine: str = "dot") -> str:
+    """
+    Run :epkg:`Graphviz`.
+
+    :param filename: filename which contains the graph definition
+    :param image: output image
+    :param engine: *dot* or *neato*
+    :return: output of graphviz
+    """
+    ext = os.path.splitext(image)[-1]
+    assert ext in {
+        ".png",
+        ".bmp",
+        ".fig",
+        ".gif",
+        ".ico",
+        ".jpg",
+        ".jpeg",
+        ".pdf",
+        ".ps",
+        ".svg",
+        ".vrml",
+        ".tif",
+        ".tiff",
+        ".wbmp",
+    }, f"Unexpected extension {ext!r} for {image!r}."
+    if sys.platform.startswith("win"):
+        bin_ = os.path.dirname(_find_graphviz_dot())
+        # if bin not in os.environ["PATH"]:
+        #    os.environ["PATH"] = os.environ["PATH"] + ";" + bin
+        exe = os.path.join(bin_, engine)
+    else:
+        exe = engine
+    if os.path.exists(image):
+        os.remove(image)
+    cmd = [exe, f"-T{ext[1:]}", filename, "-o", image]
+    output = _run_subprocess(cmd)
+    assert os.path.exists(image), (
+        f"Unable to find {image!r}, command line is "
+        f"{' '.join(cmd)!r}, Graphviz failed due to\n{output}"
+    )
+    return output
+
+
+def draw_graph_graphviz(
+    dot: Union[str, ModelProto],
+    image: str,
+    engine: str = "dot",
+) -> str:
+    """
+    Draws a graph using :epkg:`Graphviz`.
+
+    :param dot: dot graph or ModelProto
+    :param image: output image, None, just returns the output
+    :param engine: *dot* or *neato*
+    :return: :epkg:`Graphviz` output or
+        the dot text if *image* is None
+
+    The function creates a temporary file to store the dot file if *image* is not None.
+    """
+    if isinstance(dot, ModelProto):
+        from .dot_plot import to_dot
+
+        sdot = to_dot(dot)
+    else:
+        sdot = dot
+    with tempfile.NamedTemporaryFile(delete=False) as fp:
+        fp.write(sdot.encode("utf-8"))
+        fp.close()
+
+        filename = fp.name
+        assert os.path.exists(
+            filename
+        ), f"File {filename!r} cannot be created to store the graph."
+        out = _run_graphviz(filename, image, engine=engine)
+        assert os.path.exists(
+            image
+        ), f"Graphviz failed with no reason, {image!r} not found, output is {out}."
+        os.remove(filename)
+        return out
+
+
+def plot_dot(
+    dot: Union[str, ModelProto],
+    ax: Optional["matplotlib.axis.Axis"] = None,  # noqa: F821
+    engine: str = "dot",
+    figsize: Optional[Tuple[int, int]] = None,
+) -> "matplotlib.axis.Axis":  # noqa: F821
+    """
+    Draws a dot graph into a matplotlib graph.
+
+    :param dot: dot graph or ModelProto
+    :param image: output image, None, just returns the output
+    :param engine: *dot* or *neato*
+    :param figsize: figsize of ax is None
+    :return: :epkg:`Graphviz` output or, the dot text if *image* is None
+
+    .. plot::
+
+        import matplotlib.pyplot as plt
+        import onnx.parser
+        from onnx_array_api.plotting.graphviz_helper import plot_dot
+
+        model = onnx.parser.parse_model(
+            '''
+            <ir_version: 8, opset_import: [ "": 18]>
+            agraph (float[N] x) => (float[N] z) {
+                two = Constant <value_float=2.0> ()
+                four = Add(two, two)
+                z = Mul(four, four)
+            }
+        ''')
+
+        ax = plot_dot(model)
+        ax.set_title("Dummy graph")
+        plt.show()
+    """
+    if ax is None:
+        import matplotlib.pyplot as plt
+
+        _, ax = plt.subplots(1, 1, figsize=figsize)
+        clean = True
+    else:
+        clean = False
+
+    from PIL import Image
+
+    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp:
+        fp.close()
+
+        draw_graph_graphviz(dot, fp.name, engine=engine)
+        img = np.asarray(Image.open(fp.name))
+        os.remove(fp.name)
+
+        ax.imshow(img)
+
+    if clean:
+        ax.get_xaxis().set_visible(False)
+        ax.get_yaxis().set_visible(False)
+        ax.get_figure().tight_layout()
+    return ax
diff --git a/onnx_array_api/plotting/text_plot.py b/onnx_array_api/plotting/text_plot.py
index 8736d97..0b4d30a 100644
--- a/onnx_array_api/plotting/text_plot.py
+++ b/onnx_array_api/plotting/text_plot.py
@@ -75,7 +75,7 @@ def append_target(self, tid, weight):
         def process_node(self):
             "node to string"
             if self.nodes_modes == "LEAF":
-                if len(self.targets) == 0:
+                if not self.targets:
                     text = f"{self.true_false}f"
                 elif len(self.targets) == 1:
                     t = self.targets[0]
@@ -85,10 +85,8 @@ def process_node(self):
                     )
                 else:
                     ts = " ".join(
-                        map(
-                            lambda t: f"{t['target_id']}:{_number2str(t['weight'])}",
-                            self.targets,
-                        )
+                        f"{t['target_id']}:{_number2str(t['weight'])}"
+                        for t in self.targets
                     )
                     text = f"{self.true_false}f {ts}"
             else:
@@ -184,9 +182,7 @@ def iterate(nodes, node, depth=0, true_false=""):
             rows.extend(r)
         return "\n".join(rows)
 
-    raise NotImplementedError(  # pragma: no cover
-        f"Type {node.op_type!r} cannot be displayed."
-    )
+    raise NotImplementedError(f"Type {node.op_type!r} cannot be displayed.")
 
 
 def _append_succ_pred(
@@ -264,7 +260,7 @@ def _append_succ_pred_s(
                     unknown.add(i)
             for i in n.output:
                 known[i] = n
-        if len(unknown) > 0:
+        if unknown:
             # These inputs are coming from the graph below.
             for name in unknown:
                 successors[name].append(parent_node_name)
@@ -353,7 +349,7 @@ def __init__(self, nodes):
 
     def _find_sequence(node_name, known, done):
         inputs = dnodes[node_name].input
-        if any(map(lambda i: i not in known, inputs)):
+        if any((i not in known) for i in inputs):
             return []
 
         res = [node_name]
@@ -364,7 +360,7 @@ def _find_sequence(node_name, known, done):
                 if len(next_names) == 1:
                     next_name = next_names.pop()
                     inputs = dnodes[next_name].input
-                    if any(map(lambda i: i not in known, inputs)):
+                    if any((i not in known) for i in inputs):
                         break
                     res.extend(next_name)
                 else:
@@ -392,7 +388,7 @@ def _find_sequence(node_name, known, done):
                 possibles[k] = v
 
         sequences = OrderedDict()
-        for k, v in possibles.items():
+        for k, _v in possibles.items():
             if k in done:
                 continue
             sequences[k] = _find_sequence(k, known, done)
@@ -402,8 +398,8 @@ def _find_sequence(node_name, known, done):
                     % (k, ",".join(sequences[k]), list(sequences))
                 )
 
-        if len(sequences) == 0:
-            raise RuntimeError(  # pragma: no cover
+        if not sequences:
+            raise RuntimeError(
                 "Unexpected empty sequence (len(possibles)=%d, "
                 "len(done)=%d, len(nodes)=%d). This is usually due to "
                 "a name used both as result name and node node. "
@@ -417,7 +413,7 @@ def _find_sequence(node_name, known, done):
                 # if the sequence of successors is longer
                 best = k
             elif len(v) == len(sequences[best]):
-                if len(new_nodes) > 0:
+                if new_nodes:
                     # then choose the next successor sharing input with
                     # previous output
                     so = set(new_nodes[-1].output)
@@ -434,7 +430,7 @@ def _find_sequence(node_name, known, done):
                         best = k
 
         if best is None:
-            raise RuntimeError(  # pragma: no cover
+            raise RuntimeError(
                 f"Wrong implementation (len(sequence)={len(sequences)})."
             )
         if verbose:
@@ -453,7 +449,7 @@ def _find_sequence(node_name, known, done):
             known |= set(v.output)
 
     if len(new_nodes) != len(nodes):
-        raise RuntimeError(  # pragma: no cover
+        raise RuntimeError(
             "The returned new nodes are different. "
             "len(nodes=%d) != %d=len(new_nodes). done=\n%r"
             "\n%s\n----------\n%s"
@@ -486,7 +482,7 @@ def _find_sequence(node_name, known, done):
     n0s = set(n.name for n in nodes)
     n1s = set(n.name for n in new_nodes)
     if n0s != n1s:
-        raise RuntimeError(  # pragma: no cover
+        raise RuntimeError(
             "The returned new nodes are different.\n"
             "%r !=\n%r\ndone=\n%r"
             "\n----------\n%s\n----------\n%s"
@@ -758,7 +754,7 @@ def str_node(indent, node):
                     try:
                         val = str(to_array(att.t).tolist())
                     except TypeError as e:
-                        raise TypeError(  # pragma: no cover
+                        raise TypeError(
                             "Unable to display tensor type %r.\n%s"
                             % (att.type, str(att))
                         ) from e
@@ -808,7 +804,7 @@ def str_node(indent, node):
                     val = ".%d" % att.type
                 atts.append(f"{att.name}={val}")
         inputs = list(node.input)
-        if len(atts) > 0:
+        if atts:
             inputs.extend(atts)
         if node.domain in ("", "ai.onnx.ml"):
             domain = ""
@@ -828,7 +824,10 @@ def str_node(indent, node):
             rows.append(f"opset: domain={opset.domain!r} version={opset.version!r}")
     if hasattr(model, "graph"):
         if model.doc_string:
-            rows.append(f"doc_string: {model.doc_string}")
+            if len(model.doc_string) < 55:
+                rows.append(f"doc_string: {model.doc_string}")
+            else:
+                rows.append(f"doc_string: {model.doc_string[:55]}...")
         main_model = model
         model = model.graph
     else:
@@ -853,9 +852,7 @@ def str_node(indent, node):
             if isinstance(att, str):
                 rows.append(f"attribute: {att!r}")
             else:
-                raise NotImplementedError(  # pragma: no cover
-                    "Not yet introduced in onnx."
-                )
+                raise NotImplementedError("Not yet introduced in onnx.")
 
     # initializer
     if hasattr(model, "initializer"):
@@ -867,9 +864,16 @@ def str_node(indent, node):
             else:
                 content = ""
             line_name_new[init.name] = len(rows)
+            if init.doc_string:
+                t = (
+                    f"init: name={init.name!r} type={_get_type(init)} "
+                    f"shape={_get_shape(init)}{content}"
+                )
+                rows.append(f"{t}{' ' * max(0, 70 - len(t))}-- {init.doc_string}")
+                continue
             rows.append(
-                "init: name=%r type=%r shape=%r%s"
-                % (init.name, _get_type(init), _get_shape(init), content)
+                f"init: name={init.name!r} type={_get_type(init)} "
+                f"shape={_get_shape(init)}{content}"
             )
     if level == 0:
         rows.append("----- main graph ----")
@@ -894,7 +898,7 @@ def str_node(indent, node):
 
     try:
         nodes = reorder_nodes_for_display(model.node, verbose=verbose)
-    except RuntimeError as e:  # pragma: no cover
+    except RuntimeError as e:
         if raise_exc:
             raise e
         else:
@@ -917,19 +921,17 @@ def str_node(indent, node):
             indent = previous_indent
         else:
             inds = [indents.get(i, 0) for i in node.input if i not in init_names]
-            if len(inds) == 0:
+            if not inds:
                 indent = 0
             else:
                 mi = min(inds)
                 indent = mi
                 if previous_indent is not None and indent < previous_indent:
                     if verbose:
-                        print(  # pragma: no cover
-                            f"[onnx_simple_text_plot] break2 {node.op_type}"
-                        )
+                        print(f"[onnx_simple_text_plot] break2 {node.op_type}")
                     add_break = True
             if not add_break and previous_out is not None:
-                if len(set(node.input) & previous_out) == 0:
+                if not (set(node.input) & previous_out):
                     if verbose:
                         print(f"[onnx_simple_text_plot] break3 {node.op_type}")
                     add_break = True
@@ -947,7 +949,7 @@ def str_node(indent, node):
         rows.append(str_node(indent if use_indentation else 0, node))
         indents[name] = indent
 
-        for i, o in enumerate(node.output):
+        for _i, o in enumerate(node.output):
             indents[o] = indent + 1
 
         previous_indent = indents[name]
@@ -1052,7 +1054,10 @@ def _mark_link(rows, lengths, r1, r2, d):
         for fct in main_model.functions:
             rows.append(f"----- function name={fct.name} domain={fct.domain}")
             if fct.doc_string:
-                rows.append(f"----- doc_string: {fct.doc_string}")
+                if len(fct.doc_string) < 55:
+                    rows.append(f"----- doc_string: {fct.doc_string}")
+                else:
+                    rows.append(f"----- doc_string: {fct.doc_string[:55]}...")
             res = onnx_simple_text_plot(
                 fct,
                 verbose=verbose,
@@ -1111,10 +1116,19 @@ def onnx_text_plot_io(model, verbose=False, att_display=None):
         )
     # initializer
     for init in model.initializer:
+
+        if init.doc_string:
+            t = (
+                f"init: name={init.name!r} type={_get_type(init)} "
+                f"shape={_get_shape(init)}"
+            )
+            rows.append(f"{t}{' ' * max(0, 70 - len(t))}-- {init.doc_string}")
+            continue
         rows.append(
-            "init: name=%r type=%r shape=%r"
-            % (init.name, _get_type(init), _get_shape(init))
+            f"init: name={init.name!r} type={_get_type(init)} "
+            f"shape={_get_shape(init)}"
         )
+
     # outputs
     for out in model.output:
         rows.append(
diff --git a/onnx_array_api/profiling.py b/onnx_array_api/profiling.py
index 51d5ad7..ab2cc6b 100644
--- a/onnx_array_api/profiling.py
+++ b/onnx_array_api/profiling.py
@@ -71,10 +71,10 @@ def get_root(self):
         def _get_root(node, stor=None):
             if stor is not None:
                 stor.append(node)
-            if len(node.called_by) == 0:
+            if not node.called_by:
                 return node
-            if len(node.called_by) == 1:
-                return _get_root(node.called_by[0], stor=stor)
+            if len(node.called_by) == 0:
+                return None
             res = None
             for ct in node.called_by:
                 k = id(node), id(ct)
@@ -247,8 +247,7 @@ def depth_first(node, roots_keys, indent=0):
                 else:
                     if filter_node is not None and not filter_node(n):
                         continue
-                    for t in depth_first(n, roots_keys, indent + 1):
-                        yield t
+                    yield from depth_first(n, roots_keys, indent + 1)
 
         if filter_node is None:
             filter_node = ProfileNode.filter_node_
@@ -383,7 +382,7 @@ def walk(node, roots_keys, indent=0):
                         continue
                     child[key] = walk(n, roots_key, indent + 1)
 
-            if len(child) > 0:
+            if child:
                 mx = max(_[0] for _ in child)
                 dg = int(math.log(mx) / math.log(10) + 1.5)
                 form = f"%-{dg}d-%s"
@@ -472,7 +471,7 @@ def add_rows(rows, d):
 def profile2df(
     ps: Stats,
     as_df: bool = True,
-    clean_text: bool = None,
+    clean_text: Optional[bool] = None,
     verbose: bool = False,
     fLOG=None,
 ):
@@ -740,7 +739,7 @@ def fct4():
             node.add_called_by(child)
             child.add_calls_to(node, vv)
 
-    for k, v in nodes.items():
+    for _k, v in nodes.items():
         root = v.get_root()
         break
 
diff --git a/onnx_array_api/reference/__init__.py b/onnx_array_api/reference/__init__.py
index d8c5aa5..fd1d27c 100644
--- a/onnx_array_api/reference/__init__.py
+++ b/onnx_array_api/reference/__init__.py
@@ -11,6 +11,13 @@
 )
 from onnx.reference.op_run import to_array_extended
 from .evaluator import ExtendedReferenceEvaluator
+from .evaluator_yield import (
+    DistanceExecution,
+    ResultExecution,
+    ResultType,
+    YieldEvaluator,
+    compare_onnx_execution,
+)
 
 
 def from_array_extended(tensor: np.array, name: Optional[str] = None) -> TensorProto:
diff --git a/onnx_array_api/reference/evaluator.py b/onnx_array_api/reference/evaluator.py
index e20be76..89b5a84 100644
--- a/onnx_array_api/reference/evaluator.py
+++ b/onnx_array_api/reference/evaluator.py
@@ -7,6 +7,10 @@
 from .ops.op_cast_like import CastLike_15, CastLike_19
 from .ops.op_concat import Concat
 from .ops.op_constant_of_shape import ConstantOfShape
+from .ops.op_fused_matmul import FusedMatMul
+from .ops.op_memcpy_host import MemcpyFromHost, MemcpyToHost
+from .ops.op_quick_gelu import QuickGelu
+from .ops.op_scatter_elements import ScatterElements
 
 
 logger = getLogger("onnx-array-api-eval")
@@ -32,6 +36,11 @@ class ExtendedReferenceEvaluator(ReferenceEvaluator):
         CastLike_15,
         CastLike_19,
         ConstantOfShape,
+        FusedMatMul,
+        MemcpyFromHost,
+        MemcpyToHost,
+        QuickGelu,
+        ScatterElements,
     ]
 
     @staticmethod
@@ -108,4 +117,7 @@ def run(self, *args, **kwargs):
         """
         See :meth:`onnx.reference.ReferenceEvaluator.run`.
         """
+        if len(args) == 1 and isinstance(args[0], list):
+            feeds = dict(zip(self.input_names, args[0]))
+            return self.run(None, feeds, **kwargs)
         return ReferenceEvaluator.run(self, *args, **kwargs)
diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py
new file mode 100644
index 0000000..b53c27d
--- /dev/null
+++ b/onnx_array_api/reference/evaluator_yield.py
@@ -0,0 +1,680 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Iterator, Optional, Tuple, Union
+from enum import IntEnum
+import numpy as np
+from onnx import ModelProto, TensorProto, ValueInfoProto, load
+from onnx.reference import ReferenceEvaluator
+from onnx.helper import tensor_dtype_to_np_dtype
+from onnx.shape_inference import infer_shapes
+from . import to_array_extended
+from .evaluator import ExtendedReferenceEvaluator
+
+
+def _align(res: str, limit: int) -> str:
+    if len(res) == limit:
+        return res
+    if len(res) > limit:
+        return res[:limit]
+    return res + " " * (limit - len(res))
+
+
+class ResultType(IntEnum):
+    RESULT = 1
+    INITIALIZER = 2
+    SPARSE_INITIALIZER = 4
+    INPUT = 8
+    OUTPUT = 16
+    NODE = 32
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}.{self._name_}"
+
+
+def _dimension_to_str(d):
+    if isinstance(d, int):
+        return str(d)
+    try:
+        int(d)
+    except ValueError:
+        return d
+    return f"{d!r}"
+
+
+def _rank_to_str(shape):
+    if shape:
+        return f"{len(shape)}:"
+    return "  "
+
+
+@dataclass
+class ResultExecution:
+    """
+    The description of a result.
+    """
+
+    kind: ResultType
+    dtype: object
+    shape: tuple
+    summary: str
+    op_type: str
+    name: str
+    value: Optional[Any] = None
+
+    def __len__(self) -> int:
+        return 6
+
+    def __getitem__(self, i: int) -> Any:
+        if i == 0:
+            return self.kind
+        if i == 1:
+            return self.dtype
+        if i == 2:
+            return self.shape
+        if i == 3:
+            return self.summary
+        if i == 4:
+            return self.op_type
+        if i == 5:
+            return self.name
+        raise IndexError(f"i={i} out of boundary")
+
+    def __str__(self):
+        dtype = self.dtype if self.dtype != 0 else ""
+        els = [
+            _align(self.kind._name_, 6),
+            _align(str(dtype).replace("dtype(", "").replace(")", ""), 8),
+            _rank_to_str(self.shape)
+            + _align(
+                "x".join(
+                    "" if self.shape is None else map(_dimension_to_str, self.shape)
+                ),
+                18,
+            ),
+            self.summary,
+            _align(self.op_type or "", 15),
+            self.name or "",
+        ]
+        return " ".join(els)
+
+
+def make_summary(value: Any, length: int = 4, modulo: int = 26) -> str:
+    """
+    Create a short string summarizing the value (discretization).
+
+    :param value: array
+    :param length: number of value to produce
+    :param module: discretization parameter
+    :return: short string
+    """
+    if isinstance(value, np.float32):
+        # This should not happen.
+        value = np.array(value)
+    assert isinstance(
+        value, np.ndarray
+    ), f"Unexpected type {type(value)} for value, it must be a numpy array."
+    value4 = np.zeros(length, dtype=np.float64)
+    if value.size <= length:
+        value4[: value.size] = value.flatten().astype(np.float64)
+    else:
+        if value.size % length != 0:
+            value2 = np.zeros(
+                value.size + length - value.size % length, dtype=np.float64
+            )
+            value2[: value.size] = value.flatten().astype(np.float64)
+        else:
+            value2 = value.flatten().astype(np.float64)
+        value4 = value2.reshape((4, -1)).sum(axis=1)
+    value4 = np.where(np.abs(value4) < 1e10, value4, np.nan)
+    s = []
+    for v in value4:
+        s.append("?" if np.isnan(v) else (chr(65 + int(v) % modulo)))
+    return "".join(s)
+
+
+class YieldEvaluator:
+    """
+    This class implements method `enumerate_results` which iterates on
+    intermediates results. By default, it uses
+    :class:`onnx_array_api.reference.ExtendedReferenceEvaluator`.
+
+    :param onnx_model: model to run
+    :param recursive: dig into subgraph and functions as well
+    :param cls: evaluator to use, default value is :class:`ExtendedReferenceEvaluator
+        <onnx_array_api.reference.ExtendedReferenceEvaluator>`
+    """
+
+    def __init__(
+        self,
+        onnx_model: ModelProto,
+        recursive: bool = False,
+        cls: Optional[type[ExtendedReferenceEvaluator]] = None,
+    ):
+        assert not recursive, "recursive=True is not yet implemented"
+        self.onnx_model = onnx_model
+        self.evaluator = (
+            cls(onnx_model)
+            if cls is not None
+            else ExtendedReferenceEvaluator(onnx_model)
+        )
+
+    def enumerate_results(
+        self,
+        output_names: Optional[List[str]] = None,
+        feed_inputs: Optional[Dict[str, Any]] = None,
+        raise_exc: bool = True,
+    ) -> Iterator[Tuple[ResultType, str, Any]]:
+        """
+        Executes the onnx model and enumerate all the intermediate results.
+
+        Args:
+            output_names: requested outputs by names, None for all
+            feed_inputs: dictionary `{ input name: input value }`
+
+        Returns:
+            iterator on tuple(result kind, name, value, node.op_type or None)
+        """
+        assert isinstance(self.evaluator, ReferenceEvaluator), (
+            f"This implementation only works with "
+            f"ReferenceEvaluator not {type(self.evaluator)}"
+        )
+        attributes = {}
+        if output_names is None:
+            output_names = self.evaluator.output_names
+
+        results = {"": None}
+        results.update(self.evaluator.rt_inits_)
+        results.update(feed_inputs)
+        # step 0: initializer
+        for k, v in self.evaluator.rt_inits_.items():
+            yield ResultType.INITIALIZER, k, v, None
+        # step 1: inputs
+        for k, v in feed_inputs.items():
+            yield ResultType.INPUT, k, v, None
+
+        # step 2: execute nodes
+        yield_output = True
+        for node in self.evaluator.rt_nodes_:
+            for i in node.input:
+                if i not in results:
+                    raise RuntimeError(
+                        f"Unable to find input {i!r} "
+                        f"in known results {sorted(results)}, "
+                        f"self.rt_inits_ has {sorted(self.evaluator.rt_inits_)}, "
+                        f"feed_inputs has {sorted(feed_inputs)}."
+                    )
+            inputs = [results[i] for i in node.input]
+            linked_attributes = {}
+            if node.has_linked_attribute and attributes:
+                linked_attributes["linked_attributes"] = attributes
+
+            try:
+                if node.need_context():
+                    outputs = node.run(*inputs, context=results, **linked_attributes)
+                else:
+                    outputs = node.run(*inputs, **linked_attributes)
+            except Exception:
+                if raise_exc:
+                    # ExtendedReferenceEvaluator(self.onnx_model, verbose=10).run(
+                    #   None, feed_inputs
+                    # )
+                    raise
+                yield_output = False
+                break
+
+            for name, value in zip(node.output, outputs):
+                yield ResultType.RESULT, name, value, node.op_type
+                results[name] = value
+
+        # step 3: outputs
+        if yield_output:
+            for name in output_names:
+                if name not in results:
+                    raise RuntimeError(
+                        f"Unable to find output name {name!r} in {sorted(results)}, "
+                        f"proto is\n{self.proto_}"
+                    )
+                yield ResultType.OUTPUT, name, results[name], None
+
+    def enumerate_summarized(
+        self,
+        output_names: Optional[List[str]] = None,
+        feed_inputs: Optional[Dict[str, Any]] = None,
+        raise_exc: bool = True,
+        keep_tensor: bool = False,
+    ) -> Iterator[ResultExecution]:
+        """
+        Executes the onnx model and enumerate intermediate results without their names.
+
+        :param output_names: requested outputs by names, None for all
+        :param feed_inputs: dictionary ``{ input name: input value }``
+        :param raise_exc: raises an exception if the execution fails or stop where it is
+        :param keep_tensor: keep the tensor in order to compute precise distances
+        :return: iterator on ResultExecution
+        """
+        for kind, name, value, op_type in self.enumerate_results(
+            output_names, feed_inputs, raise_exc=raise_exc
+        ):
+            summary = make_summary(value)
+            yield ResultExecution(
+                kind,
+                value.dtype,
+                value.shape,
+                summary,
+                op_type,
+                name,
+                value=value if keep_tensor else None,
+            )
+
+
+def discrepancies(
+    expected: np.ndarray, value: np.ndarray, eps: float = 1e-7
+) -> Dict[str, float]:
+    """
+    Computes absolute error and relative error between two matrices.
+    """
+    assert (
+        expected.size == value.size
+    ), f"Incompatible shapes v1.shape={expected.shape}, v2.shape={value.shape}"
+    expected = expected.ravel().astype(np.float32)
+    value = value.ravel().astype(np.float32)
+    diff = np.abs(expected - value)
+    rel = diff / (np.abs(expected) + eps)
+    return dict(aerr=float(diff.max()), rerr=float(rel.max()))
+
+
+class DistanceExecution:
+    """
+    Computes a distance between two results.
+    """
+
+    float_types = {
+        np.float16,
+        np.float32,
+        np.float64,
+        np.dtype("float16"),
+        np.dtype("float32"),
+        np.dtype("float64"),
+    }
+
+    def __init__(self, max_lag: int = 50):
+        self.kind_cost = 1000
+        self.type_cost = 10
+        self.rank_cost = 100
+        self.op_type_cost = 10
+        self.max_lag = max_lag
+        self.insert_cost = 1000
+
+    def distance_pair(self, r1: ResultExecution, r2: ResultExecution) -> float:
+        """
+        (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs"),
+
+        :param r1: first result
+        :param r2: second result
+        :return: distance
+        """
+        d = 0
+        if r1[0] != r2[0]:
+            # difference type
+            d += self.kind_cost
+        if r1[1] != r2[1]:
+            d += self._cost_type(r1[1], r2[1]) * self.type_cost
+        if r1[2] != r2[2]:
+            d += self._cost_shape(r1[2], r2[2])
+        if r1[3] != r2[3]:
+            d += self._cost_summary(r1[3], r2[3])
+        if r1[4] != r2[4]:
+            d += self.op_type_cost
+        return d
+
+    def _cost_type(self, t1: "np.dtype", t2: "np.dtype") -> float:
+        if t1 in self.float_types and t2 in self.float_types:
+            return 0.2
+        return 1
+
+    def _cost_shape(self, s1: Tuple[int, ...], s2: Tuple[int, ...]) -> float:
+        if s1 is None or s2 is None:
+            return self.rank_cost
+        if any(isinstance(s, str) for s in s1) or any(isinstance(s, str) for s in s2):
+            # dynamic shapes
+            if len(s1) != len(s2):
+                return self.rank_cost
+            d = 0
+            for i, j in zip(s1, s2):
+                if isinstance(i, int) and isinstance(j, int):
+                    d += abs(i - j)
+                elif i != j:
+                    d += self.rank_cost / 2
+            return d
+
+        d = abs(np.prod(s1) - np.prod(s2))
+        if len(s1) != len(s2):
+            return self.rank_cost + d
+        for i, j in zip(s1, s2):
+            d += abs(i - j)
+        return d
+
+    def _cost_summary(self, s1: str, s2: str) -> float:
+        if len(s1) != len(s2):
+            return 1e6
+        d = 0
+        for a, b in zip(s1, s2):
+            d += abs(ord(a) - ord(b))
+        return d
+
+    def distance_sequence(
+        self, s1: List[ResultExecution], s2: List[ResultExecution]
+    ) -> Tuple[float, List[Tuple[int, int]]]:
+        """
+        Computes the distance between two sequences of results.
+
+        :param s1: first sequence
+        :param s2: second sequence
+        :return: distance and alignment
+        """
+        delay = max(self.max_lag, abs(len(s2) - len(s1)) + 1)
+        distance = {(-1, -1): 0}
+        predecessor = {(-1, -1): None}
+        for i in range(len(s1)):
+            for j in range(max(0, i - delay), min(len(s2), i + delay)):
+                best = distance.get((i, j), 1e100)
+                pred = None
+                ki, kj = i - 1, j - 1
+                if (ki, kj) in distance:
+                    d = distance[ki, kj] + self.distance_pair(s1[i], s2[j])
+                    if d < best:
+                        best = d
+                        pred = (ki, kj)
+                ki, kj = i - 1, j
+                if (ki, kj) in distance:
+                    d = distance[ki, kj] + self.insert_cost
+                    if d < best:
+                        best = d
+                        pred = (ki, kj)
+                ki, kj = i, j - 1
+                if (ki, kj) in distance:
+                    d = distance[ki, kj] + self.insert_cost
+                    if d < best:
+                        best = d
+                        pred = (ki, kj)
+                distance[i, j] = best
+                predecessor[i, j] = pred
+
+        # reverse
+        way = []
+        last = len(s1) - 1, len(s2) - 1
+        while last is not None:
+            way.append(last)
+            last = predecessor[last]
+        return distance[len(s1) - 1, len(s2) - 1], list(reversed(way))[1:]
+
+    def to_str(
+        self,
+        s1: List[ResultExecution],
+        s2: List[ResultExecution],
+        alignment: List[Tuple[int, int]],
+        column_size: int = 60,
+    ) -> str:
+        """
+        Prints out the alignment between two sequences into a string.
+        :param s1: first sequence
+        :param s2: second sequence
+        :param alignment: alignment
+        :param column_size: column size
+        :return: test
+        """
+        rows = []
+        last = -1, -1
+        row_index = 1
+        for i, j in alignment:
+            assert i < len(s1), f"Unexpected value i={i} >= len(s1)={len(s1)}"
+            assert j < len(s2), f"Unexpected value i={j} >= len(s2)={len(s2)}"
+            expected = last[0] + 1, last[1] + 1
+
+            if expected == (i, j):
+                d1 = s1[i]
+                d2 = s2[j]
+                d = self.distance_pair(d1, d2)
+                symbol = "=" if d == 0 else "~"
+                line = (
+                    f"{symbol} | {_align(str(d1), column_size)} | "
+                    f"{_align(str(d2), column_size)}"
+                )
+                if (
+                    d1.value is not None
+                    and d2.value is not None
+                    and d1.value.size == d2.value.size
+                ):
+                    disc = discrepancies(d1.value, d2.value)
+                    a, r = disc["aerr"], disc["rerr"]
+                    line += f" | a={a:.5g} r={r:.5g}"
+            elif i == last[0]:
+                d2 = s2[j]
+                line = (
+                    f"+ | {_align('', column_size)} | {_align(str(d2), column_size)} "
+                )
+            else:
+                d1 = s1[i]
+                line = f"- | {_align(str(d1), column_size)} | {_align('', column_size)}"
+            rows.append(f"{row_index:03d} {line}")
+            last = i, j
+            row_index += 1
+        return "\n".join(rows)
+
+
+def generate_input(info: ValueInfoProto) -> np.ndarray:
+    """
+    Generates one input.
+    """
+    elem_type = info.type.tensor_type.elem_type
+    shape = [
+        (getattr(d, "dim_value", None) or getattr(d, "dim_param"))  # noqa: B009
+        for d in info.type.tensor_type.shape.dim
+    ]
+    new_shape = []
+    for sh in shape:
+        if isinstance(sh, str):
+            if len(new_shape) == 0:
+                new_shape.append(1)
+            else:
+                new_shape.append(16)
+        else:
+            new_shape.append(sh)
+    new_shape = tuple(new_shape)
+    p = np.prod(new_shape)
+    value = np.arange(p)
+    if elem_type == TensorProto.INT32:
+        return value.astype(np.int32).reshape(new_shape)
+    if elem_type == TensorProto.INT64:
+        return value.astype(np.int64).reshape(new_shape)
+    if elem_type == TensorProto.FLOAT:
+        return (value.astype(np.float32) / p).astype(np.float32).reshape(new_shape)
+    if elem_type == TensorProto.FLOAT16:
+        return (value.astype(np.float16) / p).astype(np.float16).reshape(new_shape)
+    if elem_type == TensorProto.DOUBLE:
+        return (value.astype(np.float64) / p).astype(np.float64).reshape(new_shape)
+    if elem_type == TensorProto.COMPLEX64:
+        return (value.astype(np.complex64) / p).astype(np.complex64).reshape(new_shape)
+    if elem_type == TensorProto.COMPLEX128:
+        return (
+            (value.astype(np.complex128) / p).astype(np.complex128).reshape(new_shape)
+        )
+    raise RuntimeError(f"Unexpected element_type {elem_type} for info={info}")
+
+
+def generate_inputs(model: ModelProto) -> List[np.ndarray]:
+    """
+    Generates inputs for a specific model.
+
+    :param model: ModelProto
+    :return: list of inputs
+    """
+    inputs = []
+    inits = set(i.name for i in model.graph.initializer)
+    for inp in model.graph.input:
+        if inp.name in inits:
+            break
+        inputs.append(generate_input(inp))
+    return inputs
+
+
+def _update_shape_types_with_proto(
+    proto: ModelProto,
+) -> Dict[str, Tuple[int, Tuple[Union[int, str], ...]]]:
+    """
+    Retrieves the shapes and types for a model.
+    """
+    assert isinstance(proto, ModelProto), f"Unexpected type {type(proto)} for proto"
+    res = {}
+
+    for val in proto.graph.input:
+        itype = val.type.tensor_type.elem_type
+        shape = tuple(
+            d.dim_param if d.dim_param else d.dim_value
+            for d in val.type.tensor_type.shape.dim
+        )
+        res[val.name] = [itype, shape]
+
+    for val in proto.graph.output:
+        itype = val.type.tensor_type.elem_type
+        shape = tuple(
+            d.dim_param if d.dim_param else d.dim_value
+            for d in val.type.tensor_type.shape.dim
+        )
+        res[val.name] = [itype, shape]
+
+    for val in proto.graph.initializer:
+        itype = val.data_type
+        shape = tuple(d for d in val.dims)
+        res[val.name] = [itype, shape]
+
+    new_proto = infer_shapes(proto)
+    for val in new_proto.graph.value_info:
+        itype = val.type.tensor_type.elem_type
+        shape = tuple(
+            d.dim_param if d.dim_param else d.dim_value
+            for d in val.type.tensor_type.shape.dim
+        )
+        res[val.name] = [itype, shape]
+
+    return res
+
+
+def _enumerate_result_no_execution(model: ModelProto) -> Iterator[ResultType]:
+    """
+    Produces a list of results based on a model in order to
+    trigger the edit distance comparison.
+    """
+    type_shape = _update_shape_types_with_proto(model)
+    for i in model.graph.initializer:
+        itype, shape = type_shape.get(i.name, (0, None))
+        dtype = tensor_dtype_to_np_dtype(itype)
+        yield ResultExecution(
+            ResultType.INITIALIZER,
+            dtype,
+            shape,
+            make_summary(to_array_extended(i)),
+            "INIT",
+            i.name,
+        )
+    for i in model.graph.input:
+        itype, shape = type_shape.get(i.name, (0, None))
+        dtype = tensor_dtype_to_np_dtype(itype)
+        yield ResultExecution(ResultType.INPUT, dtype, shape, "????", "INPUT", i.name)
+    for node in model.graph.node:
+        yield ResultExecution(ResultType.NODE, 0, None, "????", node.op_type, node.name)
+        for o in node.output:
+            itype, shape = type_shape.get(o, (0, None))
+            dtype = 0 if itype == 0 else tensor_dtype_to_np_dtype(itype)
+            yield ResultExecution(
+                ResultType.RESULT, dtype, shape, "????", node.op_type, o
+            )
+    for i in model.graph.output:
+        itype, shape = type_shape.get(i.name, (0, None))
+        dtype = tensor_dtype_to_np_dtype(itype)
+        yield ResultExecution(ResultType.OUTPUT, dtype, shape, "????", "OUTPUT", i.name)
+
+
+def compare_onnx_execution(
+    model1: ModelProto,
+    model2: ModelProto,
+    inputs: Optional[Union[List[Any], Tuple[Dict[str, Any]]]] = None,
+    verbose: int = 0,
+    raise_exc: bool = True,
+    mode: str = "execute",
+    keep_tensor: bool = False,
+    cls: Optional[type[ReferenceEvaluator]] = None,
+) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]:
+    """
+    Compares the execution of two onnx models.
+    The function assumes both models takes the same inputs.
+    See :ref:`l-onnx-diff-example` to see a full example using
+    this function.
+
+    :param model1: first model
+    :param model2: second model
+    :param inputs: inputs to use, a list of inputs if both models have
+        the same number of inputs or two dictionaries, one for each model
+    :param verbose: verbosity
+    :param raise_exc: raise exception if the execution fails or stop at the error
+    :param mode: the model should be executed but the function can be executed
+        but the comparison may append on nodes only
+    :param keep_tensor: keeps the tensor in order to compute a precise distance
+    :param cls: evaluator class to use
+    :return: four results, a sequence of results
+        for the first model and the second model,
+        the alignment between the two, DistanceExecution
+    """
+    assert mode in {"execute", "nodes"}, f"Unexpected value for mode={mode!r}."
+
+    if mode == "execute":
+        if inputs is None:
+            if verbose:
+                print("[compare_onnx_execution] generate inputs")
+            inputs = generate_inputs(model1)
+        if isinstance(inputs, tuple):
+            assert len(inputs) == 2, f"Unexpected number {len(inputs)} of inputs."
+            feeds1, feeds2 = inputs
+        else:
+            feeds1 = {i.name: v for i, v in zip(model1.graph.input, inputs)}
+            feeds2 = {i.name: v for i, v in zip(model2.graph.input, inputs)}
+        assert isinstance(feeds1, dict), f"Unexpected type {type(feeds1)} for inputs"
+        assert isinstance(feeds2, dict), f"Unexpected type {type(feeds2)} for inputs"
+        if verbose:
+            print(f"[compare_onnx_execution] execute with {len(inputs)} inputs")
+            print("[compare_onnx_execution] execute first model")
+        res1 = list(
+            YieldEvaluator(model1, cls=cls).enumerate_summarized(
+                None, feeds1, raise_exc=raise_exc, keep_tensor=keep_tensor
+            )
+        )
+        if verbose:
+            print(f"[compare_onnx_execution] got {len(res1)} results")
+            print("[compare_onnx_execution] execute second model")
+        res2 = list(
+            YieldEvaluator(model2, cls=cls).enumerate_summarized(
+                None, feeds2, raise_exc=raise_exc, keep_tensor=keep_tensor
+            )
+        )
+    elif mode == "nodes":
+        # No execution.
+        if verbose:
+            print("[compare_onnx_execution] loading first model")
+        proto1 = load(model1) if isinstance(model1, str) else model1
+        if verbose:
+            print("[compare_onnx_execution] loading second model")
+        proto2 = load(model2) if isinstance(model2, str) else model2
+        res1 = list(_enumerate_result_no_execution(proto1))
+        res2 = list(_enumerate_result_no_execution(proto2))
+    else:
+        return
+
+    if verbose:
+        print(f"[compare_onnx_execution] got {len(res1)} results (first model)")
+        print(f"[compare_onnx_execution] got {len(res2)} results (second model)")
+        print("[compare_onnx_execution] compute edit distance")
+    dc = DistanceExecution()
+    _, align = dc.distance_sequence(res1, res2)
+    if verbose:
+        print(f"[compare_onnx_execution] got {len(align)} pairs")
+        print("[compare_onnx_execution] done")
+    return res1, res2, align, dc
diff --git a/onnx_array_api/reference/ops/op_constant_of_shape.py b/onnx_array_api/reference/ops/op_constant_of_shape.py
index 33308af..a54bb5a 100644
--- a/onnx_array_api/reference/ops/op_constant_of_shape.py
+++ b/onnx_array_api/reference/ops/op_constant_of_shape.py
@@ -7,7 +7,7 @@ class ConstantOfShape(OpRun):
     def _process(value):
         cst = value[0] if isinstance(value, np.ndarray) and value.size > 0 else value
         if isinstance(value, np.ndarray):
-            if len(value.shape) == 0:
+            if not value.shape:
                 cst = value
             elif value.size > 0:
                 cst = value.ravel()[0]
@@ -19,6 +19,8 @@ def _process(value):
             cst = np.int64(cst)
         elif isinstance(cst, float):
             cst = np.float64(cst)
+        elif isinstance(cst, complex):
+            cst = np.complex128(cst)
         elif cst is None:
             cst = np.float32(0)
         if not isinstance(
@@ -27,6 +29,8 @@ def _process(value):
                 np.float16,
                 np.float32,
                 np.float64,
+                np.complex64,
+                np.complex128,
                 np.int64,
                 np.int32,
                 np.int16,
diff --git a/onnx_array_api/reference/ops/op_fused_matmul.py b/onnx_array_api/reference/ops/op_fused_matmul.py
new file mode 100644
index 0000000..1ee0f04
--- /dev/null
+++ b/onnx_array_api/reference/ops/op_fused_matmul.py
@@ -0,0 +1,35 @@
+import numpy as np
+from onnx.reference.op_run import OpRun
+
+
+class FusedMatMul(OpRun):
+    op_domain = "com.microsoft"
+
+    def _run(
+        self,
+        A,
+        B,
+        alpha: float = 1,
+        transA: int = 0,
+        transB: int = 0,
+        transBatchA: int = 0,
+        transBatchB: int = 0,
+    ):
+        assert (
+            transBatchA == 0
+        ), f"Not implemented for transBatchA==1 and {A.shape}x{B.shape}"
+        assert (
+            transBatchB == 0
+        ), f"Not implemented for transBatchB==1 and {A.shape}x{B.shape}"
+        if transA:
+            perm = list(range(len(A.shape)))
+            dim = len(perm)
+            perm[dim - 2], perm[dim - 1] = perm[dim - 1], perm[dim - 2]
+            A = np.transpose(A, perm)
+        if transB:
+            perm = list(range(len(B.shape)))
+            dim = len(perm)
+            perm[dim - 2], perm[dim - 1] = perm[dim - 1], perm[dim - 2]
+            B = np.transpose(B, perm)
+        a = np.array(alpha, dtype=A.dtype)
+        return (np.matmul(A, B) * a,)
diff --git a/onnx_array_api/reference/ops/op_memcpy_host.py b/onnx_array_api/reference/ops/op_memcpy_host.py
new file mode 100644
index 0000000..ac365e7
--- /dev/null
+++ b/onnx_array_api/reference/ops/op_memcpy_host.py
@@ -0,0 +1,11 @@
+from onnx.reference.op_run import OpRun
+
+
+class MemcpyFromHost(OpRun):
+    def _run(self, x):
+        return (x,)
+
+
+class MemcpyToHost(OpRun):
+    def _run(self, x):
+        return (x,)
diff --git a/onnx_array_api/reference/ops/op_quick_gelu.py b/onnx_array_api/reference/ops/op_quick_gelu.py
new file mode 100644
index 0000000..e30c5ec
--- /dev/null
+++ b/onnx_array_api/reference/ops/op_quick_gelu.py
@@ -0,0 +1,23 @@
+import numpy as np
+from onnx.reference.op_run import OpRun
+
+
+def sigmoid(x):  # type: ignore
+    if x > 0:
+        return 1 / (1 + np.exp(-x))
+    return np.exp(x) / (1 + np.exp(x))
+
+
+class QuickGelu(OpRun):
+    op_domain = "com.microsoft"
+
+    def __init__(self, onnx_node, run_params):  # type: ignore
+        OpRun.__init__(self, onnx_node, run_params)
+        self.vf = np.vectorize(sigmoid)
+
+    def _run(self, X, alpha=1.0):
+        if len(X.shape) == 0:
+            return ((X * sigmoid(X * alpha)).astype(X.dtype),)
+        if X.size == 0:
+            return (X,)
+        return ((X * self.vf(X * alpha)).astype(X.dtype),)
diff --git a/onnx_array_api/reference/ops/op_scatter_elements.py b/onnx_array_api/reference/ops/op_scatter_elements.py
new file mode 100644
index 0000000..c4b0efa
--- /dev/null
+++ b/onnx_array_api/reference/ops/op_scatter_elements.py
@@ -0,0 +1,98 @@
+import numpy as np
+
+from onnx.reference.op_run import OpRun
+
+
+def scatter_elements(data, indices, updates, axis=0, reduction=None):  # type: ignore
+    if reduction == "add":
+
+        def f(x, y):
+            return x + y
+
+    elif reduction == "min":
+
+        def f(x, y):
+            return min(x, y)
+
+    elif reduction == "max":
+
+        def f(x, y):
+            return max(x, y)
+
+    else:
+
+        def f(x, y):
+            return y
+
+    if axis < 0:
+        axis = data.ndim + axis
+
+    if len(data.shape) == 1 and axis == 0:
+        scattered = np.copy(data)
+        for pos, up in zip(indices, updates):
+            scattered[pos] = f(scattered[pos], up)
+        return scattered
+
+    if len(indices.shape) == 2:
+        scattered = np.copy(data)
+        if axis == 0:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    scattered[indices[i, j], j] = f(
+                        scattered[indices[i, j], j], updates[i, j]
+                    )
+        else:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    scattered[i, indices[i, j]] = f(
+                        scattered[i, indices[i, j]], updates[i, j]
+                    )
+        return scattered
+
+    if len(indices.shape) == 3:
+        scattered = np.copy(data)
+        if axis == 0:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    for k in range(indices.shape[2]):
+                        scattered[indices[i, j, k], j, k] = f(
+                            scattered[indices[i, j, k], j, k], updates[i, j, k]
+                        )
+        elif axis == 1:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    for k in range(indices.shape[2]):
+                        scattered[i, indices[i, j, k], k] = f(
+                            scattered[i, indices[i, j, k], k], updates[i, j, k]
+                        )
+        elif axis == 2:
+            for i in range(indices.shape[0]):
+                for j in range(indices.shape[1]):
+                    for k in range(indices.shape[2]):
+                        scattered[i, j, indices[i, j, k]] = f(
+                            scattered[i, j, indices[i, j, k]], updates[i, j, k]
+                        )
+        return scattered
+
+    if len(indices.shape) == 4:
+        scattered = np.copy(data)
+        if axis == 3:
+            for a in range(indices.shape[0]):
+                for i in range(indices.shape[1]):
+                    for j in range(indices.shape[2]):
+                        for k in range(indices.shape[3]):
+                            scattered[a, i, j, indices[a, i, j, k]] = f(
+                                scattered[a, i, j, indices[a, i, j, k]],
+                                updates[a, i, j, k],
+                            )
+            return scattered
+
+    raise RuntimeError(
+        f"Not implemented for indices.shape={indices.shape} and axis={axis}"
+    )
+
+
+class ScatterElements(OpRun):
+    def _run(self, data, indices, updates, axis=None, reduction=None):  # type: ignore
+        res = scatter_elements(data, indices, updates, axis=axis, reduction=reduction)
+        return (res,)
diff --git a/onnx_array_api/tools/__init__.py b/onnx_array_api/tools/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/onnx_array_api/tools/__init__.py
@@ -0,0 +1 @@
+
diff --git a/onnx_array_api/tools/replace_constants.py b/onnx_array_api/tools/replace_constants.py
new file mode 100644
index 0000000..daa4ca8
--- /dev/null
+++ b/onnx_array_api/tools/replace_constants.py
@@ -0,0 +1,227 @@
+import numpy as np
+from onnx import FunctionProto, ModelProto, GraphProto, AttributeProto
+from onnx.helper import (
+    make_model,
+    set_model_props,
+    make_graph,
+    make_node,
+    make_attribute,
+    make_function,
+    tensor_dtype_to_np_dtype,
+)
+from onnx.numpy_helper import from_array
+
+
+def replace_initializer_by_constant_of_shape(
+    onx, threshold=128, op_type="ConstantOfShape", domain=""
+):
+    """
+    Replaces initializers by nodes *ConstantOfShape* to reduce
+    the size and still write a unit test.
+
+    :param onx: ModelProto
+    :param threshold: every initializer under this threshold is not impacted
+    :param op_type: replace by this node
+    :param domain: replace by this domain
+    :return: onx, modified ModelProto
+    """
+    if isinstance(onx, FunctionProto):
+        modified = False
+        new_nodes = []
+        for node in onx.node:
+            if node.op_type == "Constant":
+                from onnx_array_api.reference import ExtendedReferenceEvaluator
+
+                ref = ExtendedReferenceEvaluator(node)
+                cst = ref.run(None, {})[0]
+
+                size = np.prod(cst.shape)
+                if size <= threshold:
+                    new_nodes.append(node)
+                    continue
+
+                new_name = f"{node.output[0]}__SHAPE"
+                new_nodes.append(
+                    make_node(
+                        "Constant",
+                        [],
+                        [new_name],
+                        value=from_array(
+                            np.array(cst.shape, dtype=np.int64), name=new_name
+                        ),
+                    )
+                )
+                dtype = cst.dtype
+                assert op_type != "Constant"
+                new_nodes.append(
+                    make_node(
+                        op_type,
+                        [new_name],
+                        node.output,
+                        value=from_array(np.array([0.5], dtype=dtype)),
+                        domain=domain,
+                    )
+                )
+                modified = True
+                continue
+
+            new_nodes.append(node)
+
+        if not modified:
+            return onx
+
+        onxf = make_function(
+            domain=onx.domain,
+            fname=onx.name,
+            inputs=onx.input,
+            outputs=onx.output,
+            nodes=new_nodes,
+            doc_string=onx.doc_string,
+            overload=onx.overload,
+            opset_imports=[],
+        )
+        if onx.opset_import:
+            onxf.opset_import.extend(onx.opset_import)
+        if onx.value_info:
+            onxf.value_info.extend(onx.value_info)
+        if onx.attribute:
+            onxf.attribute.extend(onx.attribute)
+        if onx.attribute_proto:
+            onxf.attribute_proto.extend(onx.attribute_proto)
+        return onxf
+
+    if isinstance(onx, ModelProto):
+        new_graph = replace_initializer_by_constant_of_shape(
+            onx.graph, threshold=threshold, op_type=op_type, domain=domain
+        )
+        new_functions = [
+            replace_initializer_by_constant_of_shape(
+                f, threshold=threshold, op_type=op_type, domain=domain
+            )
+            for f in onx.functions
+        ]
+        model = make_model(
+            new_graph,
+            functions=new_functions,
+            producer_name=onx.producer_name,
+            producer_version=onx.producer_version,
+            ir_version=onx.ir_version,
+            doc_string=onx.doc_string,
+            domain=onx.domain,
+            model_version=onx.model_version,
+        )
+        if len(onx.metadata_props) > 0:  # pragma: no cover
+            values = {p.key: p.value for p in onx.metadata_props}
+            set_model_props(model, values)
+
+        del model.opset_import[:]  # pylint: disable=E1101
+        for oimp in onx.opset_import:
+            op_set = model.opset_import.add()  # pylint: disable=E1101
+            if oimp.domain == "" and oimp.version < 9:
+                raise RuntimeError(
+                    f"ConstantOfShape was introduced in "
+                    f"opset 9 but opset is {oimp.version}."
+                )
+            op_set.domain = oimp.domain
+            op_set.version = oimp.version
+        return model
+
+    if not isinstance(onx, GraphProto):
+        raise TypeError(f"onx should be a GraphProto as this stage not {type(onx)}.")
+
+    new_nodes = []
+    removed = set()
+    additional_inputs = []
+
+    new_inits = []
+    for init in onx.initializer:
+        dims = tuple(init.dims)
+        size = np.prod(dims)
+        if size <= threshold:
+            new_inits.append(init)
+            continue
+        new_name = f"{init.name}__SHAPE"
+        new_inits.append(
+            from_array(np.array(list(dims), dtype=np.int64), name=new_name)
+        )
+        dtype = tensor_dtype_to_np_dtype(init.data_type)
+        node = make_node(
+            op_type,
+            [new_name],
+            [init.name],
+            value=from_array(np.array([0.5], dtype=dtype)),
+            domain=domain,
+        )
+        new_nodes.append(node)
+        removed.add(init.name)
+
+    new_sparse_inits = []
+    for init in onx.sparse_initializer:
+        dims = tuple(init.dims)
+        size = np.prod(dims)
+        if size <= threshold:
+            new_sparse_inits.append(init)
+            continue
+        raise NotImplementedError(
+            f"This feature is not yet implemented for sparse initializer"
+            f"(name={init.name!r})."
+        )
+
+    for node in onx.node:
+        if node.op_type == "Constant":
+            from onnx_array_api.reference import ExtendedReferenceEvaluator
+
+            ref = ExtendedReferenceEvaluator(node)
+            cst = ref.run(None, {})[0]
+
+            size = np.prod(cst.shape)
+            if size <= threshold:
+                new_nodes.append(node)
+                continue
+
+            new_name = f"{node.output[0]}__SHAPE"
+            new_inits.append(
+                from_array(np.array(cst.shape, dtype=np.int64), name=new_name)
+            )
+            dtype = cst.dtype
+            new_nodes.append(
+                make_node(
+                    op_type,
+                    [new_name],
+                    node.output,
+                    value=from_array(np.array([0.5], dtype=dtype)),
+                    domain=domain,
+                )
+            )
+            continue
+
+        modified = False
+        atts = []
+        for att in node.attribute:
+            if (
+                att.type == AttributeProto.GRAPH
+                and hasattr(att, "g")
+                and att.g is not None
+            ):
+                modified = True
+                g = replace_initializer_by_constant_of_shape(
+                    att.g, threshold=threshold, op_type=op_type, domain=domain
+                )
+                att = make_attribute(att.name, g)
+            atts.append(att)
+        if modified:
+            new_node = make_node(node.op_type, node.input, node.output)
+            new_node.attribute.extend(atts)
+            new_nodes.append(new_node)
+        else:
+            new_nodes.append(node)
+
+    graph = make_graph(
+        new_nodes,
+        onx.name,
+        [i for i in onx.input if i.name not in removed] + additional_inputs,
+        onx.output,
+        initializer=new_inits,
+        sparse_initializer=new_sparse_inits,
+    )
+    return graph
diff --git a/onnx_array_api/translate_api/__init__.py b/onnx_array_api/translate_api/__init__.py
new file mode 100644
index 0000000..a9a8932
--- /dev/null
+++ b/onnx_array_api/translate_api/__init__.py
@@ -0,0 +1,94 @@
+from onnx import ModelProto
+from .translate import Translater
+from .inner_emitter import InnerEmitter, InnerEmitterShortInitializer
+from .builder_emitter import BuilderEmitter
+
+
+def translate(proto: ModelProto, single_line: bool = False, api: str = "light") -> str:
+    """
+    Translates an ONNX proto into a code using :ref:`l-light-api`
+    to describe the ONNX graph.
+
+    :param proto: model to translate
+    :param single_line: as a single line or not
+    :param api: API to export into,
+        default is `"light"` and this is handle by class
+        :class:`onnx_array_api.translate_api.light_emitter.LightEmitter`,
+        another value is `"onnx"` which is the inner API implemented
+        in onnx package, `"builder"` follows the syntax for the
+        class :class:`onnx_array_api.graph_api.GraphBuilder`,
+        `"onnx-short"` replaces long initializer with random values
+    :return: code
+
+    .. runpython::
+        :showcode:
+
+        from onnx_array_api.light_api import start
+        from onnx_array_api.translate_api import translate
+
+        onx = (
+            start()
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx)
+        print(code)
+
+    The inner API from onnx package is also available.
+
+    .. runpython::
+        :showcode:
+
+        from onnx_array_api.light_api import start
+        from onnx_array_api.translate_api import translate
+
+        onx = (
+            start()
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx, api="onnx")
+        print(code)
+
+    The :class:`GraphBuilder
+    <onnx_array_api.graph_api.GraphBuilder>` API returns this:
+
+    .. runpython::
+        :showcode:
+
+        from onnx_array_api.light_api import start
+        from onnx_array_api.translate_api import translate
+
+        onx = (
+            start()
+            .vin("X")
+            .reshape((-1, 1))
+            .Transpose(perm=[1, 0])
+            .rename("Y")
+            .vout()
+            .to_onnx()
+        )
+        code = translate(onx, api="builder")
+        print(code)
+    """
+    if api == "light":
+        tr = Translater(proto)
+        return tr.export(single_line=single_line, as_str=True)
+    if api == "onnx":
+        tr = Translater(proto, emitter=InnerEmitter())
+        return tr.export(as_str=True)
+    if api == "onnx-short":
+        tr = Translater(proto, emitter=InnerEmitterShortInitializer())
+        return tr.export(as_str=True)
+    if api == "builder":
+        tr = Translater(proto, emitter=BuilderEmitter())
+        return tr.export(as_str=True)
+    raise ValueError(f"Unexpected value {api!r} for api.")
diff --git a/onnx_array_api/translate_api/base_emitter.py b/onnx_array_api/translate_api/base_emitter.py
new file mode 100644
index 0000000..e8d3811
--- /dev/null
+++ b/onnx_array_api/translate_api/base_emitter.py
@@ -0,0 +1,280 @@
+import inspect
+from typing import Any, Dict, List, Optional, Tuple
+from enum import IntEnum
+import numpy as np
+from onnx import AttributeProto
+
+
+class EventType(IntEnum):
+    START = 0
+    INPUT = 1
+    OUTPUT = 2
+    NODE = 3
+    TO_ONNX_MODEL = 4
+    BEGIN_GRAPH = 5
+    END_GRAPH = 6
+    BEGIN_FUNCTION = 7
+    END_FUNCTION = 8
+    INITIALIZER = 9
+    SPARSE_INITIALIZER = 10
+    FUNCTION_INPUT = 11
+    FUNCTION_OUTPUT = 12
+    FUNCTION_ATTRIBUTES = 13
+    TO_ONNX_FUNCTION = 14
+    BEGIN_SIGNATURE = 15
+    END_SIGNATURE = 16
+    BEGIN_RETURN = 17
+    END_RETURN = 18
+    BEGIN_FUNCTION_SIGNATURE = 19
+    END_FUNCTION_SIGNATURE = 20
+    BEGIN_FUNCTION_RETURN = 21
+    END_FUNCTION_RETURN = 22
+
+    @classmethod
+    def to_str(cls, self) -> str:
+        for k, v in EventType.__dict__.items():
+            if self == v:
+                return f"{cls.__name__}.{k}"
+
+
+class BaseEmitter:
+    def __call__(self, event: EventType, **kwargs: Dict[str, Any]) -> List[str]:
+        """
+        Converts an event into an instruction.
+
+        :param event: event kind
+        :param kwargs: event parameters
+        :return: list of instructions
+        """
+
+        if event == EventType.NODE:
+            return self._emit_node(**kwargs)
+
+        if event == EventType.INITIALIZER:
+            return self._emit_initializer(**kwargs)
+
+        if event == EventType.SPARSE_INITIALIZER:
+            return self._emit_sparse_initializer(**kwargs)
+
+        if event == EventType.INPUT:
+            return self._emit_input(**kwargs)
+
+        if event == EventType.OUTPUT:
+            return self._emit_output(**kwargs)
+
+        if event == EventType.START:
+            return self._emit_start(**kwargs)
+
+        if event == EventType.TO_ONNX_MODEL:
+            return self._emit_to_onnx_model(**kwargs)
+
+        if event == EventType.TO_ONNX_FUNCTION:
+            return self._emit_to_onnx_function(**kwargs)
+
+        if event == EventType.BEGIN_GRAPH:
+            return self._emit_begin_graph(**kwargs)
+
+        if event == EventType.END_GRAPH:
+            return self._emit_end_graph(**kwargs)
+
+        if event == EventType.BEGIN_FUNCTION:
+            return self._emit_begin_function(**kwargs)
+
+        if event == EventType.BEGIN_FUNCTION_SIGNATURE:
+            return self._emit_begin_function_signature(**kwargs)
+
+        if event == EventType.END_FUNCTION_SIGNATURE:
+            return self._emit_end_function_signature(**kwargs)
+
+        if event == EventType.END_FUNCTION:
+            return self._emit_end_function(**kwargs)
+
+        if event == EventType.FUNCTION_INPUT:
+            return self._emit_function_input(**kwargs)
+
+        if event == EventType.FUNCTION_OUTPUT:
+            return self._emit_function_output(**kwargs)
+
+        if event == EventType.FUNCTION_ATTRIBUTES:
+            return self._emit_function_attributes(**kwargs)
+
+        if event == EventType.BEGIN_SIGNATURE:
+            return self._emit_begin_signature(**kwargs)
+
+        if event == EventType.END_SIGNATURE:
+            return self._emit_end_signature(**kwargs)
+
+        if event == EventType.BEGIN_RETURN:
+            return self._emit_begin_return(**kwargs)
+
+        if event == EventType.END_RETURN:
+            return self._emit_end_return(**kwargs)
+
+        if event == EventType.BEGIN_FUNCTION_RETURN:
+            return self._emit_begin_function_return(**kwargs)
+
+        if event == EventType.END_FUNCTION_RETURN:
+            return self._emit_end_function_return(**kwargs)
+
+        raise ValueError(f"Unexpected event {EventType.to_str(event)}.")
+
+    def render_attribute_value(self, value: Any) -> Tuple[List[str], str]:
+        """
+        Renders an attribute value into a string.
+
+        :param value: value to converter
+        :return: rows to append before, actual value
+        """
+        v = value[-1]
+        if value[0].type == AttributeProto.TENSOR:
+            repl = {"bool": "bool_", "object": "object_", "str": "str_"}
+            sdtype = repl.get(str(v.dtype), str(str(v.dtype)))
+            return [], (
+                f"from_array(np.array({v.tolist()}, dtype=np.{sdtype}), "
+                f"name={value[0].name!r})"
+            )
+        if isinstance(v, (int, float, list)):
+            return [], str(v)
+        if isinstance(v, str):
+            return [], f"{v!r}"
+        if isinstance(v, np.ndarray):
+            if not v.shape:
+                return [], str(v)
+            if len(v.shape) == 1:
+                if value[0].type in (
+                    AttributeProto.INTS,
+                    AttributeProto.FLOATS,
+                    AttributeProto.STRINGS,
+                ):
+                    return [], str(v.tolist())
+
+        if value[0].type == AttributeProto.GRAPH:
+            from .translate import Translater
+
+            tr = Translater(value[0].g, emitter=self)
+            rows = tr.export(as_str=False, single_line=False)
+            # last instruction is to_onnx, let's drop it.
+            srows = ".".join(rows[:-1])
+            return [], f"g().{srows}"
+
+        if isinstance(value, tuple) and len(value) == 2 and value[1] is None:
+            # in a function, an attribute receiving a value from an attribute
+            v = value[0]
+            name = v.name
+            ref = v.ref_attr_name
+            dt = v.type
+            return [], self._make_attribute(name=name, ref_attr_name=ref, attr_type=dt)
+
+        raise ValueError(
+            f"Unable to render an attribute {type(v)}, "
+            f"attribute type={value[0].type}, "
+            f"dtype={getattr(v, 'dtype', '-')}, "
+            f"shape={getattr(v, 'shape', '-')}, type(value)={type(value)}, "
+            f"value={value!r}."
+        )
+
+    def _make_attribute(
+        self, name: str, attr_type: int, ref_attr_name: Optional[str] = None
+    ) -> str:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def join(self, rows: List[str], single_line: bool = False) -> str:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_to_onnx_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_begin_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_end_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_sparse_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_begin_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_begin_function_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_function_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_function_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_function_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_function_attributes(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError(
+            f"Method {inspect.currentframe().f_code.co_name!r} was not overloaded."
+        )
+
+    def _emit_begin_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_begin_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_begin_function_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_function_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
diff --git a/onnx_array_api/translate_api/builder_emitter.py b/onnx_array_api/translate_api/builder_emitter.py
new file mode 100644
index 0000000..19dd7f9
--- /dev/null
+++ b/onnx_array_api/translate_api/builder_emitter.py
@@ -0,0 +1,242 @@
+from typing import Any, Dict, List
+from onnx import TensorProto
+from onnx.numpy_helper import to_array
+from .base_emitter import BaseEmitter
+
+_types = {
+    TensorProto.DOUBLE: "DOUBLE",
+    TensorProto.FLOAT: "FLOAT",
+    TensorProto.FLOAT16: "FLOAT16",
+    TensorProto.INT64: "INT64",
+    TensorProto.INT32: "INT32",
+    TensorProto.INT16: "INT16",
+    TensorProto.UINT64: "UINT64",
+    TensorProto.UINT32: "UINT32",
+    TensorProto.UINT16: "UINT16",
+    TensorProto.STRING: "STRING",
+    TensorProto.BOOL: "BOOL",
+}
+
+
+def _itype_to_string(itype: int) -> str:
+    return _types[itype]
+
+
+class BuilderEmitter(BaseEmitter):
+    """
+    Converts event into proper code.
+    """
+
+    def __init__(self, make_model_function: str = ""):
+        super().__init__()
+        self.make_model_function = make_model_function
+
+    def join(self, rows: List[str], single_line: bool = False) -> str:
+        "Join the rows"
+        assert (
+            not single_line
+        ), f"The emitter {type(self)} does not work with single_line=True."
+        return "\n".join(rows)
+
+    def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.opsets = kwargs.get("opsets", {})
+        self.ir_version = kwargs.get("ir_version", None)
+        self.function_calls = []
+        return []
+
+    def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
+        inps = ", ".join(["g.op", *[f'"{i}"' for i in self.inputs]])
+        inputs = []
+        for inp, stype, shape in self.inputs_full_:
+            inputs.append(f'g.make_tensor_input("{inp}", TensorProto.{stype}, {shape})')
+        outputs = []
+        for inp, stype, shape in self.outputs_full_:
+            outputs.append(
+                f'g.make_tensor_output("{inp}", TensorProto.{stype}, '
+                f"{shape}, is_dimension=False, indexed=False)"
+            )
+        rows = [
+            "",
+            (
+                f"g = GraphBuilder({self.opsets}, ir_version={self.ir_version})"
+                if self.ir_version
+                else f"GraphBuilder({self.opsets})"
+            ),
+            *inputs,
+            f"{self.name}({inps})",
+            *outputs,
+            *self.function_calls,
+            "model = g.to_onnx()",
+        ]
+        if self.make_model_function:
+            rows = [
+                "",
+                "",
+                f'def {self.make_model_function}() -> "ModelProto":',
+                *["    " + _ for _ in rows[1:]],
+                "    return model",
+                "",
+                "",
+                f"model = {self.make_model_function}()",
+            ]
+        return rows
+
+    def _emit_begin_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.inputs = []
+        self.inputs_full = []
+        self.outputs = []
+        self.inits = []
+        self.inputs_full_ = []
+        self.outputs_full_ = []
+        self.name = kwargs.get("name", "make_graph")
+        return []
+
+    def _emit_end_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        init = kwargs["init"]
+        if isinstance(init, TensorProto):
+            assert (
+                kwargs["name"] == init.name
+            ), f"Name mismatch init.name={init.name!r}, name={kwargs['name']!r}"
+            self.inits.append(init)
+            return []
+        raise AssertionError(f"Unsupported type for an initializer {type(init)}")
+
+    def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        itype = kwargs.get("elem_type", 0)
+        shape = kwargs.get("shape", None)
+        name = self._clean_result_name(name)
+        if itype == 0:
+            inp = name or "X"
+        else:
+            if shape is None:
+                inp = f'{name}: "{_itype_to_string(itype)}"'
+            else:
+                inp = (
+                    f'{name}: "{_itype_to_string(itype)}[{", ".join(map(str, shape))}]"'
+                )
+        self.inputs_full.append(inp)
+        self.inputs.append(name)
+        self.inputs_full_.append((name, _itype_to_string(itype), shape))
+        return []
+
+    def _emit_begin_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        rows = ["", f"def {self.name}(", '    op: "GraphBuilder",']
+        for i in self.inputs_full:
+            rows.append(f"    {i},")
+        rows.append("):")
+        for init in self.inits:
+            val = to_array(init)
+            stype = str(val.dtype).split(".")[-1]
+            name = self._clean_result_name(init.name)
+            rows.append(f"    {name} = np.array({val.tolist()}, dtype=np.{stype})")
+        return rows
+
+    def _emit_begin_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        outs = ", ".join(self.outputs)
+        return [f"    return {outs}"]
+
+    def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        name = self._clean_result_name(name)
+        itype = kwargs.get("elem_type", 0)
+        shape = kwargs.get("shape", None)
+        self.outputs.append(name)
+        self.outputs_full_.append((name, _itype_to_string(itype), shape))
+        return [f'    op.Identity({name}, outputs=["{name}"])']
+
+    def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
+        op_type = kwargs["op_type"]
+        inputs = kwargs["inputs"]
+        outputs = kwargs["outputs"]
+        domain = kwargs.get("domain", "")
+        atts = kwargs.get("atts", {})
+        args = []
+        for k, v in atts.items():
+            before, vatt = self.render_attribute_value(v)
+            if before:
+                raise NotImplementedError("Graph attribute not supported yet.")
+            args.append(f"{k}={vatt}")
+
+        cleaned_outputs = list(map(self._clean_result_name, outputs))
+        outs = ", ".join(cleaned_outputs)
+        inps = ", ".join(map(self._clean_result_name, inputs))
+        op_type = self._emit_node_type(op_type, domain)
+        # Let's add output names to make it easier to debug.
+        soutputs = f", outputs={cleaned_outputs}"
+        sdomain = soutputs if not domain else f", domain={domain!r}{soutputs}"
+        if args:
+            sargs = ", ".join(args)
+            if inps:
+                row = f"    {outs} = op.{op_type}({inps}, {sargs}{sdomain})"
+            else:
+                row = f"    {outs} = op.{op_type}({sargs}{sdomain})"
+        else:
+            row = f"    {outs} = op.{op_type}({inps}{sdomain})"
+        return [row]
+
+    def _clean_result_name(self, name):
+        return name
+
+    def _emit_node_type(self, op_type, domain):
+        return op_type
+
+    def _emit_begin_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.f_inputs = []
+        self.f_outputs = []
+        self.f_inits = []
+        self.f_name = kwargs["name"]
+        self.f_domain = kwargs["domain"]
+        self.f_attributes = []
+        self.f_opsets = kwargs["opsets"]
+        return []
+
+    def _emit_begin_function_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_function_signature(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.f_call_name = f"make_{self.f_domain}_{self.f_name}"
+        return [
+            "",
+            "",
+            f'def {self.f_call_name}(g: "GraphBuilder"):',
+            f"    gr = GraphBuilder({self.f_opsets}, as_function=True)",
+            *[f"    {name} = gr.make_tensor_input({name!r})" for name in self.f_inputs],
+            "    op = gr.op",
+        ]
+
+    def _emit_to_onnx_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return ["    return gr"]
+
+    def _emit_function_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.f_inputs.append(kwargs["name"])
+        return []
+
+    def _emit_function_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.f_outputs.append(kwargs["name"])
+        return []
+
+    def _emit_function_attributes(self, **kwargs: Dict[str, Any]) -> List[str]:
+        raise NotImplementedError("Function attribute are not implemented yet.")
+
+    def _emit_end_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        self.function_calls.append(f"{self.f_call_name}(g)")
+        return [
+            *[f"    gr.make_tensor_output({name})" for name in self.f_outputs],
+            "    g.add_function(builder=gr)",
+        ]
+
+    def _emit_begin_function_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_function_return(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
diff --git a/onnx_array_api/translate_api/inner_emitter.py b/onnx_array_api/translate_api/inner_emitter.py
new file mode 100644
index 0000000..de63dcc
--- /dev/null
+++ b/onnx_array_api/translate_api/inner_emitter.py
@@ -0,0 +1,266 @@
+from typing import Any, Dict, List, Optional, Tuple
+from onnx import AttributeProto
+from ..annotations import ELEMENT_TYPE_NAME
+from .base_emitter import BaseEmitter
+from .translate import Translater
+
+
+class InnerEmitter(BaseEmitter):
+    """
+    Converts event into proper code.
+    """
+
+    def render_attribute_value(self, value: Any) -> Tuple[List[str], str]:
+        """
+        Renders an attribute value into a string.
+
+        :param value: value to converter
+        :return: rows to append before, actual value
+        """
+        if value[0].type == AttributeProto.GRAPH:
+            tr = Translater(value[0].g, emitter=self)
+            rows = tr.export(as_str=False, single_line=False)
+            new_rows = [f"def _make_local_graph_{value[0].name}():"]
+            for line in rows:
+                if "make_model" in line:
+                    break
+                new_rows.append("    " + line)
+            new_rows.append("    return graph")
+            new_rows.append(f"{value[0].name} = _make_local_graph_{value[0].name}()")
+            return new_rows, value[0].name
+
+        return super().render_attribute_value(value)
+
+    def _make_attribute(
+        self, name: str, attr_type: int, ref_attr_name: Optional[str] = None
+    ) -> str:
+        if ref_attr_name is None:
+            raise NotImplementedError(
+                f"Cannot create attribute with name={name!r}, attr_type={attr_type}."
+            )
+        return (
+            f"make_ref_attribute(key={name!r}, attr_type={attr_type}, "
+            f"ref_attr_name={ref_attr_name!r})"
+        )
+
+    def join(self, rows: List[str], single_line: bool = False) -> str:
+        "Returns the separators. `single_line` is unused."
+        return "\n".join(rows)
+
+    def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
+        lines = ["opset_imports = ["]
+        opsets = kwargs.get("opsets", {})
+        for k, v in opsets.items():
+            lines.append(f"    make_opsetid({k!r}, {v!r}),")
+        lines.append("]")
+        return lines
+
+    def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
+        lines = [
+            "model = make_model(",
+            "    graph,",
+            "    functions=functions,",
+            "    opset_imports=opset_imports",
+            ")",
+        ]
+        return lines
+
+    def _emit_begin_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        lines = [
+            "inputs = []",
+            "outputs = []",
+            "nodes = []",
+            "initializers = []",
+            "sparse_initializers = []",
+            "functions = []",
+        ]
+        return lines
+
+    def _emit_end_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs.get("name", "noname")
+        lines = [
+            "graph = make_graph(",
+            "    nodes,",
+            f"    {name!r},",
+            "    inputs,",
+            "    outputs,",
+            "    initializers,",
+            "    sparse_initializer=sparse_initializers,",
+            ")",
+        ]
+        return lines
+
+    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        value = kwargs["value"]
+        repl = {"bool": "bool_", "object": "object_", "str": "str_"}
+        fra = "from_array"
+        sdtype = repl.get(str(value.dtype), str(value.dtype))
+        if sdtype.startswith("("):
+            from onnx.reference.custom_element_types import float8e4m3fn
+
+            if sdtype == str(float8e4m3fn):
+                sdtype = "float8e4m3fn"
+                fra = "from_array_extended"
+            else:
+                raise NotImplementedError(f"Unexpected dtype={sdtype}.")
+        else:
+            sdtype = f"np.{sdtype}"
+
+        return [
+            "initializers.append(",
+            f"    {fra}(",
+            f"        np.array({value.tolist()}, dtype={sdtype}),",
+            f"        name={name!r}",
+            "    )",
+            ")",
+        ]
+
+    def _emit_io(self, container: str, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        elem_type = kwargs.get("elem_type", None)
+        shape = kwargs.get("shape", None)
+        if elem_type and shape:
+            return [
+                f"{container}.append(make_tensor_value_info({name!r}, "
+                f"TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape={shape!r}))"
+            ]
+        if elem_type:
+            return [
+                f"{container}.append(make_tensor_value_info({name!r}, "
+                f"TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, shape=[]))"
+            ]
+        return [
+            f"{container}.append(make_tensor_value_info({name!r}, "
+            f"TensorProto.UNDEFINED, []))"
+        ]
+
+    def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return self._emit_io("inputs", **kwargs)
+
+    def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return self._emit_io("outputs", **kwargs)
+
+    def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
+        op_type = kwargs["op_type"]
+        inputs = kwargs["inputs"]
+        outputs = kwargs["outputs"]
+        if kwargs.get("domain", "") != "":
+            domain = kwargs["domain"]
+
+        before_lines = []
+        lines = [
+            "nodes.append(",
+            "    make_node_extended(",
+            f"        {op_type!r},",
+            f"        {inputs},",
+            f"        {outputs},",
+        ]
+        domain = kwargs.get("domain", "")
+        if domain:
+            lines.append(f"        domain={domain!r},")
+        atts = kwargs.get("atts", {})
+        for k, v in atts.items():
+            before, value = self.render_attribute_value(v)
+            before_lines.extend(before)
+            lines.append(f"        {k}={value},")
+        lines[-1] = lines[-1][:-1]
+        lines.extend(["    )", ")"])
+        return before_lines + lines
+
+    def _emit_begin_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        lines = [
+            "",
+            f"name_f = {kwargs['name']!r}",
+            f"domain_f = {kwargs['domain']!r}",
+            "nodes = []",
+            "inputs = []",
+            "outputs = []",
+            "atts = []",
+        ]
+        return lines
+
+    def _emit_to_onnx_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_function_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return [f"inputs.append({kwargs['name']!r})"]
+
+    def _emit_function_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return [f"outputs.append({kwargs['name']!r})"]
+
+    def _emit_function_attributes(self, **kwargs: Dict[str, Any]) -> List[str]:
+        atts = kwargs["attributes"]
+        if isinstance(atts, list) and all(isinstance(t, str) for t in atts):
+            return [f"atts.extend({atts!r})"]
+        raise NotImplementedError(f"Unable to process function attributes {atts!r}.")
+
+    def _emit_end_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        lines = [
+            "functions.append(",
+            "    make_function(",
+            "        domain_f, ",
+            "        name_f, ",
+            "        inputs, ",
+            "        outputs, ",
+            "        nodes, ",
+            "        attributes=atts, ",
+            "        opset_imports=opset_imports,",
+            "   )",
+            ")",
+        ]
+        return lines
+
+
+class InnerEmitterShortInitializer(InnerEmitter):
+    """
+    Converts event into proper code.
+    Initializer are replaced by random values if too big.
+    """
+
+    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        value = kwargs["value"]
+        repl = {"bool": "bool_", "object": "object_", "str": "str_"}
+        fra = "from_array"
+        sdtype = repl.get(str(value.dtype), str(value.dtype))
+        if sdtype.startswith("("):
+            from onnx.reference.custom_element_types import float8e4m3fn
+
+            if sdtype == str(float8e4m3fn):
+                sdtype = "float8e4m3fn"
+                fra = "from_array_extended"
+            else:
+                raise NotImplementedError(f"Unexpected dtype={sdtype}.")
+        else:
+            sdtype = f"np.{sdtype}"
+        if value.size <= 16:
+            return [
+                "initializers.append(",
+                f"    {fra}(",
+                f"        np.array({value.tolist()}, dtype={sdtype}),",
+                f"        name={name!r}",
+                "    )",
+                ")",
+            ]
+        if "int" in sdtype:
+            return [
+                f"value = np.random.randint(0, 10, size={value.shape})"
+                f".astype({sdtype})",
+                "initializers.append(",
+                f"    {fra}(",
+                f"        np.array(value, dtype={sdtype}),",
+                f"        name={name!r}",
+                "    )",
+                ")",
+            ]
+        return [
+            f"value = np.random.randn({', '.join(map(str,value.shape))})"
+            f".astype({sdtype})",
+            "initializers.append(",
+            f"    {fra}(",
+            f"        np.array(value, dtype={sdtype}),",
+            f"        name={name!r}",
+            "    )",
+            ")",
+        ]
diff --git a/onnx_array_api/translate_api/light_emitter.py b/onnx_array_api/translate_api/light_emitter.py
new file mode 100644
index 0000000..9c58830
--- /dev/null
+++ b/onnx_array_api/translate_api/light_emitter.py
@@ -0,0 +1,106 @@
+from typing import Any, Dict, List
+from ..annotations import ELEMENT_TYPE_NAME
+from .base_emitter import BaseEmitter
+
+
+class LightEmitter(BaseEmitter):
+    """
+    Converts event into proper code.
+    """
+
+    def join(self, rows: List[str], single_line: bool = False) -> str:
+        "Join the rows"
+        if single_line:
+            return ".".join(rows)
+        return "".join(["(\n    ", "\n    .".join(rows), "\n)"])
+
+    def _emit_start(self, **kwargs: Dict[str, Any]) -> List[str]:
+        opsets = kwargs.get("opsets", {})
+        opset = opsets.get("", None)
+        if opset is not None:
+            del opsets[""]
+        args = []
+        if opset:
+            args.append(f"opset={opset}")
+        if opsets:
+            args.append(f"opsets={opsets}")
+        return [f"start({', '.join(args)})"]
+
+    def _emit_to_onnx_model(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return ["to_onnx()"]
+
+    def _emit_to_onnx_function(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_begin_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_end_graph(self, **kwargs: Dict[str, Any]) -> List[str]:
+        return []
+
+    def _emit_initializer(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        value = kwargs["value"]
+        repl = {"bool": "bool_", "object": "object_", "str": "str_"}
+        sdtype = repl.get(str(value.dtype), str(str(value.dtype)))
+        return [
+            f"cst(np.array({value.tolist()}, dtype=np.{sdtype}))",
+            f"rename({name!r})",
+        ]
+
+    def _emit_input(self, **kwargs: Dict[str, Any]) -> List[str]:
+        name = kwargs["name"]
+        elem_type = kwargs.get("elem_type", None)
+        shape = kwargs.get("shape", None)
+        if elem_type and shape:
+            return [
+                f"vin({name!r}, elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, "
+                f"shape={shape!r})"
+            ]
+        if elem_type:
+            return [
+                f"vin({name!r}, elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]})"
+            ]
+        return [f"vin({name!r})"]
+
+    def _emit_output(self, **kwargs: Dict[str, Any]) -> List[str]:
+        inst = []
+        if "name" in kwargs:
+            name = kwargs["name"]
+            inst.append(f"bring({name!r})")
+        elem_type = kwargs.get("elem_type", None)
+        shape = kwargs.get("shape", None)
+        if elem_type and shape:
+            inst.append(
+                f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]}, "
+                f"shape={shape!r})"
+            )
+        elif elem_type:
+            inst.append(f"vout(elem_type=TensorProto.{ELEMENT_TYPE_NAME[elem_type]})")
+        else:
+            inst.append("vout()")
+        return inst
+
+    def _emit_node(self, **kwargs: Dict[str, Any]) -> List[str]:
+        op_type = kwargs["op_type"]
+        inputs = kwargs["inputs"]
+        outputs = kwargs["outputs"]
+        if kwargs.get("domain", "") != "":
+            domain = kwargs["domain"]
+            op_type = f"{domain}.{op_type}"
+        atts = kwargs.get("atts", {})
+        args = []
+        for k, v in atts.items():
+            before, vatt = self.render_attribute_value(v)
+            if before:
+                raise NotImplementedError("Graph attribute not supported yet.")
+            args.append(f"{k}={vatt}")
+
+        str_inputs = ", ".join([f"{i!r}" for i in inputs])
+        inst = [f"bring({str_inputs})", f"{op_type}({', '.join(args)})"]
+        if len(outputs) == 1:
+            inst.append(f"rename({outputs[0]!r})")
+        else:
+            str_outputs = ", ".join([f"{o!r}" for o in outputs])
+            inst.append(f"rename({str_outputs})")
+        return inst
diff --git a/onnx_array_api/translate_api/make_helper.py b/onnx_array_api/translate_api/make_helper.py
new file mode 100644
index 0000000..8b2703c
--- /dev/null
+++ b/onnx_array_api/translate_api/make_helper.py
@@ -0,0 +1,65 @@
+from typing import Any, Optional, Sequence
+from onnx import AttributeProto, NodeProto
+from onnx.helper import make_attribute
+
+
+def make_ref_attribute(
+    key: str, attr_type: int, ref_attr_name: Optional[str] = None
+) -> AttributeProto:
+    """
+    Creates an attribute.
+
+    :param key: atttribute name
+    :param attr_type: attribute type
+    :param ref_attr_name: if not None, link this attribute
+        to a function attribute
+    :return: attribute
+    """
+    att = AttributeProto()
+    att.name = key
+    att.type = attr_type
+    att.ref_attr_name = ref_attr_name
+    return att
+
+
+def make_node_extended(
+    op_type: str,
+    inputs: Sequence[str],
+    outputs: Sequence[str],
+    name: Optional[str] = None,
+    doc_string: Optional[str] = None,
+    domain: Optional[str] = None,
+    **kwargs: Any,
+) -> NodeProto:
+    """
+    Constructs a NodeProto.
+
+    :param op_type: The name of the operator to construct
+    :param inputs: list of input names
+    :param outputs: list of output names
+    :param name: optional unique identifier for NodeProto
+    :param doc_string: optional documentation string for NodeProto
+    :param domain: optional domain for NodeProto.
+        If it's None, we will just use default domain (which is empty)
+    :param kwargs: the attributes of the node.
+    :return: node proto
+    """
+    node = NodeProto()
+    node.op_type = op_type
+    node.input.extend(inputs)
+    node.output.extend(outputs)
+    if name:
+        node.name = name
+    if doc_string:
+        node.doc_string = doc_string
+    if domain is not None:
+        node.domain = domain
+    if kwargs:
+        for key, value in sorted(kwargs.items()):
+            if value is None:
+                continue
+            if isinstance(value, AttributeProto):
+                node.attribute.append(value)
+            else:
+                node.attribute.append(make_attribute(key, value))
+    return node
diff --git a/onnx_array_api/translate_api/translate.py b/onnx_array_api/translate_api/translate.py
new file mode 100644
index 0000000..81d515a
--- /dev/null
+++ b/onnx_array_api/translate_api/translate.py
@@ -0,0 +1,260 @@
+from typing import Any, Dict, List, Optional, Tuple, Union
+import numpy as np
+from onnx import AttributeProto, FunctionProto, GraphProto, ModelProto, NodeProto
+from onnx.numpy_helper import to_array
+from ..reference import to_array_extended
+from .base_emitter import EventType
+from .light_emitter import LightEmitter
+
+
+class Translater:
+    """
+    Translates an ONNX graph into a code following the light API.
+    """
+
+    def __init__(
+        self,
+        proto: Union[ModelProto, FunctionProto, GraphProto],
+        emitter: Optional[LightEmitter] = None,
+    ):
+        self.proto_ = proto
+        self.emitter = emitter or LightEmitter()
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(<{type(self.proto_)})"
+
+    def export(self, as_str, single_line: bool = False) -> Union[str, List[str]]:
+        """
+        Exports into a code.
+
+        :param as_str: as a single string or by rows
+        :param single_line: tries to compress the output into a single line
+        :return: list of instructions
+        """
+        rows = []
+        last_event = None
+        if isinstance(self.proto_, ModelProto):
+            opsets = {d.domain: d.version for d in self.proto_.opset_import}
+            rows.extend(
+                self.emitter(
+                    EventType.START, opsets=opsets, ir_version=self.proto_.ir_version
+                )
+            )
+            inputs = self.proto_.graph.input
+            outputs = self.proto_.graph.output
+            nodes = self.proto_.graph.node
+            initializers = self.proto_.graph.initializer
+            sparse_initializers = self.proto_.graph.sparse_initializer
+            attributes = []
+            last_event = EventType.TO_ONNX_MODEL
+            is_function = False
+        elif isinstance(self.proto_, (FunctionProto, GraphProto)):
+            inputs = self.proto_.input
+            outputs = self.proto_.output
+            nodes = self.proto_.node
+            if isinstance(self.proto_, GraphProto):
+                initializers = self.proto_.initializer
+                sparse_initializers = self.proto_.sparse_initializer
+            else:
+                initializers = []
+                sparse_initializers = []
+            attributes = (
+                self.proto_.attribute if hasattr(self.proto_, "attribute") else []
+            )
+            is_function = isinstance(self.proto_, FunctionProto)
+            last_event = (
+                EventType.TO_ONNX_FUNCTION if is_function else EventType.TO_ONNX_MODEL
+            )
+        else:
+            raise ValueError(f"Unexpected type {type(self.proto_)} for proto.")
+
+        if sparse_initializers:
+            raise NotImplementedError("Sparse initializer not supported yet.")
+
+        if is_function:
+            rows.extend(
+                self.emitter(
+                    EventType.BEGIN_FUNCTION,
+                    name=self.proto_.name,
+                    domain=self.proto_.domain,
+                    opsets={d.domain: d.version for d in self.proto_.opset_import},
+                )
+            )
+        elif isinstance(self.proto_, GraphProto):
+            rows.extend(self.emitter(EventType.BEGIN_GRAPH, name=self.proto_.name))
+        else:
+            rows.extend(
+                self.emitter(EventType.BEGIN_GRAPH, name=self.proto_.graph.name)
+            )
+
+        for i in initializers:
+            rows.extend(
+                self.emitter(
+                    EventType.INITIALIZER,
+                    name=i.name,
+                    init=i,
+                    value=to_array_extended(i),
+                )
+            )
+
+        rows.extend(
+            self.emitter(
+                EventType.BEGIN_FUNCTION_SIGNATURE
+                if is_function
+                else EventType.BEGIN_SIGNATURE
+            )
+        )
+
+        for i in inputs:
+            if is_function:
+                rows.extend(self.emitter(EventType.FUNCTION_INPUT, name=i))
+            else:
+                rows.extend(
+                    self.emitter(
+                        EventType.INPUT,
+                        name=i.name,
+                        elem_type=i.type.tensor_type.elem_type,
+                        shape=tuple(
+                            d.dim_value or d.dim_param
+                            for d in i.type.tensor_type.shape.dim
+                        ),
+                    )
+                )
+
+        if is_function and attributes:
+            rows.extend(
+                self.emitter(EventType.FUNCTION_ATTRIBUTES, attributes=list(attributes))
+            )
+
+        rows.extend(
+            self.emitter(
+                EventType.END_FUNCTION_SIGNATURE
+                if is_function
+                else EventType.END_SIGNATURE
+            )
+        )
+
+        for node in nodes:
+            atts = self.extract_attributes(node)
+            rows.extend(
+                self.emitter(
+                    EventType.NODE,
+                    op_type=node.op_type,
+                    inputs=node.input,
+                    outputs=node.output,
+                    domain=node.domain,
+                    atts=atts,
+                )
+            )
+
+        rows.extend(
+            self.emitter(
+                EventType.BEGIN_FUNCTION_RETURN
+                if is_function
+                else EventType.BEGIN_RETURN
+            )
+        )
+
+        for o in outputs:
+            if is_function:
+                rows.extend(self.emitter(EventType.FUNCTION_OUTPUT, name=o))
+            else:
+                rows.extend(
+                    self.emitter(
+                        EventType.OUTPUT,
+                        name=o.name,
+                        elem_type=o.type.tensor_type.elem_type,
+                        shape=tuple(
+                            d.dim_value or d.dim_param
+                            for d in o.type.tensor_type.shape.dim
+                        ),
+                    )
+                )
+
+        rows.extend(
+            self.emitter(
+                EventType.END_FUNCTION_RETURN if is_function else EventType.END_RETURN
+            )
+        )
+
+        if isinstance(self.proto_, (GraphProto, FunctionProto)):
+            name = self.proto_.name
+        else:
+            name = self.proto_.graph.name
+
+        rows.extend(
+            self.emitter(
+                EventType.END_FUNCTION if is_function else EventType.END_GRAPH,
+                name=name,
+            )
+        )
+
+        if isinstance(self.proto_, ModelProto) and len(self.proto_.functions) > 0:
+            for fu in self.proto_.functions:
+                cl = self.__class__(fu, self.emitter)
+                text = cl.export(False, single_line=False)
+                rows.extend(text)
+
+        rows.extend(self.emitter(last_event))
+        if as_str:
+            return self.emitter.join(rows, single_line=single_line)
+        return rows
+
+    def extract_attributes(
+        self, node: NodeProto
+    ) -> Dict[str, Tuple[AttributeProto, Any]]:
+        """
+        Extracts all atributes of a node.
+
+        :param node: node proto
+        :return: dictionary
+        """
+        atts: Dict[str, Tuple[AttributeProto, Any]] = {}
+        for att in node.attribute:
+            if hasattr(att, "ref_attr_name") and att.ref_attr_name:
+                atts[att.name] = (att, None)
+                continue
+            if att.type == AttributeProto.INT:
+                atts[att.name] = (att, att.i)
+                continue
+            if att.type == AttributeProto.FLOAT:
+                atts[att.name] = (att, att.f)
+                continue
+            if att.type == AttributeProto.INTS:
+                atts[att.name] = (att, np.array(att.ints))
+                continue
+            if att.type == AttributeProto.FLOATS:
+                atts[att.name] = (att, np.array(att.floats, dtype=np.float32))
+                continue
+            if (
+                att.type == AttributeProto.GRAPH
+                and hasattr(att, "g")
+                and att.g is not None
+            ):
+                atts[att.name] = (att, None)
+                continue
+            if att.type == AttributeProto.SPARSE_TENSORS:
+                atts[att.name] = (att, to_array(att.sparse_tensor))
+                continue
+            if att.type == AttributeProto.TENSOR:
+                atts[att.name] = (att, to_array(att.t))
+                continue
+            if att.type == AttributeProto.TENSORS:
+                atts[att.name] = (att, [to_array(t) for t in att.tensors])
+                continue
+            if att.type == AttributeProto.SPARSE_TENSORS:
+                atts[att.name] = (att, [to_array(t) for t in att.sparse_tensors])
+                continue
+            if att.type == AttributeProto.STRING:
+                atts[att.name] = (att, att.s.decode("utf-8"))
+                continue
+            if att.type == AttributeProto.STRINGS:
+                atts[att.name] = (
+                    att,
+                    np.array([s.decode("utf-8") for s in att.strings]),
+                )
+                continue
+            raise ValueError(
+                f"Attribute {att.name!r} with type {att.type} cannot be extracted yet."
+            )
+        return atts
diff --git a/onnx_array_api/validation/docs.py b/onnx_array_api/validation/docs.py
index d1a8422..c5f937f 100644
--- a/onnx_array_api/validation/docs.py
+++ b/onnx_array_api/validation/docs.py
@@ -30,7 +30,9 @@ def make_euclidean(
     n2 = oh.make_node("Pow", ["dxy", "two"], ["dxy2"])
     n3 = oh.make_node("ReduceSum", ["dxy2"], [output_name])
     graph = oh.make_graph([n1, n2, n3], "euclidian", [X, Y], [Z], [two])
-    model = oh.make_model(graph, opset_imports=[oh.make_opsetid("", opset)])
+    model = oh.make_model(
+        graph, opset_imports=[oh.make_opsetid("", opset)], ir_version=9
+    )
     return model
 
 
diff --git a/onnx_array_api/validation/f8.py b/onnx_array_api/validation/f8.py
index c630807..13b778d 100644
--- a/onnx_array_api/validation/f8.py
+++ b/onnx_array_api/validation/f8.py
@@ -9,8 +9,6 @@ class UndefinedCastError(FloatingPointError):
     Unable to case a number.
     """
 
-    pass
-
 
 def display_int(ival, sign=1, exponent=8, mantissa=23):
     """
@@ -317,25 +315,23 @@ def fe5m2_to_float32(ival: int, fn: bool = False, uz: bool = False) -> float:
 class CastFloat8Sets:
     values_e4m3fn = list(
         sorted(
-            (fe4m3_to_float32_float(i), i) for i in range(0, 256) if i not in (255, 127)
+            (fe4m3_to_float32_float(i), i) for i in range(256) if i not in (255, 127)
         )
     )
     values_e4m3fnuz = list(
-        sorted(
-            (fe4m3_to_float32_float(i, uz=True), i) for i in range(0, 256) if i != 0x80
-        )
+        sorted((fe4m3_to_float32_float(i, uz=True), i) for i in range(256) if i != 0x80)
     )
     values_e5m2 = list(
         sorted(
             (fe5m2_to_float32_float(i), i)
-            for i in range(0, 256)
+            for i in range(256)
             if i not in {253, 254, 255, 125, 126, 127}
         )
     )
     values_e5m2fnuz = list(
         sorted(
             (fe5m2_to_float32_float(i, fn=True, uz=True), i)
-            for i in range(0, 256)
+            for i in range(256)
             if i != 0x80
         )
     )
@@ -445,6 +441,11 @@ def search_float32_into_fe4m3(
             return (max_value[1] | ret) if saturate else 0x7F | ret
     f = numpy.float32(value)
     i = CastFloat8.find_closest_value(f, set_values)
+    if uz:
+        ic = i & 0x7F
+        if ic == 0:
+            return 0
+        return ic | ret
     return (i & 0x7F) | ret
 
 
@@ -488,6 +489,11 @@ def search_float32_into_fe5m2(
 
     f = numpy.float32(value)
     i = CastFloat8.find_closest_value(f, set_values)
+    if uz:
+        ic = i & 0x7F
+        if ic == 0:
+            return 0
+        return ic | ret
     return (i & 0x7F) | ret
 
 
@@ -518,47 +524,45 @@ def float32_to_fe4m3(x, fn: bool = True, uz: bool = False, saturate: bool = True
         e = (b & 0x7F800000) >> 23  # exponent
         m = b & 0x007FFFFF  # mantissa
 
-        if e != 0:
-            if e < 116:
-                pass
-            elif e < 120:
-                # denormalized number
-                ex = e - 119
-                if ex >= -2:
-                    ret |= 1 << (2 + ex)
-                    ret |= m >> (21 - ex)
-                elif m > 0:
-                    ret |= 1
-                mask = 1 << (20 - ex)
-                if m & mask and (
-                    ret & 1
-                    or m & (mask - 1) > 0
-                    or (m & mask and m & (mask << 1) and m & (mask - 1) == 0)
-                ):
+        if e < 116:
+            ret = 0
+        elif e < 120:
+            # denormalized number
+            ex = e - 119
+            if ex >= -2:
+                ret |= 1 << (2 + ex)
+                ret |= m >> (21 - ex)
+            elif m > 0:
+                ret |= 1
+            else:
+                ret = 0
+            mask = 1 << (20 - ex)
+            if m & mask and (
+                ret & 1
+                or m & (mask - 1) > 0
+                or (m & mask and m & (mask << 1) and m & (mask - 1) == 0)
+            ):
+                # rounding
+                ret += 1
+        elif e < 135:
+            # normalized number
+            ex = e - 119  # 127 - 8
+            if ex == 0:
+                ret |= 0x4
+                ret |= m >> 21
+            else:
+                ret |= ex << 3
+                ret |= m >> 20
+            if m & 0x80000 and ((m & 0x100000) or (m & 0x7FFFF)):
+                if (ret & 0x7F) < 0x7F:
                     # rounding
                     ret += 1
-            elif e < 135:
-                # normalized number
-                ex = e - 119  # 127 - 8
-                if ex == 0:
-                    ret |= 0x4
-                    ret |= m >> 21
-                else:
-                    ret |= ex << 3
-                    ret |= m >> 20
-                if m & 0x80000 and ((m & 0x100000) or (m & 0x7FFFF)):
-                    if (ret & 0x7F) < 0x7F:
-                        # rounding
-                        ret += 1
-                    elif not saturate:
-                        return 0x80
-            elif saturate:
-                ret |= 0x7F  # 01111110
-            else:
-                ret = 0x80
-        elif m == 0:
-            # -0
-            ret = 0
+                elif not saturate:
+                    return 0x80
+        elif saturate:
+            ret |= 0x7F  # 01111110
+        else:
+            ret = 0x80
         return int(ret)
     else:
         if (b & 0x7FFFFFFF) == 0x7F800000:
@@ -640,45 +644,43 @@ def float32_to_fe5m2(x, fn: bool = False, uz: bool = False, saturate: bool = Tru
         e = (b & 0x7F800000) >> 23  # exponent
         m = b & 0x007FFFFF  # mantissa
 
-        if e != 0:
-            if e < 109:
-                pass
-            elif e < 112:
-                # denormalized number
-                ex = e - 111
-                if ex >= -1:
-                    ret |= 1 << (1 + ex)
-                    ret |= m >> (22 - ex)
-                elif m > 0:
-                    ret |= 1
-                mask = 1 << (21 - ex)
-                if m & mask and (
-                    ret & 1
-                    or m & (mask - 1) > 0
-                    or (m & mask and m & (mask << 1) and m & (mask - 1) == 0)
-                ):
+        if e < 109:
+            ret = 0
+        elif e < 112:
+            # denormalized number
+            ex = e - 111
+            if ex >= -1:
+                ret |= 1 << (1 + ex)
+                ret |= m >> (22 - ex)
+            elif m > 0:
+                ret |= 1
+            else:
+                ret = 0
+            mask = 1 << (21 - ex)
+            if m & mask and (
+                ret & 1
+                or m & (mask - 1) > 0
+                or (m & mask and m & (mask << 1) and m & (mask - 1) == 0)
+            ):
+                # rounding
+                ret += 1
+        elif e < 143:
+            # normalized number
+            ex = e - 111
+            ret |= ex << 2
+            ret |= m >> 21
+            if m & 0x100000 and ((m & 0xFFFFF) or (m & 0x200000)):
+                if (ret & 0x7F) < 0x7F:
                     # rounding
                     ret += 1
-            elif e < 143:
-                # normalized number
-                ex = e - 111
-                ret |= ex << 2
-                ret |= m >> 21
-                if m & 0x100000 and ((m & 0xFFFFF) or (m & 0x200000)):
-                    if (ret & 0x7F) < 0x7F:
-                        # rounding
-                        ret += 1
-                    elif not saturate:
-                        ret = 0x80
-            elif e == 255 and m == 0:  # inf
-                ret = 0x80
-            elif saturate:
-                ret |= 0x7F  # last possible number
-            else:
-                ret = 0x80
-        elif m == 0:
-            # -0
-            ret = 0
+                elif not saturate:
+                    ret = 0x80
+        elif e == 255 and m == 0:  # inf
+            ret = 0x80
+        elif saturate:
+            ret |= 0x7F  # last possible number
+        else:
+            ret = 0x80
         return int(ret)
     elif not fn and not uz:
         if (b & 0x7FFFFFFF) == 0x7F800000:
diff --git a/onnx_array_api/validation/tools.py b/onnx_array_api/validation/tools.py
index f4628db..cbb02c1 100644
--- a/onnx_array_api/validation/tools.py
+++ b/onnx_array_api/validation/tools.py
@@ -20,7 +20,7 @@
 
 
 def randomize_proto(
-    onx: Union[ModelProto, GraphProto, FunctionProto, NodeProto, TensorProto]
+    onx: Union[ModelProto, GraphProto, FunctionProto, NodeProto, TensorProto],
 ) -> Union[ModelProto, GraphProto, FunctionProto, NodeProto, TensorProto]:
     """
     Randomizes float initializers or constant nodes.
@@ -49,7 +49,7 @@ def randomize_proto(
             doc_string=onx.doc_string,
             opset_imports=list(onx.opset_import),
         )
-        if len(onx.metadata_props) > 0:
+        if onx.metadata_props:
             values = {p.key: p.value for p in onx.metadata_props}
             set_model_props(onnx_model, values)
         return onnx_model
diff --git a/pyproject.toml b/pyproject.toml
index 4101adf..a465006 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,19 +11,46 @@ exclude = [
 # Same as Black.
 line-length = 88
 
-[tool.ruff.mccabe]
-# Unlike Flake8, default to a complexity level of 10.
-max-complexity = 10
+[tool.ruff.lint]
+select = [
+    "B", # flake8-bugbear
+    "C4", # flake8-comprehensions
+    #"D", # pydocstyle
+    "E", # pycodestyle
+    "F", # Pyflakes
+    "G", # flake8-logging-format
+    #"I", # isort
+    "ISC", # flake8-implicit-str-concat
+    "LOG", # flake8-logging
+    #"N", # pep8-naming
+    #"NPY", # modern numpy
+    #"PERF", # Perflint
+    "PIE", # flake8-pie
+    "PYI", # flake8-pyi
+    "RUF", # Ruff-specific rules
+    "SIM", # flake8-simplify
+    "SLOT", # flake8-slot
+    "T10", # flake8-debugger
+    #"TID", # Disallow relative imports
+    #"TRY", # flake8-try-except-raise
+    "UP", # pyupgrade
+    "W", # pycodestyle
+    "YTT", # flake8-2020
+]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
+"**" = ["B905", "C401", "C408", "C413", "PYI041", "RUF012", "RUF100", "RUF010", "SIM108", "SIM910", "SIM110", "SIM102", "SIM114", "SIM103", "UP015", "UP027", "UP031", "UP034", "UP032", "UP006", "UP035", "UP007", "UP038"]
+"**/plot*.py" = ["B018"]
 "_doc/examples/plot_first_example.py" = ["E402", "F811"]
 "_doc/examples/plot_onnxruntime.py" = ["E402", "F811"]
 "onnx_array_api/array_api/_onnx_common.py" = ["F821"]
+"onnx_array_api/graph_api/__init__.py" = ["F401"]
 "onnx_array_api/light_api/__init__.py" = ["F401"]
 "onnx_array_api/light_api/_op_var.py" = ["F821"]
 "onnx_array_api/light_api/_op_vars.py" = ["F821"]
-"onnx_array_api/light_api/annotations.py" = ["F821"]
+"onnx_array_api/annotations.py" = ["F821"]
 "onnx_array_api/light_api/model.py" = ["F821"]
+"onnx_array_api/translate_api/__init__.py" = ["F401"]
 "onnx_array_api/npx/__init__.py" = ["F401", "F403"]
 "onnx_array_api/npx/npx_functions.py" = ["F821"]
 "onnx_array_api/npx/npx_functions_test.py" = ["F821"]
@@ -32,4 +59,5 @@ max-complexity = 10
 "onnx_array_api/profiling.py" = ["E731"]
 "onnx_array_api/reference/__init__.py" = ["F401"]
 "_unittests/ut_npx/test_npx.py" = ["F821"]
+"_unittests/ut_translate_api/test_translate_classic.py" = ["E501"]
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5804529..de339f5 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,3 +1,5 @@
+array_api_compat
+array_api_strict
 autopep8
 black
 coverage
@@ -11,7 +13,7 @@ lightgbm
 matplotlib
 ml-dtypes
 git+https://github.com/onnx/onnxmltools.git
-onnxruntime>=1.16.1
+onnxruntime>=1.17.0
 openpyxl
 packaging
 pandas
diff --git a/requirements.txt b/requirements.txt
index 4680cfc..4396e32 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-array_api_compat
 numpy
 onnx>=1.15.0
 scipy
diff --git a/setup.py b/setup.py
index 928f93f..b4cced8 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 
 from setuptools import setup
@@ -18,7 +17,7 @@
         requirements = f.read().strip(" \n\r\t").split("\n")
 except FileNotFoundError:
     requirements = []
-if len(requirements) == 0 or requirements == [""]:
+if not requirements or requirements == [""]:
     requirements = ["numpy", "scipy", "onnx"]
 
 try:
@@ -34,7 +33,7 @@
         for _ in [_.strip("\r\n ") for _ in f.readlines()]
         if _.startswith("__version__")
     ]
-    if len(line) > 0:
+    if line:
         version_str = line[0].split("=")[1].strip('" ')
 
 
@@ -63,9 +62,10 @@
         "Operating System :: Unix",
         "Operating System :: MacOS",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Programming Language :: Python :: 3.13",
     ],
 )