diff --git a/.gitignore b/.gitignore
index f4d6253..6774a18 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,8 +10,11 @@ build/*
 .eggs/*
 .hypothesis/*
 *egg-info/*
+onnxruntime_profile*
+prof
 _doc/auto_examples/*
 _doc/examples/_cache/*
+_doc/examples/onnxruntime_profile*
 _doc/examples/plot_*.png
 _doc/examples/plot_*.xlsx
 _doc/examples/data/*.optimized.onnx
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index e807b02..9d8d98d 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,5 +4,6 @@ Change Logs
 0.2.0
 +++++
 
+* :pr:`22`: support OrtValue in function :func:`ort_profile`
 * :pr:`17`: implements ArrayAPI
 * :pr:`3`: fixes Array API with onnxruntime and scikit-learn
diff --git a/_unittests/ut_ort/test_ort_profile.py b/_unittests/ut_ort/test_ort_profile.py
index 295b7e0..e868860 100644
--- a/_unittests/ut_ort/test_ort_profile.py
+++ b/_unittests/ut_ort/test_ort_profile.py
@@ -6,6 +6,10 @@
 from onnx_array_api.ext_test_case import ExtTestCase
 from onnx_array_api.ort.ort_optimizers import ort_optimized_model
 from onnx_array_api.ort.ort_profile import ort_profile, merge_ort_profile
+from onnxruntime.capi._pybind_state import (
+    OrtValue as C_OrtValue,
+    OrtDevice as C_OrtDevice,
+)
 
 
 class TestOrtProfile(ExtTestCase):
@@ -28,7 +32,76 @@ def myloss(x, y):
         self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
         optimized = ort_optimized_model(onx)
         prof = ort_profile(optimized, feeds)
-        prof.to_csv("prof.csv", index=False)
+        self.assertIsInstance(prof, DataFrame)
+        prof = ort_profile(optimized, feeds, as_df=False)
+        self.assertIsInstance(prof, list)
+
+    def test_ort_profile_first_it_out(self):
+        def l1_loss(x, y):
+            return absolute(x - y).sum()
+
+        def l2_loss(x, y):
+            return ((x - y) ** 2).sum()
+
+        def myloss(x, y):
+            return l1_loss(x[:, 0], y[:, 0]) + l2_loss(x[:, 1], y[:, 1])
+
+        jitted_myloss = jit_onnx(myloss)
+        x = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32)
+        y = np.array([[0.11, 0.22], [0.33, 0.44]], dtype=np.float32)
+        jitted_myloss(x, y)
+        onx = jitted_myloss.get_onnx()
+        feeds = {"x0": x, "x1": y}
+        self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
+        optimized = ort_optimized_model(onx)
+        prof = ort_profile(optimized, feeds)
+        events = {
+            "kernel_time",
+            "fence_before",
+            "fence_after",
+            "SequentialExecutor::Execute",
+            "model_run",
+            "model_loading_array",
+            "session_initialization",
+        }
+        self.assertEqual(set(prof["event_name"]), events)
+        agg = ort_profile(optimized, feeds, first_it_out=True, agg=True)
+        self.assertIsInstance(agg, DataFrame)
+        self.assertLess(agg.shape[0], prof.shape[0])
+        self.assertEqual(set(agg.reset_index(drop=False)["event_name"]), events)
+        agg = ort_profile(
+            optimized, feeds, first_it_out=True, agg=True, agg_op_name=False
+        )
+        self.assertIsInstance(agg, DataFrame)
+        self.assertLess(agg.shape[0], prof.shape[0])
+        self.assertEqual(set(agg.reset_index(drop=False)["event_name"]), events)
+
+    def test_ort_profile_ort_value(self):
+        def to_ort_value(m):
+            device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0)
+            ort_value = C_OrtValue.ortvalue_from_numpy(m, device)
+            return ort_value
+
+        def l1_loss(x, y):
+            return absolute(x - y).sum()
+
+        def l2_loss(x, y):
+            return ((x - y) ** 2).sum()
+
+        def myloss(x, y):
+            return l1_loss(x[:, 0], y[:, 0]) + l2_loss(x[:, 1], y[:, 1])
+
+        jitted_myloss = jit_onnx(myloss)
+        x = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32)
+        y = np.array([[0.11, 0.22], [0.33, 0.44]], dtype=np.float32)
+        jitted_myloss(x, y)
+        onx = jitted_myloss.get_onnx()
+        np_feeds = {"x0": x, "x1": y}
+        feeds = {k: to_ort_value(v) for k, v in np_feeds.items()}
+
+        self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError)
+        optimized = ort_optimized_model(onx)
+        prof = ort_profile(optimized, feeds)
         self.assertIsInstance(prof, DataFrame)
         prof = ort_profile(optimized, feeds, as_df=False)
         self.assertIsInstance(prof, list)
diff --git a/onnx_array_api/ort/ort_profile.py b/onnx_array_api/ort/ort_profile.py
index 37d8092..b61df67 100644
--- a/onnx_array_api/ort/ort_profile.py
+++ b/onnx_array_api/ort/ort_profile.py
@@ -6,6 +6,56 @@
 from pandas import DataFrame
 
 
+def post_process_df_profile(
+    df: DataFrame,
+    first_it_out: bool = False,
+    agg: bool = False,
+    agg_op_name: bool = True,
+) -> DataFrame:
+    """
+    Post-processed a dataframe obtained after profiling onnxruntime.
+    It adds a column for a more explicit event name and adds
+    a column for the iteration number
+
+    :param agg: aggregate the result
+    :param first_it_out: leave the first iteration
+        out of the aggregation
+    :param agg_op_name: aggregate on operator name or operator index
+    :return: DataFrame
+    """
+    events = {"kernel_time", "fence_after", "fence_before"}
+
+    def sep_event(s):
+        for e in events:
+            if s.endswith(e):
+                return e
+        return s
+
+    df = df.copy()
+    df["event_name"] = df["name"].apply(sep_event)
+    df["iteration"] = -1
+    current = -1
+    for i in range(df.shape[0]):
+        if df.loc[i, "name"] == "SequentialExecutor::Execute":
+            current += 1
+        df.loc[i, "iteration"] = current
+
+    if not agg:
+        return df
+
+    agg_cols = ["cat", "args_node_index", "args_op_name", "args_provider", "event_name"]
+    if first_it_out:
+        df["it==0"] = (df["iteration"] <= 0).astype(int)
+        agg_cols.insert(0, "it==0")
+    if agg_op_name:
+        del agg_cols[agg_cols.index("args_node_index")]
+    for c in agg_cols:
+        df[c] = df[c].fillna("")
+    df["dur"] = df["dur"].fillna(0)
+    agg = df[agg_cols + ["dur"]].groupby(agg_cols).sum()
+    return agg
+
+
 def ort_profile(
     filename_or_bytes: Union[str, bytes, ModelProto],
     feeds: Dict[str, numpy.ndarray],
@@ -14,6 +64,9 @@ def ort_profile(
     repeat: int = 10,
     as_df: bool = True,
     providers: Optional[List[str]] = None,
+    first_it_out: bool = False,
+    agg: bool = False,
+    agg_op_name: bool = False,
     **kwargs,
 ) -> Union[List, DataFrame]:
     """
@@ -27,6 +80,9 @@ def ort_profile(
     :param as_df: returns the
     :param providers: list of providers to use when initializing the inference session,
         if None, the default value is `["CPUExecutionProvider"]`
+    :param first_it_out: if aggregated, leaves the first iteration out
+    :param agg: aggregate by event
+    :param agg_op_name: aggregate on operator name or operator index
     :param kwargs: additional parameters when initializing the inference session
     :return: DataFrame or dictionary
     """
@@ -45,8 +101,16 @@ def ort_profile(
     if providers is None:
         providers = ["CPUExecutionProvider"]
     sess = InferenceSession(obj, sess_options, providers=providers, **kwargs)
-    for i in range(repeat):
-        sess.run(None, feeds)
+    first = list(feeds.values())[0]
+
+    if isinstance(first, numpy.ndarray):
+        for i in range(repeat):
+            sess.run(None, feeds)
+    else:
+        out_names = [o.name for o in sess.get_outputs()]
+        for i in range(repeat):
+            sess._sess.run_with_ort_values(feeds, out_names, None)
+
     prof = sess.end_profiling()
     with open(prof, "r") as f:
         content = f.read()
@@ -68,7 +132,9 @@ def ort_profile(
                 break
         rows.append(row)
     if as_df:
-        return DataFrame(rows)
+        return post_process_df_profile(
+            DataFrame(rows), first_it_out=first_it_out, agg=agg, agg_op_name=agg_op_name
+        )
     return rows