diff --git a/.gitignore b/.gitignore index f4d6253..6774a18 100644 --- a/.gitignore +++ b/.gitignore @@ -10,8 +10,11 @@ build/* .eggs/* .hypothesis/* *egg-info/* +onnxruntime_profile* +prof _doc/auto_examples/* _doc/examples/_cache/* +_doc/examples/onnxruntime_profile* _doc/examples/plot_*.png _doc/examples/plot_*.xlsx _doc/examples/data/*.optimized.onnx diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst index e807b02..9d8d98d 100644 --- a/CHANGELOGS.rst +++ b/CHANGELOGS.rst @@ -4,5 +4,6 @@ Change Logs 0.2.0 +++++ +* :pr:`22`: support OrtValue in function :func:`ort_profile` * :pr:`17`: implements ArrayAPI * :pr:`3`: fixes Array API with onnxruntime and scikit-learn diff --git a/_unittests/ut_ort/test_ort_profile.py b/_unittests/ut_ort/test_ort_profile.py index 295b7e0..e868860 100644 --- a/_unittests/ut_ort/test_ort_profile.py +++ b/_unittests/ut_ort/test_ort_profile.py @@ -6,6 +6,10 @@ from onnx_array_api.ext_test_case import ExtTestCase from onnx_array_api.ort.ort_optimizers import ort_optimized_model from onnx_array_api.ort.ort_profile import ort_profile, merge_ort_profile +from onnxruntime.capi._pybind_state import ( + OrtValue as C_OrtValue, + OrtDevice as C_OrtDevice, +) class TestOrtProfile(ExtTestCase): @@ -28,7 +32,76 @@ def myloss(x, y): self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError) optimized = ort_optimized_model(onx) prof = ort_profile(optimized, feeds) - prof.to_csv("prof.csv", index=False) + self.assertIsInstance(prof, DataFrame) + prof = ort_profile(optimized, feeds, as_df=False) + self.assertIsInstance(prof, list) + + def test_ort_profile_first_it_out(self): + def l1_loss(x, y): + return absolute(x - y).sum() + + def l2_loss(x, y): + return ((x - y) ** 2).sum() + + def myloss(x, y): + return l1_loss(x[:, 0], y[:, 0]) + l2_loss(x[:, 1], y[:, 1]) + + jitted_myloss = jit_onnx(myloss) + x = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32) + y = np.array([[0.11, 0.22], [0.33, 0.44]], dtype=np.float32) + jitted_myloss(x, y) + onx = jitted_myloss.get_onnx() + feeds = {"x0": x, "x1": y} + self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError) + optimized = ort_optimized_model(onx) + prof = ort_profile(optimized, feeds) + events = { + "kernel_time", + "fence_before", + "fence_after", + "SequentialExecutor::Execute", + "model_run", + "model_loading_array", + "session_initialization", + } + self.assertEqual(set(prof["event_name"]), events) + agg = ort_profile(optimized, feeds, first_it_out=True, agg=True) + self.assertIsInstance(agg, DataFrame) + self.assertLess(agg.shape[0], prof.shape[0]) + self.assertEqual(set(agg.reset_index(drop=False)["event_name"]), events) + agg = ort_profile( + optimized, feeds, first_it_out=True, agg=True, agg_op_name=False + ) + self.assertIsInstance(agg, DataFrame) + self.assertLess(agg.shape[0], prof.shape[0]) + self.assertEqual(set(agg.reset_index(drop=False)["event_name"]), events) + + def test_ort_profile_ort_value(self): + def to_ort_value(m): + device = C_OrtDevice(C_OrtDevice.cpu(), C_OrtDevice.default_memory(), 0) + ort_value = C_OrtValue.ortvalue_from_numpy(m, device) + return ort_value + + def l1_loss(x, y): + return absolute(x - y).sum() + + def l2_loss(x, y): + return ((x - y) ** 2).sum() + + def myloss(x, y): + return l1_loss(x[:, 0], y[:, 0]) + l2_loss(x[:, 1], y[:, 1]) + + jitted_myloss = jit_onnx(myloss) + x = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32) + y = np.array([[0.11, 0.22], [0.33, 0.44]], dtype=np.float32) + jitted_myloss(x, y) + onx = jitted_myloss.get_onnx() + np_feeds = {"x0": x, "x1": y} + feeds = {k: to_ort_value(v) for k, v in np_feeds.items()} + + self.assertRaise(lambda: ort_optimized_model(onx, "NO"), ValueError) + optimized = ort_optimized_model(onx) + prof = ort_profile(optimized, feeds) self.assertIsInstance(prof, DataFrame) prof = ort_profile(optimized, feeds, as_df=False) self.assertIsInstance(prof, list) diff --git a/onnx_array_api/ort/ort_profile.py b/onnx_array_api/ort/ort_profile.py index 37d8092..b61df67 100644 --- a/onnx_array_api/ort/ort_profile.py +++ b/onnx_array_api/ort/ort_profile.py @@ -6,6 +6,56 @@ from pandas import DataFrame +def post_process_df_profile( + df: DataFrame, + first_it_out: bool = False, + agg: bool = False, + agg_op_name: bool = True, +) -> DataFrame: + """ + Post-processed a dataframe obtained after profiling onnxruntime. + It adds a column for a more explicit event name and adds + a column for the iteration number + + :param agg: aggregate the result + :param first_it_out: leave the first iteration + out of the aggregation + :param agg_op_name: aggregate on operator name or operator index + :return: DataFrame + """ + events = {"kernel_time", "fence_after", "fence_before"} + + def sep_event(s): + for e in events: + if s.endswith(e): + return e + return s + + df = df.copy() + df["event_name"] = df["name"].apply(sep_event) + df["iteration"] = -1 + current = -1 + for i in range(df.shape[0]): + if df.loc[i, "name"] == "SequentialExecutor::Execute": + current += 1 + df.loc[i, "iteration"] = current + + if not agg: + return df + + agg_cols = ["cat", "args_node_index", "args_op_name", "args_provider", "event_name"] + if first_it_out: + df["it==0"] = (df["iteration"] <= 0).astype(int) + agg_cols.insert(0, "it==0") + if agg_op_name: + del agg_cols[agg_cols.index("args_node_index")] + for c in agg_cols: + df[c] = df[c].fillna("") + df["dur"] = df["dur"].fillna(0) + agg = df[agg_cols + ["dur"]].groupby(agg_cols).sum() + return agg + + def ort_profile( filename_or_bytes: Union[str, bytes, ModelProto], feeds: Dict[str, numpy.ndarray], @@ -14,6 +64,9 @@ def ort_profile( repeat: int = 10, as_df: bool = True, providers: Optional[List[str]] = None, + first_it_out: bool = False, + agg: bool = False, + agg_op_name: bool = False, **kwargs, ) -> Union[List, DataFrame]: """ @@ -27,6 +80,9 @@ def ort_profile( :param as_df: returns the :param providers: list of providers to use when initializing the inference session, if None, the default value is `["CPUExecutionProvider"]` + :param first_it_out: if aggregated, leaves the first iteration out + :param agg: aggregate by event + :param agg_op_name: aggregate on operator name or operator index :param kwargs: additional parameters when initializing the inference session :return: DataFrame or dictionary """ @@ -45,8 +101,16 @@ def ort_profile( if providers is None: providers = ["CPUExecutionProvider"] sess = InferenceSession(obj, sess_options, providers=providers, **kwargs) - for i in range(repeat): - sess.run(None, feeds) + first = list(feeds.values())[0] + + if isinstance(first, numpy.ndarray): + for i in range(repeat): + sess.run(None, feeds) + else: + out_names = [o.name for o in sess.get_outputs()] + for i in range(repeat): + sess._sess.run_with_ort_values(feeds, out_names, None) + prof = sess.end_profiling() with open(prof, "r") as f: content = f.read() @@ -68,7 +132,9 @@ def ort_profile( break rows.append(row) if as_df: - return DataFrame(rows) + return post_process_df_profile( + DataFrame(rows), first_it_out=first_it_out, agg=agg, agg_op_name=agg_op_name + ) return rows