[nativert] move execution planner to torch (#155374)

dolpm · pytorchmergebot · commit 8892b782a85b · 2025-06-10T22:36:06.000Z
Summary: att Test Plan: ci Rollback Plan: Differential Revidsion: D76167093 Pull Request resolved: #155374 Approved by: https://github.com/zhxchen17
diff --git a/build_variables.bzl b/build_variables.bzl
@@ -596,6 +596,7 @@ libtorch_nativert_sources = [
     "torch/nativert/graph/Serialization.cpp",
     "torch/nativert/graph/TensorMeta.cpp",
     "torch/nativert/executor/Placement.cpp",
+    "torch/nativert/executor/ExecutionPlanner.cpp",
     "torch/nativert/executor/PlacementUtils.cpp",
     "torch/nativert/executor/Weights.cpp",
     "torch/nativert/executor/memory/FunctionSchema.cpp",
diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt
@@ -13,6 +13,7 @@ set(NATIVERT_TEST_SRCS
   ${TORCH_ROOT}/torch/nativert/executor/Weights.cpp
   ${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp
   ${TORCH_ROOT}/torch/nativert/executor/memory/FunctionSchema.cpp
+  ${TORCH_ROOT}/torch/nativert/executor/ExecutionPlanner.cpp
 )
 
 add_executable(test_nativert
diff --git a/test/cpp/nativert/test_execution_planner.cpp b/test/cpp/nativert/test_execution_planner.cpp
@@ -0,0 +1,47 @@
+#include <gtest/gtest.h>
+#include <torch/nativert/executor/ExecutionPlanner.h>
+
+namespace torch::nativert {
+
+TEST(ExecutionPlannerTest, CreatePlan) {
+  auto graph = stringToGraph(R"(
+    graph(%x, %y):
+  %a = foo(a=%x, b=%y)
+  %b = foo1(a=%x, b=%y)
+  %c = foo2(c=%a, d=%b)
+  return(%c)
+  )");
+
+  {
+    auto plan = ExecutionPlanner{*graph}.createPlan();
+
+    auto& values_to_free = plan->valuesToFree;
+    EXPECT_EQ(values_to_free.size(), 5);
+
+    for (const auto i : c10::irange(3)) {
+      EXPECT_TRUE(values_to_free[i].empty());
+    }
+
+    EXPECT_EQ(values_to_free[3].size(), 2);
+    std::set<int64_t> ids{values_to_free[3].begin(), values_to_free[3].end()};
+    EXPECT_EQ(
+        ids,
+        std::set<int64_t>(
+            {graph->tryGetValue("a")->id(), graph->tryGetValue("b")->id()}));
+
+    EXPECT_EQ(values_to_free[4].size(), 0);
+  }
+
+  {
+    auto static_values = ExecutionPlanner::staticValues(*graph);
+    std::set<int64_t> static_ids{static_values.begin(), static_values.end()};
+    EXPECT_EQ(
+        static_ids,
+        std::set<int64_t>(
+            {graph->tryGetValue("x")->id(),
+             graph->tryGetValue("y")->id(),
+             graph->tryGetValue("c")->id()}));
+  }
+}
+
+} // namespace torch::nativert
diff --git a/torch/nativert/executor/ExecutionPlanner.cpp b/torch/nativert/executor/ExecutionPlanner.cpp
@@ -0,0 +1,117 @@
+#include <unordered_map>
+
+#include <c10/util/Logging.h>
+
+#include <c10/util/Enumerate.h>
+#include <torch/nativert/executor/ExecutionPlanner.h>
+
+namespace torch::nativert {
+
+std::unique_ptr<ExecutionPlan> ExecutionPlanner::createPlan() {
+  auto plan = std::make_unique<ExecutionPlan>();
+
+  // Current implementation assume that nodes will be executed
+  // in the same order as the thrift graph.
+  // In the future, we can do execution order plan, as long as it's
+  // comply with topological order
+
+  generateDeallocationPlan(*plan);
+  return plan;
+}
+
+/* static */ c10::FastSet<ValueId> ExecutionPlanner::staticValues(
+    const Graph& graph) {
+  c10::FastSet<ValueId> staticValues;
+  // Filter lastUsedBy by graph inputs
+  // parameters/buffer values should not be freed
+  // It's a policy decision to whether to free user inputs. For now, we don't
+  // free user inputs.
+  // TODO: It should be fine to "free" the user inputs. If the user holds a ref
+  // to it, it won't be deallocated.
+  for (const auto* input : graph.inputs()) {
+    if (input) {
+      const auto& id = input->id();
+      staticValues.insert(id);
+    }
+  }
+
+  // Filter lastUsedBy by graph outputs, as they are still needed to be returned
+  for (const auto& output : graph.outputs()) {
+    const auto& id = output->id();
+    staticValues.insert(id);
+  }
+
+  for (const auto& [id, _] : graph.getConstantSymIntValues()) {
+    staticValues.insert(id);
+  }
+
+  for (const Node& node : graph.nodes()) {
+    if (node.target() == "torch.ops.higher_order.run_const_graph") {
+      for (const auto& output : node.outputs()) {
+        // Do not free the outputs of run_const_graph, as they are newly
+        // produced folded constants
+        staticValues.insert(output->id());
+      }
+    } else {
+      for (const auto& input : node.inputs()) {
+        if (input.value->isFolded()) {
+          staticValues.insert(input.value->id());
+        }
+      }
+    }
+  }
+
+  return staticValues;
+}
+
+void ExecutionPlanner::generateDeallocationPlan(ExecutionPlan& plan) {
+  const auto& nodes = graph_.nodes();
+  size_t numNodes = nodes.size();
+
+  std::unordered_map<ValueId, NodeIndex> lastUsedBy;
+
+  // Traverse from the last node to the first node
+  // For each Value, find out which is the last node that uses it
+  // the Value can freed after executing the node
+  size_t nodeIdx = nodes.size() - 1;
+  for (auto it = std::rbegin(nodes); it != std::rend(nodes); it++) {
+    const auto& inputs = it->inputs();
+    for (const auto& input : inputs) {
+      const auto& id = input.value->id();
+      if (lastUsedBy.find(id) == lastUsedBy.end()) {
+        lastUsedBy.insert({id, nodeIdx});
+      }
+    }
+    nodeIdx--;
+  }
+
+  std::vector<std::vector<ValueId>> valuesToFree(numNodes);
+
+  const auto& statics = staticValues(graph_);
+  for (auto& [id, nodeIndex] : lastUsedBy) {
+    if (statics.find(id) == statics.end()) {
+      valuesToFree[nodeIndex].push_back(id);
+    }
+  }
+
+  plan.valuesToFree = std::move(valuesToFree);
+
+  // print allocation plan
+  VLOG(2) << plan;
+
+  return;
+}
+
+std::ostream& operator<<(std::ostream& out, const ExecutionPlan& plan) {
+  out << "****** Deallocation Plan ******\n";
+  for (auto&& [i, values] : c10::enumerate(plan.valuesToFree)) {
+    out << "Node #" << i << ", valuesToFree = [";
+    for (const auto& value : values) {
+      out << value << ", ";
+    }
+    out << "]\n";
+  }
+  return out;
+}
+
+} // namespace torch::nativert
diff --git a/torch/nativert/executor/ExecutionPlanner.h b/torch/nativert/executor/ExecutionPlanner.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <c10/util/FbcodeMaps.h>
+
+#include <torch/nativert/graph/Graph.h>
+
+namespace torch::nativert {
+
+// ExecutionPlan is the result produced by ExecutionPlanner
+// ATM, it only contains value deallocation plan.
+struct ExecutionPlan {
+  // i-th entry in this list are the Values can be freed *after* execution i-th
+  // node
+  std::vector<std::vector<ValueId>> valuesToFree;
+};
+
+class ExecutionPlanner {
+ public:
+  explicit ExecutionPlanner(const Graph& graph) : graph_(graph) {}
+
+  std::unique_ptr<ExecutionPlan> createPlan();
+  // get list of values we can't free
+  static c10::FastSet<ValueId> staticValues(const Graph& graph);
+
+ private:
+  void generateDeallocationPlan(ExecutionPlan& plan);
+  const Graph& graph_;
+};
+
+std::ostream& operator<<(std::ostream& out, const ExecutionPlan& plan);
+
+} // namespace torch::nativert

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@ set(NATIVERT_TEST_SRCS`
`13`	`13`	`${TORCH_ROOT}/torch/nativert/executor/Weights.cpp`
`14`	`14`	`${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp`
`15`	`15`	`${TORCH_ROOT}/torch/nativert/executor/memory/FunctionSchema.cpp`
	`16`	`+ ${TORCH_ROOT}/torch/nativert/executor/ExecutionPlanner.cpp`
`16`	`17`	`)`
`17`	`18`
`18`	`19`	`add_executable(test_nativert`