Skip to content

Commit 8892b78

Browse files
dolpmpytorchmergebot
authored andcommitted
[nativert] move execution planner to torch (#155374)
Summary: att Test Plan: ci Rollback Plan: Differential Revidsion: D76167093 Pull Request resolved: #155374 Approved by: https://github.com/zhxchen17
1 parent ffc6cbf commit 8892b78

File tree

5 files changed

+198
-0
lines changed

5 files changed

+198
-0
lines changed

build_variables.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ libtorch_nativert_sources = [
596596
"torch/nativert/graph/Serialization.cpp",
597597
"torch/nativert/graph/TensorMeta.cpp",
598598
"torch/nativert/executor/Placement.cpp",
599+
"torch/nativert/executor/ExecutionPlanner.cpp",
599600
"torch/nativert/executor/PlacementUtils.cpp",
600601
"torch/nativert/executor/Weights.cpp",
601602
"torch/nativert/executor/memory/FunctionSchema.cpp",

test/cpp/nativert/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ set(NATIVERT_TEST_SRCS
1313
${TORCH_ROOT}/torch/nativert/executor/Weights.cpp
1414
${TORCH_ROOT}/torch/nativert/common/FileUtil.cpp
1515
${TORCH_ROOT}/torch/nativert/executor/memory/FunctionSchema.cpp
16+
${TORCH_ROOT}/torch/nativert/executor/ExecutionPlanner.cpp
1617
)
1718

1819
add_executable(test_nativert
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#include <gtest/gtest.h>
2+
#include <torch/nativert/executor/ExecutionPlanner.h>
3+
4+
namespace torch::nativert {
5+
6+
TEST(ExecutionPlannerTest, CreatePlan) {
7+
auto graph = stringToGraph(R"(
8+
graph(%x, %y):
9+
%a = foo(a=%x, b=%y)
10+
%b = foo1(a=%x, b=%y)
11+
%c = foo2(c=%a, d=%b)
12+
return(%c)
13+
)");
14+
15+
{
16+
auto plan = ExecutionPlanner{*graph}.createPlan();
17+
18+
auto& values_to_free = plan->valuesToFree;
19+
EXPECT_EQ(values_to_free.size(), 5);
20+
21+
for (const auto i : c10::irange(3)) {
22+
EXPECT_TRUE(values_to_free[i].empty());
23+
}
24+
25+
EXPECT_EQ(values_to_free[3].size(), 2);
26+
std::set<int64_t> ids{values_to_free[3].begin(), values_to_free[3].end()};
27+
EXPECT_EQ(
28+
ids,
29+
std::set<int64_t>(
30+
{graph->tryGetValue("a")->id(), graph->tryGetValue("b")->id()}));
31+
32+
EXPECT_EQ(values_to_free[4].size(), 0);
33+
}
34+
35+
{
36+
auto static_values = ExecutionPlanner::staticValues(*graph);
37+
std::set<int64_t> static_ids{static_values.begin(), static_values.end()};
38+
EXPECT_EQ(
39+
static_ids,
40+
std::set<int64_t>(
41+
{graph->tryGetValue("x")->id(),
42+
graph->tryGetValue("y")->id(),
43+
graph->tryGetValue("c")->id()}));
44+
}
45+
}
46+
47+
} // namespace torch::nativert
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#include <unordered_map>
2+
3+
#include <c10/util/Logging.h>
4+
5+
#include <c10/util/Enumerate.h>
6+
#include <torch/nativert/executor/ExecutionPlanner.h>
7+
8+
namespace torch::nativert {
9+
10+
std::unique_ptr<ExecutionPlan> ExecutionPlanner::createPlan() {
11+
auto plan = std::make_unique<ExecutionPlan>();
12+
13+
// Current implementation assume that nodes will be executed
14+
// in the same order as the thrift graph.
15+
// In the future, we can do execution order plan, as long as it's
16+
// comply with topological order
17+
18+
generateDeallocationPlan(*plan);
19+
return plan;
20+
}
21+
22+
/* static */ c10::FastSet<ValueId> ExecutionPlanner::staticValues(
23+
const Graph& graph) {
24+
c10::FastSet<ValueId> staticValues;
25+
// Filter lastUsedBy by graph inputs
26+
// parameters/buffer values should not be freed
27+
// It's a policy decision to whether to free user inputs. For now, we don't
28+
// free user inputs.
29+
// TODO: It should be fine to "free" the user inputs. If the user holds a ref
30+
// to it, it won't be deallocated.
31+
for (const auto* input : graph.inputs()) {
32+
if (input) {
33+
const auto& id = input->id();
34+
staticValues.insert(id);
35+
}
36+
}
37+
38+
// Filter lastUsedBy by graph outputs, as they are still needed to be returned
39+
for (const auto& output : graph.outputs()) {
40+
const auto& id = output->id();
41+
staticValues.insert(id);
42+
}
43+
44+
for (const auto& [id, _] : graph.getConstantSymIntValues()) {
45+
staticValues.insert(id);
46+
}
47+
48+
for (const Node& node : graph.nodes()) {
49+
if (node.target() == "torch.ops.higher_order.run_const_graph") {
50+
for (const auto& output : node.outputs()) {
51+
// Do not free the outputs of run_const_graph, as they are newly
52+
// produced folded constants
53+
staticValues.insert(output->id());
54+
}
55+
} else {
56+
for (const auto& input : node.inputs()) {
57+
if (input.value->isFolded()) {
58+
staticValues.insert(input.value->id());
59+
}
60+
}
61+
}
62+
}
63+
64+
return staticValues;
65+
}
66+
67+
void ExecutionPlanner::generateDeallocationPlan(ExecutionPlan& plan) {
68+
const auto& nodes = graph_.nodes();
69+
size_t numNodes = nodes.size();
70+
71+
std::unordered_map<ValueId, NodeIndex> lastUsedBy;
72+
73+
// Traverse from the last node to the first node
74+
// For each Value, find out which is the last node that uses it
75+
// the Value can freed after executing the node
76+
size_t nodeIdx = nodes.size() - 1;
77+
for (auto it = std::rbegin(nodes); it != std::rend(nodes); it++) {
78+
const auto& inputs = it->inputs();
79+
for (const auto& input : inputs) {
80+
const auto& id = input.value->id();
81+
if (lastUsedBy.find(id) == lastUsedBy.end()) {
82+
lastUsedBy.insert({id, nodeIdx});
83+
}
84+
}
85+
nodeIdx--;
86+
}
87+
88+
std::vector<std::vector<ValueId>> valuesToFree(numNodes);
89+
90+
const auto& statics = staticValues(graph_);
91+
for (auto& [id, nodeIndex] : lastUsedBy) {
92+
if (statics.find(id) == statics.end()) {
93+
valuesToFree[nodeIndex].push_back(id);
94+
}
95+
}
96+
97+
plan.valuesToFree = std::move(valuesToFree);
98+
99+
// print allocation plan
100+
VLOG(2) << plan;
101+
102+
return;
103+
}
104+
105+
std::ostream& operator<<(std::ostream& out, const ExecutionPlan& plan) {
106+
out << "****** Deallocation Plan ******\n";
107+
for (auto&& [i, values] : c10::enumerate(plan.valuesToFree)) {
108+
out << "Node #" << i << ", valuesToFree = [";
109+
for (const auto& value : values) {
110+
out << value << ", ";
111+
}
112+
out << "]\n";
113+
}
114+
return out;
115+
}
116+
117+
} // namespace torch::nativert
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#pragma once
2+
3+
#include <c10/util/FbcodeMaps.h>
4+
5+
#include <torch/nativert/graph/Graph.h>
6+
7+
namespace torch::nativert {
8+
9+
// ExecutionPlan is the result produced by ExecutionPlanner
10+
// ATM, it only contains value deallocation plan.
11+
struct ExecutionPlan {
12+
// i-th entry in this list are the Values can be freed *after* execution i-th
13+
// node
14+
std::vector<std::vector<ValueId>> valuesToFree;
15+
};
16+
17+
class ExecutionPlanner {
18+
public:
19+
explicit ExecutionPlanner(const Graph& graph) : graph_(graph) {}
20+
21+
std::unique_ptr<ExecutionPlan> createPlan();
22+
// get list of values we can't free
23+
static c10::FastSet<ValueId> staticValues(const Graph& graph);
24+
25+
private:
26+
void generateDeallocationPlan(ExecutionPlan& plan);
27+
const Graph& graph_;
28+
};
29+
30+
std::ostream& operator<<(std::ostream& out, const ExecutionPlan& plan);
31+
32+
} // namespace torch::nativert

0 commit comments

Comments
 (0)