Skip to content

Fixed unnecessary memcpy during array assignment #2575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/api/c/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,9 @@ af_err af_assign_gen(af_array* out, const af_array lhs, const dim_t ndims,
if (*out != lhs) {
int count = 0;
AF_CHECK(af_get_data_ref_count(&count, lhs));
if (count > 1)
if (count > 1) {
AF_CHECK(af_copy_array(&output, lhs));
}
else
output = retain(lhs);
} else {
Expand Down
26 changes: 19 additions & 7 deletions src/api/cpp/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,25 +471,37 @@ array::array_proxy &af::array::array_proxy::operator=(const array &other) {
}
}

af_array par_arr = nullptr;
af_array par_arr = 0;

dim4 parent_dims = impl->parent_->dims();
if (impl->is_linear_) {
AF_THROW(af_flat(&par_arr, impl->parent_->get()));
// The set call will dereference the impl->parent_ array. We are doing
// this because the af_flat call above increases the reference count of the
// parent array which triggers a copy operation. This triggers a copy operation
// inside the af_assign_gen function below. The parent array will be reverted
// to the original array and shape later in the code.
af_array empty = 0;
impl->parent_->set(empty);
nd = 1;
} else {
par_arr = impl->parent_->get();
}

af_array tmp = nullptr;
AF_THROW(af_assign_gen(&tmp, par_arr, nd, impl->indices_, other_arr));
af_array flat_res = 0;
AF_THROW(af_assign_gen(&flat_res, par_arr, nd, impl->indices_, other_arr));

af_array res = nullptr;
af_array res = 0;
af_array unflattened = 0;
if (impl->is_linear_) {
AF_THROW(af_moddims(&res, tmp, this_dims.ndims(), this_dims.get()));
AF_THROW(af_moddims(&res, flat_res, this_dims.ndims(), this_dims.get()));
// Unflatten the af_array and reset the original reference
AF_THROW(af_moddims(&unflattened, par_arr, parent_dims.ndims(), parent_dims.get()));
impl->parent_->set(unflattened);
AF_THROW(af_release_array(par_arr));
AF_THROW(af_release_array(tmp));
AF_THROW(af_release_array(flat_res));
} else {
res = tmp;
res = flat_res;
}

impl->parent_->set(res);
Expand Down
13 changes: 7 additions & 6 deletions src/backend/opencl/assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,17 @@ void assign(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs) {
idxArrs[x] = castArray<uint>(idxrs[x].idx.arr);
bPtrs[x] = idxArrs[x].get();
} else {
// alloc an 1-element buffer to avoid OpenCL from failing
bPtrs[x] = bufferAlloc(sizeof(uint));
// alloc an 1-element buffer to avoid OpenCL from failing using
// direct buffer allocation as opposed to mem manager to avoid
// reference count desprepancies between different backends
static cl::Buffer *empty = new Buffer(getContext(),
CL_MEM_READ_ONLY,
sizeof(uint));
bPtrs[x] = empty;
}
}

kernel::assign<T>(out, rhs, p, bPtrs);

for (dim_t x = 0; x < 4; ++x) {
if (p.isSeq[x]) bufferFree(bPtrs[x]);
}
}

#define INSTANTIATE(T) \
Expand Down
25 changes: 25 additions & 0 deletions test/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,31 @@ TEST(Memory, device) {
ASSERT_EQ(lock_bytes, 0u);
}


TEST(Memory, Assign2D) {
size_t alloc_bytes, alloc_buffers;
size_t alloc_bytes_after, alloc_buffers_after;
size_t lock_bytes, lock_buffers;
size_t lock_bytes_after, lock_buffers_after;

cleanSlate(); // Clean up everything done so far
{
array a = af::randu(10, 10, f32);
unsigned hb[] = {3, 5, 6, 8, 9};
array b(5, hb);
array c = af::randu(5, f32);
deviceMemInfo(&alloc_bytes, &alloc_buffers, &lock_bytes, &lock_buffers);
a(b) = c;
}

deviceMemInfo(&alloc_bytes_after, &alloc_buffers_after, &lock_bytes_after,
&lock_buffers_after);

// Check if assigned allocated extra buffers
ASSERT_EQ(alloc_buffers, alloc_buffers_after);
ASSERT_EQ(alloc_bytes, alloc_bytes_after);
}

TEST(Memory, unlock) {
size_t alloc_bytes, alloc_buffers;
size_t lock_bytes, lock_buffers;
Expand Down