Skip to content

Commit 1cf9d38

Browse files
nongliyhuai
authored andcommitted
[SPARK-12030] Fix Platform.copyMemory to handle overlapping regions.
This bug was exposed as memory corruption in Timsort which uses copyMemory to copy large regions that can overlap. The prior implementation did not handle this case half the time and always copied forward, resulting in the data being corrupt. Author: Nong Li <nong@databricks.com> Closes apache#10068 from nongli/spark-12030. (cherry picked from commit 2cef1cd) Signed-off-by: Yin Huai <yhuai@databricks.com>
1 parent ab2a124 commit 1cf9d38

File tree

2 files changed

+82
-6
lines changed

2 files changed

+82
-6
lines changed

unsafe/src/main/java/org/apache/spark/unsafe/Platform.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,27 @@ public static void freeMemory(long address) {
107107

108108
public static void copyMemory(
109109
Object src, long srcOffset, Object dst, long dstOffset, long length) {
110-
while (length > 0) {
111-
long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
112-
_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
113-
length -= size;
114-
srcOffset += size;
115-
dstOffset += size;
110+
// Check if dstOffset is before or after srcOffset to determine if we should copy
111+
// forward or backwards. This is necessary in case src and dst overlap.
112+
if (dstOffset < srcOffset) {
113+
while (length > 0) {
114+
long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
115+
_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
116+
length -= size;
117+
srcOffset += size;
118+
dstOffset += size;
119+
}
120+
} else {
121+
srcOffset += length;
122+
dstOffset += length;
123+
while (length > 0) {
124+
long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
125+
srcOffset -= size;
126+
dstOffset -= size;
127+
_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
128+
length -= size;
129+
}
130+
116131
}
117132
}
118133

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.unsafe;
19+
20+
import org.junit.Assert;
21+
import org.junit.Test;
22+
23+
public class PlatformUtilSuite {
24+
25+
@Test
26+
public void overlappingCopyMemory() {
27+
byte[] data = new byte[3 * 1024 * 1024];
28+
int size = 2 * 1024 * 1024;
29+
for (int i = 0; i < data.length; ++i) {
30+
data[i] = (byte)i;
31+
}
32+
33+
Platform.copyMemory(data, Platform.BYTE_ARRAY_OFFSET, data, Platform.BYTE_ARRAY_OFFSET, size);
34+
for (int i = 0; i < data.length; ++i) {
35+
Assert.assertEquals((byte)i, data[i]);
36+
}
37+
38+
Platform.copyMemory(
39+
data,
40+
Platform.BYTE_ARRAY_OFFSET + 1,
41+
data,
42+
Platform.BYTE_ARRAY_OFFSET,
43+
size);
44+
for (int i = 0; i < size; ++i) {
45+
Assert.assertEquals((byte)(i + 1), data[i]);
46+
}
47+
48+
for (int i = 0; i < data.length; ++i) {
49+
data[i] = (byte)i;
50+
}
51+
Platform.copyMemory(
52+
data,
53+
Platform.BYTE_ARRAY_OFFSET,
54+
data,
55+
Platform.BYTE_ARRAY_OFFSET + 1,
56+
size);
57+
for (int i = 0; i < size; ++i) {
58+
Assert.assertEquals((byte)i, data[i + 1]);
59+
}
60+
}
61+
}

0 commit comments

Comments
 (0)