Skip to content

Commit 9b99b2b

Browse files
committed
[SPARK-12060][CORE] Avoid memory copy in JavaSerializerInstance.serialize
`JavaSerializerInstance.serialize` uses `ByteArrayOutputStream.toByteArray` to get the serialized data. `ByteArrayOutputStream.toByteArray` needs to copy the content in the internal array to a new array. However, since the array will be converted to `ByteBuffer` at once, we can avoid the memory copy. This PR added `ByteBufferOutputStream` to access the protected `buf` and convert it to a `ByteBuffer` directly. Author: Shixiong Zhu <shixiong@databricks.com> Closes apache#10051 from zsxwing/SPARK-12060. (cherry picked from commit 1401166) Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
1 parent add4e63 commit 9b99b2b

File tree

2 files changed

+34
-4
lines changed

2 files changed

+34
-4
lines changed

core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ import scala.reflect.ClassTag
2424

2525
import org.apache.spark.SparkConf
2626
import org.apache.spark.annotation.DeveloperApi
27-
import org.apache.spark.util.ByteBufferInputStream
28-
import org.apache.spark.util.Utils
27+
import org.apache.spark.util.{ByteBufferInputStream, ByteBufferOutputStream, Utils}
2928

3029
private[spark] class JavaSerializationStream(
3130
out: OutputStream, counterReset: Int, extraDebugInfo: Boolean)
@@ -96,11 +95,11 @@ private[spark] class JavaSerializerInstance(
9695
extends SerializerInstance {
9796

9897
override def serialize[T: ClassTag](t: T): ByteBuffer = {
99-
val bos = new ByteArrayOutputStream()
98+
val bos = new ByteBufferOutputStream()
10099
val out = serializeStream(bos)
101100
out.writeObject(t)
102101
out.close()
103-
ByteBuffer.wrap(bos.toByteArray)
102+
bos.toByteBuffer
104103
}
105104

106105
override def deserialize[T: ClassTag](bytes: ByteBuffer): T = {
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.util
19+
20+
import java.io.ByteArrayOutputStream
21+
import java.nio.ByteBuffer
22+
23+
/**
24+
* Provide a zero-copy way to convert data in ByteArrayOutputStream to ByteBuffer
25+
*/
26+
private[spark] class ByteBufferOutputStream extends ByteArrayOutputStream {
27+
28+
def toByteBuffer: ByteBuffer = {
29+
return ByteBuffer.wrap(buf, 0, count)
30+
}
31+
}

0 commit comments

Comments
 (0)