Skip to content

Commit 0171b71

Browse files
hvanhovellmarmbrus
authored andcommitted
[SPARK-12421][SQL] Prevent Internal/External row from exposing state.
It is currently possible to change the values of the supposedly immutable ```GenericRow``` and ```GenericInternalRow``` classes. This is caused by the fact that scala's ArrayOps ```toArray``` (returned by calling ```toSeq```) will return the backing array instead of a copy. This PR fixes this problem. This PR was inspired by apache#10374 by apo1. cc apo1 sarutak marmbrus cloud-fan nongli (everyone in the previous conversation). Author: Herman van Hovell <hvanhovell@questtec.nl> Closes apache#10553 from hvanhovell/SPARK-12421.
1 parent 40d0396 commit 0171b71

File tree

2 files changed

+34
-4
lines changed
  • sql/catalyst/src

2 files changed

+34
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,9 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
199199

200200
override def get(i: Int): Any = values(i)
201201

202-
override def toSeq: Seq[Any] = values.toSeq
202+
override def toSeq: Seq[Any] = values.clone()
203203

204-
override def copy(): Row = this
204+
override def copy(): GenericRow = this
205205
}
206206

207207
class GenericRowWithSchema(values: Array[Any], override val schema: StructType)
@@ -226,11 +226,11 @@ class GenericInternalRow(private[sql] val values: Array[Any]) extends BaseGeneri
226226

227227
override protected def genericGet(ordinal: Int) = values(ordinal)
228228

229-
override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values
229+
override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = values.clone()
230230

231231
override def numFields: Int = values.length
232232

233-
override def copy(): InternalRow = new GenericInternalRow(values.clone())
233+
override def copy(): GenericInternalRow = this
234234
}
235235

236236
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,34 @@ class RowTest extends FunSpec with Matchers {
104104
internalRow shouldEqual internalRow2
105105
}
106106
}
107+
108+
describe("row immutability") {
109+
val values = Seq(1, 2, "3", "IV", 6L)
110+
val externalRow = Row.fromSeq(values)
111+
val internalRow = InternalRow.fromSeq(values)
112+
113+
def modifyValues(values: Seq[Any]): Seq[Any] = {
114+
val array = values.toArray
115+
array(2) = "42"
116+
array
117+
}
118+
119+
it("copy should return same ref for external rows") {
120+
externalRow should be theSameInstanceAs externalRow.copy()
121+
}
122+
123+
it("copy should return same ref for interal rows") {
124+
internalRow should be theSameInstanceAs internalRow.copy()
125+
}
126+
127+
it("toSeq should not expose internal state for external rows") {
128+
val modifiedValues = modifyValues(externalRow.toSeq)
129+
externalRow.toSeq should not equal modifiedValues
130+
}
131+
132+
it("toSeq should not expose internal state for internal rows") {
133+
val modifiedValues = modifyValues(internalRow.toSeq(Seq.empty))
134+
internalRow.toSeq(Seq.empty) should not equal modifiedValues
135+
}
136+
}
107137
}

0 commit comments

Comments
 (0)