coderzbx
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Lines changed: 2 additions & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
Lines changed: 2 additions & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
Lines changed: 23 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala
Lines changed: 23 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
Lines changed: 3 additions & 12 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
Lines changed: 3 additions & 12 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
Lines changed: 11 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala
Lines changed: 11 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
Lines changed: 3 additions & 5 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala
Lines changed: 3 additions & 5 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
Lines changed: 4 additions & 9 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala
Lines changed: 4 additions & 9 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Sort.scala
Lines changed: 4 additions & 0 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Sort.scala
Lines changed: 4 additions & 0 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
Lines changed: 23 additions & 0 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
Lines changed: 23 additions & 0 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala
Lines changed: 5 additions & 3 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala
Lines changed: 5 additions & 3 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregate.scala
Lines changed: 4 additions & 0 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregate.scala
Lines changed: 4 additions & 0 deletions
@@ -904,7 +904,8 @@ class SQLContext private[sql](
   @transient
   protected[sql] val prepareForExecution = new RuleExecutor[SparkPlan] {
     val batches = Seq(
-      Batch("Add exchange", Once, EnsureRequirements(self))
+      Batch("Add exchange", Once, EnsureRequirements(self)),
+      Batch("Add row converters", Once, EnsureRowFormats)
     )
   }
 
 
@@ -28,6 +28,7 @@ import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors.attachTree
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.util.MutablePair
@@ -49,14 +50,26 @@ case class Exchange(
       case None => ""
     }
 
-    val simpleNodeName = "Exchange"
+    val simpleNodeName = if (tungstenMode) "TungstenExchange" else "Exchange"
     s"$simpleNodeName$extraInfo"
   }
 
+  /**
+   * Returns true iff we can support the data type, and we are not doing range partitioning.
+   */
+  private lazy val tungstenMode: Boolean = !newPartitioning.isInstanceOf[RangePartitioning]
+
   override def outputPartitioning: Partitioning = newPartitioning
 
   override def output: Seq[Attribute] = child.output
 
+  // This setting is somewhat counterintuitive:
+  // If the schema works with UnsafeRow, then we tell the planner that we don't support safe row,
+  // so the planner inserts a converter to convert data into UnsafeRow if needed.
+  override def outputsUnsafeRows: Boolean = tungstenMode
+  override def canProcessSafeRows: Boolean = !tungstenMode
+  override def canProcessUnsafeRows: Boolean = tungstenMode
+
   /**
    * Determines whether records must be defensively copied before being sent to the shuffle.
    * Several of Spark's shuffle components will buffer deserialized Java objects in memory. The
@@ -117,7 +130,15 @@ case class Exchange(
     }
   }
 
-  private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
+  @transient private lazy val sparkConf = child.sqlContext.sparkContext.getConf
+
+  private val serializer: Serializer = {
+    if (tungstenMode) {
+      new UnsafeRowSerializer(child.output.size)
+    } else {
+      new SparkSqlSerializer(sparkConf)
+    }
+  }
 
   override protected def doPrepare(): Unit = {
     // If an ExchangeCoordinator is needed, we register this Exchange operator
 
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
-import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, Attribute, AttributeSet, GenericMutableRow}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericMutableRow}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics}
 import org.apache.spark.sql.sources.{BaseRelation, HadoopFsRelation}
 import org.apache.spark.sql.types.DataType
@@ -99,19 +99,10 @@ private[sql] case class PhysicalRDD(
     rdd: RDD[InternalRow],
     override val nodeName: String,
     override val metadata: Map[String, String] = Map.empty,
-    isUnsafeRow: Boolean = false)
+    override val outputsUnsafeRows: Boolean = false)
   extends LeafNode {
 
-  protected override def doExecute(): RDD[InternalRow] = {
-    if (isUnsafeRow) {
-      rdd
-    } else {
-      rdd.mapPartitionsInternal { iter =>
-        val proj = UnsafeProjection.create(schema)
-        iter.map(proj)
-      }
-    }
-  }
+  protected override def doExecute(): RDD[InternalRow] = rdd
 
   override def simpleString: String = {
     val metadataEntries = for ((key, value) <- metadata.toSeq.sorted) yield s"$key: $value"
 
@@ -41,11 +41,20 @@ case class Expand(
   // as UNKNOWN partitioning
   override def outputPartitioning: Partitioning = UnknownPartitioning(0)
 
+  override def outputsUnsafeRows: Boolean = child.outputsUnsafeRows
+  override def canProcessUnsafeRows: Boolean = true
+  override def canProcessSafeRows: Boolean = true
+
   override def references: AttributeSet =
     AttributeSet(projections.flatten.flatMap(_.references))
 
-  private[this] val projection =
-    (exprs: Seq[Expression]) => UnsafeProjection.create(exprs, child.output)
+  private[this] val projection = {
+    if (outputsUnsafeRows) {
+      (exprs: Seq[Expression]) => UnsafeProjection.create(exprs, child.output)
+    } else {
+      (exprs: Seq[Expression]) => newMutableProjection(exprs, child.output)()
+    }
+  }
 
   protected override def doExecute(): RDD[InternalRow] = attachTree(this, "execute") {
     child.execute().mapPartitions { iter =>
 
@@ -64,7 +64,6 @@ case class Generate(
       child.execute().mapPartitionsInternal { iter =>
         val generatorNullRow = InternalRow.fromSeq(Seq.fill[Any](generator.elementTypes.size)(null))
         val joinedRow = new JoinedRow
-        val proj = UnsafeProjection.create(output, output)
 
         iter.flatMap { row =>
           // we should always set the left (child output)
@@ -78,14 +77,13 @@ case class Generate(
         } ++ LazyIterator(() => boundGenerator.terminate()).map { row =>
           // we leave the left side as the last element of its child output
           // keep it the same as Hive does
-          proj(joinedRow.withRight(row))
+          joinedRow.withRight(row)
         }
       }
     } else {
       child.execute().mapPartitionsInternal { iter =>
-        val proj = UnsafeProjection.create(output, output)
-        (iter.flatMap(row => boundGenerator.eval(row)) ++
-          LazyIterator(() => boundGenerator.terminate())).map(proj)
+        iter.flatMap(row => boundGenerator.eval(row)) ++
+        LazyIterator(() => boundGenerator.terminate())
       }
     }
   }
 
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 
 
 /**
@@ -29,20 +29,15 @@ private[sql] case class LocalTableScan(
     output: Seq[Attribute],
     rows: Seq[InternalRow]) extends LeafNode {
 
-  private val unsafeRows: Array[InternalRow] = {
-    val proj = UnsafeProjection.create(output, output)
-    rows.map(r => proj(r).copy()).toArray
-  }
-
-  private lazy val rdd = sqlContext.sparkContext.parallelize(unsafeRows)
+  private lazy val rdd = sqlContext.sparkContext.parallelize(rows)
 
   protected override def doExecute(): RDD[InternalRow] = rdd
 
   override def executeCollect(): Array[InternalRow] = {
-    unsafeRows
+    rows.toArray
   }
 
   override def executeTake(limit: Int): Array[InternalRow] = {
-    unsafeRows.take(limit)
+    rows.take(limit).toArray
   }
 }
@@ -39,6 +39,10 @@ case class Sort(
     testSpillFrequency: Int = 0)
   extends UnaryNode {
 
+  override def outputsUnsafeRows: Boolean = true
+  override def canProcessUnsafeRows: Boolean = true
+  override def canProcessSafeRows: Boolean = false
+
   override def output: Seq[Attribute] = child.output
 
   override def outputOrdering: Seq[SortOrder] = sortOrder
 
@@ -97,13 +97,36 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   /** Specifies sort order for each partition requirements on the input data for this operator. */
   def requiredChildOrdering: Seq[Seq[SortOrder]] = Seq.fill(children.size)(Nil)
 
+  /** Specifies whether this operator outputs UnsafeRows */
+  def outputsUnsafeRows: Boolean = false
+
+  /** Specifies whether this operator is capable of processing UnsafeRows */
+  def canProcessUnsafeRows: Boolean = false
+
+  /**
+   * Specifies whether this operator is capable of processing Java-object-based Rows (i.e. rows
+   * that are not UnsafeRows).
+   */
+  def canProcessSafeRows: Boolean = true
 
   /**
    * Returns the result of this query as an RDD[InternalRow] by delegating to doExecute
    * after adding query plan information to created RDDs for visualization.
    * Concrete implementations of SparkPlan should override doExecute instead.
    */
   final def execute(): RDD[InternalRow] = {
+    if (children.nonEmpty) {
+      val hasUnsafeInputs = children.exists(_.outputsUnsafeRows)
+      val hasSafeInputs = children.exists(!_.outputsUnsafeRows)
+      assert(!(hasSafeInputs && hasUnsafeInputs),
+        "Child operators should output rows in the same format")
+      assert(canProcessSafeRows || canProcessUnsafeRows,
+        "Operator must be able to process at least one row format")
+      assert(!hasSafeInputs || canProcessSafeRows,
+        "Operator will receive safe rows as input but cannot process safe rows")
+      assert(!hasUnsafeInputs || canProcessUnsafeRows,
+        "Operator will receive unsafe rows as input but cannot process unsafe rows")
+    }
     RDDOperationScope.withScope(sparkContext, nodeName, false, true) {
       prepare()
       doExecute()
 
@@ -100,6 +100,8 @@ case class Window(
 
   override def outputOrdering: Seq[SortOrder] = child.outputOrdering
 
+  override def canProcessUnsafeRows: Boolean = true
+
   /**
    * Create a bound ordering object for a given frame type and offset. A bound ordering object is
    * used to determine which input row lies within the frame boundaries of an output row.
@@ -257,16 +259,16 @@ case class Window(
    * @return the final resulting projection.
    */
   private[this] def createResultProjection(
-      expressions: Seq[Expression]): UnsafeProjection = {
+      expressions: Seq[Expression]): MutableProjection = {
     val references = expressions.zipWithIndex.map{ case (e, i) =>
       // Results of window expressions will be on the right side of child's output
       BoundReference(child.output.size + i, e.dataType, e.nullable)
     }
     val unboundToRefMap = expressions.zip(references).toMap
     val patchedWindowExpression = windowExpression.map(_.transform(unboundToRefMap))
-    UnsafeProjection.create(
+    newMutableProjection(
       projectList ++ patchedWindowExpression,
-      child.output)
+      child.output)()
   }
 
   protected override def doExecute(): RDD[InternalRow] = {
 
@@ -49,6 +49,10 @@ case class SortBasedAggregate(
     "numInputRows" -> SQLMetrics.createLongMetric(sparkContext, "number of input rows"),
     "numOutputRows" -> SQLMetrics.createLongMetric(sparkContext, "number of output rows"))
 
+  override def outputsUnsafeRows: Boolean = true
+  override def canProcessUnsafeRows: Boolean = false
+  override def canProcessSafeRows: Boolean = true
+
   override def output: Seq[Attribute] = resultExpressions.map(_.toAttribute)
 
   override def requiredChildDistribution: List[Distribution] = {
Original file line number	Diff line number	Diff line change
`@@ -904,7 +904,8 @@ class SQLContext private[sql](`
`904`	`904`	`@transient`
`905`	`905`	`protected[sql] val prepareForExecution = new RuleExecutor[SparkPlan] {`
`906`	`906`	`val batches = Seq(`
`907`		`- Batch("Add exchange", Once, EnsureRequirements(self))`
	`907`	`+ Batch("Add exchange", Once, EnsureRequirements(self)),`
	`908`	`+ Batch("Add row converters", Once, EnsureRowFormats)`
`908`	`909`	`)`
`909`	`910`	`}`
`910`	`911`
Original file line number	Diff line number	Diff line change
`@@ -64,7 +64,6 @@ case class Generate(`
`64`	`64`	`child.execute().mapPartitionsInternal { iter =>`
`65`	`65`	`val generatorNullRow = InternalRow.fromSeq(Seq.fill[Any](generator.elementTypes.size)(null))`
`66`	`66`	`val joinedRow = new JoinedRow`
`67`		`- val proj = UnsafeProjection.create(output, output)`
`68`	`67`
`69`	`68`	`iter.flatMap { row =>`
`70`	`69`	`// we should always set the left (child output)`
`@@ -78,14 +77,13 @@ case class Generate(`
`78`	`77`	`} ++ LazyIterator(() => boundGenerator.terminate()).map { row =>`
`79`	`78`	`// we leave the left side as the last element of its child output`
`80`	`79`	`// keep it the same as Hive does`
`81`		`- proj(joinedRow.withRight(row))`
	`80`	`+ joinedRow.withRight(row)`
`82`	`81`	`}`
`83`	`82`	`}`
`84`	`83`	`} else {`
`85`	`84`	`child.execute().mapPartitionsInternal { iter =>`
`86`		`- val proj = UnsafeProjection.create(output, output)`
`87`		`- (iter.flatMap(row => boundGenerator.eval(row)) ++`
`88`		`- LazyIterator(() => boundGenerator.terminate())).map(proj)`
	`85`	`+ iter.flatMap(row => boundGenerator.eval(row)) ++`
	`86`	`+ LazyIterator(() => boundGenerator.terminate())`
`89`	`87`	`}`
`90`	`88`	`}`
`91`	`89`	`}`