fix typos, optimize imports

sparksburnitt · xinyunh · commit 2f6e154a5b69 · 2015-08-31T14:51:07.000-07:00
diff --git a/src/main/scala/org/apache/spark/sql/hbase/HBaseCatalog.scala b/src/main/scala/org/apache/spark/sql/hbase/HBaseCatalog.scala
@@ -29,9 +29,9 @@ import org.apache.hadoop.hbase.{Coprocessor, HColumnDescriptor, HTableDescriptor
 import org.apache.log4j.Logger
 import org.apache.spark.Logging
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.catalyst.{SimpleCatalystConf, CatalystConf}
 import org.apache.spark.sql.catalyst.analysis.{Catalog, OverrideCatalog}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery}
+import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf}
 import org.apache.spark.sql.hbase.HBaseCatalog._
 import org.apache.spark.sql.types._
 
@@ -150,7 +150,7 @@ private[hbase] class HBaseCatalog(@transient hbaseContext: SQLContext,
 
       deploySuccessfully_internal = Some(!results.isEmpty)
       if (results.isEmpty) {
-        logger.warn( """Not deplyed successfully""")
+        logger.warn( """CheckDirEndPoint coprocessor deployment failed.""")
       }
 
       pwdIsAccessible = !results.containsValue(false)
@@ -189,7 +189,7 @@ private[hbase] class HBaseCatalog(@transient hbaseContext: SQLContext,
       families.foreach {
         case family =>
           if (!checkFamilyExists(hbaseTableName, family)) {
-            throw new Exception(s"The HBase table doesn't contain the Column Family: $family")
+            throw new Exception(s"HBase table does not contain column family: $family")
           }
       }
     }
@@ -349,7 +349,7 @@ private[hbase] class HBaseCatalog(@transient hbaseContext: SQLContext,
   def deleteTable(tableName: String): Unit = {
     val metadataTable = getMetadataTable
     if (!checkLogicalTableExist(tableName, metadataTable)) {
-      throw new IllegalStateException(s"The logical table $tableName does not exist")
+      throw new IllegalStateException(s"Logical table $tableName does not exist.")
     }
 
     val delete = new Delete(Bytes.toBytes(tableName))
diff --git a/src/main/scala/org/apache/spark/sql/hbase/HBaseCriticalPoint.scala b/src/main/scala/org/apache/spark/sql/hbase/HBaseCriticalPoint.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.hbase
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types._
 import org.apache.spark.sql.hbase.catalyst.expressions.PartialPredicateOperations._
 import org.apache.spark.sql.hbase.types.Range
+import org.apache.spark.sql.types._
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
diff --git a/src/main/scala/org/apache/spark/sql/hbase/HBaseCustomFilter.scala b/src/main/scala/org/apache/spark/sql/hbase/HBaseCustomFilter.scala
@@ -31,13 +31,13 @@ import org.apache.spark.sql.hbase.util.{BinaryBytesUtils, DataTypeUtils, HBaseKV
 import org.apache.spark.sql.types.{AtomicType, DataType, StringType}
 
 /**
- * The custom filter, it will skip the scan to the proper next position based on predicate
- * this filter will only deal with the predicate which has key columns inside
+ * The custom filter.  It will skip the scanner to the proper next position based on predicate.
+ * This filter will only deal with the predicate containing key columns.
  *
- * The skip is multiple-dimensional on non-leading dimension keys in precense of the predicate's
- * range expressions; other types of expressions in the predicate will be eventually evaluated
+ * The skip is multiple-dimensional on non-leading dimension keys in presence of the predicate's
+ * range expressions; other types of expressions in the predicate will be eventually evaluated.
  *
- * The processing is stateful in that various info related to the previous processing is cahched,
+ * The processing is stateful in that various info related to the previous processing is cached
  * and checked in the next invocations for maximum reuse.
  */
 private[hbase] class HBaseCustomFilter extends FilterBase with Writable {
@@ -180,18 +180,18 @@ private[hbase] class HBaseCustomFilter extends FilterBase with Writable {
    * recursively reset the index of the current child and the value in the child's CPR
    * @param node the start level, it will also reset its children
    */
-  private def resetDecendents(node: Node): Unit = {
+  private def resetDescendants(node: Node): Unit = {
     if (node.children != null) {
       node.currentChildIndex = 0
       for (child <- node.children) {
         resetNode(child)
-        resetDecendents(child)
+        resetDescendants(child)
       }
     }
   }
 
   /**
-   * A quick top-down check whether the new row is in the current CPRs
+   * A quick top-down check whether the new row is in the current CPRs.
    * @param dimValues the current dimensional keys to check
    * @param dimLimit the lower bound of the dimensions to be checked with.
    *                 0 for the most significant dimension
@@ -241,7 +241,7 @@ private[hbase] class HBaseCustomFilter extends FilterBase with Writable {
       remainingPredicate = null
       remainingPredicateBoundRef = null
       currentValues = inputValues
-      resetDecendents(node)
+      resetDescendants(node)
       val result = findNextHint(node)
       nextReturnCode = result._1
       if (nextReturnCode == ReturnCode.SEEK_NEXT_USING_HINT) {
@@ -384,7 +384,7 @@ private[hbase] class HBaseCustomFilter extends FilterBase with Writable {
       // cannot find a containing child but there is a larger child
       node.currentChildIndex = childIndex
       val child = node.children(childIndex)
-      resetDecendents(child)
+      resetDescendants(child)
       if (child.cpr != null) {
         child.currentValue = child.cpr.start.orNull
         if (child.currentValue != null && !child.cpr.startInclusive) {
@@ -407,7 +407,7 @@ private[hbase] class HBaseCustomFilter extends FilterBase with Writable {
       if (addOne(currentNode)) {
         val cmp = compareWithinRange(currentNode.cpr.dt, currentNode.currentValue, currentNode.cpr)
         if (cmp == 0) {
-          resetDecendents(currentNode)
+          resetDescendants(currentNode)
           return (ReturnCode.SEEK_NEXT_USING_HINT, buildRowKey())
         } else {
           require(cmp > 0, "Internal logical error: unexpected ordering of row key")
@@ -418,7 +418,7 @@ private[hbase] class HBaseCustomFilter extends FilterBase with Writable {
               // no look back: release the memory
               currentNode.children = null
             }
-            resetDecendents(currentNode.parent)
+            resetDescendants(currentNode.parent)
             currentNode.parent.currentChildIndex = childIndex
             return (ReturnCode.SEEK_NEXT_USING_HINT, buildRowKey())
           } else {
@@ -562,14 +562,14 @@ private[hbase] class HBaseCustomFilter extends FilterBase with Writable {
       node.children = Seq(Node(dt, dimIndex, node,
         cpr = new CriticalPointRange[t](None, false, None, false, dt, null)))
     }
-    resetDecendents(node)
+    resetDescendants(node)
   }
 
   /**
-   * do a full evaluation for the remaining predicate based on all the cell values
+   * Do a full evaluation for the remaining predicate based on all the cell values.
    * @param kvs the list of cell
    */
-  private def fullEvalution(kvs: java.util.List[Cell]) = {
+  private def fullEvaluation(kvs: java.util.List[Cell]) = {
     resetRow(workingRow)
     cellMap.clear()
     for (i <- 0 to kvs.size() - 1) {
@@ -617,7 +617,7 @@ private[hbase] class HBaseCustomFilter extends FilterBase with Writable {
     // If a later HBase release has this addressed, this check will be made unnecessary
     // to save some CPU cycles
     if (kvs.isEmpty) filterRowFlag = true
-    else if (remainingPredicate != null) fullEvalution(kvs)
+    else if (remainingPredicate != null) fullEvaluation(kvs)
   }
 
   override def hasFilterRow: Boolean = {
diff --git a/src/main/scala/org/apache/spark/sql/hbase/HBasePartition.scala b/src/main/scala/org/apache/spark/sql/hbase/HBasePartition.scala
@@ -17,10 +17,10 @@
 package org.apache.spark.sql.hbase
 
 import org.apache.hadoop.hbase.regionserver.RegionScanner
-import org.apache.spark.{Logging, Partition}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.hbase.catalyst.expressions.PartialPredicateOperations._
 import org.apache.spark.sql.hbase.types.{HBaseBytesType, Range}
+import org.apache.spark.{Logging, Partition}
 
 
 private[hbase] class HBasePartition(
diff --git a/src/main/scala/org/apache/spark/sql/hbase/HBasePartitioner.scala b/src/main/scala/org/apache/spark/sql/hbase/HBasePartitioner.scala
@@ -17,12 +17,9 @@
 
 package org.apache.spark.sql.hbase
 
-import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
-
 import org.apache.hadoop.hbase.util.Bytes
-import org.apache.spark.serializer.JavaSerializer
-import org.apache.spark.util.{CollectionsUtils, Utils}
-import org.apache.spark.{Partitioner, SparkEnv}
+import org.apache.spark.Partitioner
+import org.apache.spark.util.CollectionsUtils
 
 object HBasePartitioner {
   implicit object HBaseRawOrdering extends Ordering[HBaseRawType] {
diff --git a/src/main/scala/org/apache/spark/sql/hbase/HBaseRelation.scala b/src/main/scala/org/apache/spark/sql/hbase/HBaseRelation.scala
@@ -17,18 +17,17 @@
 package org.apache.spark.sql.hbase
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hbase.util.Bytes
-import org.apache.hadoop.hbase.{HBaseConfiguration, _}
 import org.apache.hadoop.hbase.client.{Get, HTable, Put, Result, Scan}
 import org.apache.hadoop.hbase.filter._
+import org.apache.hadoop.hbase.util.Bytes
+import org.apache.hadoop.hbase.{HBaseConfiguration, _}
 import org.apache.log4j.Logger
 import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.hbase.catalyst.expressions.PartialPredicateOperations.partialPredicateReducer
 import org.apache.spark.sql.hbase.catalyst.NotPusher
+import org.apache.spark.sql.hbase.catalyst.expressions.PartialPredicateOperations.partialPredicateReducer
 import org.apache.spark.sql.hbase.types.Range
 import org.apache.spark.sql.hbase.util._
 import org.apache.spark.sql.sources.{BaseRelation, CatalystScan, InsertableRelation, LogicalRelation, RelationProvider}
diff --git a/src/main/scala/org/apache/spark/sql/hbase/HBaseSQLReaderRDD.scala b/src/main/scala/org/apache/spark/sql/hbase/HBaseSQLReaderRDD.scala
@@ -44,7 +44,7 @@ object CoprocessorConstants {
  * @param useCustomFilter whether custom filter is in effect
  * @param output projection. For post coprocessor processing,
  *               is the projection of the original scan
- * @param subplan coproecssor subplan to be sent to coprocessor
+ * @param subplan coprocessor subplan to be sent to coprocessor
  * @param dummyRDD in-memory scan RDD, might be used to reconstruct the original subplan.
  *                 This is possible when decision to use coprocessor has to be made
  *                 by the slaves when its partition-specific predicate is
diff --git a/src/main/scala/org/apache/spark/sql/hbase/HBaseSerializer.scala b/src/main/scala/org/apache/spark/sql/hbase/HBaseSerializer.scala
@@ -19,17 +19,6 @@ package org.apache.spark.sql.hbase
 
 import java.io._
 
-import org.apache.hadoop.hbase.{KeyValue, CellUtil, Cell}
-import org.apache.hadoop.hbase.exceptions.DeserializationException
-import org.apache.hadoop.hbase.filter.Filter.ReturnCode
-import org.apache.hadoop.hbase.filter.FilterBase
-import org.apache.hadoop.hbase.util.{Bytes, Writables}
-import org.apache.hadoop.io.Writable
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.hbase.util.{HBaseKVHelper, DataTypeUtils, BinaryBytesUtils}
-import org.apache.spark.sql.types.{DataType, AtomicType, StringType}
-import org.apache.spark.sql.hbase.catalyst.expressions.PartialPredicateOperations._
-
 /**
  * the serializer to serialize / de-serialize the objects for HBase embedded execution,
  * may be made configurable and use the ones provided by Spark in the future.
diff --git a/src/main/scala/org/apache/spark/sql/hbase/ScanPredClassifier.scala b/src/main/scala/org/apache/spark/sql/hbase/ScanPredClassifier.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hbase
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.hbase.util.{BinaryBytesUtils, DataTypeUtils}
+import org.apache.spark.sql.hbase.util.DataTypeUtils
 
 /**
  * Classifies a predicate into a pair of (pushdownable, non-pushdownable) predicates
diff --git a/src/main/scala/org/apache/spark/sql/hbase/catalyst/expressions/PartialPredicateOperations.scala b/src/main/scala/org/apache/spark/sql/hbase/catalyst/expressions/PartialPredicateOperations.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.hbase.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types._
-import org.apache.spark.sql.hbase.types._
 import org.apache.spark.sql.hbase.types.RangeType._
+import org.apache.spark.sql.hbase.types._
+import org.apache.spark.sql.types._
 
 object PartialPredicateOperations {
   // When the checkNullness argument of the partialReduce method is false, the partial
diff --git a/src/main/scala/org/apache/spark/sql/hbase/execution/AddCoprocessor.scala b/src/main/scala/org/apache/spark/sql/hbase/execution/AddCoprocessor.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.hbase.execution
 
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution.expressions._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.expressions._
 import org.apache.spark.sql.hbase._
 
 private[hbase] case class AddCoprocessor(sqlContext: SQLContext) extends Rule[SparkPlan] {
diff --git a/src/main/scala/org/apache/spark/sql/hbase/execution/HBaseStrategies.scala b/src/main/scala/org/apache/spark/sql/hbase/execution/HBaseStrategies.scala
@@ -149,14 +149,14 @@ private[hbase] trait HBaseStrategies {
             //or it missed some mid dimensions in the rowkey,
             //that means we have to do it with the partial aggregation.
             //
-            //If the groupingExpreesions are composed by all keys,
+            //If the groupingExpressions are composed by all keys,
             //that means it need to be grouped by rowkey in all dimensions,
             //so we could do the aggregation for all directly.
             if (keysForGroup.size != groupingExpressions.size) aggrWithPartial
             else if (keysForGroup.size == hbaseRelation.keyColumns.size) aggrForAll
             else {
               val partitionsAfterFilter = scanNode.result.partitions
-              val eachPartionApart = (0 to partitionsAfterFilter.length - 2).forall { case i =>
+              val eachPartitionApart = (0 to partitionsAfterFilter.length - 2).forall { case i =>
                 val headEnd = partitionsAfterFilter(i).asInstanceOf[HBasePartition]
                   .end.get.asInstanceOf[HBaseRawType]
                 val tailStart = partitionsAfterFilter(i + 1).asInstanceOf[HBasePartition]
@@ -165,7 +165,7 @@ private[hbase] trait HBaseStrategies {
                 // for the given rowkey dimensions, we could not do the aggregation for all.
                 distinguishedForGroupKeys(headEnd, tailStart, keysForGroup)
               }
-              if (eachPartionApart) aggrForAll
+              if (eachPartitionApart) aggrForAll
               else aggrWithPartial
             }
           }
diff --git a/src/main/scala/org/apache/spark/sql/hbase/execution/hbaseCommands.scala b/src/main/scala/org/apache/spark/sql/hbase/execution/hbaseCommands.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.plans.logical.Subquery
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hbase.HBasePartitioner.HBaseRawOrdering
 import org.apache.spark.sql.hbase._
-import org.apache.spark.sql.hbase.util.{Util, DataTypeUtils}
+import org.apache.spark.sql.hbase.util.{DataTypeUtils, Util}
 import org.apache.spark.sql.sources.LogicalRelation
 import org.apache.spark.sql.types._
 import org.apache.spark.{Logging, SerializableWritable, SparkEnv, TaskContext}
diff --git a/src/main/scala/org/apache/spark/sql/hbase/util/DataTypeUtils.scala b/src/main/scala/org/apache/spark/sql/hbase/util/DataTypeUtils.scala
@@ -16,15 +16,11 @@
 */
 package org.apache.spark.sql.hbase.util
 
-import com.google.protobuf.InvalidProtocolBufferException
-import org.apache.hadoop.hbase.exceptions.DeserializationException
-import org.apache.hadoop.hbase.filter.{ByteArrayComparable, BinaryComparator}
-import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos
-import org.apache.hadoop.hbase.util.{ByteStringer, Bytes}
+import org.apache.hadoop.hbase.filter.{BinaryComparator, ByteArrayComparable}
 import org.apache.spark.sql.catalyst.expressions.{Literal, MutableRow, Row}
 import org.apache.spark.sql.execution.SparkSqlSerializer
-import org.apache.spark.sql.types._
 import org.apache.spark.sql.hbase._
+import org.apache.spark.sql.types._
 
 /**
  * Data Type conversion utilities
diff --git a/src/main/scala/org/apache/spark/sql/hbase/util/HBaseKVHelper.scala b/src/main/scala/org/apache/spark/sql/hbase/util/HBaseKVHelper.scala
@@ -21,8 +21,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, Row}
 import org.apache.spark.sql.hbase._
 import org.apache.spark.sql.types._
 
-import scala.collection.mutable.ArrayBuffer
-
 object HBaseKVHelper {
   val delimiter: Byte = 0
 
diff --git a/src/main/scala/org/apache/spark/sql/hbase/util/bytesUtils.scala b/src/main/scala/org/apache/spark/sql/hbase/util/bytesUtils.scala
@@ -17,8 +17,8 @@
 package org.apache.spark.sql.hbase.util
 
 import org.apache.hadoop.hbase.util.Bytes
-import org.apache.spark.sql.types._
 import org.apache.spark.sql.hbase._
+import org.apache.spark.sql.types._
 
 trait BytesUtils {
   def create(dataType: DataType): ToBytesUtils
diff --git a/src/main/scala/org/apache/spark/sql/hbase/util/comparators.scala b/src/main/scala/org/apache/spark/sql/hbase/util/comparators.scala
@@ -21,7 +21,7 @@ import com.google.protobuf.InvalidProtocolBufferException
 import org.apache.hadoop.hbase.exceptions.DeserializationException
 import org.apache.hadoop.hbase.filter.ByteArrayComparable
 import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos
-import org.apache.hadoop.hbase.util.{Bytes, ByteStringer}
+import org.apache.hadoop.hbase.util.{ByteStringer, Bytes}
 import org.apache.spark.sql.hbase._
 
 class CustomComparator(value: Array[Byte]) extends ByteArrayComparable(value) {
diff --git a/src/test/java/org/apache/spark/sql/hbase/api/java/JavaAPISuite.java b/src/test/java/org/apache/spark/sql/hbase/api/java/JavaAPISuite.java
@@ -17,19 +17,19 @@
 
 package org.apache.spark.sql.hbase.api.java;
 
-import java.io.Serializable;
-
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SQLContext;
-import org.apache.spark.sql.hbase.*;
+import org.apache.spark.sql.hbase.HBaseSQLContext;
+import org.apache.spark.sql.hbase.TestBase;
 import org.junit.Before;
 import org.junit.Test;
 
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.Row;
+import java.io.Serializable;
 
 public class JavaAPISuite extends TestBase implements Serializable {
     private transient JavaSparkContext sc;
diff --git a/src/test/scala/org/apache/spark/sql/hbase/BytesUtilsSuite.scala b/src/test/scala/org/apache/spark/sql/hbase/BytesUtilsSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.hbase
 
-import org.apache.spark.Logging
 import org.apache.hadoop.hbase.util.Bytes
-import org.apache.spark.sql.types._
+import org.apache.spark.Logging
 import org.apache.spark.sql.hbase.types.HBaseBytesType
 import org.apache.spark.sql.hbase.util.BinaryBytesUtils
+import org.apache.spark.sql.types._
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
 
 class BytesUtilsSuite extends FunSuite with BeforeAndAfterAll with Logging {
diff --git a/src/test/scala/org/apache/spark/sql/hbase/CriticalPointsTestSuite.scala b/src/test/scala/org/apache/spark/sql/hbase/CriticalPointsTestSuite.scala
@@ -18,8 +18,8 @@ package org.apache.spark.sql.hbase
 
 import org.apache.spark._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types._
 import org.apache.spark.sql.hbase.util.{BinaryBytesUtils, HBaseKVHelper}
+import org.apache.spark.sql.types._
 import org.scalatest.{BeforeAndAfterAll, FunSuite}
 
 import scala.collection.mutable.ArrayBuffer
diff --git a/src/test/scala/org/apache/spark/sql/hbase/HBaseTpcStringFormatMiniTestSuite.scala b/src/test/scala/org/apache/spark/sql/hbase/HBaseTpcStringFormatMiniTestSuite.scala
diff --git a/src/test/scala/org/apache/spark/sql/hbase/TestHbase.scala b/src/test/scala/org/apache/spark/sql/hbase/TestHbase.scala