Skip to content

Commit 970635a

Browse files
hvanhovellrxin
authored andcommitted
[SPARK-12362][SQL][WIP] Inline Hive Parser
This PR inlines the Hive SQL parser in Spark SQL. The previous (merged) incarnation of this PR passed all tests, but had and still has problems with the build. These problems are caused by a the fact that - for some reason - in some cases the ANTLR generated code is not included in the compilation fase. This PR is a WIP and should not be merged until we have sorted out the build issues. Author: Herman van Hovell <hvanhovell@questtec.nl> Author: Nong Li <nong@databricks.com> Author: Nong Li <nongli@gmail.com> Closes apache#10525 from hvanhovell/SPARK-12362.
1 parent 44ee920 commit 970635a

File tree

18 files changed

+5443
-73
lines changed

18 files changed

+5443
-73
lines changed

pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,6 +1951,11 @@
19511951
</compilerArgs>
19521952
</configuration>
19531953
</plugin>
1954+
<plugin>
1955+
<groupId>org.antlr</groupId>
1956+
<artifactId>antlr3-maven-plugin</artifactId>
1957+
<version>3.5.2</version>
1958+
</plugin>
19541959
<!-- Surefire runs all Java tests -->
19551960
<plugin>
19561961
<groupId>org.apache.maven.plugins</groupId>

project/SparkBuild.scala

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,9 +414,51 @@ object Hive {
414414
// Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
415415
// in order to generate golden files. This is only required for developers who are adding new
416416
// new query tests.
417-
fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }
418-
)
417+
fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") },
418+
// ANTLR code-generation step.
419+
//
420+
// This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of
421+
// build errors in the current plugin.
422+
// Create Parser from ANTLR grammar files.
423+
sourceGenerators in Compile += Def.task {
424+
val log = streams.value.log
425+
426+
val grammarFileNames = Seq(
427+
"SparkSqlLexer.g",
428+
"SparkSqlParser.g")
429+
val sourceDir = (sourceDirectory in Compile).value / "antlr3"
430+
val targetDir = (sourceManaged in Compile).value
431+
432+
// Create default ANTLR Tool.
433+
val antlr = new org.antlr.Tool
434+
435+
// Setup input and output directories.
436+
antlr.setInputDirectory(sourceDir.getPath)
437+
antlr.setOutputDirectory(targetDir.getPath)
438+
antlr.setForceRelativeOutput(true)
439+
antlr.setMake(true)
440+
441+
// Add grammar files.
442+
grammarFileNames.flatMap(gFileName => (sourceDir ** gFileName).get).foreach { gFilePath =>
443+
val relGFilePath = (gFilePath relativeTo sourceDir).get.getPath
444+
log.info("ANTLR: Grammar file '%s' detected.".format(relGFilePath))
445+
antlr.addGrammarFile(relGFilePath)
446+
}
419447

448+
// Generate the parser.
449+
antlr.process
450+
if (antlr.getNumErrors > 0) {
451+
log.error("ANTLR: Caught %d build errors.".format(antlr.getNumErrors))
452+
}
453+
454+
// Return all generated java files.
455+
(targetDir ** "*.java").get.toSeq
456+
}.taskValue,
457+
// Include ANTLR tokens files.
458+
resourceGenerators in Compile += Def.task {
459+
((sourceManaged in Compile).value ** "*.tokens").get.toSeq
460+
}.taskValue
461+
)
420462
}
421463

422464
object Assembly {

project/plugins.sbt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,5 @@ addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2")
2727
libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3"
2828

2929
libraryDependencies += "org.ow2.asm" % "asm-commons" % "5.0.3"
30+
31+
libraryDependencies += "org.antlr" % "antlr" % "3.5.2"

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,12 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
308308

309309
// The difference between the double numbers generated by Hive and Spark
310310
// can be ignored (e.g., 0.6633880657639323 and 0.6633880657639322)
311-
"udaf_corr"
311+
"udaf_corr",
312+
313+
// Feature removed in HIVE-11145
314+
"alter_partition_protect_mode",
315+
"drop_partitions_ignore_protection",
316+
"protectmode"
312317
)
313318

314319
/**
@@ -328,7 +333,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
328333
"alter_index",
329334
"alter_merge_2",
330335
"alter_partition_format_loc",
331-
"alter_partition_protect_mode",
332336
"alter_partition_with_whitelist",
333337
"alter_rename_partition",
334338
"alter_table_serde",
@@ -460,7 +464,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
460464
"drop_partitions_filter",
461465
"drop_partitions_filter2",
462466
"drop_partitions_filter3",
463-
"drop_partitions_ignore_protection",
464467
"drop_table",
465468
"drop_table2",
466469
"drop_table_removes_partition_dirs",
@@ -778,7 +781,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
778781
"ppr_pushdown2",
779782
"ppr_pushdown3",
780783
"progress_1",
781-
"protectmode",
782784
"push_or",
783785
"query_with_semi",
784786
"quote1",

sql/hive/pom.xml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@
232232
<configuration>
233233
<sources>
234234
<source>v${hive.version.short}/src/main/scala</source>
235+
<source>${project.build.directory/generated-sources/antlr</source>
235236
</sources>
236237
</configuration>
237238
</execution>
@@ -260,6 +261,27 @@
260261
</execution>
261262
</executions>
262263
</plugin>
264+
265+
266+
<plugin>
267+
<groupId>org.antlr</groupId>
268+
<artifactId>antlr3-maven-plugin</artifactId>
269+
<executions>
270+
<execution>
271+
<goals>
272+
<goal>antlr</goal>
273+
</goals>
274+
</execution>
275+
</executions>
276+
<configuration>
277+
<sourceDirectory>${basedir}/src/main/antlr3</sourceDirectory>
278+
<includes>
279+
<include>**/SparkSqlLexer.g</include>
280+
<include>**/SparkSqlParser.g</include>
281+
</includes>
282+
</configuration>
283+
</plugin>
284+
263285
</plugins>
264286
</build>
265287
</project>

0 commit comments

Comments
 (0)