Skip to content

Commit be553aa

Browse files
committed
SI-9015 Reject 0x and minor parser cleanup
Only print error results. Show deprecated forms. Test for rejected literals and clean up parser There was no negative test for what constitutes a legal literal. The ultimate goal is for the test to report all errors in one compilation. This commit follows up the removal of "1." syntax to simplify number parsing. It removes previous paulp code to contain the erstwhile complexity. Leading zero is not immediately put to the buffer. Instead, the empty buffer is handled on evaluation. In particular, an empty buffer due to `0x` is a syntax error. The message for obsolete octal syntax is nuanced and deferred until evaluation by the parser, which is slightly simpler to reason about. Improve comment on usage of base The slice-and-dicey usage of base deserves a better comment. The difference is that `intVal` sees an empty char buffer for input `"0"`.
1 parent 028420c commit be553aa

File tree

6 files changed

+175
-202
lines changed

6 files changed

+175
-202
lines changed

src/compiler/scala/tools/nsc/ast/parser/Scanners.scala

Lines changed: 73 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -453,18 +453,15 @@ trait Scanners extends ScannersCommon {
453453
getOperatorRest()
454454
}
455455
case '0' =>
456-
def fetchZero() = {
457-
putChar(ch)
456+
def fetchLeadingZero(): Unit = {
458457
nextChar()
459-
if (ch == 'x' || ch == 'X') {
460-
nextChar()
461-
base = 16
462-
} else {
463-
base = 8
458+
ch match {
459+
case 'x' | 'X' => base = 16 ; nextChar()
460+
case _ => base = 8 // single decimal zero, perhaps
464461
}
465-
getNumber()
466462
}
467-
fetchZero()
463+
fetchLeadingZero()
464+
getNumber()
468465
case '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
469466
base = 10
470467
getNumber()
@@ -902,62 +899,61 @@ trait Scanners extends ScannersCommon {
902899
*/
903900
def charVal: Char = if (strVal.length > 0) strVal.charAt(0) else 0
904901

905-
/** Convert current strVal, base to long value
902+
/** Convert current strVal, base to long value.
906903
* This is tricky because of max negative value.
904+
*
905+
* Conversions in base 10 and 16 are supported. As a permanent migration
906+
* path, attempts to write base 8 literals except `0` emit a verbose error.
907907
*/
908908
def intVal(negated: Boolean): Long = {
909-
if (token == CHARLIT && !negated) {
910-
charVal.toLong
911-
} else {
912-
var value: Long = 0
913-
val divider = if (base == 10) 1 else 2
914-
val limit: Long =
915-
if (token == LONGLIT) Long.MaxValue else Int.MaxValue
916-
var i = 0
909+
def malformed: Long = {
910+
if (base == 8) syntaxError("Decimal integer literals may not have a leading zero. (Octal syntax is obsolete.)")
911+
else syntaxError("malformed integer number")
912+
0
913+
}
914+
def tooBig: Long = {
915+
syntaxError("integer number too large")
916+
0
917+
}
918+
def intConvert: Long = {
917919
val len = strVal.length
918-
while (i < len) {
919-
val d = digit2int(strVal charAt i, base)
920-
if (d < 0) {
921-
syntaxError("malformed integer number")
922-
return 0
923-
}
924-
if (value < 0 ||
925-
limit / (base / divider) < value ||
926-
limit - (d / divider) < value * (base / divider) &&
927-
!(negated && limit == value * base - 1 + d)) {
928-
syntaxError("integer number too large")
929-
return 0
930-
}
931-
value = value * base + d
932-
i += 1
920+
if (len == 0) {
921+
if (base != 8) syntaxError("missing integer number") // e.g., 0x;
922+
0
923+
} else {
924+
val divider = if (base == 10) 1 else 2
925+
val limit: Long = if (token == LONGLIT) Long.MaxValue else Int.MaxValue
926+
@tailrec def convert(value: Long, i: Int): Long =
927+
if (i >= len) value
928+
else {
929+
val d = digit2int(strVal charAt i, base)
930+
if (d < 0)
931+
malformed
932+
else if (value < 0 ||
933+
limit / (base / divider) < value ||
934+
limit - (d / divider) < value * (base / divider) &&
935+
!(negated && limit == value * base - 1 + d))
936+
tooBig
937+
else
938+
convert(value * base + d, i + 1)
939+
}
940+
val result = convert(0, 0)
941+
if (base == 8) malformed else if (negated) -result else result
933942
}
934-
if (negated) -value else value
935943
}
944+
if (token == CHARLIT && !negated) charVal.toLong else intConvert
936945
}
937946

938947
def intVal: Long = intVal(negated = false)
939948

940949
/** Convert current strVal, base to double value
941-
*/
950+
*/
942951
def floatVal(negated: Boolean): Double = {
943-
944-
val limit: Double =
945-
if (token == DOUBLELIT) Double.MaxValue else Float.MaxValue
952+
val limit: Double = if (token == DOUBLELIT) Double.MaxValue else Float.MaxValue
946953
try {
947954
val value: Double = java.lang.Double.valueOf(strVal).doubleValue()
948-
def isDeprecatedForm = {
949-
val idx = strVal indexOf '.'
950-
(idx == strVal.length - 1) || (
951-
(idx >= 0)
952-
&& (idx + 1 < strVal.length)
953-
&& (!Character.isDigit(strVal charAt (idx + 1)))
954-
)
955-
}
956955
if (value > limit)
957956
syntaxError("floating point number too large")
958-
if (isDeprecatedForm)
959-
syntaxError("floating point number is missing digit after dot")
960-
961957
if (negated) -value else value
962958
} catch {
963959
case _: NumberFormatException =>
@@ -968,86 +964,44 @@ trait Scanners extends ScannersCommon {
968964

969965
def floatVal: Double = floatVal(negated = false)
970966

971-
def checkNoLetter(): Unit = {
967+
def checkNoLetter(): Unit = {
972968
if (isIdentifierPart(ch) && ch >= ' ')
973969
syntaxError("Invalid literal number")
974970
}
975971

976-
/** Read a number into strVal and set base */
977-
protected def getNumber(): Unit = {
978-
val base1 = if (base < 10) 10 else base
979-
// Read 8,9's even if format is octal, produce a malformed number error afterwards.
980-
// At this point, we have already read the first digit, so to tell an innocent 0 apart
981-
// from an octal literal 0123... (which we want to disallow), we check whether there
982-
// are any additional digits coming after the first one we have already read.
983-
var notSingleZero = false
984-
while (digit2int(ch, base1) >= 0) {
985-
putChar(ch)
986-
nextChar()
987-
notSingleZero = true
988-
}
989-
token = INTLIT
990-
991-
/* When we know for certain it's a number after using a touch of lookahead */
992-
def restOfNumber() = {
993-
putChar(ch)
994-
nextChar()
972+
/** Read a number into strVal.
973+
*
974+
* The `base` can be 8, 10 or 16, where base 8 flags a leading zero.
975+
* For ints, base 8 is legal only for the case of exactly one zero.
976+
*/
977+
protected def getNumber(): Unit = {
978+
// consume digits of a radix
979+
def consumeDigits(radix: Int): Unit =
980+
while (digit2int(ch, radix) >= 0) {
981+
putChar(ch)
982+
nextChar()
983+
}
984+
// adding decimal point is always OK because `Double valueOf "0."` is OK
985+
def restOfNonIntegralNumber(): Unit = {
986+
putChar('.')
987+
if (ch == '.') nextChar()
995988
getFraction()
996989
}
997-
def restOfUncertainToken() = {
998-
def isEfd = ch match { case 'e' | 'E' | 'f' | 'F' | 'd' | 'D' => true ; case _ => false }
999-
def isL = ch match { case 'l' | 'L' => true ; case _ => false }
1000-
1001-
if (base <= 10 && isEfd)
1002-
getFraction()
1003-
else {
1004-
// Checking for base == 8 is not enough, because base = 8 is set
1005-
// as soon as a 0 is read in `case '0'` of method fetchToken.
1006-
if (base == 8 && notSingleZero) syntaxError("Non-zero integral values may not have a leading zero.")
1007-
setStrVal()
1008-
if (isL) {
1009-
nextChar()
1010-
token = LONGLIT
1011-
}
1012-
else checkNoLetter()
990+
// after int: 5e7f, 42L, 42.toDouble but not 42b. Repair 0d.
991+
def restOfNumber(): Unit = {
992+
ch match {
993+
case 'e' | 'E' | 'f' | 'F' |
994+
'd' | 'D' => if (cbuf.isEmpty) putChar('0'); restOfNonIntegralNumber()
995+
case 'l' | 'L' => token = LONGLIT ; setStrVal() ; nextChar()
996+
case _ => token = INTLIT ; setStrVal() ; checkNoLetter()
1013997
}
1014998
}
1015999

1016-
if (base > 10 || ch != '.')
1017-
restOfUncertainToken()
1018-
else {
1019-
val lookahead = lookaheadReader
1020-
val c = lookahead.getc()
1021-
1022-
/* Prohibit 1. */
1023-
if (!isDigit(c))
1024-
return setStrVal()
1025-
1026-
val isDefinitelyNumber = (c: @switch) match {
1027-
/** Another digit is a giveaway. */
1028-
case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
1029-
true
1000+
// consume leading digits, provisionally an Int
1001+
consumeDigits(if (base == 16) 16 else 10)
10301002

1031-
/* Backquoted idents like 22.`foo`. */
1032-
case '`' =>
1033-
return setStrVal() /** Note the early return */
1034-
1035-
/* These letters may be part of a literal, or a method invocation on an Int.
1036-
*/
1037-
case 'd' | 'D' | 'f' | 'F' =>
1038-
!isIdentifierPart(lookahead.getc())
1039-
1040-
/* A little more special handling for e.g. 5e7 */
1041-
case 'e' | 'E' =>
1042-
val ch = lookahead.getc()
1043-
!isIdentifierPart(ch) || (isDigit(ch) || ch == '+' || ch == '-')
1044-
1045-
case x =>
1046-
!isIdentifierStart(x)
1047-
}
1048-
if (isDefinitelyNumber) restOfNumber()
1049-
else restOfUncertainToken()
1050-
}
1003+
val detectedFloat: Boolean = base != 16 && ch == '.' && isDigit(lookaheadReader.getc)
1004+
if (detectedFloat) restOfNonIntegralNumber() else restOfNumber()
10511005
}
10521006

10531007
/** Parse character literal if current character is followed by \',

test/files/neg/literals.check

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
literals.scala:6: error: missing integer number
2+
def missingHex: Int = { 0x } // line 4: was: not reported, taken as zero
3+
^
4+
literals.scala:8: error: Decimal integer literals may not have a leading zero. (Octal syntax is obsolete.)
5+
def leadingZeros: Int = { 01 } // line 6: no leading zero
6+
^
7+
literals.scala:10: error: Decimal integer literals may not have a leading zero. (Octal syntax is obsolete.)
8+
def tooManyZeros: Int = { 00 } // line 8: no leading zero
9+
^
10+
literals.scala:12: error: Decimal integer literals may not have a leading zero. (Octal syntax is obsolete.)
11+
def zeroOfNine: Int = { 09 } // line 10: no leading zero
12+
^
13+
literals.scala:16: error: Decimal integer literals may not have a leading zero. (Octal syntax is obsolete.)
14+
def zeroOfNineDot: Int = { 09. } // line 14: malformed integer, ident expected
15+
^
16+
literals.scala:23: error: missing integer number
17+
def missingHex: Int = 0x // line 22: was: not reported, taken as zero
18+
^
19+
literals.scala:27: error: Decimal integer literals may not have a leading zero. (Octal syntax is obsolete.)
20+
def tooManyZeros: Int = 00 // line 26: no leading zero
21+
^
22+
literals.scala:14: error: identifier expected but '}' found.
23+
def orphanDot: Int = { 9. } // line 12: ident expected
24+
^
25+
literals.scala:16: error: identifier expected but '}' found.
26+
def zeroOfNineDot: Int = { 09. } // line 14: malformed integer, ident expected
27+
^
28+
literals.scala:18: error: ';' expected but double literal found.
29+
def noHexFloat: Double = { 0x1.2 } // line 16: ';' expected but double literal found.
30+
^
31+
literals.scala:25: error: ';' expected but 'def' found.
32+
def leadingZeros: Int = 01 // line 24: no leading zero
33+
^
34+
literals.scala:29: error: ';' expected but 'def' found.
35+
def zeroOfNine: Int = 09 // line 28: no leading zero
36+
^
37+
literals.scala:33: error: identifier expected but 'def' found.
38+
def zeroOfNineDot: Int = 09. // line 32: malformed integer, ident expected
39+
^
40+
13 errors found

test/files/neg/literals.scala

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
2+
/* This took me literally all day.
3+
*/
4+
trait RejectedLiterals {
5+
6+
def missingHex: Int = { 0x } // line 4: was: not reported, taken as zero
7+
8+
def leadingZeros: Int = { 01 } // line 6: no leading zero
9+
10+
def tooManyZeros: Int = { 00 } // line 8: no leading zero
11+
12+
def zeroOfNine: Int = { 09 } // line 10: no leading zero
13+
14+
def orphanDot: Int = { 9. } // line 12: ident expected
15+
16+
def zeroOfNineDot: Int = { 09. } // line 14: malformed integer, ident expected
17+
18+
def noHexFloat: Double = { 0x1.2 } // line 16: ';' expected but double literal found.
19+
}
20+
21+
trait Braceless {
22+
23+
def missingHex: Int = 0x // line 22: was: not reported, taken as zero
24+
25+
def leadingZeros: Int = 01 // line 24: no leading zero
26+
27+
def tooManyZeros: Int = 00 // line 26: no leading zero
28+
29+
def zeroOfNine: Int = 09 // line 28: no leading zero
30+
31+
def orphanDot: Int = 9. // line 30: ident expected
32+
33+
def zeroOfNineDot: Int = 09. // line 32: malformed integer, ident expected
34+
35+
def noHexFloat: Double = 0x1.2 // line 34: ';' expected but double literal found.
36+
}

test/files/run/literals.check

Lines changed: 12 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,12 @@
1-
warning: there were 5 deprecation warnings; re-run with -deprecation for details
2-
test '\u0024' == '$' was successful
3-
test '\u005f' == '_' was successful
4-
test 65.asInstanceOf[Char] == 'A' was successful
5-
test "\141\142" == "ab" was successful
6-
test "\0x61\0x62".trim() == "x61\0x62" was successful
7-
8-
test (65 : Byte) == 'A' was successful
9-
10-
test 0X01 == 1 was successful
11-
test 0x01 == 1 was successful
12-
test 0x10 == 16 was successful
13-
test 0xa == 10 was successful
14-
test 0x0a == 10 was successful
15-
test +0x01 == 1 was successful
16-
test +0x10 == 16 was successful
17-
test +0xa == 10 was successful
18-
test +0x0a == 10 was successful
19-
test -0x01 == -1 was successful
20-
test -0x10 == -16 was successful
21-
test -0xa == -10 was successful
22-
test -0x0a == -10 was successful
23-
test 0x7fffffff == 2147483647 was successful
24-
test 0x80000000 == -2147483648 was successful
25-
test 0xffffffff == -1 was successful
26-
27-
test 1l == 1L was successful
28-
test 1L == 1l was successful
29-
test 1.asInstanceOf[Long] == 1l was successful
30-
test 0x7fffffffffffffffL == 9223372036854775807L was successful
31-
test 0x8000000000000000L == -9223372036854775808L was successful
32-
test 0xffffffffffffffffL == -1L was successful
33-
34-
test 1e1f == 10.0f was successful
35-
test .3f == 0.3f was successful
36-
test 0f == 0.0f was successful
37-
test 01.23f == 1.23f was successful
38-
test 3.14f == 3.14f was successful
39-
test 6.022e23f == 6.022e23f was successful
40-
test 09f == 9.0f was successful
41-
test 1.asInstanceOf[Float] == 1.0 was successful
42-
test 1l.asInstanceOf[Float] == 1.0 was successful
43-
44-
test 1e1 == 10.0 was successful
45-
test .3 == 0.3 was successful
46-
test 0.0 == 0.0 was successful
47-
test 0d == 0.0 was successful
48-
test 01.23 == 1.23 was successful
49-
test 01.23d == 1.23d was successful
50-
test 3.14 == 3.14 was successful
51-
test 1e-9d == 1.0e-9 was successful
52-
test 1e137 == 1.0e137 was successful
53-
test 1.asInstanceOf[Double] == 1.0 was successful
54-
test 1l.asInstanceOf[Double] == 1.0 was successful
55-
56-
test "".length() was successful
57-
test ggg == 3 was successful
1+
literals.scala:34: warning: Octal escape literals are deprecated, use \u0061 instead.
2+
check_success("\"\\141\\142\" == \"ab\"", "\141\142", "ab")
3+
^
4+
literals.scala:34: warning: Octal escape literals are deprecated, use \u0062 instead.
5+
check_success("\"\\141\\142\" == \"ab\"", "\141\142", "ab")
6+
^
7+
literals.scala:37: warning: Octal escape literals are deprecated, use \u0000 instead.
8+
"\0x61\0x62".getBytes(io.Codec.UTF8.charSet) sameElements Array[Byte](0, 120, 54, 49, 0, 120, 54, 50),
9+
^
10+
literals.scala:37: warning: Octal escape literals are deprecated, use \u0000 instead.
11+
"\0x61\0x62".getBytes(io.Codec.UTF8.charSet) sameElements Array[Byte](0, 120, 54, 49, 0, 120, 54, 50),
12+
^

test/files/run/literals.flags

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
-deprecation

0 commit comments

Comments
 (0)