Skip to content

Commit c2239a7

Browse files
hansonrhansonr
authored andcommitted
java.util.Scanner implemented
passes test.ScanTests in JavaScript except: - no nextLong() for > 53-bit long (0x20000000000000) - no UTF-16 currency symbols (hi_IN) - no non-ascii (Tibetan) numerals - support for simpler Java POSIX clauses; missing more complex UNICODE POSIX
1 parent fdabf28 commit c2239a7

File tree

12 files changed

+1890
-1552
lines changed

12 files changed

+1890
-1552
lines changed
Binary file not shown.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
20201203173354
1+
20201205151557
Binary file not shown.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
20201203173354
1+
20201205151557
908 Bytes
Binary file not shown.

sources/net.sf.j2s.java.core/src/java/util/Scanner.java

Lines changed: 71 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -390,16 +390,28 @@ protected boolean hasName(Pattern p, String s) {
390390
private IOException lastException;
391391

392392
// A pattern for java whitespace
393-
private static Pattern WHITESPACE_PATTERN = Pattern.compile(
394-
"\\p{javaWhitespace}+");
393+
private static Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
394+
// "\\p{javaWhitespace}+");
395395

396396
// A pattern for any token
397397
private static Pattern FIND_ANY_PATTERN = Pattern.compile("(?s).*");
398398

399-
// A pattern for non-ASCII digits
400-
private static Pattern NON_ASCII_DIGIT = Pattern.compile(
401-
"[\\p{javaDigit}&&[^0-9]]");
399+
// Some Unicode character ranges that contain digits:
400+
//
401+
// '\u0030' through '\u0039', ISO-LATIN-1 digits ('0' through '9')
402+
// '\u0660' through '\u0669', Arabic-Indic digits
403+
// '\u06F0' through '\u06F9', Extended Arabic-Indic digits
404+
// '\u0966' through '\u096F', Devanagari digits
405+
// '\uFF10' through '\uFF19', Fullwidth digits
406+
407+
// Many other character ranges contain digits as well.
402408

409+
// A pattern for non-ASCII digits
410+
// SwingJS not supported
411+
// private static Pattern NON_ASCII_DIGIT = Pattern.compile(
412+
// //"[\\p{javaDigit}&&[^0-9]]");
413+
// "[\\p{javaDigit}&&[^0-9]]");
414+
// A pattern for java whitespace
403415
// Fields and methods to support scanning primitive types
404416

405417
/**
@@ -432,26 +444,32 @@ private static Pattern boolPattern() {
432444
*/
433445
private Pattern integerPattern;
434446
private String digits = "0123456789abcdefghijklmnopqrstuvwxyz";
435-
private String non0Digit = "[\\p{javaDigit}&&[^0]]";
436-
private int SIMPLE_GROUP_INDEX = 5;
447+
private String non0Digit = "[1-9]";//"[\\p{javaDigit}&&[^0]]";
448+
private int SIMPLE_GROUP_INDEX = 12; // SwingJS - moved simple to later
437449
private String buildIntegerPatternString() {
438-
String radixDigits = digits.substring(0, radix);
450+
String radixDigits = (radix == 10 ? "0-9" : digits.substring(0, radix) + digits.substring(10, radix).toUpperCase());
439451
// \\p{javaDigit} is not guaranteed to be appropriate
440452
// here but what can we do? The final authority will be
441453
// whatever parse method is invoked, so ultimately the
442454
// Scanner will do the right thing
443-
String digit = "((?i)["+radixDigits+"]|\\p{javaDigit})";
455+
String digit = "(["+radixDigits+"])";//\\p{javaDigit})";
444456
String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+
445457
groupSeparator+digit+digit+digit+")+)";
446458
// digit++ is the possessive form which is necessary for reducing
447459
// backtracking that would otherwise cause unacceptable performance
448-
String numeral = "(("+ digit+"++)|"+groupedNumeral+")";
460+
// JavaScript requires reversal of these two
461+
String numeral = "("
462+
+groupedNumeral
463+
+ "|"
464+
+ "("+ digit+"+)"
465+
+")";
449466
String javaStyleInteger = "([-+]?(" + numeral + "))";
450467
String negativeInteger = negativePrefix + numeral + negativeSuffix;
451468
String positiveInteger = positivePrefix + numeral + positiveSuffix;
452-
return "("+ javaStyleInteger + ")|(" +
453-
positiveInteger + ")|(" +
454-
negativeInteger + ")";
469+
return "("+ javaStyleInteger + ")"
470+
+ "|(" + positiveInteger + ")"
471+
+ "|(" + negativeInteger + ")"
472+
;
455473
}
456474
private Pattern integerPattern() {
457475
if (integerPattern == null) {
@@ -490,23 +508,31 @@ private static Pattern linePattern() {
490508
private Pattern decimalPattern;
491509
private void buildFloatAndDecimalPattern() {
492510
// \\p{javaDigit} may not be perfect, see above
493-
String digit = "([0-9]|(\\p{javaDigit}))";
511+
String digit = "([0-9])";//|(\\p{javaDigit}))";
494512
String exponent = "([eE][+-]?"+digit+"+)?";
495-
String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?("+
513+
String groupedNumeral = "("+non0Digit+digit+"?"+digit+"?"
514+
+ "("+
496515
groupSeparator+digit+digit+digit+")+)";
497516
// Once again digit++ is used for performance, as above
498-
String numeral = "(("+digit+"++)|"+groupedNumeral+")";
499-
String decimalNumeral = "("+numeral+"|"+numeral +
500-
decimalSeparator + digit + "*+|"+ decimalSeparator +
501-
digit + "++)";
517+
String numeral = "("
518+
+ "("+digit+"+)"
519+
+ "|"+groupedNumeral
520+
+")";
521+
// SwingJS had to move numeral to the end here
522+
String decimalNumeral = "("
523+
+numeral + decimalSeparator + digit + "*"
524+
+ "|"+ decimalSeparator + digit + "+"
525+
+ "|"+numeral
526+
+ ")";
502527
String nonNumber = "(NaN|"+nanString+"|Infinity|"+
503528
infinityString+")";
504529
String positiveFloat = "(" + positivePrefix + decimalNumeral +
505530
positiveSuffix + exponent + ")";
506531
String negativeFloat = "(" + negativePrefix + decimalNumeral +
507532
negativeSuffix + exponent + ")";
508-
String decimal = "(([-+]?" + decimalNumeral + exponent + ")|"+
509-
positiveFloat + "|" + negativeFloat + ")";
533+
String decimal = "(([-+]?" + decimalNumeral + exponent + ")"
534+
+ "|"+ positiveFloat + "|" + negativeFloat
535+
+ ")";
510536
String hexFloat =
511537
"[-+]?0[xX][0-9a-fA-F]*\\.[0-9a-fA-F]+([pP][-+]?[0-9]+)?";
512538
String positiveNonNumber = "(" + positivePrefix + nonNumber +
@@ -516,8 +542,9 @@ private void buildFloatAndDecimalPattern() {
516542
String signedNonNumber = "(([-+]?"+nonNumber+")|" +
517543
positiveNonNumber + "|" +
518544
negativeNonNumber + ")";
519-
floatPattern = Pattern.compile(decimal + "|" + hexFloat + "|" +
520-
signedNonNumber);
545+
floatPattern = Pattern.compile(decimal
546+
+ "|" + hexFloat + "|" + signedNonNumber
547+
);
521548
decimalPattern = Pattern.compile(decimal);
522549
}
523550
private Pattern floatPattern() {
@@ -1032,7 +1059,7 @@ private String findPatternInBuffer(Pattern pattern, int horizon) {
10321059
return null;
10331060
}
10341061
// The match could go away depending on what is next
1035-
if ((searchLimit == horizonLimit) && matcher.requireEnd()) {
1062+
if (matcher.requireEnd()) {
10361063
// Rare case: we hit the end of input and it happens
10371064
// that it is at the horizon and the end of input is
10381065
// required for the match.
@@ -2275,24 +2302,25 @@ private String processFloatToken(String token) {
22752302
if (isNegative)
22762303
result = "-" + result;
22772304

2278-
// Translate non-ASCII digits
2279-
Matcher m = NON_ASCII_DIGIT.matcher(result);
2280-
if (m.find()) {
2281-
StringBuilder inASCII = new StringBuilder();
2282-
for (int i=0; i<result.length(); i++) {
2283-
char nextChar = result.charAt(i);
2284-
if (Character.isDigit(nextChar)) {
2285-
int d = Character.digit(nextChar, 10);
2286-
if (d != -1)
2287-
inASCII.append(d);
2288-
else
2289-
inASCII.append(nextChar);
2290-
} else {
2291-
inASCII.append(nextChar);
2292-
}
2293-
}
2294-
result = inASCII.toString();
2295-
}
2305+
// swingjs NOT IMPLEMENTED
2306+
// // Translate non-ASCII digits
2307+
// Matcher m = NON_ASCII_DIGIT.matcher(result);
2308+
// if (m.find()) {
2309+
// StringBuilder inASCII = new StringBuilder();
2310+
// for (int i=0; i<result.length(); i++) {
2311+
// char nextChar = result.charAt(i);
2312+
// if (Character.isDigit(nextChar)) {
2313+
// int d = Character.digit(nextChar, 10);
2314+
// if (d != -1)
2315+
// inASCII.append(d);
2316+
// else
2317+
// inASCII.append(nextChar);
2318+
// } else {
2319+
// inASCII.append(nextChar);
2320+
// }
2321+
// }
2322+
// result = inASCII.toString();
2323+
// }
22962324

22972325
return result;
22982326
}
@@ -2629,4 +2657,5 @@ public Scanner reset() {
26292657
clearCaches();
26302658
return this;
26312659
}
2660+
26322661
}

sources/net.sf.j2s.java.core/src/java/util/regex/Matcher.java

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ public String toString() {
171171
* have to consume all of the input. ENDANCHOR is the mode used for matching all
172172
* the input.
173173
*/
174+
static final int STARTANCHOR = 2; // SwingJS
174175
static final int ENDANCHOR = 1;
175176
static final int NOANCHOR = 0;
176177

@@ -269,10 +270,11 @@ public String toString() {
269270
* @since 1.5
270271
*/
271272
public MatchResult toMatchResult() {
272-
Matcher result = new Matcher(this.pat, cs.toString());
273-
result.first = this.first;
274-
result.last = this.last;
275-
// TODO result.groups = this.groups.clone();
273+
Matcher result = new Matcher(pat, cs.toString());
274+
result.first = first;
275+
result.last = last;
276+
result.groupCount = groupCount;
277+
result.results = results.clone();
276278
return result;
277279
}
278280

@@ -438,17 +440,17 @@ boolean search(int from, int anchor) {
438440
if (strString == null)
439441
strString = cs.toString();
440442
hitEnd = false;
441-
requireEnd = false;
443+
requireEnd = pat.pattern().endsWith("$");
442444
from = (from < 0 ? 0 : from);
443445
first = from;
444446
oldLast = (oldLast < 0 ? from : oldLast);
445447
clearGroups();
446448
String s = (rightBound == strString.length() ? strString : strString.substring(0, rightBound));
447449
RegExp rg = getRE();
448450
rg.lastIndex = from;
449-
acceptMode = (anchor != UNSPECIFIED ? anchor : NOANCHOR);
451+
acceptMode = (anchor == UNSPECIFIED ? NOANCHOR : anchor);
450452
results = execRE(rg, s);
451-
boolean result = checkRE(results);
453+
boolean result = checkRE(results, s);
452454
this.oldLast = last;
453455
return result;
454456
}
@@ -505,7 +507,7 @@ public boolean find(int start) {
505507
* this matcher's pattern
506508
*/
507509
public boolean lookingAt() {
508-
return match(leftBound, UNSPECIFIED);
510+
return match(leftBound, STARTANCHOR);
509511
}
510512

511513
/**
@@ -1039,7 +1041,6 @@ public boolean matches() {
10391041
@SuppressWarnings("null")
10401042
boolean match(int from, int anchor) {
10411043
hitEnd = false;
1042-
requireEnd = false;
10431044
from = Math.max(0, from);
10441045
first = from;
10451046
oldLast = (oldLast < 0 ? from : oldLast);
@@ -1060,18 +1061,35 @@ private String[] execRE(RegExp rg, String s) {
10601061
null;
10611062
}
10621063

1063-
private boolean checkRE(String[] r) {
1064+
private boolean checkRE(String[] r, String s) {
10641065
hitEnd = (r == null);
10651066
if (hitEnd) {
1067+
requireEnd = false;
10661068
first = -1;
1067-
} else {
1068-
groupCount = r.length - 1;
1069-
first = indexRE(r);
1070-
last = first + r[0].length();
1069+
return false;
1070+
}
1071+
groupCount = r.length - 1;
1072+
int f0 = this.first;
1073+
first = indexRE(r);
1074+
last = first + r[0].length();
1075+
hitEnd = (last == s.length());
1076+
if (hitEnd && requireEnd() && last != strString.length()) {
1077+
// scanner may be checking for a bound in
1078+
// a longer string
1079+
return false;
1080+
}
1081+
if (groupCount < 0)
1082+
return false;
1083+
switch (acceptMode) {
1084+
case STARTANCHOR:
1085+
return first == f0;
1086+
case ENDANCHOR:
1087+
return first == f0 && last ==
1088+
/** @j2sNative r.input.length || */
1089+
0;
1090+
default:
1091+
return true;
10711092
}
1072-
return (!hitEnd && r.length > 0 && (acceptMode != ENDANCHOR || r[0].length() ==
1073-
/** @j2sNative r.input.length || */
1074-
0));
10751093
}
10761094

10771095
private RegExp getRE() {

sources/net.sf.j2s.java.core/src/java/util/regex/Pattern.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,6 +1793,29 @@ private Pattern(String p, int f) {
17931793
}
17941794
}
17951795

1796+
private final static String[] posixes = {
1797+
"\\p{javaWhitespace}","\\W",
1798+
"\\p{javaDigit}","[0-9]",
1799+
"\\p{Lower}", "[a-z]",
1800+
"\\p{Upper}", "[A-Z]",
1801+
"\\p{ASCII}", "[\u0000-\u007F]",
1802+
"\\p{Alpha}", "[A-Za-z]",
1803+
"\\p{Digit}", "[0-9]",
1804+
"\\p{Alnum}", "[A-Za-z0-9]",
1805+
"\\p{Punct}", "[!\"#$%&'\\(\\)\\*\\+,-./:;<=>?@\\[\\\\\\]^_`{\\|}~]",
1806+
"\\p{Graph}", "[A-Za-z0-9]!\"#$%&'\\(\\)\\*\\+,-./:;<=>?@\\[\\\\\\]^_`{\\|}~]",
1807+
"\\p{Print}", "[A-Za-z0-9]!\"#$%&'\\(\\)\\*\\+,-./:;<=>?@\\[\\\\\\]^_`{\\|}~]",
1808+
"\\p{Blank}", "[ \t]",
1809+
"\\p{Cntrl}", "[\u0000-\u001F\u007F]",
1810+
"\\p{XDigit}", "[0-9a-fA-F]",
1811+
"\\p{Space}", "[ \t\n\u000B\f\r]",
1812+
"\\p{javaLowerCase}", "[a-z]",
1813+
"\\p{javaUpperCase}", "[A-Z]",
1814+
"\\p{javaWhitespace}", "\\W",
1815+
"\\p{Sc}", "[\u0024\u00A2\u00A3\u00A4\u00A5\u058F\u060B\u07FE\u07FF\u09F2\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u20A0\u20A1\u20A2\u20A3\u20A4\u20A5\u20A6\u20A7\u20A8\u20A9\u20AA\u20AB\u20AC\u20AD\u20AE\u20AF\u20B0\u20B1\u20B2\u20B3\u20B4\u20B5\u20B6\u20B7\u20B8\u20B9\u20BA\u20BB\u20BC\u20BD\u20BE\u20BF\uA838\uFDFC\uFE69\uFF04\uFFE0\uFFE1\uFFE5\uFFE6]"
1816+
1817+
// more currency: \u11FDD\u11FDE\u11FDF\u11FE0\u17DB\u1E2FF\u1ECB0
1818+
};
17961819
public static String removeQEQuoting(String pattern) {
17971820
int pt;
17981821
while ((pt = pattern.indexOf("\\Q")) >= 0) {
@@ -1803,6 +1826,14 @@ public static String removeQEQuoting(String pattern) {
18031826
ret += javaQuote(pattern.substring(pt + 2, pt1));
18041827
pattern = ret + pattern.substring(pt1 + 2);
18051828
}
1829+
if (pattern.indexOf("\\p{") >= 0) {
1830+
for (int i = 0, n = posixes.length; i < n; i += 2) {
1831+
pattern = pattern.replace(posixes[i], posixes[i+1]);
1832+
}
1833+
if (pattern.indexOf("\\p{") >= 0) {
1834+
JSUtil.notImplemented("POSIX " + pattern);
1835+
}
1836+
}
18061837
return pattern;
18071838
}
18081839

0 commit comments

Comments
 (0)