diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java b/UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java index 8268dc756..f3bc4ce28 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/CachedProps.java @@ -30,6 +30,8 @@ import com.ibm.icu.util.ICUUncheckedIOException; import com.ibm.icu.util.VersionInfo; +import org.unicode.props.UnicodeProperty; + public class CachedProps { public static final boolean IS_BETA = false; diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/CharEncoder.java b/UnicodeJsps/src/main/java/org/unicode/jsp/CharEncoder.java deleted file mode 100644 index ed859e2d3..000000000 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/CharEncoder.java +++ /dev/null @@ -1,101 +0,0 @@ -/** - * - */ -package org.unicode.jsp; - -import java.nio.BufferUnderflowException; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CoderResult; -import java.nio.charset.CodingErrorAction; - -public class CharEncoder { - - private boolean verifyRoundtrip; - private boolean justCheck; - - private CharsetEncoder encoder; - private CharsetDecoder decoder; - - private char[] chars = new char[2]; - private char[] returnChars = new char[2]; - - private CharBuffer charBuffer = CharBuffer.wrap(chars); - private ByteBuffer byteBuffer = ByteBuffer.allocate(5); - private CharBuffer returnCharBuffer = CharBuffer.wrap(returnChars); - - /** - * - * @param charset - * @param verifyRoundtrip - * @param justCheck - */ - public CharEncoder(Charset charset, boolean verifyRoundtrip, boolean justCheck) { - this.verifyRoundtrip = verifyRoundtrip; - this.justCheck = justCheck; - encoder = charset.newEncoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT); - decoder = charset.newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT); - } - - public boolean isVerifyRoundtrip() { - return verifyRoundtrip; - } - - /** - * Convert the code point. Return -1 if fails. If justCheck, then return 1 if success. Otherwise return length of the bytes - * converted, and fill in the destination. In either case, if isVerifyRoundtrip() then check that the roundtrip works. - * @param codepoint - * @param destination - * @param offset - * @return - * @throws BufferUnderflowException if the supplied destination is too small. - */ - public int getValue(int codepoint, byte[] destination, int offset) { - int byteLen; - try { - int len = Character.toChars(codepoint, chars, 0); - charBuffer.limit(len); - charBuffer.position(0); - byteBuffer.clear(); - CoderResult encodeResult = encoder.encode(charBuffer, byteBuffer, true); - if (encodeResult.isError()) { - return -1; - } - if (verifyRoundtrip) { - byteBuffer.flip(); - returnCharBuffer.clear(); - CoderResult decodeResult = decoder.decode(byteBuffer, returnCharBuffer, true); - if (decodeResult.isError()) { - return -1; - } - int len2 = returnCharBuffer.position(); - if (len != len2) { - return -1; - } - if (chars[0] != returnChars[0]) { - return -1; - } - if (len > 1 && chars[1] != returnChars[1]) { - return -1; - } - } - if (justCheck) return 1; - byteBuffer.flip(); - byteLen = byteBuffer.limit(); - byteBuffer.get(destination, offset, byteLen); - return byteLen; - } catch (Exception e) { - if (e instanceof BufferUnderflowException) { - throw (BufferUnderflowException) e; - } - return -1; - } - } -} \ No newline at end of file diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/FileUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/FileUtilities.java deleted file mode 100644 index 9b2bc1019..000000000 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/FileUtilities.java +++ /dev/null @@ -1,207 +0,0 @@ -package org.unicode.jsp; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.regex.Pattern; - -public final class FileUtilities { - - public static abstract class SemiFileReader { - public final static Pattern SPLIT = Pattern.compile("\\s*;\\s*"); - private int lineCount; - - protected void handleStart() {} - protected abstract boolean handleLine(int start, int end, String[] items); - protected void handleEnd() {} - - public int getLineCount() { - return lineCount; - } - - protected boolean isCodePoint() { - return true; - } - - protected String[] splitLine(String line) { - return SPLIT.split(line); - } - - public SemiFileReader process(Class classLocation, String fileName) { - BufferedReader in; - try { - in = FileUtilities.openFile(classLocation, fileName); - } catch (Exception e) { - throw (RuntimeException) new IllegalArgumentException(classLocation.getName() + ", " + fileName).initCause(e); - } - try { - return process(in, fileName); - } catch (Exception e) { - throw (RuntimeException) new IllegalArgumentException(lineCount + ":\t" + 0).initCause(e); - } - } - - public SemiFileReader process(String directory, String fileName) { - try { - FileInputStream fileStream = new FileInputStream(directory + "/" + fileName); - InputStreamReader reader = new InputStreamReader(fileStream, FileUtilities.UTF8); - BufferedReader bufferedReader = new BufferedReader(reader,1024*64); - return process(bufferedReader, fileName); - } catch (Exception e) { - throw (RuntimeException) new IllegalArgumentException(lineCount + ":\t" + 0).initCause(e); - } - } - - public SemiFileReader process(BufferedReader in, String fileName) { - handleStart(); - String line = null; - lineCount = 1; - try { - for (; ; ++lineCount) { - line = in.readLine(); - if (line == null) { - break; - } - int comment = line.indexOf("#"); - if (comment >= 0) { - line = line.substring(0,comment); - } - if (line.startsWith("\uFEFF")) { - line = line.substring(1); - } - line = line.trim(); - if (line.length() == 0) { - continue; - } - String[] parts = splitLine(line); - int start, end; - if (isCodePoint()) { - String source = parts[0]; - int range = source.indexOf(".."); - if (range >= 0) { - start = Integer.parseInt(source.substring(0,range),16); - end = Integer.parseInt(source.substring(range+2),16); - } else { - start = end = Integer.parseInt(source, 16); - } - } else { - start = end = -1; - } - if (!handleLine(start, end, parts)) { - break; - } - } - in.close(); - handleEnd(); - } catch (Exception e) { - throw (RuntimeException) new IllegalArgumentException(lineCount + ":\t" + line).initCause(e); - } - return this; - } - } - // - // public static SemiFileReader fillMapFromSemi(Class classLocation, String fileName, SemiFileReader handler) { - // return handler.process(classLocation, fileName); - // } - - public static BufferedReader openFile(Class class1, String file) throws IOException { - //URL path = null; - //String externalForm = null; - try { - // //System.out.println("Reading:\t" + file1.getCanonicalPath()); - // path = class1.getResource(file); - // externalForm = path.toExternalForm(); - // if (externalForm.startsWith("file:")) { - // externalForm = externalForm.substring(5); - // } - // File file1 = new File(externalForm); - // boolean x = file1.canRead(); - // final InputStream resourceAsStream = new FileInputStream(file1); - final InputStream resourceAsStream = class1.getResourceAsStream(file); - if(resourceAsStream==null) { - throw new IllegalArgumentException("class " + class1.getName()+".getResourceAsStream(" + file + ") returned null"); - } - InputStreamReader reader = new InputStreamReader(resourceAsStream, FileUtilities.UTF8); - BufferedReader bufferedReader = new BufferedReader(reader,1024*64); - return bufferedReader; - } catch (Exception e) { - e.printStackTrace(); - System.err.println(e.toString()+" on read of "+file); - File file1 = new File(file); - String foo = class1.getResource(".").toString(); - - throw (RuntimeException) new IllegalArgumentException("Bad file name: " - // + path + "\t" + externalForm + "\t" + - + file1.getCanonicalPath() - + "\r\n" + foo - + "\r\n" + new File(".").getCanonicalFile() + " => " + Arrays.asList(new File(".").getCanonicalFile().list()) - ) - .initCause(e); - } - } - - public static final Charset UTF8 = Charset.forName("utf-8"); - - static String[] splitCommaSeparated(String line) { - // items are separated by ',' - // each item is of the form abc... - // or "..." (required if a comma or quote is contained) - // " in a field is represented by "" - List result = new ArrayList(); - StringBuilder item = new StringBuilder(); - boolean inQuote = false; - for (int i = 0; i < line.length(); ++i) { - char ch = line.charAt(i); // don't worry about supplementaries - switch(ch) { - case '"': - inQuote = !inQuote; - // at start or end, that's enough - // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote - if (inQuote && item.length() != 0) { - item.append('"'); - inQuote = true; - } - break; - case ',': - if (!inQuote) { - result.add(item.toString()); - item.setLength(0); - } else { - item.append(ch); - } - break; - default: - item.append(ch); - break; - } - } - result.add(item.toString()); - return result.toArray(new String[result.size()]); - } - - public static String getFileAsString(BufferedReader in) { - try { - StringBuilder result = new StringBuilder(); - while (true) { - String line = in.readLine(); - if (line == null) { - break; - } - if (result.length() != 0) { - result.append('\n'); - } - result.append(line); - } - return result.toString(); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } - } -} diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/LanguageCode.java b/UnicodeJsps/src/main/java/org/unicode/jsp/LanguageCode.java index aa86fc0f7..e28186d5d 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/LanguageCode.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/LanguageCode.java @@ -15,7 +15,7 @@ import com.ibm.icu.text.Collator; import com.ibm.icu.util.ULocale; -public class LanguageCode { +public class LanguageCode { static public final Pattern languageID = Pattern.compile( " (?: ( [a-z A-Z]{2,8} | [a-z A-Z]{2,3} [-_] [a-z A-Z]{3} )" @@ -32,12 +32,12 @@ public class LanguageCode { .split("\\s+"))); enum Subtag { - language, - script, - region, + language, + script, + region, variants, - extensions, - privateUse, + extensions, + privateUse, privateUse2; String get(Matcher m) { return m.group(ordinal()+1); @@ -88,8 +88,8 @@ private static void validate(String input, ULocale ulocale, StringBuilder builde posAfter = input.length(); } prefix = "

Ill-Formed Language Identifier: " + input.substring(0, posBefore) - + "" + input.substring(posBefore, i) - + "×" + + "" + input.substring(posBefore, i) + + "×" + input.substring(i, posAfter) + "" + input.substring(posAfter, input.length()) + "
Couldn't parse past the point marked with ×.

\n"; @@ -130,7 +130,7 @@ private static void validate(String input, ULocale ulocale, StringBuilder builde } fixed = fixCodes.get(languageCode); } else { // must be 2 - // cases are the following. For the replacement, we use fix(extlang) if valid, otherwise fix(lang) if valid, otherwise fix(extlang) + // cases are the following. For the replacement, we use fix(extlang) if valid, otherwise fix(lang) if valid, otherwise fix(extlang) // zh-cmn - valid => cmn // en-cmn - valid => cmn // but shouldn't be; by canonicalization en-cmn = cmn // eng-cmn - invalid => cmn @@ -367,17 +367,17 @@ private static String getIcuName(String code, ULocale ulocale) { switch(code.length()) { case 2: case 3: - icuName = code.compareTo("a") < 0 + icuName = code.compareTo("a") < 0 ? ULocale.getDisplayCountry("und-" + code, ulocale) : ULocale.getDisplayLanguage(code, ulocale); break; - case 4: + case 4: if (code.compareTo("A") >= 0) { - icuName = ULocale.getDisplayScript("und-" + code, ulocale); + icuName = ULocale.getDisplayScript("und-" + code, ulocale); break; } // otherwise fall through! - default: - icuName = ULocale.getDisplayVariant("und-Latn-AQ-" + code, ulocale).toLowerCase(); + default: + icuName = ULocale.getDisplayVariant("und-Latn-AQ-" + code, ulocale).toLowerCase(); break; } return icuName; diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeProperty.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeProperty.java index 5a6a8b904..8beb89dc8 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeProperty.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeProperty.java @@ -35,6 +35,10 @@ import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; +/** + * @Deprecated use org.unicode.props.UnicodeProperty + */ +@Deprecated( forRemoval = true ) public abstract class UnicodeProperty extends UnicodeLabel { public static final UnicodeSet UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze(); diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeRegex.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeRegex.java deleted file mode 100644 index a9a7eebbc..000000000 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeRegex.java +++ /dev/null @@ -1,399 +0,0 @@ -//##header -/* - ******************************************************************************* - * Copyright (C) 2009, Google, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ -package org.unicode.jsp; - -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParsePosition; -import java.util.Arrays; -import java.util.Comparator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.regex.Pattern; - -import com.ibm.icu.text.StringTransform; -import com.ibm.icu.text.SymbolTable; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.Freezable; - -/** - * Contains utilities to supplement the JDK Regex, since it doesn't handle - * Unicode well. - * - * @author markdavis - */ -public class UnicodeRegex implements Cloneable, Freezable, StringTransform { - // Note: we don't currently have any state, but intend to in the future, - // particularly for the regex style supported. - - SymbolTable symbolTable; - ParsePosition parsePosition = new ParsePosition(0); - - public SymbolTable getSymbolTable() { - return symbolTable; - } - - public UnicodeRegex setSymbolTable(SymbolTable symbolTable) { - this.symbolTable = symbolTable; - return this; - } - - /** - * Adds full Unicode property support, with the latest version of Unicode, - * to Java Regex, bringing it up to Level 1 (see - * http://www.unicode.org/reports/tr18/). It does this by preprocessing the - * regex pattern string and interpreting the character classes (\p{...}, - * \P{...}, [...]) according to their syntax and meaning in UnicodeSet. With - * this utility, Java regex expressions can be updated to work with the - * latest version of Unicode, and with all Unicode properties. Note that the - * UnicodeSet syntax has not yet, however, been updated to be completely - * consistent with Java regex, so be careful of the differences. - *

Not thread-safe; create a separate copy for different threads. - *

In the future, we may extend this to support other regex packages. - * - * @regex A modified Java regex pattern, as in the input to - * Pattern.compile(), except that all "character classes" are - * processed as if they were UnicodeSet patterns. Example: - * "abc[:bc=N:]. See UnicodeSet for the differences in syntax. - * @return A processed Java regex pattern, suitable for input to - * Pattern.compile(). - */ - public String transform(String regex) { - StringBuilder result = new StringBuilder(); - UnicodeSet temp = new UnicodeSet(); - ParsePosition pos = new ParsePosition(0); - int state = 0; // 1 = after \ - - // We add each character unmodified to the output, unless we have a - // UnicodeSet. Note that we don't worry about supplementary characters, - // since none of the syntax uses them. - - for (int i = 0; i < regex.length(); ++i) { - // look for UnicodeSets, allowing for quoting with \ and \Q - char ch = regex.charAt(i); - switch (state) { - case 0: // we only care about \, and '['. - if (ch == '\\') { - if (UnicodeSet.resemblesPattern(regex, i)) { - // should only happen with \p - i = processSet(regex, i, result, temp, pos); - continue; - } - state = 1; - } else if (ch == '[') { - // if we have what looks like a UnicodeSet - if (UnicodeSet.resemblesPattern(regex, i)) { - i = processSet(regex, i, result, temp, pos); - continue; - } - } - break; - - case 1: // we are after a \ - if (ch == 'Q') { - state = 1; - } else { - state = 0; - } - break; - - case 2: // we are in a \Q... - if (ch == '\\') { - state = 3; - } - break; - - case 3: // we are in at \Q...\ - if (ch == 'E') { - state = 0; - } - state = 2; - break; - } - result.append(ch); - } - return result.toString(); - } - - /** - * Convenience static function, using standard parameters. - * @param regex as in process() - * @return processed regex pattern, as in process() - */ - public static String fix(String regex) { - return STANDARD.transform(regex); - } - - /** - * Compile a regex string, after processing by fix(...). - * - * @param regex - * Raw regex pattern, as in fix(...). - * @return Pattern - */ - public static Pattern compile(String regex) { - return Pattern.compile(STANDARD.transform(regex)); - } - - /** - * Compile a composed string from a set of BNF lines; see the List version for more information. - * - * @param bnfLines Series of BNF lines. - * @return Pattern - */ - public String compileBnf(String bnfLines) { - return compileBnf(Arrays.asList(bnfLines.split("\\r\\n?|\\n"))); - } - - /** - * Compile a composed string from a set of BNF lines, such as for composing a regex - * expression. The lines can be in any order, but there must not be any - * cycles. The result can be used as input for fix(). - *

- * Example: - *

-     * uri = (?: (scheme) \\:)? (host) (?: \\? (query))? (?: \\u0023 (fragment))?;
-     * scheme = reserved+;
-     * host = // reserved+;
-     * query = [\\=reserved]+;
-     * fragment = reserved+;
-     * reserved = [[:ascii:][:alphabetic:]];
-     * 
- *

- * Caveats: at this point the parsing is simple; for example, # cannot be - * quoted (use \\u0023); you can set it to null to disable. - * The equality sign and a few others can be reset with - * setBnfX(). - * - * @param bnfLines - * Series of lines that represent a BNF expression. The lines contain - * a series of statements that of the form x=y;. A statement can take - * multiple lines, but there can't be multiple statements on a line. - * A hash quotes to the end of the line. - * @return Pattern - */ - public String compileBnf(List lines) { - Map variables = getVariables(lines); - Set unused = new LinkedHashSet(variables.keySet()); - // brute force replacement; do twice to allow for different order - // later on can optimize - for (int i = 0; i < 2; ++i) { - for (String variable : variables.keySet()) { - String definition = variables.get(variable); - for (String variable2 : variables.keySet()) { - if (variable.equals(variable2)) { - continue; - } - String definition2 = variables.get(variable2); - String altered2 = definition2.replace(variable, definition); - if (!altered2.equals(definition2)) { - unused.remove(variable); - variables.put(variable2, altered2); - if (log != null) { - try { - log.append(variable2 + "=" + altered2 + ";"); - } catch (IOException e) { - throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); - } - } - } - } - } - } - if (unused.size() != 1) { - throw new IllegalArgumentException("Not a single root: " + unused); - } - return variables.get(unused.iterator().next()); - } - - /** - * Compile a regex string, after processing by fix(...). - * - * @param regex - * Raw regex pattern, as in fix(...). - * @return Pattern - */ - public static Pattern compile(String regex, int options) { - return Pattern.compile(STANDARD.transform(regex), options); - } - - public String getBnfCommentString() { - return bnfCommentString; - } - - public void setBnfCommentString(String bnfCommentString) { - this.bnfCommentString = bnfCommentString; - } - - public String getBnfVariableInfix() { - return bnfVariableInfix; - } - - public void setBnfVariableInfix(String bnfVariableInfix) { - this.bnfVariableInfix = bnfVariableInfix; - } - - public String getBnfLineSeparator() { - return bnfLineSeparator; - } - - public void setBnfLineSeparator(String bnfLineSeparator) { - this.bnfLineSeparator = bnfLineSeparator; - } - - /** - * Utility for loading lines from a UTF8 file. - * @param file - * @param result - * @return - * @throws IOException - */ - public static List loadFile(String file, List result) throws IOException { - BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); - while (true) { - String line = in.readLine(); - if (line == null) { - break; - } - result.add(line); - } - return result; - } - - - /* (non-Javadoc) - * @see com.ibm.icu.util.Freezable#cloneAsThawed() - */ - public Object cloneAsThawed() { - // TODO Auto-generated method stub - try { - return clone(); - } catch (CloneNotSupportedException e) { - throw new IllegalArgumentException(); // should never happen - } - } - - /* (non-Javadoc) - * @see com.ibm.icu.util.Freezable#freeze() - */ - public Object freeze() { - // no action needed now. - return this; - } - - /* (non-Javadoc) - * @see com.ibm.icu.util.Freezable#isFrozen() - */ - public boolean isFrozen() { - // at this point, always true - return true; - } - - // ===== PRIVATES ===== - - private int processSet(String regex, int i, StringBuilder result, UnicodeSet temp, ParsePosition pos) { - try { - pos.setIndex(i); - UnicodeSet x = temp.clear().applyPattern(regex, pos, symbolTable, 0); - x.complement().complement(); // hack to fix toPattern - result.append(x.toPattern(false)); - i = pos.getIndex() - 1; // allow for the loop increment - return i; - } catch (Exception e) { - throw (IllegalArgumentException) new IllegalArgumentException("Error in " + regex).initCause(e); - } - } - - private static UnicodeRegex STANDARD = new UnicodeRegex(); - private String bnfCommentString = "#"; - private String bnfVariableInfix = "="; - private String bnfLineSeparator = "\n"; - private Appendable log = null; - - private Comparator LongestFirst = new Comparator () { - public int compare(String arg0, String arg1) { - int len0 = arg0.length(); - int len1 = arg1.length(); - if (len0 != len1) { - return len1 - len0; - } - return arg0.compareTo(arg1); - } - }; - - - private Map getVariables(List lines) { - Map variables = new TreeMap(LongestFirst); - String variable = null; - StringBuffer definition = new StringBuffer(); - int count = 0; - for (String line : lines) { - ++count; - // remove initial bom, comments - if (line.length() == 0) { - continue; - } - if (line.charAt(0) == '\uFEFF') { - line = line.substring(1); - } - - if (bnfCommentString != null) { - int hashPos = line.indexOf(bnfCommentString); - if (hashPos >= 0) { - line = line.substring(0, hashPos); - } - } - String trimline = line.trim(); - if (trimline.length() == 0) { - continue; - } - - // String[] lineParts = line.split(";"); - String linePart = line; // lineParts[i]; // .trim().replace("\\s+", " "); - if (linePart.trim().length() == 0) { - continue; - } - boolean terminated = trimline.endsWith(";"); - if (terminated) { - linePart = linePart.substring(0,linePart.lastIndexOf(';')); - } - int equalsPos = linePart.indexOf(bnfVariableInfix); - if (equalsPos >= 0) { - if (variable != null) { - throw new IllegalArgumentException("Missing ';' before " + count + ") " + line); - } - variable = linePart.substring(0,equalsPos).trim(); - if (variables.containsKey(variable)) { - throw new IllegalArgumentException("Duplicate variable definition in " + line); - } - definition.append(linePart.substring(equalsPos+1).trim()); - } else { // no equals, so - if (variable == null) { - throw new IllegalArgumentException("Missing '=' at " + count + ") " + line); - } - definition.append(bnfLineSeparator).append(linePart); - } - // we are terminated if i is not at the end, or the line ends with a ; - if (terminated) { - variables.put(variable, definition.toString()); - variable = null; // signal we have no variable - definition.setLength(0); - } - } - if (variable != null) { - throw new IllegalArgumentException("Missing ';' at end"); - } - return variables; - } - -} diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java index afc1faf2f..3bc0df9c9 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java @@ -6,7 +6,8 @@ import java.util.regex.Pattern; import org.unicode.cldr.util.MultiComparator; -import org.unicode.jsp.UnicodeProperty.PatternMatcher; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.PatternMatcher; import org.unicode.jsp.UnicodeSetUtilities.ComparisonMatcher.Relation; import com.ibm.icu.lang.CharSequences; @@ -26,7 +27,7 @@ public class UnicodeSetUtilities { public static final UnicodeSet SINGLETONS = new UnicodeSet("[©®‼⁉™ℹ↔-↙↩↪⌚⌛⌨⏏⏩-⏳⏸-⏺Ⓜ▪▫▶◀◻-◾☀-☄☎☑☔☕☘☝☠☢☣☦☪☮☯☸-☺♈-♓♠♣♥♦♨♻♿⚒-⚔⚖⚗⚙⚛⚜⚠⚡" + "⚪⚫⚰⚱⚽⚾⛄⛅⛈⛎⛏⛑⛓⛔⛩⛪⛰-⛵⛷-⛺⛽✂✅✈-✍✏✒✔✖✝✡✨✳✴❄❇❌❎❓-❕❗❣❤➕-➗➡➰➿⤴⤵⬅-⬇⬛⬜⭐⭕〰〽㊗㊙🀄🃏🅰🅱🅾🅿🆎🆑-🆚🈁🈂🈚🈯🈲-🈺" - + "🉐🉑🌀-🌡🌤-🎓🎖🎗🎙-🎛🎞-🏰🏳-🏵🏷-📽📿-🔽🕉-🕎🕐-🕧🕯🕰🕳-🕹🖇🖊-🖍🖐🖕🖖🖥🖨🖱🖲🖼🗂-🗄🗑-🗓🗜-🗞🗡🗣🗯🗳🗺-🙏🚀-🛅🛋-🛐🛠-🛥🛩🛫🛬🛰🛳🤐-🤘🦀-🦄🧀]").freeze(); + + "🉐🉑🌀-🌡🌤-🎓🎖🎗🎙-🎛🎞-🏰🏳-🏵🏷-📽📿-🔽🕉-🕎🕐-🕧🕯🕰🕳-🕹🖇🖊-🖍🖐🖕🖖🖥🖨🖱🖲🖼🗂-🗄🗑-🗓🗜-🗞🗡🗣🗯🗳🗺-🙏🚀-🛅🛋-🛐🛠-🛥🛩🛫🛬🛰🛳🤐-🤘🦀-🦄🧀]").freeze(); public static final UnicodeSet KEYCAPS = new UnicodeSet("[{#⃣}{*⃣}{0⃣}{1⃣}{2⃣}{3⃣}{4⃣}{5⃣}{6⃣}{7⃣}{8⃣}{9⃣}]").freeze(); public static final UnicodeSet FLAGS = new UnicodeSet("[{🇦🇨}" + "{🇦🇩}{🇦🇪}{🇦🇫}{🇦🇬}{🇦🇮}{🇦🇱}{🇦🇲}{🇦🇴}{🇦🇶}{🇦🇷}{🇦🇸}{🇦🇹}{🇦🇺}{🇦🇼}{🇦🇽}{🇦🇿}{🇧🇦}{🇧🇧}{🇧🇩}{🇧🇪}{🇧🇫}{🇧🇬}{🇧🇭}{🇧🇮}{🇧🇯}{🇧🇱}{🇧🇲}{🇧🇳}{🇧🇴}{🇧🇶}{🇧🇷}{🇧🇸}" @@ -69,7 +70,7 @@ public static String addEmojiVariation(String s) { } return b.toString(); } - + private static UnicodeSet OK_AT_END = new UnicodeSet("[ \\]\t]").freeze(); private static Pattern UPLUS = Pattern.compile("U\\+(1?[A-Za-z0-9]{3,5})"); private static Pattern DOTDOT = Pattern.compile("\\.\\."); @@ -88,8 +89,8 @@ public static UnicodeSet parseUnicodeSet(String input) { int parseEnd = parsePosition.getIndex(); if (parseEnd != parseInput.length() && !UnicodeSetUtilities.OK_AT_END.containsAll(parseInput.substring(parseEnd))) { parseEnd--; // get input offset - throw new IllegalArgumentException("Additional characters past the end of the set, at " - + parseEnd + ", ..." + throw new IllegalArgumentException("Additional characters past the end of the set, at " + + parseEnd + ", ..." + input.substring(Math.max(0, parseEnd - 10), parseEnd) + "|" + input.substring(parseEnd, Math.min(input.length(), parseEnd + 10)) @@ -120,7 +121,7 @@ public MySymbolTable() { // String[] propertyNames = propertyName.split("[*]"); // for (int i = propertyNames.length - 1; i >= 0; ++i) { // String pname = propertyNames[i]; - // + // // } // return null; // } @@ -140,7 +141,7 @@ public boolean applyPropertyAlias(String propertyName, if (posNotEqual < 0) posNotEqual = propertyName.length(); if (posColon < 0) posColon = propertyName.length(); int opPos = posNotEqual < posColon ? posNotEqual : posColon; - propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) + propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) : propertyName.substring(opPos+1) + "=" + propertyValue; propertyName = propertyName.substring(0,opPos); if (posNotEqual < posColon) { @@ -182,7 +183,7 @@ public boolean applyPropertyAlias(String propertyName, return status; } - private boolean applyPropertyAlias0(UnicodeProperty prop, + private boolean applyPropertyAlias0(UnicodeProperty prop, String propertyValue, UnicodeSet result, boolean invert) { result.clear(); String propertyName = prop.getName(); diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java index ac247ed9a..be60412f8 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java @@ -27,7 +27,8 @@ import org.unicode.cldr.util.UnicodeSetPrettyPrinter; import org.unicode.jsp.Idna.IdnaType; import org.unicode.jsp.Idna2008.Idna2008Type; -import org.unicode.jsp.UnicodeProperty.UnicodeMapProperty; +import org.unicode.props.UnicodeProperty.UnicodeMapProperty; +import org.unicode.props.UnicodeProperty; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.impl.Row.R4; @@ -53,7 +54,7 @@ public class UnicodeUtilities { private static final Collator COLLATOR = Collator.getInstance(new ULocale("en-u-co-emoji")); - static final UnicodeSet OFF_LIMITS = new UnicodeSet(UnicodeProperty.UNASSIGNED).addAll(UnicodeProperty.PRIVATE_USE).addAll(UnicodeProperty.SURROGATE).freeze(); + static final UnicodeSet OFF_LIMITS = new UnicodeSet(UnicodeProperty.getUNASSIGNED()).addAll(UnicodeProperty.PRIVATE_USE).addAll(UnicodeProperty.SURROGATE).freeze(); static final UnicodeSet NONCHAR = new UnicodeSet(OFF_LIMITS).addAll(new UnicodeSet("[:Cc:]")).removeAll(new UnicodeSet("[:whitespace:]")).freeze(); static { @@ -357,8 +358,8 @@ public static void showSet(UnicodeSet inputSetRaw, CodePointShower codePointShow LinkedHashMap items = new LinkedHashMap(); String specials = "Unassigned, Private use, or Surrogates"; - UnicodeSet specialSet = new UnicodeSet(inputSetRaw).retainAll(UnicodeProperty.SPECIALS); - UnicodeSet inputSet = specialSet.size() == 0 ? inputSetRaw : new UnicodeSet(inputSetRaw).removeAll(UnicodeProperty.SPECIALS); + UnicodeSet specialSet = new UnicodeSet(inputSetRaw).retainAll(UnicodeProperty.getSPECIALS()); + UnicodeSet inputSet = specialSet.size() == 0 ? inputSetRaw : new UnicodeSet(inputSetRaw).removeAll(UnicodeProperty.getSPECIALS()); if (specialSet.size() != 0) { items.put(specials, specialSet); } diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XIDModifications.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XIDModifications.java deleted file mode 100644 index e1cec00b7..000000000 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/XIDModifications.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.unicode.jsp; - -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.text.UnicodeSet; - -public class XIDModifications { - private static UnicodeMap allowed = new UnicodeMap(); // "[:XID_Continue:]"); - private static UnicodeMap reasons = new UnicodeMap(); - - static class MyReader extends FileUtilities.SemiFileReader { - - @Override - protected boolean handleLine(int start, int end, String[] items) { -// String type = items[1]; -// if (type.equalsIgnoreCase("allowed")) { -// reasons.putAll(start, end, items[2]); -// } else if (type.equalsIgnoreCase("restricted")) { -// // allowed.remove(start, end); -// } else { -// throw new IllegalArgumentException(type); -// } - allowed.putAll(start, end, items[1]); - reasons.putAll(start, end, items[2]); - return true; - } - } - static { - //# @missing: 0000..10FFFF; Restricted ; Not-Characters - allowed.putAll(0,0x10FFFF,"Restricted"); - reasons.putAll(0,0x10FFFF,"Not-Characters"); - //reasons.putAll(new UnicodeSet("[[:gc=cn:][:gc=co:][:gc=cs:][:gc=cc:]-[:whitespace:]]"),"not-char"); - new MyReader().process(XIDModifications.class, "xidmodifications.txt"); - allowed.freeze(); - reasons.freeze(); - } - public static UnicodeMap getTypes() { - return reasons; - } - public static UnicodeMap getStatus() { - return allowed; - } - public static UnicodeSet getAllowed() { - return allowed.getSet("Restricted"); - } - - public static boolean isAllowed(int codePoint) { - return allowed.get(codePoint).equals("Restricted"); - } - public static String getType(int codePoint) { - return reasons.get(codePoint); - } -} diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java index b8c426b9c..e039bd908 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java @@ -7,9 +7,11 @@ import java.util.Locale; import org.unicode.jsp.Idna.IdnaType; -import org.unicode.jsp.UnicodeProperty.BaseProperty; -import org.unicode.jsp.UnicodeProperty.Factory; -import org.unicode.jsp.UnicodeProperty.SimpleProperty; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.AliasAddAction; +import org.unicode.props.UnicodeProperty.BaseProperty; +import org.unicode.props.UnicodeProperty.Factory; +import org.unicode.props.UnicodeProperty.SimpleProperty; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.lang.CharSequences; @@ -150,7 +152,7 @@ public String transform(Integer source) { add(new UnicodeProperty.UnicodeMapProperty() .set(specialMap) .setMain("Script_Extensions", "scx", UnicodeProperty.ENUMERATED, "1.1") - .addValueAliases(ScriptTester.getScriptSpecialsAlternates(), false) + .addValueAliases(ScriptTester.getScriptSpecialsAlternates(), AliasAddAction.IGNORE_IF_MISSING) ); CachedProps cp = CachedProps.CACHED_PROPS; @@ -579,7 +581,7 @@ public static class UnicodeSetProperty extends BaseProperty { private static final String[] YESNO_ARRAY = new String[]{"Yes", "No"}; private static final List YESNO = Arrays.asList(YESNO_ARRAY); - public UnicodeSetProperty set(UnicodeSet set) { + public XPropertyFactory.UnicodeSetProperty set(UnicodeSet set) { unicodeSet = set; return this; } @@ -591,7 +593,7 @@ protected UnicodeMap _getUnicodeMap() { return result; } - public UnicodeSetProperty set(String string) { + public XPropertyFactory.UnicodeSetProperty set(String string) { // TODO Auto-generated method stub return set(new UnicodeSet(string).freeze()); } diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestBasicProperties.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestBasicProperties.java index 570a517e6..45b427576 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestBasicProperties.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestBasicProperties.java @@ -9,7 +9,7 @@ import org.junit.jupiter.api.condition.EnabledIf; import org.unicode.jsp.PropertyMetadata; import org.unicode.jsp.PropertyMetadata.PropertyMetaDatum; -import org.unicode.jsp.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.jsp.XPropertyFactory; public class TestBasicProperties extends TestFmwk2 { diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestIcuProperties.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestIcuProperties.java index debb662a9..c36e471f6 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestIcuProperties.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestIcuProperties.java @@ -3,8 +3,8 @@ import java.util.Arrays; import java.util.List; -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.UnicodeProperty; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java index bcfc209be..bec90842e 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java @@ -28,7 +28,7 @@ import org.unicode.jsp.Idna2003; import org.unicode.jsp.Idna2008; import org.unicode.jsp.UnicodeJsp; -import org.unicode.jsp.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.jsp.UnicodeRegex; import org.unicode.jsp.UnicodeSetUtilities; import org.unicode.jsp.UnicodeUtilities; diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestProperties.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestProperties.java index e46462e3e..18ee15634 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestProperties.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestProperties.java @@ -21,7 +21,7 @@ import org.unicode.jsp.NFM; import org.unicode.jsp.PropertyMetadata; import org.unicode.jsp.UnicodeJsp; -import org.unicode.jsp.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.jsp.UnicodeSetUtilities; import org.unicode.jsp.UnicodeUtilities; import org.unicode.jsp.XPropertyFactory; diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java index 715e6caee..bbe1a543d 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java @@ -22,7 +22,7 @@ import org.unicode.jsp.CharEncoder; import org.unicode.jsp.Common; import org.unicode.jsp.UnicodeJsp; -import org.unicode.jsp.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.jsp.UnicodeSetUtilities; import org.unicode.jsp.UnicodeUtilities; import org.unicode.jsp.XPropertyFactory; diff --git a/unicodetools/src/main/java/org/unicode/draft/FormatSpecialData2.java b/unicodetools/src/main/java/org/unicode/draft/FormatSpecialData2.java index 5766f2225..c6cfab8ac 100644 --- a/unicodetools/src/main/java/org/unicode/draft/FormatSpecialData2.java +++ b/unicodetools/src/main/java/org/unicode/draft/FormatSpecialData2.java @@ -16,7 +16,7 @@ import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility.CollectionComparator; import org.unicode.cldr.util.FileReaders; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; import org.unicode.cldr.util.props.UnicodeLabel; import org.unicode.draft.ScriptCategories2.RemapType; diff --git a/unicodetools/src/main/java/org/unicode/draft/FrequencyData2.java b/unicodetools/src/main/java/org/unicode/draft/FrequencyData2.java index 83decda34..cb29da11a 100644 --- a/unicodetools/src/main/java/org/unicode/draft/FrequencyData2.java +++ b/unicodetools/src/main/java/org/unicode/draft/FrequencyData2.java @@ -13,8 +13,8 @@ import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.Counter; import org.unicode.cldr.util.PatternCache; -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.UnicodeProperty; import org.unicode.text.utility.Utility; import com.ibm.icu.dev.util.UnicodeMap; @@ -54,21 +54,21 @@ public class FrequencyData2 { /** * The 1st column is the code point. - * + * * 2nd is detected language - * + * * Then there are 3 groups of 4 columns, where each group is: - * + * * pre-HTML code point count post-HTML code point count, document count, UTF-8 document count - * + * * The 1st group includes "bad" docs (error during input conversion or * contains unassigned or high private use), 2nd group excludes "bad" * docs, 3rd group is multiplied by pagerank (and excludes "bad" docs). - * + * * Then there are up to 3 groups, where each group is: - * + * * navboost, pagerank, language, encoding, url - * + * * @param frequencyFile * @throws IOException */ @@ -119,7 +119,7 @@ public class FrequencyData2 { // } // in.close(); // } - + public FrequencyData2(String frequencyFile, boolean showProgress) throws IOException { if (true) throw new IllegalArgumentException("old code: see CharacterFrequency"); BufferedReader in = GenerateNormalizeForMatch2.openUTF8Reader(frequencyFile); @@ -456,7 +456,7 @@ private static void writeSummary2(FrequencyData2 data) { } for (int i = 0; i < 4; ++i) { - System.out.println((i+1) + "-byte:\t" + System.out.println((i+1) + "-byte:\t" + 100*buckets[i]/(double) total + "%"); } } diff --git a/unicodetools/src/main/java/org/unicode/idna/GenerateIdna.java b/unicodetools/src/main/java/org/unicode/idna/GenerateIdna.java index a3ab7ba2a..f979fee2e 100644 --- a/unicodetools/src/main/java/org/unicode/idna/GenerateIdna.java +++ b/unicodetools/src/main/java/org/unicode/idna/GenerateIdna.java @@ -7,10 +7,10 @@ import java.util.TreeSet; import org.unicode.cldr.draft.FileUtilities; -import org.unicode.cldr.util.props.BagFormatter; -import org.unicode.cldr.util.props.BagFormatter.NameLabel; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.UnicodeMapProperty; +import org.unicode.props.BagFormatter; +import org.unicode.props.BagFormatter.NameLabel; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.UnicodeMapProperty; import org.unicode.idna.Idna.IdnaType; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; @@ -181,7 +181,7 @@ public static void main(String[] args) throws IOException { UnicodeSet ageSet = AGE.getSet(age); missing += ageSet.size(); } - } + } System.out.println("Missing IDNA2008 Official Values: " + missing); } @@ -193,7 +193,7 @@ public static void main(String[] args) throws IOException { for (Age_Values age : AGE.values()) { UnicodeSet ageSet = AGE.getSet(age); System.out.println(age + "\t" + ageSet.size()); - } + } } private static void showSet(String title, UnicodeSet validSet, UnicodeSet iDNA2008Valid2) { @@ -349,7 +349,7 @@ private static UnicodeMap> createMappingTable(boolean S } if (deviationSet.contains(cp)) { result = Row.of(IdnaType.deviation, baseMappingValue); - } else if (baseExclusionSet.contains(cp) + } else if (baseExclusionSet.contains(cp) || false && bidiControls.contains(cp)) { // Step 5. result = disallowedResult; } else if (!labelSeparator.contains(cp) && !baseValidSet.containsAll(baseMappingValue)) { @@ -453,9 +453,9 @@ private static void writeDataFile(UnicodeMap mappingTable) throws IOExce final PrintWriter writer = FileUtilities.openUTF8Writer(GEN_IDNA_DIR, unversionedFileName); writer.println(Utility.getBaseDataHeader( - unversionedFileName, - 46, - "Unicode IDNA Compatible Preprocessing", + unversionedFileName, + 46, + "Unicode IDNA Compatible Preprocessing", Default.ucdVersion())); // writer.println( // "#\n" + diff --git a/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaStableSamples.java b/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaStableSamples.java index b43d072b7..8646cdcd3 100644 --- a/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaStableSamples.java +++ b/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaStableSamples.java @@ -1,7 +1,7 @@ package org.unicode.idna; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodePropertySymbolTable; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodePropertySymbolTable; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.ToolUnicodePropertySource; import org.unicode.text.UCD.ToolUnicodeTransformFactory; @@ -12,7 +12,7 @@ public class GenerateIdnaStableSamples { public static void main(String[] args) { - + Default.setUCD("9.0.0"); UnicodeTransform.setFactory(new ToolUnicodeTransformFactory()); final ToolUnicodePropertySource toolUPS1 = ToolUnicodePropertySource.make(Default.ucdVersion()); @@ -28,26 +28,26 @@ public static void main(String[] args) { "[[:bc=EN:]-[:Cn:]]", "[[:bc=AN:]-[:Cn:]]", "[[:bc=NSM:]-[:Cn:]]", - "// contextj",// + "// contextj",// "[\u200C\u200D]", "[[:ccc=virama:]-[:Cn:]]", "[[:jt=T:]-[:Cn:]]", "[[:jt=L:][:jt=D:]-[:Cn:]]", "[[:jt=R:][:jt=D:]-[:Cn:]]", - "// syntax",// + "// syntax",// "[-]", - "// changed mapping from 2003",// + "// changed mapping from 2003",// "[[\u04C0 \u10A0-\u10C5 \u2132 \u2183 \u2F868 \u2F874 \u2F91F \u2F95F \u2F9BF \u3164 \uFFA0 \u115F \u1160 \u17B4 \u17B5 \u1806]-[:Cn:]]", "// disallowed in 2003",// disallowed in 2003 "[[\u200E-\u200F \u202A-\u202E \u2061-\u2063 \uFFFC \uFFFD \u1D173-\u1D17A \u206A-\u206F \uE0001 \uE0020-\uE007F]-[:Cn:]]", - "// Step 7",// + "// Step 7",// "[[\u2260 \u226E \u226F \uFE12 \u2488]-[:Cn:]]", - "// disallowed", + "// disallowed", "[[:S:][:P:][:C:]-[:Cn:][:noncharactercodepoint:][\\U000D0000\\U000E0000\\U000F0000\\U00100000]]", - "// deviations", // + "// deviations", // "[[\\u200C\\u200D\\u00DF\\u03C2]-[:Cn:]]", }; - + for (int i = 0; i < samples.length; ++i) { String sample = samples[i]; if (sample.contains("[")) { diff --git a/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaTest.java b/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaTest.java index 37a7e6afc..280d8ca86 100644 --- a/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaTest.java +++ b/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaTest.java @@ -12,8 +12,8 @@ import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.CldrUtility; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodePropertySymbolTable; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodePropertySymbolTable; import org.unicode.idna.Idna2008.Idna2008Type; import org.unicode.idna.LoadIdnaTest.TestLine; import org.unicode.idna.Uts46.Errors; @@ -51,7 +51,7 @@ public class GenerateIdnaTest { private static final boolean NEW_FORMAT = true; private static final int UNDEFINED; - + static { // find a character that is likely to remain undefined, and is if possible in the BMP. // so we take the highest BMP if possible, then the highest smp @@ -104,9 +104,9 @@ int generateTests(int lines) throws IOException { final PrintWriter out2 = org.unicode.cldr.draft.FileUtilities.openUTF8Writer(GenerateIdna.GEN_IDNA_DIR, NEW_FILE_NAME); // out2.println(Utility.getDataHeader(NEW_FILE_NAME)); out2.println(Utility.getBaseDataHeader( - NEW_FILE_NAME, - 46, - "Unicode IDNA Compatible Preprocessing", + NEW_FILE_NAME, + 46, + "Unicode IDNA Compatible Preprocessing", Default.ucdVersion())); FileUtilities.appendFile(this.getClass().getResource("IdnaTestHeader2.txt").toString().substring(5), "UTF-8", out2); @@ -260,9 +260,9 @@ int generateLine(String source, PrintWriter out, PrintWriter out2) { // toUnicodeErrors2 = EnumSet.copyOf(toUnicodeErrors2); // toUnicodeErrors2.add(Errors.NV8); // } -// +// // // Hack to check whether problems were introduced. Needs to be deeper check in processMap -// +// // final Set throwAway = EnumSet.noneOf(Errors.class); // Set nonTransitionalErrors2 = nonTransitionalErrors; // final String nontransitional2 = Uts46.SINGLETON.toASCII(unicode, IdnaChoice.nontransitional, throwAway); @@ -278,7 +278,7 @@ int generateLine(String source, PrintWriter out, PrintWriter out2) { // nonTransitionalErrors2.add(Errors.NV8); // } - out2.println(source + out2.println(source + "; " + CldrUtility.ifEqual(unicode, source, "") + "; " + CldrUtility.ifEqual(toUnicodeErrors, Collections.EMPTY_SET, "") + "; " + CldrUtility.ifEqual(nontransitional, unicode, "") @@ -393,7 +393,7 @@ private void showLine(String source, String type, String ascii, Set asci + (hasUnicodeErrors ? showErrors(toUnicodeErrors) : unicode.equals(source) ? "" : unicodeReveal) + ";\t" + (hasAsciiErrors ? showErrors(asciiErrors) : unicode.equals(ascii) ? "" : hexForTest.transform(ascii)) - + (Idna2008.GRANDFATHERED_VALID.containsSome(unicode) ? ";\tXV8" + + (Idna2008.GRANDFATHERED_VALID.containsSome(unicode) ? ";\tXV8" : hasUnicodeErrors || validIdna2008 ? "" : ";\tNV8") // checking + (!NEW_FORMAT ? "" : "" + (unicodeReveal.equals(unicode) ? "" : "\t#\t" + removeInvisible.transform(unicode))) diff --git a/unicodetools/src/main/java/org/unicode/idna/StringPrepData.java b/unicodetools/src/main/java/org/unicode/idna/StringPrepData.java index 0a1923ee4..fe221b9e2 100644 --- a/unicodetools/src/main/java/org/unicode/idna/StringPrepData.java +++ b/unicodetools/src/main/java/org/unicode/idna/StringPrepData.java @@ -1,5 +1,5 @@ /** - * + * */ package org.unicode.idna; @@ -10,7 +10,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.idna.Idna.IdnaType; import org.unicode.jsp.FileUtilities; @@ -360,4 +360,4 @@ private static void getNamePrepData32(EnumSet allo throw new IllegalArgumentException(e); } } -} \ No newline at end of file +} diff --git a/unicodetools/src/main/java/org/unicode/idna/Uts46.java b/unicodetools/src/main/java/org/unicode/idna/Uts46.java index 91d2a88b0..e8ee18995 100644 --- a/unicodetools/src/main/java/org/unicode/idna/Uts46.java +++ b/unicodetools/src/main/java/org/unicode/idna/Uts46.java @@ -4,7 +4,6 @@ import java.util.Set; import java.util.regex.Pattern; -import org.unicode.jsp.FileUtilities; import org.unicode.text.utility.Settings; import com.google.common.base.Splitter; @@ -14,6 +13,8 @@ import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSet.SpanCondition; +import org.unicode.jsp.FileUtilities; + public class Uts46 extends Idna { public static final Splitter PERIOD = Splitter.on('.'); @@ -134,29 +135,29 @@ public boolean handleLine(int start, int end, String[] items) { * http://datatracker.ietf.org/doc/draft-ietf-idnabis-bidi/, version 07 An * RTL label is a label that contains at least one character of type R, AL * or AN. - * + * * An LTR label is any label that is not an RTL label. - * + * * A "BIDI domain name" is a domain name that contains at least one RTL * label. - * + * * 1. The first character must be a character with BIDI property L, R or AL. * If it has the R or AL property, it is an RTL label; if it has the L * property, it is an LTR label. - * + * * 2. In an RTL label, only characters with the BIDI properties R, AL, AN, * EN, ES, CS, ET, ON, BN and NSM are allowed. - * + * * 3. In an RTL label, the end of the label must be a character with BIDI * property R, AL, EN or AN, followed by zero or more characters with BIDI * property NSM. - * + * * 4. In an RTL label, if an EN is present, no AN may be present, and vice * versa. - * + * * 5. In an LTR label, only characters with the BIDI properties L, EN, ES, * CS. ET, ON, BN and NSM are allowed. - * + * * 6. In an LTR label, the end of the label must be a character with BIDI * property L or EN, followed by zero or more characters with BIDI property * NSM. @@ -175,7 +176,7 @@ public boolean handleLine(int start, int end, String[] items) { static final UnicodeSet NSM = new UnicodeSet("[[:bc=NSM:]]").freeze(); /** * Checks a string for IDNA2008 bidi errors. label must not be empty - * + * * @param domainName * the string to be tested * @param errors @@ -191,7 +192,7 @@ public static boolean hasBidiError(String label, Set errors) { final int firstChar = label.codePointAt(0); // 1. The first character must be a character with BIDI property L, - // R or AL. + // R or AL. // If it has the R or AL property, it is an RTL label; // if it has the L property, it is an LTR label. @@ -229,7 +230,7 @@ public static boolean hasBidiError(String label, Set errors) { if (RTL && EN.containsSome(label) && AN.containsSome(label)) { errors.add(Errors.B4); } - + // 5. In an LTR label, only characters with the BIDI properties L, // EN, ES, CS. ET, ON, BN and NSM are allowed. if (LTR && !L_EN_ES_CS_ET_ON_BN_NSM.containsAll(label)) { @@ -348,7 +349,7 @@ public static boolean hasContextJError(String domain, Set errors) { /** * Input must start with xn-- - * + * * @param label * @param errors * @return @@ -410,15 +411,15 @@ public enum Errors { A3(UIDNA_ERROR_PUNYCODE), A4_1(UIDNA_ERROR_DOMAIN_NAME_TOO_LONG), A4_2(UIDNA_ERROR_EMPTY_LABEL | UIDNA_ERROR_LABEL_TOO_LONG), - NV8(UIDNA_NOT_IDNA2008), - X3(UIDNA_ERROR_EMPTY_LABEL), + NV8(UIDNA_NOT_IDNA2008), + X3(UIDNA_ERROR_EMPTY_LABEL), X4_2(UIDNA_ERROR_EMPTY_LABEL), ; static final Set TO_ASCII_ERRORS = ImmutableSortedSet.of(A3, A4_1, A4_2); static final Set BOTH_X_A4_2 = ImmutableSortedSet.of(A4_2, X4_2); - + int errorNum; - + Errors(int errorNum) { this.errorNum = errorNum; } @@ -473,7 +474,7 @@ public static int hasBidiOrContextError(String domainName, Set errors) { } return errors.size() - oldErrorLength; } - + private String processMap(String domainName, IdnaChoice idnaChoice, Set errors) { final StringBuilder buffer = new StringBuilder(); int cp; diff --git a/unicodetools/src/main/java/org/unicode/jsp/AlternateIterator.java b/unicodetools/src/main/java/org/unicode/jsp/AlternateIterator.java deleted file mode 100644 index 14a29524e..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/AlternateIterator.java +++ /dev/null @@ -1,103 +0,0 @@ -package org.unicode.jsp; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.TreeSet; - -public class AlternateIterator implements Iterator, Iterable { - final String[][] sources; - final int[] position; - // optimize later - final int length; - boolean notDone = true; - StringBuilder result = new StringBuilder(); - - public static class Builder { - List> sources = new ArrayList>(); - - Builder add(Collection items) { - if (items.size() == 0) { - throw new IllegalArgumentException(); - } - final ArrayList copy = new ArrayList(items); - sources.add(copy); - return this; - } - - public Builder add(String... items) { - return add(Arrays.asList(items)); - } - - public AlternateIterator build() { - return new AlternateIterator(sources); - } - } - - public static Builder start() { - return new Builder(); - } - - private AlternateIterator(List> inSources) { - length = inSources.size(); - sources = new String[length][]; - for (int i = 0; i < length; ++i) { - final List list = inSources.get(i); - sources[i] = list.toArray(new String[list.size()]); - } - position = new int[length]; - } - - @Override - public boolean hasNext() { - return notDone; - } - - @Override - public String next() { - result.setLength(0); - for (int i = 0; i < length; ++i) { - result.append(sources[i][position[i]]); - } - int i; - for (i = length-1; i >= 0; --i) { - ++position[i]; - if (position[i] < sources[i].length) { - break; - } - position[i] = 0; - } - if (i < 0) { - notDone = false; - } - return result.toString(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - @Override - public Iterator iterator() { - return this; - } - - public double getMaxSize() { - double result = 1; - for (int i = 0; i < length; ++i) { - result *= sources[i].length; - } - return result; - } - - public List> getAlternates() { - final List> result = new ArrayList>(); - for (int i = 0; i < length; ++i) { - result.add(new TreeSet(Arrays.asList(sources[i]))); - } - return result; - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/BidiCharMap.java b/unicodetools/src/main/java/org/unicode/jsp/BidiCharMap.java deleted file mode 100644 index a6ba52edb..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/BidiCharMap.java +++ /dev/null @@ -1,90 +0,0 @@ -/** - * - */ -package org.unicode.jsp; - -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.text.UnicodeSetIterator; - -class BidiCharMap { - private static final byte L = BidiReference.L; - private static final byte LRE = BidiReference.LRE; - private static final byte LRO = BidiReference.LRO; - private static final byte R = BidiReference.R; - private static final byte AL = BidiReference.AL; - private static final byte RLE = BidiReference.RLE; - private static final byte RLO = BidiReference.RLO; - private static final byte PDF = BidiReference.PDF; - private static final byte EN = BidiReference.EN; - private static final byte ES = BidiReference.ES; - private static final byte ET = BidiReference.ET; - private static final byte AN = BidiReference.AN; - private static final byte CS = BidiReference.CS; - private static final byte NSM = BidiReference.NSM; - private static final byte BN = BidiReference.BN; - private static final byte B = BidiReference.B; - private static final byte S = BidiReference.S; - private static final byte WS = BidiReference.WS; - private static final byte ON = BidiReference.ON; - - static byte mapIcuToRefNum[] = null; - static UnicodeSet[] umap = new UnicodeSet[BidiReference.typenames.length]; - static UnicodeMap asciiHackMap = new UnicodeMap(); - - static { - mapIcuToRefNum = new byte[BidiReference.typenames.length]; - // generate permutation from names - for (byte i = 0; i < mapIcuToRefNum.length; ++i) { - final int icuValue = UCharacter.getPropertyValueEnum(UProperty.BIDI_CLASS, BidiReference.typenames[i]); - mapIcuToRefNum[icuValue] = i; - } - - for (int i = 0; i < BidiReference.typenames.length; ++i) { - umap[i] = new UnicodeSet(); - } - - for (final UnicodeSetIterator it = new UnicodeSetIterator(new UnicodeSet("[[:ascii:]-[[:cc:]-[:whitespace:]]]")); it.next();) { - asciiHackMap.put(it.codepoint, mapIcuToRefNum[UCharacter.getIntPropertyValue(it.codepoint, UProperty.BIDI_CLASS)]); - } - // override - asciiHackMap.put(']', LRE); - asciiHackMap.put('[', RLE); - asciiHackMap.put('}', LRO); - asciiHackMap.put('{', RLO); - asciiHackMap.put('|', PDF); - asciiHackMap.putAll(new UnicodeSet("[A-M]"), R); - asciiHackMap.putAll(new UnicodeSet("[N-Z]"), AL); - asciiHackMap.putAll(new UnicodeSet("[5-9]"), AN); - asciiHackMap.put('>', L); - asciiHackMap.put('<',R); - asciiHackMap.put('"',NSM); - asciiHackMap.put('_',BN); - } - - boolean asciiHack; - - public BidiCharMap (boolean asciiHack) { - this.asciiHack = asciiHack; - } - - public static UnicodeSet getAsciiHack(byte i) { - return asciiHackMap.keySet(i); - } - - public static byte getBidiClass(int codepoint, boolean asciiHack2) { - if (asciiHack2) { - final Byte result = (Byte) asciiHackMap.getValue(codepoint); - if (result != null) { - return result; - } - } - return mapIcuToRefNum[UCharacter.getIntPropertyValue(codepoint, UProperty.BIDI_CLASS)]; - } - - public byte getBidiClass(int codepoint) { - return getBidiClass(codepoint, asciiHack); - } -} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/jsp/BidiReference.java b/unicodetools/src/main/java/org/unicode/jsp/BidiReference.java deleted file mode 100644 index 26ab9d7bb..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/BidiReference.java +++ /dev/null @@ -1,1251 +0,0 @@ -package org.unicode.jsp; - -/* - * (C) Copyright IBM Corp. 1999, All Rights Reserved - * - * version 1.1 - */ - -/** - * Reference implementation of the Unicode 3.0 Bidi algorithm. - * - *

- * This implementation is not optimized for performance. It is intended - * as a reference implementation that closely follows the specification - * of the Bidirectional Algorithm in The Unicode Standard version 3.0. - *

- * Input:
- * There are two levels of input to the algorithm, since clients may prefer - * to supply some information from out-of-band sources rather than relying on - * the default behavior. - *

    - *
  1. unicode type array - *
  2. unicode type array, with externally supplied base line direction - *
- *

Output:
- * Output is separated into several stages as well, to better enable clients - * to evaluate various aspects of implementation conformance. - *

    - *
  1. levels array over entire paragraph - *
  2. reordering array over entire paragraph - *
  3. levels array over line - *
  4. reordering array over line - *
- * Note that for conformance, algorithms are only required to generate correct - * reordering and character directionality (odd or even levels) over a line. - * Generating identical level arrays over a line is not required. Bidi - * explicit format codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned - * arbitrary levels and positions as long as the other text matches. - *

- * As the algorithm is defined to operate on a single paragraph at a time, - * this implementation is written to handle single paragraphs. Thus - * rule P1 is presumed by this implementation-- the data provided to the - * implementation is assumed to be a single paragraph, and either contains no - * 'B' codes, or a single 'B' code at the end of the input. 'B' is allowed - * as input to illustrate how the algorithm assigns it a level. - *

- * Also note that rules L3 and L4 depend on the rendering engine that uses - * the result of the bidi algorithm. This implementation assumes that the - * rendering engine expects combining marks in visual order (e.g. to the - * left of their base character in RTL runs) and that it adjust the glyphs - * used to render mirrored characters that are in RTL runs so that they - * render appropriately. - * - * @author Doug Felt - */ - -public final class BidiReference { - private final byte[] initialTypes; - private byte[] embeddings; // generated from processing format codes - private byte paragraphEmbeddingLevel = -1; // undefined - - private int textLength; // for convenience - private byte[] resultTypes; // for paragraph, not lines - private byte[] resultLevels; // for paragraph, not lines - private StringBuffer[] record; - private String rule; - private int[] mapToOriginal; - - // The bidi types - - /** Left-to-right*/ - public static final byte L = 0; - - /** Left-to-Right Embedding */ - public static final byte LRE = 1; - - /** Left-to-Right Override */ - public static final byte LRO = 2; - - /** Right-to-Left */ - public static final byte R = 3; - - /** Right-to-Left Arabic */ - public static final byte AL = 4; - - /** Right-to-Left Embedding */ - public static final byte RLE = 5; - - /** Right-to-Left Override */ - public static final byte RLO = 6; - - /** Pop Directional Format */ - public static final byte PDF = 7; - - /** European Number */ - public static final byte EN = 8; - - /** European Number Separator */ - public static final byte ES = 9; - - /** European Number Terminator */ - public static final byte ET = 10; - - /** Arabic Number */ - public static final byte AN = 11; - - /** Common Number Separator */ - public static final byte CS = 12; - - /** Non-Spacing Mark */ - public static final byte NSM = 13; - - /** Boundary Neutral */ - public static final byte BN = 14; - - /** Paragraph Separator */ - public static final byte B = 15; - - /** Segment Separator */ - public static final byte S = 16; - - /** Whitespace */ - public static final byte WS = 17; - - /** Other Neutrals */ - public static final byte ON = 18; - - /** Minimum bidi type value. */ - public static final byte TYPE_MIN = 0; - - /** Maximum bidi type value. */ - public static final byte TYPE_MAX = 18; - - /** Shorthand names of bidi type values, for error reporting. */ - public static final String[] typenames = { - "L", - "LRE", - "LRO", - "R", - "AL", - "RLE", - "RLO", - "PDF", - "EN", - "ES", - "ET", - "AN", - "CS", - "NSM", - "BN", - "B", - "S", - "WS", - "ON", - }; - - // - // Input - // - - /** - * Initialize using an array of direction types. Types range from TYPE_MIN to TYPE_MAX inclusive - * and represent the direction codes of the characters in the text. - * - * @param types the types array - */ - public BidiReference(byte[] types) { - validateTypes(types); - - initialTypes = types.clone(); // client type array remains unchanged - - runAlgorithm(); - } - - /** - * Initialize using an array of direction types and an externally supplied paragraph embedding level. - * The embedding level may be -1, 0, or 1. -1 means to apply the default algorithm (rules P2 and P3), - * 0 is for LTR paragraphs, and 1 is for RTL paragraphs. - * - * @param types the types array - * @param paragraphEmbeddingLevel the externally supplied paragraph embedding level. - */ - public BidiReference(byte[] types, byte paragraphEmbeddingLevel) { - validateTypes(types); - validateParagraphEmbeddingLevel(paragraphEmbeddingLevel); - - initialTypes = types.clone(); // client type array remains unchanged - this.paragraphEmbeddingLevel = paragraphEmbeddingLevel; - - runAlgorithm(); - } - - /** - * The algorithm. - * Does not include line-based processing (Rules L1, L2). - * These are applied later in the line-based phase of the algorithm. - */ - private void runAlgorithm() { - // Ensure trace hook does not change while running algorithm. - // Trace hook is a shared class resource. - synchronized (BidiReference.class) { - textLength = initialTypes.length; - - // Initialize output types. - // Result types initialized to input types. - resultTypes = initialTypes.clone(); - record = new StringBuffer[resultTypes.length]; - for (int i = 0; i < resultTypes.length; ++i) { - record[i] = new StringBuffer(); - } - - trace(BidiTraceHook.PHASE_INIT, 0, textLength); - - // 1) determining the paragraph level - // Rule P1 is the requirement for entering this algorithm. - // Rules P2, P3. - // If no externally supplied paragraph embedding level, use default. - setRule("P1"); - if (paragraphEmbeddingLevel == -1) { - determineParagraphEmbeddingLevel(); - } - - // Initialize result levels to paragraph embedding level. - setRule("P1"); - resultLevels = new byte[textLength]; - setLevels(0, textLength, paragraphEmbeddingLevel); - trace(BidiTraceHook.PHASE_BASELEVEL, 0, textLength); - - // 2) Explicit levels and directions - // Rules X1-X8.\ - setRule("X1-8"); - determineExplicitEmbeddingLevels(); - trace(BidiTraceHook.PHASE_EXPLICIT, 0, textLength); - - // Rule X9. - setRule("X9"); - textLength = removeExplicitCodes(); - trace(BidiTraceHook.PHASE_EXPLICIT_REMOVED, 0, textLength); - - // Rule X10. - // Run remainder of algorithm one level run at a time - setRule("X10"); - byte prevLevel = paragraphEmbeddingLevel; - int start = 0; - while (start < textLength) { - final byte level = resultLevels[start]; - final byte prevType = typeForLevel(Math.max(prevLevel, level)); - - int limit = start + 1; - while (limit < textLength && resultLevels[limit] == level) { - ++limit; - } - - final byte succLevel = limit < textLength ? resultLevels[limit] : paragraphEmbeddingLevel; - final byte succType = typeForLevel(Math.max(succLevel, level)); - - // 3) resolving weak types - // Rules W1-W7. - setRule("W1-7"); - resolveWeakTypes(start, limit, level, prevType, succType); - trace(BidiTraceHook.PHASE_WEAK, start, limit); - - // 4) resolving neutral types - // Rules N1-N3. - setRule("N1-2"); - resolveNeutralTypes(start, limit, level, prevType, succType); - trace(BidiTraceHook.PHASE_NEUTRAL, start, limit); - - // 5) resolving implicit embedding levels - // Rules I1, I2. - setRule("I1-2"); - resolveImplicitLevels(start, limit, level, prevType, succType); - trace(BidiTraceHook.PHASE_IMPLICIT, start, limit); - - prevLevel = level; - start = limit; - } - } - - // Reinsert explicit codes and assign appropriate levels to 'hide' them. - // This is for convenience, so the resulting level array maps 1-1 - // with the initial array. - // See the implementation suggestions section of TR#9 for guidelines on - // how to implement the algorithm without removing and reinserting the codes. - textLength = reinsertExplicitCodes(textLength); - } - - /** - * 1) determining the paragraph level. - *

- * Rules P2, P3. - *

- * At the end of this function, the member variable paragraphEmbeddingLevel is set to either 0 or 1. - */ - private void determineParagraphEmbeddingLevel() { - byte strongType = -1; // unknown - - // Rule P2. - for (int i = 0; i < textLength; ++i) { - final byte t = resultTypes[i]; - if (t == L || t == AL || t == R) { - strongType = t; - break; - } - } - - // Rule P3. - if (strongType == -1) { // none found - // default embedding level when no strong types found is 0. - paragraphEmbeddingLevel = 0; - } else if (strongType == L) { - paragraphEmbeddingLevel = 0; - } else { // AL, R - paragraphEmbeddingLevel = 1; - } - } - - /** - * Process embedding format codes. - *

- * Calls processEmbeddings to generate an embedding array from the explicit format codes. The - * embedding overrides in the array are then applied to the result types, and the result levels are - * initialized. - * @see #processEmbeddings - */ - private void determineExplicitEmbeddingLevels() { - embeddings = processEmbeddings(resultTypes, paragraphEmbeddingLevel); - - for (int i = 0; i < textLength; ++i) { - byte level = embeddings[i]; - if ((level & 0x80) != 0) { - level &= 0x7f; - setType(i, typeForLevel(level)); - } - resultLevels[i] = level; - } - } - - private void setType(int i, byte value) { - if (value != resultTypes[i]) { - record[i].append(getRule() + "\u2192"+getHtmlTypename(value) + "\n"); - } - resultTypes[i] = value; - } - - public String getChanges(int i) { - return record[i].toString(); - } - - /** - * Rules X9. - * Remove explicit codes so that they may be ignored during the remainder - * of the main portion of the algorithm. The length of the resulting text - * is returned. - * @return the length of the data excluding explicit codes and BN. - */ - private int removeExplicitCodes() { - int w = 0; - mapToOriginal = new int[initialTypes.length]; - for (int i = 0; i < textLength; ++i) { - final byte t = initialTypes[i]; - if (!(t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN)) { - mapToOriginal[w] = i; - embeddings[w] = embeddings[i]; - resultTypes[w] = resultTypes[i]; - resultLevels[w] = resultLevels[i]; - w++; - } - } - return w; // new textLength while explicit levels are removed - } - - /** - * Reinsert levels information for explicit codes. - * This is for ease of relating the level information - * to the original input data. Note that the levels - * assigned to these codes are arbitrary, they're - * chosen so as to avoid breaking level runs. - * @param textLength the length of the data after compression - * @return the length of the data (original length of - * types array supplied to constructor) - */ - private int reinsertExplicitCodes(int textLength) { - for (int i = initialTypes.length; --i >= 0;) { - final byte t = initialTypes[i]; - if (t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN) { - embeddings[i] = 0; - setType(i, t); - resultLevels[i] = -1; - } else { - --textLength; - embeddings[i] = embeddings[textLength]; - setType(i, resultTypes[textLength]); - resultLevels[i] = resultLevels[textLength]; - } - } - mapToOriginal = null; - - // now propagate forward the levels information (could have - // propagated backward, the main thing is not to introduce a level - // break where one doesn't already exist). - - if (resultLevels[0] == -1) { - resultLevels[0] = paragraphEmbeddingLevel; - } - for (int i = 1; i < initialTypes.length; ++i) { - if (resultLevels[i] == -1) { - resultLevels[i] = resultLevels[i-1]; - } - } - - // Embedding information is for informational purposes only - // so need not be adjusted. - - return initialTypes.length; - } - - /** - * 2) determining explicit levels - * Rules X1 - X8 - * - * The interaction of these rules makes handling them a bit complex. - * This examines resultTypes but does not modify it. It returns embedding and - * override information in the result array. The low 7 bits are the level, the high - * bit is set if the level is an override, and clear if it is an embedding. - */ - private static byte[] processEmbeddings(byte[] resultTypes, byte paragraphEmbeddingLevel) { - final int EXPLICIT_LEVEL_LIMIT = 62; - - final int textLength = resultTypes.length; - final byte[] embeddings = new byte[textLength]; - - // This stack will store the embedding levels and override status in a single byte - // as described above. - final byte[] embeddingValueStack = new byte[EXPLICIT_LEVEL_LIMIT]; - int stackCounter = 0; - - // An LRE or LRO at level 60 is invalid, since the new level 62 is invalid. But - // an RLE at level 60 is valid, since the new level 61 is valid. The current wording - // of the rules requires that the RLE remain valid even if a previous LRE is invalid. - // This keeps track of ignored LRE or LRO codes at level 60, so that the matching PDFs - // will not try to pop the stack. - int overflowAlmostCounter = 0; - - // This keeps track of ignored pushes at level 61 or higher, so that matching PDFs will - // not try to pop the stack. - int overflowCounter = 0; - - // Rule X1. - - // Keep the level separate from the value (level | override status flag) for ease of access. - byte currentEmbeddingLevel = paragraphEmbeddingLevel; - byte currentEmbeddingValue = paragraphEmbeddingLevel; - - // Loop through types, handling all remaining rules - for (int i = 0; i < textLength; ++i) { - - embeddings[i] = currentEmbeddingValue; - - final byte t = resultTypes[i]; - - // Rules X2, X3, X4, X5 - switch (t) { - case RLE: - case LRE: - case RLO: - case LRO: - // Only need to compute new level if current level is valid - if (overflowCounter == 0) { - byte newLevel; - if (t == RLE || t == RLO) { - newLevel = (byte)((currentEmbeddingLevel + 1) | 1); // least greater odd - } else { // t == LRE || t == LRO - newLevel = (byte)((currentEmbeddingLevel + 2) & ~1); // least greater even - } - - // If the new level is valid, push old embedding level and override status - // No check for valid stack counter, since the level check suffices. - if (newLevel < EXPLICIT_LEVEL_LIMIT) { - embeddingValueStack[stackCounter] = currentEmbeddingValue; - stackCounter++; - - currentEmbeddingLevel = newLevel; - if (t == LRO || t == RLO) { // override - currentEmbeddingValue = (byte)(newLevel | 0x80); - } else { - currentEmbeddingValue = newLevel; - } - - // Adjust level of format mark (for expositional purposes only, this gets - // removed later). - embeddings[i] = currentEmbeddingValue; - break; - } - - // Otherwise new level is invalid, but a valid level can still be achieved if this - // level is 60 and we encounter an RLE or RLO further on. So record that we - // 'almost' overflowed. - if (currentEmbeddingLevel == 60) { - overflowAlmostCounter++; - break; - } - } - - // Otherwise old or new level is invalid. - overflowCounter++; - break; - - case PDF: - // The only case where this did not actually overflow but may have almost overflowed - // is when there was an RLE or RLO on level 60, which would result in level 61. So we - // only test the almost overflow condition in that case. - // - // Also note that there may be a PDF without any pushes at all. - - if (overflowCounter > 0) { - --overflowCounter; - } else if (overflowAlmostCounter > 0 && currentEmbeddingLevel != 61) { - --overflowAlmostCounter; - } else if (stackCounter > 0) { - --stackCounter; - currentEmbeddingValue = embeddingValueStack[stackCounter]; - currentEmbeddingLevel = (byte)(currentEmbeddingValue & 0x7f); - } - break; - - case B: - // Rule X8. - - // These values are reset for clarity, in this implementation B can only - // occur as the last code in the array. - stackCounter = 0; - overflowCounter = 0; - overflowAlmostCounter = 0; - currentEmbeddingLevel = paragraphEmbeddingLevel; - currentEmbeddingValue = paragraphEmbeddingLevel; - - embeddings[i] = paragraphEmbeddingLevel; - break; - - default: - break; - } - } - - return embeddings; - } - - - /** - * 3) resolving weak types - * Rules W1-W7. - * - * Note that some weak types (EN, AN) remain after this processing is complete. - */ - private void resolveWeakTypes(int start, int limit, byte level, byte sor, byte eor) { - - // on entry, only these types remain - assertOnly(start, limit, new byte[] {L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM }); - - // Rule W1. - // Changes all NSMs. - setRule("W1"); - byte preceedingCharacterType = sor; - for (int i = start; i < limit; ++i) { - final byte t = resultTypes[i]; - if (t == NSM) { - setType(i, preceedingCharacterType); - } else { - preceedingCharacterType = t; - } - } - - // Rule W2. - // EN does not change at the start of the run, because sor != AL. - setRule("W2"); - for (int i = start; i < limit; ++i) { - if (resultTypes[i] == EN) { - for (int j = i - 1; j >= start; --j) { - final byte t = resultTypes[j]; - if (t == L || t == R || t == AL) { - if (t == AL) { - setType(i, AN); - } - break; - } - } - } - } - - // Rule W3. - setRule("W3"); - for (int i = start; i < limit; ++i) { - if (resultTypes[i] == AL) { - setType(i, R); - } - } - - // Rule W4. - // Since there must be values on both sides for this rule to have an - // effect, the scan skips the first and last value. - // - // Although the scan proceeds left to right, and changes the type values - // in a way that would appear to affect the computations later in the scan, - // there is actually no problem. A change in the current value can only - // affect the value to its immediate right, and only affect it if it is - // ES or CS. But the current value can only change if the value to its - // right is not ES or CS. Thus either the current value will not change, - // or its change will have no effect on the remainder of the analysis. - - setRule("W4"); - for (int i = start + 1; i < limit - 1; ++i) { - if (resultTypes[i] == ES || resultTypes[i] == CS) { - final byte prevSepType = resultTypes[i-1]; - final byte succSepType = resultTypes[i+1]; - if (prevSepType == EN && succSepType == EN) { - setType(i, EN); - } else if (resultTypes[i] == CS && prevSepType == AN && succSepType == AN) { - setType(i, AN); - } - } - } - - // Rule W5. - setRule("W5"); - for (int i = start; i < limit; ++i) { - if (resultTypes[i] == ET) { - // locate end of sequence - final int runstart = i; - final int runlimit = findRunLimit(runstart, limit, new byte[] { ET }); - - // check values at ends of sequence - byte t = runstart == start ? sor : resultTypes[runstart - 1]; - - if (t != EN) { - t = runlimit == limit ? eor : resultTypes[runlimit]; - } - - if (t == EN) { - setTypes(runstart, runlimit, EN); - } - - // continue at end of sequence - i = runlimit; - } - } - - // Rule W6. - setRule("W6"); - for (int i = start; i < limit; ++i) { - final byte t = resultTypes[i]; - if (t == ES || t == ET || t == CS) { - setType(i, ON); - } - } - - // Rule W7. - setRule("W7"); - for (int i = start; i < limit; ++i) { - if (resultTypes[i] == EN) { - // set default if we reach start of run - byte prevStrongType = sor; - for (int j = i - 1; j >= start; --j) { - final byte t = resultTypes[j]; - if (t == L || t == R) { // AL's have been removed - prevStrongType = t; - break; - } - } - if (prevStrongType == L) { - setType(i, L); - } - } - } - } - - /** - * 6) resolving neutral types - * Rules N1-N2. - */ - private void resolveNeutralTypes(int start, int limit, byte level, byte sor, byte eor) { - - // on entry, only these types can be in resultTypes - assertOnly(start, limit, new byte[] {L, R, EN, AN, B, S, WS, ON}); - - for (int i = start; i < limit; ++i) { - final byte t = resultTypes[i]; - if (t == WS || t == ON || t == B || t == S) { - // find bounds of run of neutrals - final int runstart = i; - final int runlimit = findRunLimit(runstart, limit, new byte[] {B, S, WS, ON}); - - // determine effective types at ends of run - byte leadingType; - byte trailingType; - - if (runstart == start) { - leadingType = sor; - } else { - leadingType = resultTypes[runstart - 1]; - if (leadingType == L || leadingType == R) { - // found the strong type - } else if (leadingType == AN) { - leadingType = R; - } else if (leadingType == EN) { - // Since EN's with previous strong L types have been changed - // to L in W7, the leadingType must be R. - leadingType = R; - } - } - - if (runlimit == limit) { - trailingType = eor; - } else { - trailingType = resultTypes[runlimit]; - if (trailingType == L || trailingType == R) { - // found the strong type - } else if (trailingType == AN) { - trailingType = R; - } else if (trailingType == EN) { - trailingType = R; - } - } - - byte resolvedType; - if (leadingType == trailingType) { - // Rule N1. - setRule("N1"); - resolvedType = leadingType; - } else { - // Rule N2. - // Notice the embedding level of the run is used, not - // the paragraph embedding level. - setRule("N2"); - resolvedType = typeForLevel(level); - } - - setTypes(runstart, runlimit, resolvedType); - - // skip over run of (former) neutrals - i = runlimit; - } - } - } - - /** - * 7) resolving implicit embedding levels - * Rules I1, I2. - */ - private void resolveImplicitLevels(int start, int limit, byte level, byte sor, byte eor) { - - // on entry, only these types can be in resultTypes - assertOnly(start, limit, new byte[] {L, R, EN, AN}); - - if ((level & 1) == 0) { // even level - for (int i = start; i < limit; ++i) { - final byte t = resultTypes[i]; - // Rule I1. - setRule("I1"); - if (t == L ) { - // no change - } else if (t == R) { - resultLevels[i] += 1; - } else { // t == AN || t == EN - resultLevels[i] += 2; - } - } - } else { // odd level - for (int i = start; i < limit; ++i) { - final byte t = resultTypes[i]; - // Rule I2. - setRule("I2"); - if (t == R) { - // no change - } else { // t == L || t == AN || t == EN - resultLevels[i] += 1; - } - } - } - } - - // - // Output - // - - /** - * Return levels array breaking lines at offsets in linebreaks.
- * Rule L1. - *

- * The returned levels array contains the resolved level for each - * bidi code passed to the constructor. - *

- * The linebreaks array must include at least one value. - * The values must be in strictly increasing order (no duplicates) - * between 1 and the length of the text, inclusive. The last value - * must be the length of the text. - * - * @param linebreaks the offsets at which to break the paragraph - * @return the resolved levels of the text - */ - public byte[] getLevels(int[] linebreaks) { - - // Note that since the previous processing has removed all - // P, S, and WS values from resultTypes, the values referred to - // in these rules are the initial types, before any processing - // has been applied (including processing of overrides). - // - // This example implementation has reinserted explicit format codes - // and BN, in order that the levels array correspond to the - // initial text. Their final placement is not normative. - // These codes are treated like WS in this implementation, - // so they don't interrupt sequences of WS. - - validateLineBreaks(linebreaks, textLength); - - final byte[] result = resultLevels.clone(); // will be returned to caller - - // don't worry about linebreaks since if there is a break within - // a series of WS values preceeding S, the linebreak itself - // causes the reset. - for (int i = 0; i < result.length; ++i) { - final byte t = initialTypes[i]; - if (t == B || t == S) { - // Rule L1, clauses one and two. - result[i] = paragraphEmbeddingLevel; - - // Rule L1, clause three. - for (int j = i - 1; j >= 0; --j) { - if (isWhitespace(initialTypes[j])) { // including format codes - result[j] = paragraphEmbeddingLevel; - } else { - break; - } - } - } - } - - // Rule L1, clause four. - int start = 0; - for (final int limit : linebreaks) { - for (int j = limit - 1; j >= start; --j) { - if (isWhitespace(initialTypes[j])) { // including format codes - result[j] = paragraphEmbeddingLevel; - } else { - break; - } - } - - start = limit; - } - - traceLineLevels(linebreaks, result); - - return result; - } - - /** - * Return reordering array breaking lines at offsets in linebreaks. - *

- * The reordering array maps from a visual index to a logical index. - * Lines are concatenated from left to right. So for example, the - * fifth character from the left on the third line is - *

 getReordering(linebreaks)[linebreaks[1] + 4]
- * (linebreaks[1] is the position after the last character of the - * second line, which is also the index of the first character on the - * third line, and adding four gets the fifth character from the left). - *

- * The linebreaks array must include at least one value. - * The values must be in strictly increasing order (no duplicates) - * between 1 and the length of the text, inclusive. The last value - * must be the length of the text. - * - * @param linebreaks the offsets at which to break the paragraph. - */ - public int[] getReordering(int[] linebreaks) { - validateLineBreaks(linebreaks, textLength); - - final byte[] levels = getLevels(linebreaks); - - return computeMultilineReordering(levels, linebreaks); - } - - /** - * Return multiline reordering array for a given level array. - * Reordering does not occur across a line break. - */ - private static int[] computeMultilineReordering(byte[] levels, int[] linebreaks) { - final int[] result = new int[levels.length]; - - int start = 0; - for (final int limit : linebreaks) { - final byte[] templevels = new byte[limit - start]; - System.arraycopy(levels, start, templevels, 0, templevels.length); - - final int[] temporder = computeReordering(templevels); - for (int j = 0; j < temporder.length; ++j) { - result[start + j] = temporder[j] + start; - } - - start = limit; - } - - return result; - } - - /** - * Return reordering array for a given level array. This reorders a single line. - * The reordering is a visual to logical map. For example, - * the leftmost char is string.charAt(order[0]). - * Rule L2. - */ - private static int[] computeReordering(byte[] levels) { - final int lineLength = levels.length; - - final int[] result = new int[lineLength]; - - // initialize order - for (int i = 0; i < lineLength; ++i) { - result[i] = i; - } - - // locate highest level found on line. - // Note the rules say text, but no reordering across line bounds is performed, - // so this is sufficient. - byte highestLevel = 0; - byte lowestOddLevel = 63; - for (int i = 0; i < lineLength; ++i) { - final byte level = levels[i]; - if (level > highestLevel) { - highestLevel = level; - } - if (((level & 1) != 0) && level < lowestOddLevel) { - lowestOddLevel = level; - } - } - - for (int level = highestLevel; level >= lowestOddLevel; --level) { - for (int i = 0; i < lineLength; ++i) { - if (levels[i] >= level) { - // find range of text at or above this level - final int start = i; - int limit = i + 1; - while (limit < lineLength && levels[limit] >= level) { - ++limit; - } - - // reverse run - for (int j = start, k = limit - 1; j < k; ++j, --k) { - final int temp = result[j]; - result[j] = result[k]; - result[k] = temp; - } - - // skip to end of level run - i = limit; - } - } - } - - return result; - } - - /** - * Return the base level of the paragraph. - */ - public byte getBaseLevel() { - return paragraphEmbeddingLevel; - } - - // --- internal utilities ------------------------------------------------- - - /** - * Return true if the type is considered a whitespace type for the line break rules. - */ - private static boolean isWhitespace(byte biditype) { - switch (biditype) { - case LRE: - case RLE: - case LRO: - case RLO: - case PDF: - case BN: - case WS: - return true; - default: - return false; - } - } - - /** - * Return the strong type (L or R) corresponding to the level. - */ - private static byte typeForLevel(int level) { - return ((level & 0x1) == 0) ? L : R; - } - - /** - * Return the limit of the run starting at index that includes only resultTypes in validSet. - * This checks the value at index, and will return index if that value is not in validSet. - */ - private int findRunLimit(int index, int limit, byte[] validSet) { - --index; - loop: - while (++index < limit) { - final byte t = resultTypes[index]; - for (int i = 0; i < validSet.length; ++i) { - if (t == validSet[i]) { - continue loop; - } - } - // didn't find a match in validSet - return index; - } - return limit; - } - - /** - * Return the start of the run including index that includes only resultTypes in validSet. - * This assumes the value at index is valid, and does not check it. - */ - private int findRunStart(int index, byte[] validSet) { - loop: - while (--index >= 0) { - final byte t = resultTypes[index]; - for (int i = 0; i < validSet.length; ++i) { - if (t == validSet[i]) { - continue loop; - } - } - return index + 1; - } - return 0; - } - - /** - * Set resultTypes from start up to (but not including) limit to newType. - */ - private void setTypes(int start, int limit, byte newType) { - for (int i = start; i < limit; ++i) { - setType(i, newType); - } - } - - /** - * Set resultLevels from start up to (but not including) limit to newLevel. - */ - private void setLevels(int start, int limit, byte newLevel) { - for (int i = start; i < limit; ++i) { - resultLevels[i] = newLevel; - } - } - - // --- algorithm internal validation -------------------------------------- - - /** - * Algorithm validation. - * Assert that all values in resultTypes are in the provided set. - */ - private void assertOnly(int start, int limit, byte[] codes) { - loop: - for (int i = start; i < limit; ++i) { - final byte t = resultTypes[i]; - for (int j = 0; j < codes.length; ++j) { - if (t == codes[j]) { - continue loop; - } - } - - throw new Error("invalid bidi code " + getHtmlTypename(t) + " present in assertOnly at position " + i); - } - } - - // --- input validation --------------------------------------------------- - - /** - * Throw exception if type array is invalid. - */ - private static void validateTypes(byte[] types) { - if (types == null) { - throw new IllegalArgumentException("types is null"); - } - for (int i = 0; i < types.length; ++i) { - if (types[i] < TYPE_MIN || types[i] > TYPE_MAX) { - throw new IllegalArgumentException("illegal type value at " + i + ": " + types[i]); - } - } - for (int i = 0; i < types.length - 1; ++i) { - if (types[i] == B) { - throw new IllegalArgumentException("B type before end of paragraph at index: " + i); - } - } - } - - /** - * Throw exception if paragraph embedding level is invalid. Special allowance for -1 so that - * default processing can still be performed when using this API. - */ - private static void validateParagraphEmbeddingLevel(byte paragraphEmbeddingLevel) { - if (paragraphEmbeddingLevel != -1 && - paragraphEmbeddingLevel != 0 && - paragraphEmbeddingLevel != 1) { - throw new IllegalArgumentException("illegal paragraph embedding level: " + paragraphEmbeddingLevel); - } - } - - /** - * Throw exception if line breaks array is invalid. - */ - private static void validateLineBreaks(int[] linebreaks, int textLength) { - int prev = 0; - for (int i = 0; i < linebreaks.length; ++i) { - final int next = linebreaks[i]; - if (next <= prev) { - throw new IllegalArgumentException("bad linebreak: " + next + " at index: " + i); - } - prev = next; - } - if (prev != textLength) { - throw new IllegalArgumentException("last linebreak must be at " + textLength); - } - } - - // --- debug utilities ---------------------------------------------------- - - /** - * An interface for tracing the progress of the Bidi reference implementation. - */ - public static interface BidiTraceHook { - - /** - * Display the current state of the implementation. - *

- * The data supplied to the display method represents the current internal state of the implementation. Note - * that some phases of the algorithm operate on the data as it appears when the explicit formatting codes and - * BN have been removed. When this is the case, start and limit do not correspond directly to the original - * direction type codes that were passed to the constructor. However, the values in embeddings, resultTypes, - * and resultLevels are consistent. - *

- * @param phase the current phase of the algorithm - * @param start the start of the run of text being worked on - * @param limit the limit of the run of text being worked on - * @param paragraphEmbeddingLevel the paragraph embedding level - * @param initialTypes the original bidi types provided to the constructor - * @param embeddings the embeddings and override information resulting from explicit formatting codes - * @param resultTypes the current resolved bidi types - * @param resultLevels the current resolved levels (assuming the paragraph is a single line) - */ - public abstract void display(int phase, - int start, int limit, - byte paragraphEmbeddingLevel, - byte[] initialTypes, - byte[] embeddings, - byte[] resultTypes, - byte[] resultLevels); - - /** - * Display the results of processing line break information to generate line levels. - *

- * @param paragraphEmbeddingLevel the paragraph embedding level - * @param initialTypes the original bidi types provided to the constructor - * @param embeddings the embeddings and override information resulting from explicit formatting codes - * @param linebreaks the array of positions where line breaks occur - * @param resolvedLevels the resolved levels before line processing is performed - * @param lineLevels the levels after line processing was performed - */ - public abstract void displayLineLevels(byte paragraphEmbeddingLevel, - byte[] initialTypes, - byte[] embeddings, - int[] linebreaks, - byte[] resolvedLevels, - byte[] lineLevels); - - /** - * Display a message. - * - * @param msg the message text - */ - public abstract void message(String msg); - - - /** The phase before any processing on the data bas been performed. */ - public static int PHASE_INIT = 0; - - /** The phase after the base paragraph level has been determined. */ - public static int PHASE_BASELEVEL = 1; - - /** The phase after explicit codes have been processed to generate the embedding information. */ - public static int PHASE_EXPLICIT = 2; - - /** The phase after explicit codes and BN have been removed from the internal data. */ - public static int PHASE_EXPLICIT_REMOVED = 3; - - /** The phase after the weak rule processing has been performed. */ - public static int PHASE_WEAK = 4; - - /** The phase after the neutral rule processing has been performed. */ - public static int PHASE_NEUTRAL = 5; - - /** The phase after the implicit rule processing has been performed. */ - public static int PHASE_IMPLICIT = 6; - } - - private static BidiTraceHook hook = null; // for tracking the algorithm - - /** - * Set a trace hook so the progress of the algorithm can be monitored. - */ - public static synchronized void setTraceHook(BidiTraceHook hook) { - BidiReference.hook = hook; - } - - /** - * Return the trace hook. - */ - public static BidiTraceHook getTraceHook() { - return hook; - } - - /** - * Call trace hook during major phases of algorithm. - */ - private void trace(int phase, int start, int limit) { - if (hook != null) { - hook.display(phase, start, limit, paragraphEmbeddingLevel, - initialTypes, embeddings, resultTypes, resultLevels); - } - } - - /** - * Call trace hook when computing line levels based on linebreaks. - */ - private void traceLineLevels(int[] linebreaks, byte[] lineLevels) { - if (hook != null) { - hook.displayLineLevels(paragraphEmbeddingLevel, initialTypes, embeddings, linebreaks, resultLevels, lineLevels); - } - } - - private void setRule(String rule) { - final String[] anchor = rule.split("-"); - this.rule = "" + rule + ""; - } - - public static String getHtmlTypename(int value) { - return "" + typenames[value] + ""; - } - - private String getRule() { - return rule; - } -} - diff --git a/unicodetools/src/main/java/org/unicode/jsp/BranchStringPrepData.java b/unicodetools/src/main/java/org/unicode/jsp/BranchStringPrepData.java deleted file mode 100644 index defd81d23..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/BranchStringPrepData.java +++ /dev/null @@ -1,384 +0,0 @@ -package org.unicode.jsp; - -/** - * - */ - - -public class BranchStringPrepData { - // - // private static final boolean DEBUG = true; - // public static UnicodeSet U32 = new UnicodeSet("[:age=3.2:]").freeze(); - // public static UnicodeSet VALID_ASCII = new UnicodeSet("[\\u002Da-zA-Z0-9]").freeze(); - // - // - // /** - //3. Mapping - // This profile specifies mapping using the following tables from - // [STRINGPREP]: - // Table B.1 - // Table B.2 - //4. Normalization - // This profile specifies using Unicode normalization form KC, as - // described in [STRINGPREP]. - //5. Prohibited Output - // Table C.1.2 - // Table C.2.2 - // Table C.3 - // Table C.4 - // Table C.5 - // Table C.6 - // Table C.7 - // Table C.8 - // Table C.9 - // */ - // - // public static void getIdna2003Tables(UnicodeMap mappings, UnicodeMap types) { - // EnumSet allowed = EnumSet.of( - // Idna2003Table.B_1, - // Idna2003Table.B_2, - // Idna2003Table.C_1_2 - // , Idna2003Table.C_2_2 - // , Idna2003Table.C_3 - // , Idna2003Table.C_4 - // , Idna2003Table.C_5 - // , Idna2003Table.C_6 - // , Idna2003Table.C_7 - // , Idna2003Table.C_8 - // , Idna2003Table.C_9 - // ); - // for (int i = 0; i <= 0x10FFFF; ++i) { - // String mapping = getMapping(i, allowed); - // boolean isProhibited = mapping == null ? isProhibited(i, allowed) : isProhibited(mapping,allowed); - // IdnaType status; - // if (isProhibited || !U32.contains(i)) { - // status = IdnaType.disallowed; - // mapping = null; - // } else if (mapping == null) { - // status = IdnaType.valid; - // } else if (mapping.length() == 0) { - // status = IdnaType.ignored; - // } else { - // status = IdnaType.mapped; - // } - // mappings.put(i, mapping); - // types.put(i, status); - // } - // // special handling for separators - // mappings.putAll(IdnaTypes.OTHER_DOT_SET,"."); - // types.putAll(IdnaTypes.OTHER_DOT_SET,IdnaType.mapped); - // types.put('.',IdnaType.valid); - // - // mappings.freeze(); - // types.freeze(); - // } - // - // private static String getMapping(int cp, EnumSet allowed) { - // DataSet items = data.get(cp); - // String mapping = items == null ? null : items.mapping; - // String normalizedMapping = mapping != null ? Normalizer.normalize(mapping, Normalizer.NFKC) : Normalizer.normalize(cp, Normalizer.NFKC); - // if (UnicodeProperty.equals(cp, normalizedMapping)) { - // return null; - // } - // return normalizedMapping; - // } - // - // private static boolean isProhibited(int cp, EnumSet allowed) { - // DataSet items = data.get(cp); - // if (items != null) { - // return items.isProhibited; - // } - // return false; - // } - // - // private static boolean isProhibited(String string, EnumSet allowed) { - // int cp; - // for (int i = 0; i < string.length(); i += Character.charCount(cp)) { - // cp = string.codePointAt(i); - // if (isProhibited(cp, allowed)) { - // return true; - // } - // } - // return false; - // } - // - // enum Idna2003Table {none, A_1, B_1, B_2, B_3, C_1_1, C_1_2, C_2_1, C_2_2, C_3, C_4, C_5, C_6, C_7, C_8, C_9, D_1, D_2} - // - // static EnumSet PROHIBITED = EnumSet.range(Idna2003Table.C_1_1, Idna2003Table.C_9); - // static EnumSet MAPPING = EnumSet.range(Idna2003Table.B_1, Idna2003Table.B_3); - // - // - // /** - // A.1 Unassigned code points in Unicode 3.2 - // ----- Start Table A.1 ----- - // 0221 - // B.1 Commonly mapped to nothing - // ----- Start Table B.1 ----- - // 00AD; ; Map to nothing - // B.2 Mapping for case-folding used with NFKC - // ----- Start Table B.2 ----- - // 0041; 0061; Case map - // B.3 Mapping for case-folding used with no normalization - // ----- Start Table B.3 ----- - // 0041; 0061; Case map - // C.1.1 ASCII space characters - // ----- Start Table C.1.1 ----- - // 0020; SPACE - // C.1.2 Non-ASCII space characters - // ----- Start Table C.1.2 ----- - // 00A0; NO-BREAK SPACE - // C.2.1 ASCII control characters - // ----- Start Table C.2.1 ----- - // 0000-001F; [CONTROL CHARACTERS] - // C.2.2 Non-ASCII control characters - // ----- Start Table C.2.2 ----- - // 0080-009F; [CONTROL CHARACTERS] - // C.2.2 Non-ASCII control characters - // ----- Start Table C.2.2 ----- - // 0080-009F; [CONTROL CHARACTERS] - // C.3 Private use - // ----- Start Table C.3 ----- - // E000-F8FF; [PRIVATE USE, PLANE 0] - // C.4 Non-character code points - // ----- Start Table C.4 ----- - // FDD0-FDEF; [NONCHARACTER CODE POINTS] - // C.5 Surrogate codes - // ----- Start Table C.5 ----- - // D800-DFFF; [SURROGATE CODES] - // C.6 Inappropriate for plain text - // ----- Start Table C.6 ----- - // FFF9; INTERLINEAR ANNOTATION ANCHOR - // C.7 Inappropriate for canonical representation - // ----- Start Table C.7 ----- - // 2FF0-2FFB; [IDEOGRAPHIC DESCRIPTION CHARACTERS] - // C.8 Change display properties or are deprecated - // ----- Start Table C.8 ----- - // 0340; COMBINING GRAVE TONE MARK - // C.9 Tagging characters - // ----- Start Table C.9 ----- - // E0001; LANGUAGE TAG - // D.1 Characters with bidirectional property "R" or "AL" - // ----- Start Table D.1 ----- - // 05BE - // D.2 Characters with bidirectional property "L" - // ----- Start Table D.2 ----- - // 0041-005A - // */ - // - // static Pattern TABLE_DELIMITER = Pattern.compile("\\Q-----\\E\\s*(Start|End)\\s*Table\\s*(\\S+)\\s*\\Q-----\\E"); - // static Pattern MAP_LINE = Pattern.compile("([A-Z0-9]{4,6})" + - // "(?:-([A-Z0-9]{4,6}))?" + - // "(?:\\s*;\\s*((?:[A-Z0-9]{4,6}\\s*)*))?" + - // "(?:\\s*;\\s*.*)?"); - // static Pattern SET_LINE = Pattern.compile("([A-Z0-9]{4,6})" + - // "(?:-([A-Z0-9]{4,6}))?" + - // "(?:\\s*;\\s*.*)?"); - // - // static class DataSet { - // final boolean isProhibited; - // final String mapping; - // final String comment; - // - // private DataSet(boolean isProhibited2, String mapping2, String comment2) { - // isProhibited = isProhibited2; - // mapping = mapping2; - // comment = comment2; - // } - // - // public DataSet add(boolean myisProhibited, String mymapping, String mycomment) { - // // now merge - // if (isProhibited) { - // myisProhibited = true; - // } - // if (mymapping == null) { - // mymapping = mapping; - // } else if (mapping != null && !mymapping.equals(mapping)) { - // throw new IllegalArgumentException("Conflicting mapping " + Utility.hex(mapping) + ", " + Utility.hex(mymapping)); - // } - // if (mycomment == null) { - // mycomment = comment; - // } else if (comment != null) { - // mycomment = comment + "\n" + mycomment; - // } - // return new DataSet(myisProhibited, mymapping, mycomment); - // } - // /** - // * If there is a mapping, use the mapping to set the prohibited bit. - // * @param codepoint - // * @param data - // * @return stuff - // */ - // public DataSet fix(int codepoint, UnicodeMap data) { - // if (mapping != null) { - // boolean newIsProhibited = false; - // int cp; - // for (int i = 0; i < mapping.length(); i += Character.charCount(cp)) { - // cp = mapping.codePointAt(i); - // DataSet other = data.get(i); - // if (other.mapping != null) { - // throw new IllegalArgumentException("Recursive Mapping"); - // } - // if (other.isProhibited) { - // newIsProhibited = true; - // } - // } - // DataSet newDataSet = new DataSet(newIsProhibited, mapping, comment); - // if (DEBUG) System.out.println("Changing value for " + Utility.hex(codepoint) + ":\t[" + this + "] => [" + newDataSet + "]"); - // return newDataSet; - // } - // return null; - // } - // - // public boolean equals(Object other) { - // DataSet that = (DataSet) other; - // return isProhibited == that.isProhibited - // && UnicodeProperty.equals(mapping, that.mapping) - // && UnicodeProperty.equals(comment, that.comment); - // } - // public int hashCode() { - // return (isProhibited ? 1 : 0) ^ (mapping == null ? 0 : mapping.hashCode()); - // } - // public String toString() { - // return isProhibited + ", " + Utility.hex(mapping) + ", " + comment; - // } - // } - // - // - // private static final UnicodeMap data; - // - // static { - // data = new UnicodeMap(); - // try { - // //UnicodeMap> rawMapping = new UnicodeMap>(); - // - // Matcher tableDelimiter = TABLE_DELIMITER.matcher(""); - // Matcher mapLine = MAP_LINE.matcher(""); - // Matcher setLine = SET_LINE.matcher(""); - // BufferedReader in = FileUtilities.openFile(StringPrepData.class, "nameprep.txt"); - // //BufferedReader in = FileUtilities.openUTF8Reader(UCD_Types.BASE_DIR + "idna/", "nameprep.txt"); - // StringPrepData.Idna2003Table table = null; - // boolean inTable = false; - // boolean isMapping = false; - // for (int count = 1; ; ++count) { - // String line = in.readLine(); - // if (line == null) break; - // line = line.trim(); - // if (line.length() == 0 || line.startsWith("Hoffman") || line.startsWith("RFC")) continue; - // if (line.startsWith("-----")) { - // if (!tableDelimiter.reset(line).matches()) { - // throw new IllegalArgumentException("Bad syntax: " + line); - // } - // inTable = tableDelimiter.group(1).equals("Start"); - // StringPrepData.Idna2003Table newTable = Idna2003Table.valueOf(tableDelimiter.group(2).replace(".","_")); - // if (inTable) { - // if (table != null) { - // throw new IllegalArgumentException("Table not terminated: " + table + "; " + line); - // } - // table = newTable; - // if (DEBUG) System.out.println(count + ")\t*** New Table: " + table); - // isMapping = newTable.toString().startsWith("B"); - // } else { - // if (newTable != table) { - // throw new IllegalArgumentException("Bad table end: " + newTable + " != " + table + "; " + line); - // } - // table = null; - // isMapping = false; - // } - // continue; - // } - // if (!inTable) { - // if (DEBUG) System.out.println(count + ")\tIgnoring: " + line); - // continue; - // } - // // if (!allowed.contains(table)) { - // // if (DEBUG) System.out.println(count + ")\t" + table + "\tSKIPPING line:\t" + line); - // // continue; - // // } else { - // // if (DEBUG) System.out.println(count + ")\t" + table + "\tDoing line:\t" + line); - // // } - // Matcher lineMatcher = isMapping ? mapLine : setLine; - // if (!lineMatcher.reset(line).matches()) { - // throw new IllegalArgumentException("Illegal range-value syntax: " + line); - // } - // int startCode = Utility.fromHex(lineMatcher.group(1),4," ").codePointAt(0); - // String endCodeString = lineMatcher.groupCount() < 2 ? null : lineMatcher.group(2); - // String group3 = lineMatcher.groupCount() < 3 ? null : lineMatcher.group(3); - // String group4 = lineMatcher.groupCount() < 4 ? null : lineMatcher.group(4); - // int endCode = endCodeString == null ? startCode : Utility.fromHex(endCodeString,4," ").codePointAt(0); - // String comment, mapValueString; - // if (isMapping) { - // comment = group4; - // try { - // mapValueString = group3.length() == 0 ? "" : Utility.fromHex(group3,4," "); - // } catch (RuntimeException e) { - // throw e; - // } - // } else { - // comment = group3; - // mapValueString = null; - // } - // if (DEBUG) System.out.println(count + ")\t" + line + ":\t" + Utility.hex(startCode) - // + (startCode == endCode ? "" : ".." + Utility.hex(endCode)) - // + ",\t" + table - // + ",\t" + (mapValueString == null ? "null" : Utility.hex(mapValueString)) - // ); - // - // addMapping(startCode, endCode, table, (String)mapValueString, (String)comment); - // } - // in.close(); - // } catch (IOException e) { - // throw new IllegalArgumentException(e); - // } - // - // // fix ASCII - // - // addMapping(0, 0x7F, Idna2003Table.C_9, (String)null, (String)null); - // for (UnicodeSetIterator it = new UnicodeSetIterator(VALID_ASCII); it.next();) { - // addMapping(0, 0x7F, null, null, null); - // } - // - // //rawMapping.putAll(VALID_ASCII, null); - // - // for (int i = 'A'; i <= 'Z'; ++i) { - // R3 alphaMap = Row.of(Idna2003Table.B_1, UTF16.valueOf(i-'A'+'a'), (String)null); - // DataSet tableSet = data.get(i); - // if (tableSet == null) { - // tableSet = new DataSet(PROHIBITED.contains(Idna2003Table.B_1), UTF16.valueOf(i-'A'+'a'), (String)null); - // } else { - // tableSet = tableSet.add(PROHIBITED.contains(Idna2003Table.B_1), UTF16.valueOf(i-'A'+'a'), (String)null); - // } - // data.put(i, tableSet); - // } - // for (String i : data.keySet()) { - // DataSet dataSet = data.get(i); - // DataSet fixed = dataSet.fix(i.codePointAt(0), data); - // if (fixed != null) { - // data.put(i, fixed); - // } - // } - // data.freeze(); - // } - // - // private static void addMapping(int startCode, int endCode, StringPrepData.Idna2003Table type, String mapping, String comment) { - // for (int i = startCode; i <= endCode; ++i) { - // addData(i, type, mapping, comment); - // } - // } - // - // private static void addData(int i, StringPrepData.Idna2003Table type, String mapping, String comment) { - // try { - // if (i == 0x200c) { - // System.out.print(""); - // } - // DataSet tableSet = data.get(i); - // if (tableSet == null) { - // tableSet = new DataSet(PROHIBITED.contains(type), mapping, comment); - // } else { - // tableSet = tableSet.add(PROHIBITED.contains(type), mapping, comment); - // } - // data.put(i, tableSet); - // } catch (RuntimeException e) { - // throw new IllegalArgumentException("Failure with " + Utility.hex(i), e); - // } - // } -} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/jsp/Builder.java b/unicodetools/src/main/java/org/unicode/jsp/Builder.java deleted file mode 100644 index 612a1ff86..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/Builder.java +++ /dev/null @@ -1,386 +0,0 @@ -package org.unicode.jsp; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; - -/** - * Convenience class for building collections and maps. Allows them to be built by chaining, making it simpler to - * set as parameters and fields. Also supplies some operations that are missing on the JDK maps and collections, - * and provides finer control for what happens with equal elements. - *

- * Operations: A is current contents, B is new collection, x indicates the results
- * A-B   A&B    B-A   Name
- *                    clear()
- * x                  removeAll(B)
- *        x           retainAll(B) -- option 1: keep A, option 2: substitute B
- *               x    keepNew(B)
- * x      x           
- *        x      x    clear().addAll(B)
- * x             x    xor(B)
- * x      x      x    addAll(B)
- * 
- * @author markdavis - */ -// TODO add other Iterable - -public final class Builder { - public enum EqualAction {NATIVE, REPLACE, RETAIN, THROW} - - public static > CBuilder with(C collection, EqualAction ea) { - return new CBuilder(collection, ea); - } - - public static > CBuilder with(C collection) { - return new CBuilder(collection, EqualAction.NATIVE); - } - - public static > MBuilder with(M map, EqualAction ea) { - return new MBuilder(map, ea); - } - - public static > MBuilder with(M map) { - return new MBuilder(map, EqualAction.NATIVE); - } - - // ===== Collections ====== - - public static final class CBuilder> { - public EqualAction getEqualAction() { - return equalAction; - } - public CBuilder setEqualAction(EqualAction equalAction) { - this.equalAction = equalAction; - return this; - } - - public CBuilder clear() { - collection.clear(); - return this; - } - - public CBuilder add(E e) { - switch (equalAction) { - case NATIVE: - break; - case REPLACE: - collection.remove(e); - break; - case RETAIN: - if (collection.contains(e)) { - return this; - } - break; - case THROW: - if (collection.contains(e)) { - throw new IllegalArgumentException("Map already contains " + e); - } - } - collection.add(e); - return this; - } - - public CBuilder addAll(Collection c) { - if (equalAction == EqualAction.REPLACE) { - collection.addAll(c); - } else { - for (final E item : c) { - add(item); - } - } - return this; - } - - public CBuilder addAll(E... items) { - for (final E item : items) { - collection.add(item); - } - return this; - } - - public CBuilder addAll(Iterable items) { - for (final E item : items) { - collection.add(item); - } - return this; - } - - public CBuilder remove(E o) { - collection.remove(o); - return this; - } - - public CBuilder removeAll(Collection c) { - collection.removeAll(c); - return this; - } - - public CBuilder removeAll(E... items) { - for (final E item : items) { - collection.remove(item); - } - return this; - } - - public CBuilder removeAll(Iterable items) { - for (final E item : items) { - collection.remove(item); - } - return this; - } - - public CBuilder retainAll(Collection c) { - collection.retainAll(c); - return this; - } - - public CBuilder retainAll(E... items) { - collection.retainAll(Arrays.asList(items)); - return this; - } - - public CBuilder xor(Collection c) { - for (final E item : c) { - final boolean changed = collection.remove(item); - if (!changed) { - collection.add(item); - } - } - return this; - } - - public CBuilder xor(E... items) { - return xor(Arrays.asList(items)); - } - - public CBuilder keepNew(Collection c) { - final HashSet extras = new HashSet(c); - extras.removeAll(collection); - collection.clear(); - collection.addAll(extras); - return this; - } - - public CBuilder keepNew(E... items) { - return keepNew(Arrays.asList(items)); - } - - public U get() { - final U temp = collection; - collection = null; - return temp; - } - - @SuppressWarnings("unchecked") - public U freeze() { - U temp; - if (collection instanceof SortedSet) { - temp = (U)Collections.unmodifiableSortedSet((SortedSet) collection); - } else if (collection instanceof Set) { - temp = (U)Collections.unmodifiableSet((Set) collection); - } else if (collection instanceof List) { - temp = (U)Collections.unmodifiableList((List) collection); - } else { - temp = (U)Collections.unmodifiableCollection(collection); - } - collection = null; - return temp; - } - - @Override - public String toString() { - return collection.toString(); - } - - // ===== PRIVATES ====== - - private CBuilder(U set2, EqualAction ea) { - this.collection = set2; - equalAction = ea; - } - private U collection; - private EqualAction equalAction; - } - - // ===== Maps ====== - - public static final class MBuilder> { - - public EqualAction getEqualAction() { - return equalAction; - } - public MBuilder setEqualAction(EqualAction equalAction) { - this.equalAction = equalAction; - return this; - } - - public MBuilder clear() { - map.clear(); - return this; - } - public MBuilder put(K key, V value) { - switch (equalAction) { - case NATIVE: - break; - case REPLACE: - map.remove(key); - break; - case RETAIN: - if (map.containsKey(key)) { - return this; - } - break; - case THROW: - if (map.containsKey(key)) { - throw new IllegalArgumentException("Map already contains " + key); - } - } - map.put(key, value); - return this; - } - - public MBuilder on(K... keys) { - this.keys = Arrays.asList(keys); - return this; - } - - public MBuilder on(Collection keys) { - this.keys = keys; - return this; - } - - public MBuilder put(V value) { - for (final K key : keys) { - put(key, value); - } - keys = null; - return this; - } - - public MBuilder put(V... values) { - int v = 0; - for (final K key : keys) { - put(key, values[v++]); - if (v >= values.length) { - v = 0; - } - } - keys = null; - return this; - } - - public MBuilder put(Collection values) { - Iterator vi = null; - for (final K key : keys) { - if (vi == null || !vi.hasNext()) { - vi = values.iterator(); - } - put(key, vi.next()); - } - return this; - } - - public MBuilder putAll(Map m) { - if (equalAction == EqualAction.NATIVE) { - map.putAll(m); - } else { - for (final K key : m.keySet()) { - put(key, m.get(key)); - } - } - keys = null; - return this; - } - - public MBuilder putAll(Object[][] data) { - for (final Object[] key : data) { - put((K)key[0], (V)key[1]); - } - keys = null; - return this; - } - - public MBuilder remove(K key) { - map.remove(key); - return this; - } - - public MBuilder removeAll(Collection keys) { - map.keySet().removeAll(keys); - return this; - } - public MBuilder removeAll(K... keys) { - return removeAll(Arrays.asList(keys)); - } - - public MBuilder retainAll(Collection keys) { - map.keySet().retainAll(keys); - return this; - } - public MBuilder retainAll(K... keys) { - return retainAll(Arrays.asList(keys)); - } - - public > MBuilder xor(N c) { - for (final K item : c.keySet()) { - if (map.containsKey(item)) { - map.remove(item); - } else { - put(item, c.get(item)); - } - } - return this; - } - - public > MBuilder keepNew(N c) { - final HashSet extras = new HashSet(c.keySet()); - extras.removeAll(map.keySet()); - map.clear(); - for (final K key : extras) { - map.put(key, c.get(key)); - } - return this; - } - - public M get() { - final M temp = map; - map = null; - return temp; - } - - @SuppressWarnings("unchecked") - public M freeze() { - M temp; - if (map instanceof SortedMap) { - temp = (M)Collections.unmodifiableSortedMap((SortedMap) map); - } else { - temp = (M)Collections.unmodifiableMap(map); - } - map = null; - return temp; - } - - @Override - public String toString() { - return map.toString(); - } - - // ===== PRIVATES ====== - - private Collection keys; - private M map; - private EqualAction equalAction; - - private MBuilder(M map, EqualAction ea) { - this.map = map; - equalAction = ea; - } - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/Common.java b/unicodetools/src/main/java/org/unicode/jsp/Common.java deleted file mode 100644 index 73b34c16f..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/Common.java +++ /dev/null @@ -1,174 +0,0 @@ -package org.unicode.jsp; - -import java.util.Arrays; -import java.util.List; - -import org.unicode.jsp.XPropertyFactory.HanType.HanTypeValues; - -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.Normalizer.Mode; -import com.ibm.icu.text.Normalizer2; -import com.ibm.icu.text.StringTransform; -import com.ibm.icu.text.Transform; -import com.ibm.icu.text.UTF16; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.ULocale; - -public class Common { - static final Normalizer2 NFKC_CF_ = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE); - - // public static class NFKC_CF implements StringTransform { - // private static final UnicodeSet DEFAULT_IGNORABLES = new UnicodeSet("[:di:]").freeze(); - // - // // static Matcher DI = - // // Pattern.compile(UnicodeRegex.fix("[:di:]")).matcher(""); - // // UnicodeMap DI2 = new - // // UnicodeMap().putAll(DEFAULT_IGNORABLES, ""); - // public String transform(String source) { - // - // // String di = DI2.transform(source); - // String di = DEFAULT_IGNORABLES.stripFrom(source, true); - // String cf = Common.myFoldCase(di); - // String nfcf = Common.MyNormalize(cf, Normalizer.NFKC); - // String nfcfnf = Common.myFoldCase(nfcf); - // // String s3 = DI.reset(s2).replaceAll(""); - // String result = Common.MyNormalize(nfcfnf, Normalizer.NFKC); - // return result; - // } - // } - - public static Transform NFKC_CF = new StringTransform() { - @Override - public String transform(String source) { - return NFKC_CF_.normalize(source); - } - }; - - static List XPROPERTY_NAMES = Arrays.asList(new String[] { "toNfc", "toNfd", "toNfkc", "toNfkd", "toCasefold", "toLowercase", "toUppercase", - "toTitlecase", - "subhead" }); - static final int XSTRING_START = UProperty.STRING_LIMIT; - - public static String getXStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { - - switch (propertyEnum) { - case Common.TO_NFC: - return Common.MyNormalize(codepoint, Normalizer.NFC); - case Common.TO_NFD: - return Common.MyNormalize(codepoint, Normalizer.NFD); - case Common.TO_NFKC: - return Common.MyNormalize(codepoint, Normalizer.NFKC); - case Common.TO_NFKD: - return Common.MyNormalize(codepoint, Normalizer.NFKD); - case Common.TO_CASEFOLD: - return UCharacter.foldCase(UTF16.valueOf(codepoint), true); - case Common.TO_LOWERCASE: - return UCharacter.toLowerCase(ULocale.ROOT, UTF16.valueOf(codepoint)); - case Common.TO_UPPERCASE: - return UCharacter.toUpperCase(ULocale.ROOT, UTF16.valueOf(codepoint)); - case Common.TO_TITLECASE: - return UCharacter.toTitleCase(ULocale.ROOT, UTF16.valueOf(codepoint), null); - case Common.SUBHEAD: - return UnicodeUtilities.getSubheader().getSubheader(codepoint); - } - return UCharacter.getStringPropertyValue(propertyEnum, codepoint, nameChoice); - } - - static String myFoldCase(String source) { - return UCharacter.foldCase(source, true); - } - - public static HanTypeValues getValue(int codepoint) { - if (Common.simpOnly.contains(codepoint)) { - return HanTypeValues.Hans; - } - if (Common.tradOnly.contains(codepoint)) { - return HanTypeValues.Hant; - } - if (Common.bothSimpTrad.contains(codepoint)) { - return HanTypeValues.Han; - } - return HanTypeValues.na; - } - - static String MyNormalize(int codepoint, Mode mode) { - return Normalizer.normalize(codepoint, mode); - } - - public static final UnicodeSet simpOnly = new UnicodeSet( - "[㑩㓥㔉㖊㖞㛟㛠㛿㟆㧑㧟㨫㱩㱮㲿㶉㶶㶽㺍㻏㻘䁖䅉䇲䌶-䌺䌼-䌾䍀䍁䓕䗖䘛䙊䙓䜣䜥䜧䝙䞌䞍䞐䢂䥿-䦁䩄䯃-䯅䲝䲞䴓-䴙万与丑专业-丝丢两严丧个丰临为丽举么义乌乐乔习乡书买乱争于亏云亚产亩亲亵亸亿仅仆从仑仓仪们价众优会伛伞-传伣-伧伪伫体佣佥侠侣侥-侪侬俣俦俨-俫俭债倾偬偻偾偿傥傧-傩儿克兑兖党兰关兴兹养兽冁内冈册写军农冯冲决况冻净准凉减凑凛几凤凫凭凯击凿刍划刘-创删别-刮制刹刽刿剀剂剐剑剥剧劝办务劢动励-劳势勋勚匀匦匮区医华协单卖卜卢卤卫却厂厅历厉压-厍厐厕厘厢厣厦厨厩厮县叁参双发变叙叠只叶号叹叽同向吓吕吗吣吨听启吴呐呒呓呕-呙呛呜咏咙咛咝咤咸响哑-哕哗哙哜哝哟唛唝唠-唢唤啧啬-啮啴啸喷喽喾嗫嗳嘘嘤嘱噜嚣团园困囱围囵国图圆圣圹场坂坏块坚-坠垄-垆垒垦垩垫垭垱垲垴埘-埚埯堑堕墙壮声壳壶壸处备复够头夸-夺奁奂奋奖奥奸妆-妈妩-妫姗姹娄-娈娱娲娴婳-婶媪嫒嫔嫱嬷孙学孪宁宝实宠审宪宫宽宾寝对寻导寿将尔尘尝尧尴尸尽层屃屉届属屡屦屿岁岂岖-岛岭岽岿峄峡峣-峦崂-崄崭嵘嵚嵝巅巩巯币帅师帏帐帘帜带帧帮帱帻帼幂干并广庄庆庐庑库应庙庞废廪开异弃弑张弥弪弯弹强归当录彝彦彻征径徕御忆忏忧忾怀-怆怜总怼怿恋恒恳恶恸-恽悦悫-悯惊惧-惩惫-惯愠愤愦愿慑懑懒懔戆戋戏戗战戬戯户扑执扩-扬扰抚抛抟-抢护报担拟拢拣拥-择挂挚-挦挽捝-损捡-捣据掳掴掷掸掺掼揽-搂搅携摄-摈摊撄撑撵撷撸撺擞攒敌敛数斋斓斗斩断无旧时-旸昙昼-显晋晒-晖暂暧术朴机杀杂权杆条来杨杩杰松板构枞枢枣枥枧枨枪枫枭柜柠柽栀栅标-栌栎栏树栖栗样栾桠-桩梦梼梾-棂椁椟椠椤椭楼榄榅榇-榉槚槛槟槠横樯樱橥橱橹橼檩欢欤欧歼殁殇残殒殓殚殡殴毁毂毕毙毡毵氇气氢氩氲汇汉汤汹沈沟没沣-沧沩沪泞注泪泶-泸泺-泾洁洒洼浃浅-浈浊测浍-浔涂涛涝-涡涣涤润-涩淀渊渌-渎渐渑渔渖渗温湾湿溃溅溆滗滚滞-滢滤-滦滨-滪漓漤潆潇潋潍潜潴澜濑濒灏灭灯灵灶灾-炀炉炖炜炝点炼炽烁-烃烛烟烦-烩烫-热焕焖焘煴爱爷牍牦牵牺犊状-犹狈狝狞独-狲猃猎猕猡猪-猬献獭玑玚玛玮-玱玺珐珑珰珲琏琐琼瑶瑷璎瓒瓯电画畅畴疖疗疟-疡疬-疯疱疴症-痉痒痖痨痪痫瘅瘆瘗瘘瘪瘫瘾瘿癞癣癫皑皱皲盏-监盖-盘眍眦眬着睁睐睑瞆瞒瞩矫矶矾-码砖砗砚砜砺砻砾础硁硕-硗硙确硷碍碛碜碱礼祃祎祢祯祷祸禀禄禅离秃秆种积称秽秾稆税稣稳穑穷窃窍窎窑窜窝窥窦窭竖竞笃笋笔笕笺笼笾筑筚-筝筹筼签简箓箦-箫篑篓篮篱簖籁籴类籼粜粝粤粪粮糁糇系紧累絷纟-缏缑-缵罂网罗罚罢罴羁羟翘耢耧耸耻聂聋-聍联聩聪肃肠肤肮肴肾-胁胆胜胡胧胨胪胫胶脉脍脏-脑脓脔脚脱脶脸腊腭腻-腾膑臜致舆舍舣舰舱舻艰艳艺节芈芗芜芦芸苁苇苈苋-苏苹范茎茏茑茔茕茧荆荐荙-荜荞-荡荣-药莅莱-莴莶-莺莼萝萤-萨葱蒇蒉蒋蒌蓝蓟蓠蓣蓥蓦蔂蔷蔹蔺蔼蕰蕲蕴薮藓蘖虏虑虚虫虬虮虽-蚂蚕蚬蛊蛎蛏蛮蛰-蛴蜕蜗蝇-蝉蝼蝾螀螨蟏衅衔补表衬衮袄-袆袜袭袯装裆裈裢-裥褛褴见-觑觞触觯訚誉誊讠-谈谊-谷豮贝-赣赪赵赶趋趱趸跃跄跞践-跹跻踊踌踪踬踯蹑蹒蹰蹿躏躜躯车-辚辞辩辫边辽达迁过迈运还这进-迟迩迳迹适选逊递逦逻遗遥邓邝邬邮邹-邻郁郏-郑郓郦郧郸酂酝酦酱酽-酿采释里鉴銮錾钅-镶长门-阛队阳-阶际-陉陕陧-险随隐隶隽难雏雠雳雾霁霡霭靓静面靥鞑鞒鞯韦-韬韵页-颢颤-颧风-飚飞飨餍饣-馕马-骧髅髋髌鬓魇魉鱼-鳣鸟-鹭鹯-鹴鹾麦麸黄黉黡黩黪黾鼋鼍鼗鼹齐齑齿-龌龙-龛龟𡒄𨱏]") - .freeze(); - public static final UnicodeSet tradOnly = new UnicodeSet( - "[㠏㩜䊷䋙䋻䝼䯀䰾䱽䲁丟並乾亂亞佇併來侖侶俁係俔俠倀倆倈倉個們倫偉側偵偽傑傖傘備傭傯傳-債傷傾僂僅僉僑僕僞僥僨價儀儂億儈儉儐儔儕儘償優儲儷儸儺-儼兌兒兗內兩冊冪凈凍凜凱別刪剄則剋剎剗剛剝剮剴創劃劇劉劊劌劍劏劑劚勁動務勛勝勞勢勩勱勵勸勻匭匯匱區協卻厙厠厭厲厴參叄叢吒吳吶呂呆咼員唄唚問啓啞啟啢喎喚喪喬單喲嗆嗇嗊嗎嗚嗩嗶嘆嘍嘔嘖嘗嘜嘩嘮-嘰嘵嘸嘽噓噚噝噠噥噦噯噲噴噸噹嚀嚇嚌嚕嚙嚦嚨嚲-嚴嚶囀-囂囅囈囑囪圇國圍園圓圖團垵埡埰執堅堊堖堝堯報場塊塋塏塒塗塢塤塵塹墊墜墮墳墻墾壇壈壋壓壘-壚壞-壠壢壩壯壺壼壽夠夢夾奐奧奩奪奬奮奼妝姍姦娛婁婦婭媧媯媼媽嫗嫵嫻嫿嬀嬈嬋嬌嬙嬡嬤嬪嬰嬸孌孫學孿宮寢實寧審寫寬寵寶將專尋對導尷屆屍屓屜屢層屨屬岡峴島峽崍崗崢崬嵐嶁嶄嶇嶔嶗嶠嶢嶧嶮嶴嶸嶺嶼巋巒巔巰帥師帳帶幀幃幗幘幟幣幫幬幹幺幾庫廁廂廄廈廚廝廟-廣廩廬廳弒弳張強彈彌彎彙彞彥後徑從徠復徵徹恆恥悅悞悵悶惡惱惲惻愛愜愨愴愷愾慄態慍慘慚慟慣慤慪慫慮慳慶憂憊憐-憒憚憤憫憮憲憶懇應懌懍懟懣懨懲懶-懸懺懼懾戀戇戔戧戩戰-戲戶拋挩挾捨捫掃掄掗掙掛採揀揚換揮損搖搗搵搶摑摜摟摯摳摶摻撈撏撐撓撝撟撣撥撫撲撳撻撾撿擁擄擇擊擋擓擔據擠擬擯-擲擴擷擺-擼擾攄攆攏攔攖攙攛-攝攢-攤攪攬敗敘敵數斂斃斕斬斷於時晉晝暈暉暘暢暫曄曆曇曉曏曖曠曨曬書會朧東杴柵桿梔梘條梟梲棄棖棗棟棧棲棶椏楊楓楨業極榪榮榲榿構槍槤槧槨槳樁樂樅樓標樞樣樸-樺橈橋機橢橫檁檉檔檜檟檢檣檮檯檳檸檻櫃櫓櫚櫛櫝-櫟櫥櫧櫨櫪-櫬櫱櫳櫸櫻欄權欏欒欖欞欽歐歟歡歲歷歸歿殘殞殤殨殫殮-殰殲殺-殼毀毆毿氂氈氌氣氫氬氳決沒沖況洶浹涇涼淚淥淪淵淶淺渙減渦測渾湊湞湯溈準溝溫滄滅滌滎滬滯滲滷滸滻滾滿漁漚漢漣漬漲漵漸漿潁潑潔潙潛潤潯潰潷潿澀澆澇澗澠澤澦澩澮澱濁濃濕濘濟濤濫濰濱濺濼濾瀅-瀇瀉瀋瀏瀕瀘瀝瀟瀠瀦-瀨瀲瀾灃灄灑灕灘灝灠灣灤灧災為烏烴無煉煒煙煢煥煩煬煱熅熒熗熱熲熾燁燈燉燒燙燜營燦燭燴燶燼燾爍爐爛爭爲爺爾牆牘牽犖犢犧狀狹狽猙猶猻獁獃-獅獎獨獪獫獮獰-獲獵獷獸獺-獼玀現琺琿瑋瑒瑣瑤瑩瑪瑲璉璣璦璫環璽瓊瓏瓔瓚甌產産畝畢畫異當疇疊痙痾瘂瘋瘍瘓瘞瘡瘧瘮瘲瘺瘻療癆癇癉癘癟癢癤癥癧癩癬-癮癰-癲發皚皰皸皺盜盞盡監盤盧盪眥眾睏睜睞瞘瞜瞞瞶瞼矓矚矯硜硤硨硯碩碭碸確碼磑磚磣磧磯磽礆礎礙礦礪-礬礱祿禍禎禕禡禦禪禮禰禱禿秈稅稈稏稟種稱穀穌-穎穠-穢穩穫穭窩窪窮窯窵窶窺竄竅竇竈竊竪競筆筍筧筴箋箏節範築篋篔篤篩篳簀簍簞簡簣簫簹簽簾籃籌籙籜籟籠籩籪籬籮粵糝糞糧糲糴糶糹糾紀紂約-紉紋納紐紓-紝紡紬細-紳紵紹紺紼紿絀終組-絆絎結絕絛絝絞絡絢給絨絰-絳絶絹綁綃綆綈綉綌綏綐經綜綞綠綢綣綫-維綯-綵綸-綻綽-綿緄緇緊緋緑-緔緗-線緝緞締緡緣緦編緩緬緯緱緲練緶緹緻縈-縋縐縑縕縗縛縝-縟縣縧縫縭縮縱-縳縵-縷縹總績繃繅繆繒織繕繚繞繡繢繩-繫繭-繰繳繸繹繼-繿纈纊續纍纏纓纖纘纜缽罈罌罰罵罷羅羆羈羋羥義習翹耬耮聖聞聯聰聲聳聵-職聹聽聾肅脅脈脛脫脹腎腖腡腦腫腳腸膃膚膠膩膽-膿臉臍臏臘臚臟臠臢臨臺與-舊艙艤艦艫艱艷芻茲荊莊莖莢莧華萇萊萬萵葉葒著葤葦葯葷蒓蒔蒞蒼蓀蓋蓮蓯蓴蓽蔔蔞蔣蔥蔦蔭蕁蕆蕎蕒蕓蕕蕘蕢蕩蕪蕭蕷薀薈薊薌薔薘薟薦薩薳薴薺藍藎藝藥藪藴藶藹藺蘄蘆蘇蘊蘋蘚蘞蘢蘭蘺蘿虆處虛虜號虧虯蛺蛻蜆蝕蝟蝦蝸螄螞螢螮螻螿蟄蟈蟎蟣蟬蟯蟲蟶蟻蠅蠆蠐蠑蠟蠣蠨蠱蠶蠻衆術衕衚衛衝衹袞裊裏補裝裡製複褌褘褲褳褸褻襇襏襖襝襠襤襪襬襯襲覆見覎規覓視覘覡覥覦親覬覯覲覷覺覽覿觀觴觶觸訁-訃計訊訌討訐訒訓訕-記訛訝訟訢訣訥訩訪設許訴訶診註詁詆詎詐詒詔-詘詛詞詠-詣試詩詫-詮詰-詳詵詼詿誄-誇誌認誑誒誕誘誚語誠誡誣-誦誨說説誰課誶誹誼誾調諂諄談諉請諍諏諑諒論諗諛-諞諢諤諦諧諫諭諮諱諳諶-諸諺諼諾謀-謂謄謅謊謎謐謔謖謗謙-講謝謠謡謨謫-謭謳謹謾譅證譎譏譖識-譚譜譫譯議譴護譸譽譾讀變讎讒讓讕讖讜讞豈豎豐豬豶貓貙貝-貢貧-責貯貰貲-貴貶-貸貺-貽貿-賅資賈賊賑-賓賕賙賚賜賞賠-賤賦賧質-賭賰賴賵賺-賾贄贅贇贈贊贋贍贏贐贓贔贖贗贛贜赬趕趙趨趲跡踐踴蹌蹕蹣蹤蹺躂躉-躋躍躑-躓躕躚躡躥躦躪軀車-軍軑軒軔軛軟軤軫軲軸-軼軾較輅輇-輊輒-輕輛-輟輥輦輩輪輬輯輳輸輻輾-轀轂轄-轆轉轍轎轔轟轡轢轤辦辭-辯農逕這連進運過達違遙遜遞遠適遲遷選遺遼邁還邇邊邏邐郟郵鄆鄉鄒鄔鄖鄧鄭鄰鄲鄴鄶鄺酇酈醖醜醞醫醬醱釀釁釃釅釋釐釒-釕釗-釙針釣釤釧釩釵釷釹釺鈀鈁鈃鈄鈈鈉鈍鈎鈐-鈒鈔鈕鈞鈣鈥-鈧鈮鈰鈳鈴鈷-鈺鈽-鉀鉅鉈鉉鉋鉍鉑鉕鉗鉚鉛鉞鉢鉤鉦鉬鉭鉶鉸鉺鉻鉿銀銃銅銍銑銓銖銘銚-銜銠銣銥銦銨-銬銱銳銷銹銻銼鋁鋃鋅鋇鋌鋏鋒鋙鋝鋟鋣-鋦鋨-鋪鋭-鋱鋶鋸鋼錁錄錆-錈錏錐錒錕錘-錛錟-錢錦錨錩錫錮錯録錳錶錸鍀鍁鍃鍆-鍈鍋鍍鍔鍘鍚鍛鍠鍤鍥鍩鍬鍰鍵鍶鍺鍾鎂鎄鎇鎊鎔鎖鎘鎚鎛鎝鎡-鎣鎦鎧鎩鎪鎬鎮鎰鎲鎳鎵鎸鎿鏃鏇鏈鏌鏍鏐鏑鏗鏘鏜-鏟鏡鏢鏤鏨鏰鏵鏷鏹鏽鐃鐋鐐鐒-鐔鐘鐙鐝鐠鐦-鐨鐫鐮鐲鐳鐵鐶鐸鐺鐿鑄鑊鑌鑒鑔鑕鑞鑠鑣鑥鑭鑰-鑲鑷鑹鑼-鑿钁長門閂閃閆閈閉開閌閎閏閑間閔閘閡閣閥閨閩閫-閭閱閲閶閹閻-閿闃闆闈闊-闍闐闒-闖關闞闠闡闤闥阪陘陝陣陰陳陸陽隉隊階隕際隨險隱隴隸隻雋雖雙雛雜雞離難雲電霢霧霽靂靄靈靚靜靦靨鞀鞏鞝鞽韁韃韉韋-韍韓韙韜韞韻響頁-頃項-須頊頌頎-頓頗領頜頡頤頦頭頮頰頲頴頷-頹頻頽顆題-顏顒-顔願顙顛類顢顥顧顫顬顯-顱顳顴風颭-颯颱颳颶颸颺-颼飀飄飆飈飛飠飢飣飥飩-飫飭飯飲飴飼-飿餃-餅餉養餌餎餏餑-餓餕餖餘餚-餜餞餡館餱餳餶餷餺餼餾餿饁饃饅饈-饌饑饒饗饜饞饢馬-馮馱馳馴馹駁駐-駒駔駕駘駙駛駝駟駡駢駭駰駱駸駿騁騂騅騌-騏騖騙騤騧騫騭騮騰騶-騸騾驀-驅驊驌驍驏驕驗驚驛驟驢驤-驦驪驫骯髏髒體-髖髮鬆鬍鬚鬢鬥鬧鬩鬮鬱魎魘魚魛魢魨魯魴魷魺鮁鮃鮊鮋鮍鮎鮐-鮓鮚鮜-鮞鮦鮪鮫鮭鮮鮳鮶鮺鯀鯁鯇鯉鯊鯒鯔-鯗鯛鯝鯡鯢鯤鯧鯨鯪鯫鯰鯴鯷鯽鯿鰁-鰃鰈鰉鰍鰏鰐鰒鰓鰜鰟鰠鰣鰥鰨鰩鰭鰮鰱-鰳鰵鰷鰹-鰼鰾鱂鱅鱈鱉鱒鱔鱖-鱘鱝鱟鱠鱣鱤鱧鱨鱭鱯鱷鱸鱺鳥鳧鳩鳬鳲-鳴鳶鳾鴆鴇鴉鴒鴕鴛鴝-鴟鴣鴦鴨鴯鴰鴴鴷鴻鴿鵁-鵃鵐-鵓鵜鵝鵠鵡鵪鵬鵮鵯鵲鵷鵾鶄鶇鶉鶊鶓鶖鶘鶚鶡鶥鶩鶪鶬鶯鶲鶴鶹-鶼鶿-鷂鷄鷈鷊鷓鷖鷗鷙鷚鷥鷦鷫鷯鷲鷳鷸-鷺鷽鷿鸂鸇鸌鸏鸕鸘鸚鸛鸝鸞鹵鹹鹺鹼鹽麗麥麩麵麼麽黃黌點黨黲黶黷黽黿鼉鼴齊齋齎齏齒齔齕齗齙齜齟-齡齦齪齬齲齶齷龍龎龐龔龕龜𡞵𡠹𡢃𤪺𤫩𧜵𧝞𧩙𧵳𨋢𨦫𨧜𨯅𩣑𩶘]") - .freeze(); - public static final UnicodeSet bothSimpTrad = new UnicodeSet("[:sc=han:]").removeAll(simpOnly).removeAll(tradOnly).freeze(); - - static String MyNormalize(String string, Mode mode) { - return Normalizer.normalize(string, mode); - } - - static final int TO_NFC = UProperty.STRING_LIMIT; - static final int TO_NFD = UProperty.STRING_LIMIT + 1; - static final int TO_NFKC = UProperty.STRING_LIMIT + 2; - static final int TO_NFKD = UProperty.STRING_LIMIT + 3; - static final int TO_CASEFOLD = UProperty.STRING_LIMIT + 4; - static final int TO_LOWERCASE = UProperty.STRING_LIMIT + 5; - static final int TO_UPPERCASE = UProperty.STRING_LIMIT + 6; - static final int TO_TITLECASE = UProperty.STRING_LIMIT + 7; - public static final int SUBHEAD = TO_TITLECASE + 1; - static final int XSTRING_LIMIT = SUBHEAD + 1; - // static UnicodeSet isCaseFolded = new UnicodeSet(); - // static UnicodeSet isLowercase = new UnicodeSet(); - // static UnicodeSet isUppercase = new UnicodeSet(); - // static UnicodeSet isTitlecase = new UnicodeSet(); - // static UnicodeSet isCased = new UnicodeSet(); - // static UnicodeSet isNFKC_CF = new UnicodeSet(); - - // static { - // for (int cp = 0; cp <= 0x10FFFF; ++cp) { - // - // int cat = UCharacter.getType(cp); - // if (cat == UCharacter.UNASSIGNED || cat == UCharacter.PRIVATE_USE || cat == UCharacter.SURROGATE) { - // // idnaTypeSet.get(IdnaType.disallowed).add(cp); // faster - // Common.isNFKC_CF.add(cp); - // Common.isCaseFolded.add(cp); - // Common.isLowercase.add(cp); - // Common.isTitlecase.add(cp); - // Common.isUppercase.add(cp); - // continue; - // } - // - // // IdnaType idnaType = Idna2003.getIDNA2003Type(cp); - // // idnaTypeSet.get(idnaType).add(cp); - // - // String s = UTF16.valueOf(cp); - // if (UCharacter.foldCase(s, true).equals(s)) { - // Common.isCaseFolded.add(cp); - // } - // if (NFKC_CF_.normalize(s).equals(s)) { - // Common.isNFKC_CF.add(cp); - // } - // if (UCharacter.toLowerCase(ULocale.ROOT, s).equals(s)) { - // Common.isLowercase.add(cp); - // } - // if (UCharacter.toUpperCase(ULocale.ROOT, s).equals(s)) { - // Common.isUppercase.add(cp); - // } - // if (UCharacter.toTitleCase(ULocale.ROOT, s, null).equals(s)) { - // Common.isTitlecase.add(cp); - // } - // } - // isCaseFolded.freeze(); - // isNFKC_CF.freeze(); - // isLowercase.freeze(); - // isUppercase.freeze(); - // isTitlecase.freeze(); - // // isCased if isLowercase=false OR isUppercase=false OR - // // isTitlecase=false - // // or := ! (isLowercase && isUppercase && isTitlecase) - // Common.isCased = new UnicodeSet(Common.isLowercase).retainAll(Common.isUppercase).retainAll( - // Common.isTitlecase).complement(); - // } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/Confusables.java b/unicodetools/src/main/java/org/unicode/jsp/Confusables.java deleted file mode 100644 index 3dc62817d..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/Confusables.java +++ /dev/null @@ -1,257 +0,0 @@ -package org.unicode.jsp; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Set; -import java.util.TreeSet; - -import org.unicode.cldr.util.XEquivalenceClass; -import org.unicode.jsp.AlternateIterator.Builder; -import org.unicode.jsp.ScriptTester.CompatibilityLevel; -import org.unicode.jsp.ScriptTester.ScriptSpecials; - -import com.ibm.icu.dev.util.CollectionUtilities; -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.impl.Utility; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.Normalizer.Mode; -import com.ibm.icu.text.UTF16; -import com.ibm.icu.text.UnicodeSet; - -public class Confusables implements Iterable{ - public enum ScriptCheck {same, none}; - - private static final XEquivalenceClass equivalents = new XEquivalenceClass(); - private final String source; - private Mode normalizationCheck; - private ScriptCheck scriptCheck = ScriptCheck.none; - private UnicodeSet allowedCharacters = null; - - public UnicodeSet getAllowedCharacters() { - return allowedCharacters; - } - - public static UnicodeMap getMap() { - final UnicodeMap result = new UnicodeMap(); - for (final String s : equivalents) { - final Set others = new TreeSet(equivalents.getEquivalences(s)); - final String list = "\u2051" + CollectionUtilities.join(others, "\u2051") + "\u2051"; - for (final String other : others) { - result.put(other, list); - } - } - result.freeze(); - return result; - } - - public static Set getEquivalents(String string) { - final Set result = equivalents.getEquivalences(string); - return Collections.unmodifiableSet(result); - } - - public Confusables setAllowedCharacters(UnicodeSet allowedCharacters) { - this.allowedCharacters = allowedCharacters; - return this; - } - - public Mode getNormalizationCheck() { - return normalizationCheck; - } - - public Confusables setNormalizationCheck(Mode normalizationCheck) { - this.normalizationCheck = normalizationCheck; - return this; - } - - static class MyReader extends FileUtilities.SemiFileReader { - @Override - protected boolean handleLine(int start, int end, String[] items) { - final String type = items[2]; - if (!type.equals("MA")) { - return true; - } - final String result = Utility.fromHex(items[1], 4, " "); - for (int i = start; i <= end; ++i) { - equivalents.add(UTF16.valueOf(i), result); - } - return true; - } - } - - static { - new MyReader().process(Confusables.class, "confusables.txt"); - } - - public Confusables(String source) { - this.source = Normalizer.normalize(source,Normalizer.NFD); - } - - public double getMaxSize() { - final AlternateIterator build = buildIterator(); - if (build == null) { - return 0; - } - return build.getMaxSize(); - } - - @Override - public Iterator iterator() { - final AlternateIterator build = buildIterator(); - if (build == null) { - final Set empty = Collections.emptySet(); - return empty.iterator(); - } - return new MyFilteredIterator(build); - } - - private AlternateIterator buildIterator() { - final Builder builder = AlternateIterator.start(); - final List> table = new ArrayList>(); - int cp; - for (int i = 0; i < source.length(); i += Character.charCount(cp)) { - cp = source.codePointAt(i); - final String cps = UTF16.valueOf(cp); - final Set confusables = equivalents.getEquivalences(cps); - final Set items = new HashSet(); - for (final String confusable : confusables) { - if (normalizationCheck != null && !Normalizer.isNormalized(confusable, normalizationCheck, 0)) { - continue; - } - if (allowedCharacters != null && !allowedCharacters.containsAll(confusable)) { - continue; - } - items.add(confusable); - } - if (items.size() == 0) { - return null; - } - table.add(items); - } - - // now filter for multiple scripts, if set - if (scriptCheck != ScriptCheck.none) { - if (!scriptTester.filterTable(table)) { - return null; - } - } - for (final Set items : table) { - builder.add(items); - } - final AlternateIterator build = builder.build(); - return build; - } - - - public List> getAlternates() { - final AlternateIterator build = buildIterator(); - if (build == null) { - return Collections.emptyList(); - } - return build.getAlternates(); - } - - public ScriptCheck getScriptCheck() { - return scriptCheck; - } - - public Confusables setScriptCheck(ScriptCheck scriptCheck) { - this.scriptCheck = scriptCheck; - return this; - } - - public static boolean scriptOk(String confusable, ScriptCheck scriptCheck) { - return scriptCheck == ScriptCheck.none - || scriptTester.isOk(confusable); - } - - public static ScriptTester scriptTester = ScriptTester.start(CompatibilityLevel.Highly_Restrictive, ScriptSpecials.on).get(); - - class MyFilteredIterator extends FilteredIterator{ - Set alreadySeen;; - - public MyFilteredIterator(Iterator base) { - super(base); - } - - @Override - public String allow(String confusable) { - if (alreadySeen == null) { - alreadySeen = new HashSet(); - } - if (alreadySeen.contains(confusable)) { - return null; - } - alreadySeen.add(confusable); - - final String nfcConfusable = Normalizer.normalize(confusable, Normalizer.NFC); - if (!nfcConfusable.equals(confusable)) { - if (alreadySeen.contains(nfcConfusable)) { - return null; - } - alreadySeen.add(nfcConfusable); - } - - if (allowedCharacters != null && !allowedCharacters.containsAll(nfcConfusable)) { - return null; - } - if (!scriptOk(nfcConfusable, scriptCheck)) { - return null; - } - if (normalizationCheck != null && !Normalizer.isNormalized(nfcConfusable, normalizationCheck, 0)) { - return null; - } - return nfcConfusable; - } - - } - - public static class FilteredIterator implements Iterator { - Iterator base; - T nextItem = null; - - public FilteredIterator(Iterator base) { - this.base = base; - load(); - } - - @Override - public boolean hasNext() { - return nextItem != null; - } - - @Override - public T next() { - final T temp = nextItem; - load(); - return temp; - } - - private void load() { - while (base.hasNext()) { - nextItem = allow(base.next()); - if (nextItem != null) { - return; - } - } - nextItem = null; - } - - public T allow(T item) { - return item; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - } - - public String getOriginal() { - return source; - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/CreateInversions.java b/unicodetools/src/main/java/org/unicode/jsp/CreateInversions.java deleted file mode 100644 index a23c29738..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/CreateInversions.java +++ /dev/null @@ -1,241 +0,0 @@ -package org.unicode.jsp; - -import java.io.IOException; - -import org.unicode.cldr.util.props.ICUPropertyFactory; - -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.dev.util.UnicodeMapIterator; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.text.UnicodeSetIterator; - -public class CreateInversions { - - // testing - - public static void main(String[] args) { - final UnicodeSet ignorables = new UnicodeSet("[[:Cn:][:Cs:][:Co:]]").freeze(); // exclude unassigned, surrogates, and private use - final CreateInversions createInversions = new CreateInversions().setIgnorables(ignorables).setDelta(true); - - // check the code (by inspection) to make sure it works - // later do unit test - final UnicodeSet[] tests = { - new UnicodeSet("[abcxyz]"), - new UnicodeSet("[:whitespace:]"), - new UnicodeSet("[:deprecated:]"), - }; - for (final UnicodeSet test : tests) { - showSet(createInversions, test); - } - - final UnicodeMap testMap = new UnicodeMap(); - testMap.putAll(new UnicodeSet("[abcxyz]"), "foo"); - showMap(createInversions, testMap); - - // check with names - for (final UnicodeSet test : tests) { - testMap.clear(); - for (final UnicodeSetIterator it = new UnicodeSetIterator(test); it.next();) { - testMap.put(it.codepoint, UCharacter.getName(it.codepoint)); - } - showMap(createInversions, testMap); - } - - // check with properties - final ICUPropertyFactory propFactory = ICUPropertyFactory.make(); - final UnicodeMap[] testProperties = { - propFactory.getProperty("numeric_type").getUnicodeMap(), - propFactory.getProperty("block").getUnicodeMap(), - propFactory.getProperty("word_break").getUnicodeMap(), - propFactory.getProperty("grapheme_cluster_break").getUnicodeMap().putAll(new UnicodeSet(0xAC00,0xD7A3), "LVT"), - // note: separating out the LV from LVT can be done more compactly with an algorithm. - // it is periodic: AC00, AC1C, AC38... - }; - for (final UnicodeMap test : testProperties) { - showMap(createInversions, test); - } - - // further compaction can be done by assigning each property value to a number, and using that instead. - final UnicodeMap source = propFactory.getProperty("grapheme_cluster_break").getUnicodeMap().putAll(new UnicodeSet(0xAC00,0xD7A3), "LVT"); - final UnicodeMap target = new UnicodeMap(); - int numberForValue = 0; - // iterate through the values, assigning each a number - for (final Object value : source.getAvailableValues()) { - target.putAll(source.keySet(value), numberForValue++); - } - showMap(createInversions, target); - } - - private static void showSet(CreateInversions createInversions, UnicodeSet test) { - System.out.println("** Source:"); - System.out.println(test); - System.out.println("** Result:"); - System.out.println(createInversions.create("testName", test)); - System.out.println("Inversions: " + createInversions.getInversions()); - System.out.println(); - } - - private static void showMap(CreateInversions createInversions, UnicodeMap testMap) { - System.out.println("** Source:"); - System.out.println(testMap); - System.out.println("** Result:"); - System.out.println(createInversions.create("testName", testMap)); - System.out.println("Inversions: " + createInversions.getInversions()); - System.out.println(); - } - - // guts - - private UnicodeSet ignorables; - - private boolean delta; - - private int inversions; - - private int getInversions() { - return inversions; - } - - private CreateInversions setDelta(boolean b) { - delta = b; - return this; - } - - private CreateInversions setIgnorables(UnicodeSet ignorables) { - this.ignorables = ignorables; - return this; - } - - public String create(String name, UnicodeSet source) { - try { - return create(name, source, new StringBuilder()).toString(); - } catch (final IOException e) { - throw (RuntimeException) new IllegalArgumentException("Should not happen").initCause(e); - } - } - - public String create(String name, UnicodeMap source) { - try { - return create(name, source, new StringBuilder()).toString(); - } catch (final IOException e) { - throw (RuntimeException) new IllegalArgumentException("Should not happen").initCause(e); - } - } - - // public String createInversions(UnicodeSet source, String name, String - // filename) throws IOException { - // return createInversions(source, name, new StringBuilder()).close(); - // } - // - // public String createInversions(UnicodeMap source, String name, String - // filename) throws IOException { - // return createInversions(source, name, new StringBuilder()).toString(); - // } - - public Appendable create(String name, UnicodeSet source, Appendable target) - throws IOException { - initShortestForm(); - target.append("var " + name + " = new Inversion([\n"); - boolean first = true; - for (final UnicodeSetIterator it = new UnicodeSetIterator(source); it.nextRange();) { - if (first) { - first = false; - } else { - target.append(",\n"); // the linebreak is not needed, but easier to read - } - target.append(shortestForm(it.codepoint, delta)); - if (it.codepointEnd != 0x10FFFF) { - target.append(",").append(shortestForm(it.codepointEnd + 1, delta)); - } - } - target.append("\n]"); - if (delta) { - target.append(",true"); - } - target.append(");"); - return target; - } - - public Appendable create(String name, UnicodeMap source, Appendable target) - throws IOException { - initShortestForm(); - target.append("var " + name + " = new Inversion([\n"); - final StringBuilder valueArray = new StringBuilder(); - boolean first = true; - for (final UnicodeMapIterator it = new UnicodeMapIterator(source); it.nextRange();) { - // skip ignorable range - if (ignorables.contains(it.codepoint, it.codepointEnd)) { - continue; - } - // also skip adjacent rows with same value - final String valueString = shortestForm(source.getValue(it.codepoint)); - if (lastValue == valueString || lastValue != null && lastValue.equals(valueString)) { - continue; - } - lastValue = valueString; - if (first) { - first = false; - } else { - target.append(",\n"); // the linebreak is not needed, but easier to read - valueArray.append(",\n"); // the linebreak is not needed, but easier to - // read - } - target.append(shortestForm(it.codepoint, delta)); - valueArray.append(valueString); - } - target.append("\n],[\n").append(valueArray).append("\n]"); - if (delta) { - target.append(",true"); - } - target.append(");"); - return target; - } - - long lastNumber; - String lastValue; - - private void initShortestForm() { - lastNumber = 0; - inversions = 0; - lastValue = null; - } - - private String shortestForm(Object value) { - String result; - if (value == null) { - result = "null"; - } else if (value instanceof Byte || value instanceof Short || value instanceof Integer - || value instanceof Long) { - --inversions; // don't add inversion in this case - result = shortestForm(((Number) value).longValue(), false); - } else if (value instanceof Float || value instanceof Double) { - result = value.toString(); - } else { - result = value.toString(); - // TODO optimize this - result.replace("\b", "\\\b"); // quote - result.replace("\t", "\\\t"); // quote - result.replace("\n", "\\\n"); // quote - result.replace("\u000B", "\\v"); // quote - result.replace("\f", "\\\f"); // quote - result.replace("\r", "\\\r"); // quote - result.replace("\"", "\\\""); // quote - result.replace("\\", "\\\\"); // quote - result = "\"" + result + "\""; - } - return result; - } - - private String shortestForm(long number, boolean useDelta) { - if (useDelta) { - final long temp = number; - number -= lastNumber; - lastNumber = temp; - } - ++inversions; - final String decimal = String.valueOf(number); - final String hex = "0x" + Long.toHexString(number); - return decimal.length() < hex.length() ? decimal : hex; - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/Dummy.java b/unicodetools/src/main/java/org/unicode/jsp/Dummy.java deleted file mode 100644 index 2a8363b02..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/Dummy.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.unicode.jsp; - -import org.unicode.cldr.tool.TablePrinter; - -public class Dummy { - public static String getTest() { - final TablePrinter tablePrinter = new TablePrinter() - .setTableAttributes("style='border-collapse: collapse' border='1'") - .addColumn("Language").setSpanRows(true).setSortPriority(0).setBreakSpans(true) - .addColumn("Junk").setSpanRows(true) - .addColumn("Territory").setHeaderAttributes("bgcolor='green'").setCellAttributes("align='right'").setSpanRows(true) - .setSortPriority(1).setSortAscending(false); - final Comparable[][] data = { - {"German", 1.3d, 3}, - {"French", 1.3d, 2}, - {"English", 1.3d, 2}, - {"English", 1.3d, 4}, - {"English", 1.3d, 6}, - {"English", 1.3d, 8}, - {"Arabic", 1.3d, 5}, - {"Zebra", 1.3d, 10} - }; - tablePrinter.addRows(data); - tablePrinter.addRow().addCell("Foo").addCell(1.5d).addCell(99).finishRow(); - - final String s = tablePrinter.toTable(); - return s; - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/GeneralUtilities.java b/unicodetools/src/main/java/org/unicode/jsp/GeneralUtilities.java deleted file mode 100644 index 66bbf58d2..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/GeneralUtilities.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.unicode.jsp; - -import java.util.Locale; - -public class GeneralUtilities { - - /** - * Used to set a static debug flag from an environment variable. Allows static final flags to be set for debugging information even in environments - * where the source cannot be altered. For a given class StringPrepData, the debug flag is -Dstringprepdata_ (that is, all lowercase). - *

Example: - *

-     * private static final boolean DEBUG_SHOW_DETAILS = GeneralUtilities.getDebugFlag(StringPrepData.class, "show_details");
-     * 
- * @param class1 Typically the class where the boolean is defined. - * @param flagName a specialized name, such as show_details. - * @return whether flag was present. - */ - public static boolean getDebugFlag(Class class1, String flagName) { - String className = class1.getName(); - final int lastPart = className.lastIndexOf('.'); - if (lastPart >= 0) { - className = className.substring(lastPart+1); - } - return System.getProperty((className+"_" + flagName).toLowerCase(Locale.ROOT)) != null; - } - - /** - * Used to set a static debug flag from an environment variable. Allows static final flags to be set for debugging information even in environments - * where the source cannot be altered. For a given class StringPrepData, the debug flag is -Dstringprepdata_ (that is, all lowercase). - *

Example: - *

-     * private static final boolean DEBUG_SHOW_DETAILS = GeneralUtilities.getDebugFlag(StringPrepData.class, "show_details", DEBUG);
-     * 
- * @param class1 Typically the class where the boolean is defined. - * @param flagName a specialized name, such as show_details. - * @param onlyif allows the test to be subject to a general flag. - * @return whether flag was present. - */ - public static boolean getDebugFlag(Class class1, String flagName, boolean onlyif) { - return onlyif && getDebugFlag(class1, flagName); - } - - /** - * Convenience method, where the flagname is "debug". - * @param class1 Typically the class where the boolean is defined. - * @return whether flag was present. - */ - public static boolean getDebugFlag(Class class1) { - return getDebugFlag(class1, "debug"); - } - -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/GenerateSubheader.java b/unicodetools/src/main/java/org/unicode/jsp/GenerateSubheader.java deleted file mode 100644 index ea9c2d5a4..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/GenerateSubheader.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.unicode.jsp; - -import java.io.IOException; - -import com.ibm.icu.text.UnicodeSet; - -public class GenerateSubheader { - public static void main(String[] args) throws IOException { - final String unicodeDataDirectory = "./jsp/"; - final Subheader subheader = new Subheader(unicodeDataDirectory); - for (final String subhead : subheader) { - final UnicodeSet result = subheader.getUnicodeSet(subhead); - System.out.println("{\"" + subhead + "\",\"" + result.toString().replace("\\", "\\\\") + "\"},"); - } - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/Globe.java b/unicodetools/src/main/java/org/unicode/jsp/Globe.java deleted file mode 100644 index 81416efb0..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/Globe.java +++ /dev/null @@ -1,2342 +0,0 @@ -package org.unicode.jsp; - -import java.awt.BasicStroke; -import java.awt.BorderLayout; -import java.awt.Color; -import java.awt.Container; -import java.awt.Dimension; -import java.awt.FlowLayout; -import java.awt.Font; -import java.awt.FontMetrics; -import java.awt.Graphics2D; -import java.awt.Image; -import java.awt.MediaTracker; -import java.awt.Rectangle; -import java.awt.RenderingHints; -import java.awt.Shape; -import java.awt.Toolkit; -import java.awt.event.ActionEvent; -import java.awt.event.ActionListener; -import java.awt.geom.Ellipse2D; -import java.awt.geom.GeneralPath; -import java.awt.geom.Line2D; -import java.awt.geom.Rectangle2D; -import java.awt.image.BufferedImage; -import java.awt.image.ColorModel; -import java.awt.image.FilteredImageSource; -import java.awt.image.ImageFilter; -import java.awt.image.ImageProducer; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.NumberFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Set; -import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.imageio.ImageIO; -import javax.swing.ImageIcon; -import javax.swing.JButton; -import javax.swing.JComboBox; -import javax.swing.JFileChooser; -import javax.swing.JFrame; -import javax.swing.JLabel; -import javax.swing.JPanel; -import javax.swing.JScrollPane; - -import org.unicode.text.utility.Settings; - -//import com.sun.image.codec.jpeg.JPEGCodec; -//import com.sun.image.codec.jpeg.JPEGEncodeParam; -//import com.sun.image.codec.jpeg.JPEGImageEncoder; - -public class Globe { - public static final boolean DEBUG = false; - static int style = 0; - static int degreeInterval = 15; - - static String myPictures0 = "bin/jsp/images/"; - static String SOURCE_DIR = myPictures0; - static String TARGET_DIR = "Pictures/Earth/"; - - static JFileChooser loadFileChooser = new JFileChooser(); - static JFileChooser saveFileChooser = new JFileChooser(); - static { - saveFileChooser.setCurrentDirectory(new File(TARGET_DIR)); - loadFileChooser.setCurrentDirectory(new File(SOURCE_DIR)); - } - - static int QUALITY = 80; - - static double lightness = 0; - static boolean rotate = false; - static boolean doLabels = false; - - static int originChoice = 0; - static String[] originList = new String[] { - "?", - "North Pole", - "San Francisco (SFO)", - "Zürich (ZRH)", - "Tokyo (NRT)", - "Wellington (WLG)", - "Honolulu", - "Melbourne (MEL)", - "Caen (CFR)", - "Cochin (COK)", - "Cochin (COK) - centering", - "Moscow, ID", - "Denver, CO", - "Tokyo" - }; - // Melbourne, Australia 37 47 S 144 58 E - // Caen — 49° 10' 59" N 00° 22' 10" W - // sundance latitude 44.406 and longitude -104.376 - // San Diego, Calif. 32 42 117 10 9:00 a.m. - // Moscow, ID Latitude: 46.73 N, Longitude: 117.00 W - - static double[][] origins = { // lat, long - { -Math.PI / 2 + 0.0001, 0.0001 }, // - { -Math.PI / 2 + 0.0001, 0.0001 }, // - { Navigator.toRadians(37.0, 37.0, 8.3, false), - Navigator.toRadians(122.0, 22.0, 29.6, false) }, // sf // - { Navigator.toRadians(47, 27, 0, false), - Navigator.toRadians(8.0, 33.0, 0, true) }, // zurich // - { Navigator.toRadians(35, 45, 50, false), - Navigator.toRadians(140.0, 23.0, 30, true) }, // Narita - // 35°45´50"N 140°23´30"E - { Navigator.toRadians(41, 20, 0, true), - Navigator.toRadians(174.0, 48.0, 0, true) }, // Wellington - // 41° 20' - // 0" S 174° 48' 0" - // E - { Navigator.toRadians(21, 18, 0, false), - Navigator.toRadians(157, 50, 0, false) }, - { Navigator.toRadians(37, 39, 42, true), - Navigator.toRadians(144, 50, 0, true) }, - { Navigator.toRadians(49, 10, 24, false), - Navigator.toRadians(0, 26, 53, false) }, - { Navigator.toRadians(10, 9, 7, false), - Navigator.toRadians(76, 24, 7, true) }, // Cochin - { Navigator.toRadians(0, 0, 0, false), - Navigator.toRadians(70, 0, 0, true) }, // Cochin - { Navigator.toRadians(46.743978, 0, 0, false), - Navigator.toRadians(116.904176, 0, 0, false) }, // Moscow - { Navigator.toRadians(39.7392, 0, 0, false), - Navigator.toRadians(104.9847, 0, 0, false) }, // Moscow - { Navigator.toRadians(35.6895, 0, 0, false), - Navigator.toRadians(-139.6917, 0, 0, false) }, // Moscow - // ,-116.904176 - /* - * Airport Code : COK - * - * Longitude : 76° 24’ 7” E (?) Latitude : 10° 9’ 7” N (?) - */ - }; - - static int[][] sizeValues = { - { 640, 320 }, - { 1024, 512 }, - { 1280, 640 }, - { 1280, 1024 }, - { 1400, 700 }, - { 1400, 1050 }, - { 1440, 720 }, - { 1600, 800 }, - { 1920, 960 }, - { 1920, 1200 }, - { 2400, 1200 }, - }; - - static String[] sizeList = new String[] { "640×320", "1024×512", - "1280×640", "1280×1024", "1400×700", "1400×1050", "1440×720", - "1600×800", "1920×960", "1920×1200", "2400×1200" }; - static int sizeChoice = 0; - - static String[] gridList = new String[] { "5°", "10°", "15°" }; - static int gridChoice = 0; - - static String[] labelList = new String[] { "no labels", "labels" }; - - static String[] localeList = new String[] { "en", "de", "fr", "el", "ru", - "ja", "zh" }; - static String[] translatedLocaleList; - - static String[] projectionList = new String[] { - "Plate Carrée", - "Equal Area Rectangular (Gall)", - "Equal Area Sinusoidal", - "Equal Area Ellipse", - "Equidistant Conic", - "3D Isometric" }; - static int projectionChoice = 0; - - static Transform[] projectionValues = new Transform[] { - new TransformPlateCarree(), - new TransformGallOrthographic(), - new TransformSinusoidal(), - new TransformEqualAreaEllipse(), - new TransformEquidistantConic(), - new Transform3DIsometric(), - }; - - static double originLat = origins[0][0]; // N = + - static double originLong = origins[0][1]; // W = - - - /** - * Create the GUI and show it. For thread safety, this method should be - * invoked from the event-dispatching thread. - * - * @throws IOException - */ - static JLabel mainPicture = new JLabel(); - - // static ImageIcon sourceIcon, resultIcon; - static JFrame frame; - - static BufferedImage sourceImage, griddedImage; - static Image transformedImage; - - static int gradations = 10; - - private static void createAndShowGUI() { - if (false) { - final Mapper m = new Mapper(3, 7, 100, 140); - System.out.println(m.map(3) + ", " + m.map(7)); - new Transform.Tester().test(); - return; - } - - // cldrFactory = CLDRFile.Factory.make(Utility.MAIN_DIRECTORY,".*"); - - // Make sure we have nice window decorations. - JFrame.setDefaultLookAndFeelDecorated(true); - - // Create and set up the window. - frame = new JFrame("HelloWorldSwing"); - frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); - - // Add the ubiquitous "Hello World" label. - // JLabel label = new JLabel("Hello World"); - // frame.getContentPane().add(label); - final String sname = Settings.CLDR.BASE_DIRECTORY + "workspace/unicode-jsps/WebContent/images/earth-living.jpg"; - // "ev11656_land_shallow_topo_8192.tiff"; - // "ev11656_land_shallow_topo_8192.PNG"; - // earthmap1k.jpg"; - // "C:/Documents and Settings/Administrator/Desktop/macchiato-backup/distance/worldmap.jpg" - loadSourceMap(sname); - - final JPanel topPanel = new JPanel(); - topPanel.setLayout(new FlowLayout(FlowLayout.LEADING)); - - final JButton but = new JButton("Save"); - but.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent e) { - saveFileChooser.setSelectedFile(new File("Earth " - + projectionList[projectionChoice] - + ", " + sizeList[sizeChoice] - + ", " + gridList[gridChoice] - + ", " + originList[originChoice] - + ".jpg")); - final int returnVal = saveFileChooser.showSaveDialog(frame); - if (returnVal == JFileChooser.APPROVE_OPTION) { - try { - final String filename = saveFileChooser - .getSelectedFile().getCanonicalPath(); - System.out.println("You chose to save this file: " - + filename); - writeImage(griddedImage, filename, QUALITY); - // myPictures + "new-earth-living" + style + ".jpg" - } catch (final IOException e1) { - System.out.println("Couldn't save file."); - } - } - - } - }); - topPanel.add(but); - - final JButton but2 = new JButton("Load"); - but2.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent e) { - - // File file = new File(SOURCE_DIR + "earth-living.jpg"); - // try { - // System.out.println("Source Dir: " + file.getCanonicalPath()); - // } catch (IOException e2) { - // } - // loadFileChooser.setSelectedFile(file); - final int returnVal = loadFileChooser.showOpenDialog(frame); - if (returnVal == JFileChooser.APPROVE_OPTION) { - try { - String filename = loadFileChooser.getSelectedFile() - .getCanonicalPath(); - if (!filename.toLowerCase().endsWith(".jpg")) { - filename += ".jpg"; - } - System.out.println("You chose to open this file: " - + filename); - loadSourceMap(filename); - // myPictures + "new-earth-living" + style + ".jpg" - } catch (final IOException e1) { - System.out.println("Couldn't save file."); - } - } - - } - }); - topPanel.add(but2); - - final JComboBox box = new JComboBox(projectionList); - box.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent e) { - final int index = ((JComboBox) e.getSource()) - .getSelectedIndex(); - if (index != projectionChoice) { - style = projectionChoice = index; - changeImage(frame); - } - } - }); - topPanel.add(box); - - final JComboBox box2 = new JComboBox(sizeList); - box2.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent e) { - final int index = ((JComboBox) e.getSource()) - .getSelectedIndex(); - if (index != sizeChoice) { - sizeChoice = index; - changeImage(frame); - } - } - }); - topPanel.add(box2); - final String[] gradationNames = new String[gradations * 2 - 1]; - for (int i = 0; i < gradationNames.length; ++i) { - gradationNames[i] = i < gradations - 1 ? "Lighten to " - + ((i + 1) * 100 / gradations) - : i == gradations - 1 ? "Neutral" - : "Darken to " - + ((gradations * 2 - i - 1) * 100 / gradations); - } - final JComboBox box3 = new JComboBox(gradationNames); - box3.setSelectedIndex(gradations - 1); - box3.addActionListener(new ActionListener() { - int lastIndex = gradations - 1; - - @Override - public void actionPerformed(ActionEvent e) { - final int index = ((JComboBox) e.getSource()) - .getSelectedIndex(); - if (index != lastIndex) { - lastIndex = index; - lightness = (gradations - 1 - index) / (double) gradations; - changeImage(frame); - } - } - }); - topPanel.add(box3); - - final JComboBox box4 = new JComboBox(originList); - // box4.setSelectedIndex(1); - box4.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent e) { - final int index = ((JComboBox) e.getSource()) - .getSelectedIndex(); - if (index != originChoice) { - originChoice = index; - originLat = origins[index][0]; - originLong = origins[index][1]; - if (projectionValues[projectionChoice].usesOrigin()) { - changeImage(frame); - } else { - addGrid(transformedImage, - projectionValues[projectionChoice]); - // - } - } - } - }); - topPanel.add(box4); - - final JComboBox box5 = new JComboBox(gridList); - // box4.setSelectedIndex(1); - box5.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent e) { - final int index = ((JComboBox) e.getSource()) - .getSelectedIndex(); - if (index != gridChoice) { - gridChoice = index; - degreeInterval = (index + 1) * 5; - addGrid(transformedImage, - projectionValues[projectionChoice]); - // changeImage(frame); - } - } - }); - topPanel.add(box5); - - final JComboBox box6 = new JComboBox(labelList); - // box4.setSelectedIndex(1); - box6.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent e) { - final int index = ((JComboBox) e.getSource()) - .getSelectedIndex(); - if ((index == 1) != doLabels) { - doLabels = index == 1; - addGrid(transformedImage, - projectionValues[projectionChoice]); - // changeImage(frame); - } - } - }); - topPanel.add(box6); - - box7 = new JComboBox(localeList); - box7.setFont(font); - // setLocale(0); - // box4.setSelectedIndex(1); - box7.addActionListener(new ActionListener() { - @Override - public void actionPerformed(ActionEvent e) { - final int index = ((JComboBox) e.getSource()) - .getSelectedIndex(); - if (index != currentLocaleIndex) { - // setLocale(index); - ((JComboBox) e.getSource()).setSelectedIndex(index); - changeImage(frame); - } - } - }); - topPanel.add(box7); - - final JPanel panel = new JPanel(); - panel.setLayout(new BorderLayout()); - panel.add(topPanel, BorderLayout.PAGE_START); - panel.add(mainPicture, BorderLayout.CENTER); - panel.add( - new JLabel( - "See http://www.3dsoftware.com/Cartography/USGS/MapProjections/"), - BorderLayout.PAGE_END); - - final JScrollPane scrollPane = new JScrollPane(panel); - scrollPane.setPreferredSize(new Dimension(660, 540)); - // add(scrollPane, BorderLayout.CENTER); - - frame.getContentPane().add(scrollPane); - - // Display the window. - frame.pack(); - frame.setVisible(true); - // writeImage(i, myPictures + "new-earth-living" + style + ".jpg", - // QUALITY); - } - - // private static void setLocale(int newLocaleIndex) { - // cldrFile = cldrFactory.make(localeList[newLocaleIndex], true); - // tzf = new TimezoneFormatter(cldrFactory, localeList[newLocaleIndex], - // true); - // currentLocaleIndex = newLocaleIndex; - // MutableComboBoxModel model = (MutableComboBoxModel) box7.getModel(); - // for (int i = 0; i < localeList.length; ++i) { - // model.removeElementAt(i); - // model.insertElementAt(cldrFile.getName(localeList[i], true), i); - // } - // } - static Font font = Font.decode("Arial Unicode MS-9"); - // static CLDRFile.Factory cldrFactory; - // static CLDRFile cldrFile; - static int currentLocaleIndex = -1; - // static TimezoneFormatter tzf; - static JComboBox box7; - - /* - * public static class LightingImageFilter implements RGBImageFilter { /** - * Subclasses must specify a method to convert a single input pixel in the - * default RGB ColorModel to a single output pixel. - * - * @param x, y the coordinates of the pixel - * - * @param rgb the integer pixel representation in the default RGB color - * model - * - * @return a filtered pixel in the default RGB color model. - * - * @see ColorModel#getRGBdefault - * - * @see #filterRGBPixels / public int filterRGB(int x, int y, int rgb) { int - * a = (rgb >>> 24) & 0xFF; int r = (rgb >> 16) & 0xFF; int g = (rgb >> 8) & - * 0xFF; int b = rgb & 0xFF; return (rgb & 0xFF) - * - * } } - */ - - /** - * @param sname - */ - private static void loadSourceMap(String sname) { - try { - System.out.println("Check: " + new File(sname).getAbsolutePath()); - final ImageIcon sourceIcon = new ImageIcon(sname); - sourceImage = convertToBuffered(sourceIcon.getImage()); - System.out.println("Loaded " + new File(sname).getCanonicalPath()); - } catch (final IOException e) { - e.printStackTrace(); - throw new RuntimeException("Can't load"); - } - changeImage(frame); - } - - /** - * @param frame - */ - private static final boolean DEBUG_ICON = false; - - private static void changeImage(JFrame frame) { - if (DEBUG_ICON) { - System.out.println("Changing Icon1"); - } - // System.out.println("Width " + ii.getIconWidth() + ", Height: " + - // ii.getIconHeight()); - final DeformFilter filter = new DeformFilter(sourceImage.getWidth(), - sourceImage.getHeight(), sizeValues[sizeChoice][0], - sizeValues[sizeChoice][1], - projectionValues[projectionChoice]); - // ImageFilter filter = new RotateFilter(Math.PI / 4); - - if (DEBUG_ICON) { - System.out.println("Changing Icon2"); - } - final ImageProducer ip = new FilteredImageSource( - sourceImage.getSource(), filter); // modifies filter - if (DEBUG_ICON) { - System.out.println("Changing Icon3"); - } - transformedImage = frame.createImage(ip); - if (DEBUG_ICON) { - System.out.println("Changing Icon4"); - // Icon junk = new ImageIcon(transformedImage); // load image (HACK) - } - - if (DEBUG_ICON) { - System.out.println("Changing Icon5"); - } - addGrid(transformedImage, projectionValues[projectionChoice]); - } - - public static void main(String[] args) throws IOException { - readData(); - // Schedule a job for the event-dispatching thread: - // creating and showing this application's GUI. - javax.swing.SwingUtilities.invokeLater(new Runnable() { - @Override - public void run() { - createAndShowGUI(); - } - }); - } - - static void readData() throws IOException { - final File file = new File("jsp/Globe.txt"); - System.out.println(file.getAbsolutePath()); - - String globe = Settings.CLDR.BASE_DIRECTORY + "workspace/unicodetools/org/unicode/jsp/Globe.txt"; - System.out.println(new File(globe).getCanonicalPath()); - - final BufferedReader br = new BufferedReader( - new InputStreamReader( - new FileInputStream(globe), - "UTF-8")); - // FileUtilities.openUTF8Reader("classes/jsp/", "Globe.txt"); - final String pat = "([^;]+) \\s* [;] \\s* " - + "([0-9.]+) [°]* \\s* ([0-9.]+)? [']* \\s* ([0-9.]+)? [\"]* \\s* " - + "([NS]) \\s* " - + "([0-9.]+) [°]* \\s* ([0-9.]+)? [']* \\s* ([0-9.]+)? [\"]* \\s* " - + "([EW]) \\s*"; - final Matcher m = Pattern.compile(pat, - Pattern.CASE_INSENSITIVE | Pattern.COMMENTS).matcher(""); - System.out.println("Pattern: " + pat); - final List nameData = new ArrayList(); - final List posData = new ArrayList(); - final String[] pieces = new String[3]; - while (true) { - final String line = br.readLine(); - if (line == null) { - break; - } - if (!m.reset(line).matches()) { - System.out.println("Error in data: " + line); - continue; - } - nameData.add(m.group(1)); - final double latitude = Navigator.toRadians( - Double.parseDouble(m.group(2)), - m.group(3) != null ? Double.parseDouble(m.group(3)) : 0, - m.group(4) != null ? Double.parseDouble(m.group(4)) : 0, - m.group(5).equalsIgnoreCase("S")); - final double longitude = Navigator.toRadians( - Double.parseDouble(m.group(6)), - m.group(7) != null ? Double.parseDouble(m.group(7)) : 0, - m.group(8) != null ? Double.parseDouble(m.group(8)) : 0, - m.group(9).equalsIgnoreCase("E")); - posData.add(new double[] { latitude, longitude }); - System.out.println(m.group(1) + ", " + latitude + ", " + longitude); - } - originList = (String[]) nameData.toArray(originList); - origins = (double[][]) posData.toArray(origins); - br.close(); - } - - public static final NumberFormat nf = NumberFormat.getInstance(); - - public static void getAntipode(DPoint in) { - if (in.x > 0) { - in.x -= Math.PI; - } else { - in.x += Math.PI; - } - in.y = -in.y; - } - - public static class DPoint implements Comparable { - double x, y; - - DPoint() { - this(0, 0); - } - - DPoint(double x, double y) { - set(x, y); - } - - DPoint set(double x, double y) { - this.x = x; - this.y = y; - return this; - } - - @Override - public int compareTo(Object o) { - final DPoint that = (DPoint) o; - if (y < that.y) { - return -1; - } - if (y > that.y) { - return 1; - } - if (x < that.x) { - return -1; - } - if (x > that.x) { - return 1; - } - return 0; - } - - @Override - public String toString() { - return "[" + nf.format(x) + "," + nf.format(y) + "]"; - } - } - - public static class DRectangle { - double x0, y0, x1, y1; - } - - public static class Quad { - DRectangle containing = new DRectangle(); - DPoint[] p = new DPoint[4]; - - // returns the amount (0..1) that the square from x,y to x+1, y+1 - // overlaps the quadralateral - void set(DPoint a, DPoint b, DPoint c, DPoint d) { - p[0] = a; - p[1] = b; - p[2] = c; - p[3] = d; - // sort; so y's are now in sorted order - Arrays.sort(p); - // integer bounding rectangle - // is easy for y's - containing.y0 = (int) Math.floor(p[0].y); - containing.y1 = (int) Math.ceil(p[3].y); - // but for x's we have to compute - containing.x0 = (int) Math.floor(Math.min(p[0].x, - Math.min(p[1].x, Math.min(p[2].x, p[3].x)))); - containing.x1 = (int) Math.ceil(Math.max(p[0].x, - Math.max(p[1].x, Math.max(p[2].x, p[3].x)))); - } - - double getWeight(double x, double y) { - // return the percentage overlap between this quadralateral, - // and the rectangle from x,y to x+1,y+1 - // simple implementation for now. return 1 if center is in - // containing, otherwise 0 - if (containing.x0 <= x && x < containing.x1 - && containing.y0 <= y && y < containing.y1) { - return 1.0; - } - return 0; - } - } - - static public abstract class Transform { - static final boolean debug = false; - protected double srcW, srcH, dstW, dstH; - Mapper srcW_long, srcH_lat, long_dstW, lat_dstH; - Navigator navigator; - boolean allowRotation = true; - Shape clip = null; - - /** - * @return Returns the clip. - */ - public Shape getClip() { - if (clip == null) { - clip = _getClip(); - } - return clip; - } - - /** - * @return - */ - public boolean usesOrigin() { - return false; - } - - // must set before use - Transform set(double srcW, double srcH, double dstW, double dstH) { - this.srcW = srcW; - this.srcH = srcH; - this.dstW = dstW; - this.dstH = dstH; - srcW_long = new Mapper(0, srcW, -Math.PI, Math.PI); - srcH_lat = new Mapper(0, srcH, -Math.PI / 2, Math.PI / 2); - long_dstW = new Mapper(-Math.PI, Math.PI, 0, dstW); - lat_dstH = new Mapper(-Math.PI / 2, Math.PI / 2, 0, dstH); - navigator = new Navigator().setLat1Lon1(originLat, originLong); - clip = null; - return this; - } - - // Remember that the coordinate system is upside down so apply - // the transform as if the angle were negated. - // cos(-angle) = cos(angle) - // sin(-angle) = -sin(angle) - public final boolean transform(double x, double y, DPoint retcoord) { - retcoord.x = srcW_long.map(x); - retcoord.y = srcH_lat.map(y); - if (allowRotation && rotate) { - navigator.setLat2Lon2(retcoord.y, retcoord.x); - double dist = navigator.getDistance(); - double course = -navigator.getCourse(); - double offset = Math.PI / 2; - if (dist > Math.PI / 2) { - dist = Math.PI - dist; - offset = -offset; - course = -course; - } - navigator.setLat2Lon2(retcoord.y, retcoord.x); - retcoord.x = dist; - retcoord.y = course; - } - _transform(retcoord); - retcoord.x = long_dstW.map(retcoord.x); - retcoord.y = lat_dstH.map(retcoord.y); - return retcoord.x >= 0.0 && retcoord.x <= dstW && retcoord.y >= 0 - && retcoord.y <= dstH; - } - - // Remember that the coordinate system is upside down so apply - // the transform as if the angle were negated. Since inverting - // the transform is also the same as negating the angle, itransform - // is calculated the way you would expect to calculate transform. - public final boolean itransform(double x, double y, DPoint retcoord) { - retcoord.x = long_dstW.back(x); - retcoord.y = lat_dstH.back(y); - _itransform(retcoord); - if (allowRotation && rotate) { - // retcoord.x = Navigator.wrap(retcoord.x + originLong, - // -Math.PI, Math.PI); - // retcoord.x = Navigator.wrap(retcoord.x, -Math.PI, Math.PI); - // System.out.println(); - // System.out.println("lat: " + Navigator.degrees(retcoord.y) + - // ", lon:" + Navigator.degrees(retcoord.x)); - navigator.setDistanceCourse(retcoord.y, retcoord.x); - retcoord.y = navigator.getLat2(); - retcoord.x = -navigator.getLon2(); - // System.out.println("lat: " + Navigator.degrees(retcoord.y) + - // ", lon:" + Navigator.degrees(retcoord.x)); - } - retcoord.x = srcW_long.back(retcoord.x); - retcoord.y = srcH_lat.back(retcoord.y); - return retcoord.x >= 0.0 && retcoord.x <= srcW && retcoord.y >= 0 - && retcoord.y <= srcH; - } - - /** - * @param input - * and output: latitude in y (radians from -pi/2 to pi/2) and - * longitude in x (radians from -pi to pi) - */ - abstract protected void _transform(DPoint retcoord); - - /** - * @param input - * and output: latitude in y (radians from -pi/2 to pi/2) and - * longitude in x (radians from -pi to pi) - */ - abstract protected void _itransform(DPoint retcoord); - - abstract protected Shape _getClip(); - - /** - * @param style - * @return - */ - public static class Tester { - Transform trans; - DPoint retcoord = new DPoint(); - DPoint minX, minY, maxX, maxY; - - void test() { - for (final Transform projectionValue : projectionValues) { - test(projectionValue); - } - } - - private void test(Transform trans) { - System.out.println("Testing: " + trans.getClass().getName()); - // check that points in the source rectangle map to the target - // rectangle - trans.set(10, 10, 100, 150); - int counter = 0; - minX = new DPoint(Double.MAX_VALUE, Double.MAX_VALUE); - minY = new DPoint(Double.MAX_VALUE, Double.MAX_VALUE); - maxX = new DPoint(Double.MIN_VALUE, Double.MIN_VALUE); - maxY = new DPoint(Double.MIN_VALUE, Double.MIN_VALUE); - final double pLong = trans.srcW_long.back(originLong); - final double pLat = trans.srcH_lat.back(originLat); - - trans.transform(pLong, pLat, retcoord); - trans.transform(trans.srcW / 2, trans.srcH / 2, retcoord); - for (double x = 0; x < trans.srcW; ++x) { - for (double y = 0; y < trans.srcH; ++y) { - counter++; - trans.transform(x, y, retcoord); - final double x2 = retcoord.x; - final double y2 = retcoord.y; - if (x2 < minX.x) { - minX.set(x2, y2); - } - if (x2 > maxX.x) { - maxX.set(x2, y2); - } - if (y2 < minY.y) { - minY.set(x2, y2); - } - if (y2 > maxY.y) { - maxY.set(x2, y2); - } - if (0 <= x2 && x2 < trans.dstW && 0 <= y2 - && y2 < trans.dstH) { - trans.itransform(x2, y2, retcoord); - final double x3 = retcoord.x; - final double y3 = retcoord.y; - if (Math.abs(x - x3) > 0.001 - || Math.abs(y - y3) > 0.001) { - System.out.println("Error: " + counter + "\t" - + x + ", " + y - + " => " + x2 + ", " + y2 - + " => " + x3 + ", " + y3 - ); - } - } - } - } - System.out.println("\t minX " + minX - + ",\t maxX " + maxX - + ",\t minY " + minY - + ",\t maxY " + maxY - ); - } - } - } - - static public class TransformPlateCarree extends Transform { - @Override - public void _transform(DPoint retcoord) { - // nothing - } - - @Override - public void _itransform(DPoint retcoord) { - // nothing - } - - /* - * (non-Javadoc) - * - * @see Globe.Transform#_getClip() - */ - @Override - protected Shape _getClip() { - return new Rectangle.Double(0, 0, dstW, dstH); - } - } - - static public class TransformSinusoidal extends Transform { - @Override - public void _transform(DPoint retcoord) { - retcoord.x = retcoord.x * Math.cos(retcoord.y); - } - - @Override - public void _itransform(DPoint retcoord) { - if (!(-Math.PI <= retcoord.x && retcoord.x <= Math.PI)) { - retcoord.x = Double.NaN; - return; - } - retcoord.x = retcoord.x / Math.cos(retcoord.y); - } - - @Override - protected Shape _getClip() { - final GeneralPath p = new GeneralPath(); - p.moveTo((float) (dstW / 2), 0); - double limitx = srcW_long.map(0); - for (int i = 1; i <= dstH; ++i) { - final double y = lat_dstH.back(i); - // System.out.println(i + ", " + y + ", " + Math.cos(y)); - double x = limitx * Math.cos(y); - x = long_dstW.map(x); - p.lineTo((float) x, i); - } - limitx = srcW_long.map(srcW); - for (int i = (int) dstH - 1; i >= 0; --i) { - final double y = lat_dstH.back(i); - double x = limitx * Math.cos(y); - x = long_dstW.map(x); - p.lineTo((float) x, i); - } - return p; - } - } - - static public class Transform3DIsometric extends Transform { - static double SHIFT = Math.PI / 3; - - @Override - public void _transform(DPoint retcoord) { - // special shift - retcoord.x = Navigator.wrap(retcoord.x - SHIFT, -Math.PI, Math.PI); - // regular stuff - final boolean shift = retcoord.x < -Math.PI / 2 - || retcoord.x > Math.PI / 2; - final double offset = shift ? Math.PI / 2 : -Math.PI / 2; - final double cosy = Math.cos(retcoord.y); - retcoord.y = Math.sin(retcoord.y) * (Math.PI / 2); - retcoord.x = Math.sin(retcoord.x) * cosy * Math.PI / 2 + offset; - if (shift) { - retcoord.x = Math.PI - retcoord.x; - } - } - - @Override - public void _itransform(DPoint retcoord) { - retcoord.x *= 2; - if (retcoord.x < 0) { - retcoord.x += Math.PI; - retcoord.y = Math.asin(retcoord.y / (Math.PI / 2)); - retcoord.x = Math.asin(retcoord.x / Math.cos(retcoord.y) - / Math.PI); - } else { - retcoord.x -= Math.PI; - retcoord.y = Math.asin(retcoord.y / (Math.PI / 2)); - retcoord.x = Math.asin(retcoord.x / Math.cos(retcoord.y) - / Math.PI); - retcoord.x += Math.PI; - if (retcoord.x > Math.PI) { - retcoord.x -= 2 * Math.PI; - } - } - retcoord.x = Navigator.wrap(retcoord.x + SHIFT, -Math.PI, Math.PI); - } - - @Override - protected Shape _getClip() { - final GeneralPath p = new GeneralPath(new Ellipse2D.Double(0, 0, - dstW / 2, dstH)); - p.append(new Ellipse2D.Double(dstW / 2, 0, dstW / 2, dstH), false); - return p; - } - } - - static public class TransformEqualAreaEllipse extends Transform { - boolean debugTemp = false; - - TransformEqualAreaEllipse() { - if (debugTemp) { - final double[][] tests = { { 0, -Math.PI / 2 }, - { 0, -Math.PI / 4 }, { 0, -Math.PI / 8 }, { 0, 0 }, - { 0, Math.PI / 8 }, { 0, Math.PI / 4 }, - { 0, Math.PI / 2 } }; - for (final double[] test : tests) { - final DPoint p = new DPoint(test[0], test[1]); - System.out.println(p); - _itransform(p); - System.out.println(" => " + p); - } - for (double x = -1; x <= 1; x += 0.1) { - final double y = oddFunction(x); - final double xx = inverseOddFunction(y); - System.out.println("x: " + x + "\ty: " + y + "\txx: " + xx); - } - debugTemp = false; - } - } - - // Area of a spherical cap is 2 pi r^2 (1-sin(lat)) - // Area of a circular segment is r^2 ( acos(p) - p sqrt(1-p^2)), where p - // = dist to chord/r - // Thus we get the itransform easily: - // asin(2/pi (acos p - p sqrt(1-p^2)) - @Override - public void _transform(DPoint retcoord) { - final double temp2 = Math.PI / 2 * (1 - Math.sin(retcoord.y)); - final double p = inverseOddFunction(temp2); - retcoord.y = (Math.PI / 2) * p; - final double temp = Math.sqrt(1 - p * p); - retcoord.x = temp * retcoord.x; - } - - @Override - public void _itransform(DPoint retcoord) { - final double p = retcoord.y / (Math.PI / 2); - if (debugTemp) { - System.out.println("\tp:\t" + p); - } - final double temp = Math.sqrt(1 - p * p); - if (debugTemp) { - System.out.println("\ttemp:\t" + temp); - } - final double temp2 = (Math.acos(p) - p * temp); - if (debugTemp) { - System.out.println("\ttemp2:\t" + temp2); - } - final double newy = Math.asin(1 - (2 / Math.PI) * temp2); - if (debugTemp) { - System.out.println("\tnewy:\t" + newy); - } - final double newx = retcoord.x / temp; - retcoord.y = newy; - retcoord.x = newx; - } - - @Override - protected Shape _getClip() { - return new Ellipse2D.Double(0, 0, dstW, dstH); - } - - /** - * @param in - * -1..1 - * @return value in 0..PI - */ - public double oddFunction(double p) { - final double temp = Math.sqrt(1 - p * p); - return (Math.acos(p) - p * temp); - } - - public double oddFunctionDerivative(double p) { - final double temp = Math.sqrt(1 - p * p); - return (-2 - p + p * p) / temp; - } - - static final double epsilon = 0.0001; - final double lowValue = oddFunction(-1); - final double highValue = oddFunction(1); - - public double inverseOddFunction(double pp) { - // ugly, have to approximate. Use newton's method. - final double pLow = pp - epsilon; - final double pHigh = pp + epsilon; - // for the first guess, use high and low bounds - // (guess - low) / (high - low) = (pp - lowV) / (highV - lowV); - double guess = -1 + (1 - -1) * (pp - lowValue) - / (highValue - lowValue); - while (true) { - final double p = oddFunction(guess); - if (pLow < p && p < pHigh) { - return guess; - } - guess = guess - (p - pp) / oddFunctionDerivative(guess); - if (debugTemp) { - System.out.println("newGuess: " + guess); - } - } - } - } - - static public class TransformGallOrthographic extends Transform { - @Override - public void _transform(DPoint retcoord) { - retcoord.y = Math.sin(retcoord.y) * (Math.PI / 2); // transform to - // -1..1, then - // -PI/2..PI/2 - } - - @Override - public void _itransform(DPoint retcoord) { - retcoord.y = Math.asin(retcoord.y / (Math.PI / 2)); // transform to - // -1..1 - } - - @Override - protected Shape _getClip() { - return new Rectangle.Double(0, 0, dstW, dstH); - } - } - - static public class TransformEquidistantConic extends Transform { - { - allowRotation = false; - } - - @Override - public void _transform(DPoint retcoord) { - // divide into two cases - navigator.setLat2Lon2(retcoord.y, retcoord.x); - double dist = navigator.getDistance(); - double course = -navigator.getCourse(); - double offset = Math.PI / 2; - if (dist > Math.PI / 2) { - dist = Math.PI - dist; - offset = -offset; - course = -course; - } - retcoord.x = Math.sin(course) * dist - offset; - retcoord.y = Math.cos(course) * dist; - } - - @Override - public void _itransform(DPoint retcoord) { - double x2 = retcoord.x; - final double y2 = retcoord.y; - double dist, course; - if (x2 < 0) { - x2 += Math.PI / 2; // re-center - dist = Math.sqrt(x2 * x2 + y2 * y2); - if (dist > Math.PI / 2) { - retcoord.x = Double.NaN; - return; - } - course = -Math.atan2(x2, y2); - } else { - x2 -= Math.PI / 2; // re-center - dist = Math.sqrt(x2 * x2 + y2 * y2); - dist = Math.PI - dist; - if (dist < Math.PI / 2) { - retcoord.x = Double.NaN; - return; - } - course = Math.atan2(x2, y2); - } - navigator.setDistanceCourse(dist, course); - retcoord.y = navigator.getLat2(); - retcoord.x = navigator.getLon2(); - } - - @Override - protected Shape _getClip() { - final GeneralPath p = new GeneralPath(new Ellipse2D.Double(0, 0, - dstW / 2, dstH)); - p.append(new Ellipse2D.Double(dstW / 2, 0, dstW / 2, dstH), false); - return p; - } - - @Override - public boolean usesOrigin() { - return true; - } - } - - static class Mapper { - private final double slope, offset; - - Mapper(double sourceMin, double sourceMax, double targetMin, - double targetMax) { - slope = (targetMax - targetMin) / (sourceMax - sourceMin); - offset = targetMin - slope * sourceMin; - } - - double map(double in) { - return in * slope + offset; - } - - double back(double out) { - return (out - offset) / slope; - } - } - - static public class DeformFilter extends ImageFilter { - - private static ColorModel defaultRGB = ColorModel.getRGBdefault(); - - private final DPoint coord = new DPoint(); - - private int raster[]; - - private int xoffset, yoffset; - - private int srcW, srcH; - - private final int dstW, dstH; - - int style; - - DeformFilter(int srcW, int srcH, int width, int height, Transform trans) { - dstW = width; - dstH = height; - this.trans = trans; - trans.set(srcW, srcH, dstW, dstH); - // this.style = style; - } - - Transform trans; - - public void transformBBox(Rectangle rect) { - double minx = Double.POSITIVE_INFINITY; - double miny = Double.POSITIVE_INFINITY; - double maxx = Double.NEGATIVE_INFINITY; - double maxy = Double.NEGATIVE_INFINITY; - for (int y = 0; y <= 1; y++) { - for (int x = 0; x <= 1; x++) { - trans.transform(rect.x + x * rect.width, - rect.y + y * rect.height, coord); - minx = Math.min(minx, coord.x); - miny = Math.min(miny, coord.y); - maxx = Math.max(maxx, coord.x); - maxy = Math.max(maxy, coord.y); - } - } - rect.x = (int) Math.floor(minx); - rect.y = (int) Math.floor(miny); - rect.width = (int) Math.ceil(maxx) - rect.x + 1; - rect.height = (int) Math.ceil(maxy) - rect.y + 1; - } - - @Override - public void setDimensions(int width, int height) { - srcW = width; - srcH = height; - final Rectangle rect = new Rectangle(0, 0, dstW, dstH); - xoffset = -rect.x; - yoffset = -rect.y; - raster = new int[srcW * srcH]; - consumer.setDimensions(dstW, dstH); - - // for debugging - debug = false; - for (int i = 0; i <= rect.width; i += rect.width / 4) { - for (int j = 0; j <= rect.height; j += rect.height / 4) { - trans.transform(i, j, coord); - final double i2 = coord.x; - final double j2 = coord.y; - trans.itransform(i2, j2, coord); - if (debug) { - System.out.println(i + ", " + j + "\t=> " + i2 + ", " - + j2 - + "\t=> " + coord.x + ", " + coord.y); - } - } - } - debug = false; - - } - - static boolean debug = false; - - @Override - public void setColorModel(ColorModel model) { - consumer.setColorModel(defaultRGB); - } - - @Override - public void setHints(int hintflags) { - consumer.setHints(TOPDOWNLEFTRIGHT | COMPLETESCANLINES | SINGLEPASS - | (hintflags & SINGLEFRAME)); - } - - @Override - public void setPixels(int x, int y, int w, int h, ColorModel model, - byte pixels[], int off, int scansize) { - int srcoff = off; - int dstoff = y * srcW + x; - for (int yc = 0; yc < h; yc++) { - for (int xc = 0; xc < w; xc++) { - raster[dstoff++] = model.getRGB(pixels[srcoff++] & 0xff); - } - srcoff += (scansize - w); - dstoff += (srcW - w); - } - } - - @Override - public void setPixels(int x, int y, int w, int h, ColorModel model, - int pixels[], int off, int scansize) { - int srcoff = off; - int dstoff = y * srcW + x; - if (model == defaultRGB) { - for (int yc = 0; yc < h; yc++) { - System.arraycopy(pixels, srcoff, raster, dstoff, w); - srcoff += scansize; - dstoff += srcW; - } - } else { - for (int yc = 0; yc < h; yc++) { - for (int xc = 0; xc < w; xc++) { - raster[dstoff++] = model.getRGB(pixels[srcoff++]); - } - srcoff += (scansize - w); - dstoff += (srcW - w); - } - } - } - - @Override - public void imageComplete(int status) { - if (status == IMAGEERROR || status == IMAGEABORTED) { - consumer.imageComplete(status); - return; - } - final int pixels[] = new int[dstW]; - final Quad q = new Quad(); - final DPoint coord00 = new DPoint(), coord10 = new DPoint(), coord11 = new DPoint(), coord01 = new DPoint(); - double r, g, b, a, w; - boolean changeLightness = false; - double mainProportion = 0, otherProportion = 0; - if (lightness != 0) { - changeLightness = true; - if (lightness < 0) { - mainProportion = (1 + lightness); // 0 = 1, -1 = 0 - // other is zero - } else { - mainProportion = (1 - lightness); // 0 = 1, 1 = 0 - otherProportion = 0xFF * (1 - mainProportion); - } - } - boolean[] topOk = new boolean[dstW]; - double[] topRowX = new double[dstW]; - double[] topRowY = new double[dstW]; - boolean[] bottomOk = new boolean[dstW]; - double[] bottomRowX = new double[dstW]; - double[] bottomRowY = new double[dstW]; - - fillRow(dstW, 0, bottomOk, bottomRowX, bottomRowY); - - for (int dy = 0; dy < dstH; dy++) { - // exchange rows - final boolean[] temp = bottomOk; - bottomOk = topOk; - topOk = temp; - double[] temp2 = bottomRowX; - bottomRowX = topRowX; - topRowX = temp2; - temp2 = bottomRowY; - bottomRowY = topRowY; - topRowY = temp2; - // and fill - fillRow(dstW, dy + 1, bottomOk, bottomRowX, bottomRowY); - for (int dx = 0; dx < dstW - 1; dx++) { - // optimize later - - // find the corners of the destination pixel in source space - pixels[dx] = 0; - /* - * if (false) { int i = (int)Math.round(coord00.x); int j = - * (int)Math.round(coord00.y); if (i < 0 || j < 0 || i >= - * srcW || j >= srcH) { pixels[dx] = 0; } else { pixels[dx] - * = raster[j * srcW + i]; } continue; } if - * (!toptrans.itransform(dx+1, dy, coord10)) continue; if - * (!trans.itransform(dx+1, dy+1, coord11)) continue; if - * (!trans.itransform(dx, dy+1, coord01)) continue; - */ - if (!topOk[dx] || !topOk[dx + 1] || !bottomOk[dx] - || !bottomOk[dx + 1]) { - // pixels[dx] = 0xFFFFFFFF; - continue; - } - coord00.x = topRowX[dx]; - coord00.y = topRowY[dx]; - coord10.x = topRowX[dx + 1]; - coord10.y = topRowY[dx + 1]; - coord01.x = bottomRowX[dx]; - coord01.y = bottomRowY[dx]; - coord11.x = bottomRowX[dx + 1]; - coord11.y = bottomRowY[dx + 1]; - - q.set(coord00, coord10, coord11, coord01); - - // add up the weighted colors - r = g = b = a = w = 0; - final int xx0 = (int) q.containing.x0; - final int xx1 = (int) q.containing.x1; - final int yy0 = (int) q.containing.y0; - final int yy1 = (int) q.containing.y1; - for (int x0 = xx0; x0 < xx1; ++x0) { - for (int y0 = yy0; y0 < yy1; ++y0) { - double weight; - // weight = q.getWeight(x0, y0); - weight = 1; - if (weight == 0.0) { - continue; - } - w += weight; - if (x0 < 0 || y0 < 0 || x0 >= srcW || y0 >= srcH) { - continue; - } - final int color = raster[y0 * srcW + x0]; - a += ((color >> 24) & 0xFF) * weight; - r += ((color >> 16) & 0xFF) * weight; - g += ((color >> 8) & 0xFF) * weight; - b += ((color) & 0xFF) * weight; - } - } - // average: - r /= w; - g /= w; - b /= w; - a /= w; - - if (changeLightness) { - r = mainProportion * r + otherProportion; - g = mainProportion * g + otherProportion; - b = mainProportion * b + otherProportion; - a = mainProportion * a + otherProportion; - } - - pixels[dx] = - ((int) Math.max(0, Math.min(0xFF, Math.round(a))) << 24) - | - ((int) Math.max(0, - Math.min(0xFF, Math.round(r))) << 16) - | - ((int) Math.max(0, - Math.min(0xFF, Math.round(g))) << 8) - | - ((int) Math.max(0, - Math.min(0xFF, Math.round(b)))); - } - consumer.setPixels(0, dy, dstW, 1, defaultRGB, pixels, 0, dstW); - if ((dy % 50) == 0) { - System.out.println(dy); - } - } - consumer.imageComplete(status); - } - - /** - * @param i - * @param dstW2 - * @param j - * @param rowX - * @param rowY - */ - private void fillRow(int xLimit, int dy, boolean[] ok, double[] rowX, - double[] rowY) { - final DPoint coord00 = new DPoint(); - for (int dx = 0; dx < xLimit; dx++) { - ok[dx] = trans.itransform(dx, dy, coord00); - rowX[dx] = coord00.x; - rowY[dx] = coord00.y; - } - } - } - - static public class RotateFilter extends ImageFilter { - - private static ColorModel defaultRGB = ColorModel.getRGBdefault(); - - private final double angle; - - private final double sin; - - private final double cos; - - private final double coord[] = new double[2]; - - private int raster[]; - - private int xoffset, yoffset; - - private int srcW, srcH; - - private int dstW, dstH; - - public RotateFilter(double angle) { - this.angle = angle; - sin = Math.sin(angle); - cos = Math.cos(angle); - } - - public void transform(double x, double y, double[] retcoord) { - // Remember that the coordinate system is upside down so apply - // the transform as if the angle were negated. - // cos(-angle) = cos(angle) - // sin(-angle) = -sin(angle) - retcoord[0] = cos * x + sin * y; - retcoord[1] = cos * y - sin * x; - } - - public void itransform(double x, double y, double[] retcoord) { - // Remember that the coordinate system is upside down so apply - // the transform as if the angle were negated. Since inverting - // the transform is also the same as negating the angle, itransform - // is calculated the way you would expect to calculate transform. - retcoord[0] = cos * x - sin * y; - retcoord[1] = cos * y + sin * x; - } - - public void transformBBox(Rectangle rect) { - double minx = Double.POSITIVE_INFINITY; - double miny = Double.POSITIVE_INFINITY; - double maxx = Double.NEGATIVE_INFINITY; - double maxy = Double.NEGATIVE_INFINITY; - for (int y = 0; y <= 1; y++) { - for (int x = 0; x <= 1; x++) { - transform(rect.x + x * rect.width, - rect.y + y * rect.height, coord); - minx = Math.min(minx, coord[0]); - miny = Math.min(miny, coord[1]); - maxx = Math.max(maxx, coord[0]); - maxy = Math.max(maxy, coord[1]); - } - } - rect.x = (int) Math.floor(minx); - rect.y = (int) Math.floor(miny); - rect.width = (int) Math.ceil(maxx) - rect.x + 1; - rect.height = (int) Math.ceil(maxy) - rect.y + 1; - } - - @Override - public void setDimensions(int width, int height) { - final Rectangle rect = new Rectangle(0, 0, width, height); - transformBBox(rect); - xoffset = -rect.x; - yoffset = -rect.y; - srcW = width; - srcH = height; - dstW = rect.width; - dstH = rect.height; - raster = new int[srcW * srcH]; - consumer.setDimensions(dstW, dstH); - } - - @Override - public void setColorModel(ColorModel model) { - consumer.setColorModel(defaultRGB); - } - - @Override - public void setHints(int hintflags) { - consumer.setHints(TOPDOWNLEFTRIGHT | COMPLETESCANLINES | SINGLEPASS - | (hintflags & SINGLEFRAME)); - } - - @Override - public void setPixels(int x, int y, int w, int h, ColorModel model, - byte pixels[], int off, int scansize) { - int srcoff = off; - int dstoff = y * srcW + x; - for (int yc = 0; yc < h; yc++) { - for (int xc = 0; xc < w; xc++) { - raster[dstoff++] = model.getRGB(pixels[srcoff++] & 0xff); - } - srcoff += (scansize - w); - dstoff += (srcW - w); - } - } - - @Override - public void setPixels(int x, int y, int w, int h, ColorModel model, - int pixels[], int off, int scansize) { - int srcoff = off; - int dstoff = y * srcW + x; - if (model == defaultRGB) { - for (int yc = 0; yc < h; yc++) { - System.arraycopy(pixels, srcoff, raster, dstoff, w); - srcoff += scansize; - dstoff += srcW; - } - } else { - for (int yc = 0; yc < h; yc++) { - for (int xc = 0; xc < w; xc++) { - raster[dstoff++] = model.getRGB(pixels[srcoff++]); - } - srcoff += (scansize - w); - dstoff += (srcW - w); - } - } - } - - @Override - public void imageComplete(int status) { - if (status == IMAGEERROR || status == IMAGEABORTED) { - consumer.imageComplete(status); - return; - } - final int pixels[] = new int[dstW]; - for (int dy = 0; dy < dstH; dy++) { - itransform(0 - xoffset, dy - yoffset, coord); - double x1 = coord[0]; - double y1 = coord[1]; - itransform(dstW - xoffset, dy - yoffset, coord); - final double x2 = coord[0]; - final double y2 = coord[1]; - final double xinc = (x2 - x1) / dstW; - final double yinc = (y2 - y1) / dstW; - for (int dx = 0; dx < dstW; dx++) { - final int sx = (int) Math.round(x1); - final int sy = (int) Math.round(y1); - if (sx < 0 || sy < 0 || sx >= srcW || sy >= srcH) { - pixels[dx] = 0; - } else { - pixels[dx] = raster[sy * srcW + sx]; - } - x1 += xinc; - y1 += yinc; - } - consumer.setPixels(0, dy, dstW, 1, defaultRGB, pixels, 0, dstW); - } - consumer.imageComplete(status); - } - } - - /* - * public static double convertDegreesToDecimal(double degrees, double - * minutes, double seconds, boolean NorthOrEast) { double result = (degrees - * + minutes / 60 + seconds / 3600); if (!NorthOrEast) result = -result; - * return result; } - */ - /* - * public static void convertLongitudeLatitudeToWidthHeight(double - * longitude, double latitude, double width, double height, DPoint output) { - * output.x = (longitude + 180)/360 * width; output.y = (90 - latitude)/180 - * * height; } - */ - /* - * public static void convertPolarRadiansToWidthHeight(double longitudeR, - * double colatitudeR, double width, double height, DPoint output) { // get - * in range longitudeR += Math.PI; // origin on left while (longitudeR < 0) - * longitudeR += Math.PI * 2; while (longitudeR > Math.PI * 2) longitudeR -= - * Math.PI * 2; output.x = longitudeR/(Math.PI * 2) * width; output.y = - * colatitudeR/Math.PI * height; } - */ - /* - * public static void convertLongitudeLatitudeToPolarRadians(double - * longitude, double latitude, DPoint output) { output.x = longitude/180 * - * Math.PI; output.y = (90 - latitude)/180 * Math.PI; } - */ - - public static BufferedImage convertToBuffered(Image image) { - final int thumbWidth = image.getWidth(null); - final int thumbHeight = image.getHeight(null); - final BufferedImage thumbImage = new BufferedImage(thumbWidth, - thumbHeight, - BufferedImage.TYPE_INT_RGB); - final Graphics2D graphics2D = thumbImage.createGraphics(); - graphics2D.setRenderingHint(RenderingHints.KEY_INTERPOLATION, - RenderingHints.VALUE_INTERPOLATION_BILINEAR); - graphics2D.drawImage(image, 0, 0, thumbWidth, thumbHeight, null); - return thumbImage; - } - - public static void addGrid(Image image, Transform trans) { - final int thumbWidth = image.getWidth(null); - final int thumbHeight = image.getHeight(null); - final BufferedImage thumbImage = new BufferedImage(thumbWidth, - thumbHeight, - BufferedImage.TYPE_INT_RGB); - final Graphics2D graphics2D = thumbImage.createGraphics(); - graphics2D.setRenderingHint(RenderingHints.KEY_INTERPOLATION, - RenderingHints.VALUE_INTERPOLATION_BILINEAR); - Color meridian = Color.red; - final Color everyOtherLine = Color.orange; - if (lightness > 0) { - graphics2D.setClip(0, 0, thumbWidth, thumbHeight); - graphics2D.setColor(new Color((int) (0xFF * lightness), - (int) (0xFF * lightness), (int) (0xFF * lightness))); - graphics2D.fillRect(0, 0, thumbWidth, thumbHeight); - meridian = new Color(0xFF, (int) (0xFF * lightness), - (int) (0xFF * lightness)); - } - graphics2D.setClip(trans.getClip()); - graphics2D.drawImage(image, 0, 0, thumbWidth, thumbHeight, null); - // Menlo Park 37? 28' 48" N 122? 08' 39" W - - graphics2D.setRenderingHint(RenderingHints.KEY_ANTIALIASING, - RenderingHints.VALUE_ANTIALIAS_ON); - // double latitude = convertDegreesToDecimal(37.0, 28.0, 48.0, true); // - // N = + - // double longitude = convertDegreesToDecimal(122.0, 8.0, 39.0, false); - // // W = - - final DPoint retCoord = new DPoint(); - - // drawPoint(graphics2D, trans, longitude, latitude); - - drawPoint(graphics2D, trans, Color.green, Color.white, originLong, - originLat); - retCoord.x = originLong; - retCoord.y = originLat; - getAntipode(retCoord); - drawPoint(graphics2D, trans, Color.red, Color.white, retCoord.x, - retCoord.y); - - graphics2D.setFont(font); - final FontMetrics fm = graphics2D.getFontMetrics(); - - final BasicStroke normal = new BasicStroke(1.0f / 3); - final BasicStroke thick = new BasicStroke(2.0f / 3); - - if (true) { - // hack to draw circles - // convertLongitudeLatitudeToPolarRadians(originLong, originLat, - // retCoord); - // double longR = retCoord.x; - // double colatR = retCoord.y; - - // SphericalTriangle stri = new SphericalTriangle(); - final Navigator navigator = new Navigator().setLat1Lon1(originLat, - originLong); - final int increment = 180 / degreeInterval; - final int grain = 3; - final int labelPosition = increment / 2; - - // circles of equal distance - final double dInc = Math.PI / increment; - // double dInc2 = dInc/grain; - final int distLimit = increment - 1; - final int angleLimit = 2 * increment - 1; - final int halfAngle = increment; - - /* - * for (int distanceI = 1; distanceI <= distLimit; ++distanceI) { if - * (distanceI == labelPosition) graphics2D.setColor(Color.black); - * else graphics2D.setColor(Color.yellow); double distance = dInc * - * distanceI; double lat1 = 0, lon1 = 0; for (int angleI = 0; angleI - * <= (angleLimit + 1) * grain; ++angleI) { double angle = dInc2 * - * angleI; //System.out.println("Distance: " + distance + - * "\tAngle: " + angle); navigator.setDistanceCourse(distance, - * angle); double lat2 = trans.srcH_lat.back(navigator.getLat2()); - * double lon2 = trans.srcW_long.back(navigator.getLon2()); - * //System.out.println("Distance: " + distance + "\tAngle: " + - * angle); if (angleI != 0) drawLine(graphics2D, trans, lon2, lat2, - * lon1, lat1); lat1 = lat2; lon1 = lon2; } } - * - * // lines to antipode for (int angleI = 0; angleI <= angleLimit; - * ++angleI) { double angle = dInc * angleI; double lat1 = 0, lon1 = - * 0; if (angleI == 0) graphics2D.setColor(Color.black); else - * graphics2D.setColor(Color.white); for (int distanceI = grain; - * distanceI <= distLimit * grain; ++distanceI) { double distance = - * dInc2 * distanceI; //System.out.println("Distance: " + distance + - * "\tAngle: " + angle); navigator.setDistanceCourse(distance, - * angle); double lat2 = trans.srcH_lat.back(navigator.getLat2()); - * double lon2 = trans.srcW_long.back(navigator.getLon2()); - * //System.out.println("Distance: " + distance + "\tAngle: " + - * angle); if (distanceI != grain) drawLine(graphics2D, trans, lon2, - * lat2, lon1, lat1); lat1 = lat2; lon1 = lon2; } - */ - - // lines to the antipode - final double gap = 0.02; - final PathTransform pathTransform = new PathTransform(navigator, - trans); - LineDrawer ld = new LineDrawer(graphics2D, pathTransform); - for (int angleI = 0; angleI <= angleLimit; ++angleI) { - final double angle = dInc * angleI; - if (angleI == 0 || angleI == halfAngle) { - graphics2D.setColor(meridian); - graphics2D.setStroke(thick); - } else if ((angleI % 3) == 0) { - graphics2D.setColor(everyOtherLine); - graphics2D.setStroke(thick); - } else { - graphics2D.setColor(Color.white); - graphics2D.setStroke(normal); - } - pathTransform.setAngle(angle); - ld.draw(gap, 1 - gap); - } - - final AngleCircleTransform angleTransform = new AngleCircleTransform( - navigator, trans); - ld = new LineDrawer(graphics2D, angleTransform); - for (int distanceI = 1; distanceI <= distLimit; ++distanceI) { - if (distanceI == labelPosition) { - graphics2D.setColor(meridian); - graphics2D.setStroke(thick); - } else { - graphics2D.setColor(Color.white); - graphics2D.setStroke(normal); - } - final double distance = dInc * distanceI; - angleTransform.setDistance(distance); - ld.draw(0, 1); - } - - // if (doLabels) { - // graphics2D.setClip(null); - // StandardCodes sc = StandardCodes.make(); - // LabelPosition lp = new LabelPosition(graphics2D, trans.dstW, - // trans.dstH); - // Map zones = sc.getZoneData(); - // Set zkeys = sc.getGoodAvailableCodes("tzid"); - // Date now = new Date(); - // for (Iterator it = zkeys.iterator(); it.hasNext();) { - // String fullkey = (String) it.next(); - // List data = (List) zones.get(fullkey); - // //String key = tzf.getFormattedZone(fullkey,"vvvv", - // now.getTime(), false); // key.substring(key.lastIndexOf('/')+1); - // retCoord.y = ((Double)data.get(0)).doubleValue(); - // retCoord.y = Navigator.toRadians(retCoord.y, 0, 0, false); - // retCoord.x = ((Double)data.get(1)).doubleValue(); - // retCoord.x = Navigator.toRadians(retCoord.x, 0, 0, true); - // drawPoint(graphics2D, trans, Color.white, Color.red, retCoord.x, - // retCoord.y, null); - // lp.add(trans, retCoord.x, retCoord.y, key); - // } - // lp.draw(); - // } - /* - * graphics2D.setColor(Color.red); graphics2D.setStroke(new - * BasicStroke(2f)); pathTransform = new PathTransform(navigator, - * trans); pathTransform.setAngle(20*Navigator.DEGREE); ld = new - * LineDrawer(graphics2D,pathTransform); ld.draw(0,1); - */ - - /* - * for (int distanceI = 1; distanceI < coord.length; ++distanceI) { - * boolean doLabel = distanceI == labelPosition; double[][] - * distanceR = coord[distanceI]; for (int angleI = 0; angleI < - * distanceR.length-1; ++angleI) { double[] angleR00 = - * distanceR[angleI]; double[] angleR01 = distanceR[angleI+1]; - * graphics2D.setColor(Color.white); if (doLabel) { if - * (AverageWithColor) graphics2D.setColor(Color.gray); else - * graphics2D.setColor(Color.black); } drawLine(graphics2D, trans, - * angleR00[0], angleR00[1], angleR01[0], angleR01[1]); if (doLabel) - * { drawDegrees(graphics2D, trans, fm, 180 * angleI / increment, - * angleR00[0], angleR00[1]); } } - * - * double[] angleR00 = distanceR[distanceR.length-1]; double[] - * angleR01 = distanceR[0]; graphics2D.setColor(Color.white); if - * (doLabel) { if (AverageWithColor) - * graphics2D.setColor(Color.gray); else - * graphics2D.setColor(Color.black); } drawLine(graphics2D, trans, - * angleR00[0], angleR00[1], angleR01[0], angleR01[1]); if (doLabel) - * { drawDegrees(graphics2D, trans, fm, 180 * (distanceR.length-1) / - * increment, angleR00[0], angleR00[1]); } - * - * } - */ - - } - // save thumbnail image to OUTFILE - griddedImage = thumbImage; - if (DEBUG_ICON) { - System.out.println("Changing Icon6"); - } - final ImageIcon resultIcon = new ImageIcon(thumbImage); // recreate with - // buffered - // version - if (DEBUG_ICON) { - System.out.println("Changing Icon7"); - } - mainPicture.setIcon(resultIcon); - - } - - static class LabelPosition { - class Chunk implements Comparable { - double x, y; - int xStart, yStart, width; - String s; - - public Chunk(double x2, double y2, String s2) { - x = x2; - y = y2; - s = s2; - width = 1; - xStart = (int) (x2 / tileWidth); - yStart = (int) (y2 / tileHeight); - if (s == null) { - return; - } - xStart += 2; - - final Rectangle2D r = metrics.getStringBounds(s, graphics2D); - width = 1 + (int) ((r.getWidth() - 1.0) / tileWidth); - - if (xStart + width >= tileWidthCount) { - xStart = tileWidthCount - width; - } - } - - @Override - public int compareTo(Object o) { - final Chunk that = (Chunk) o; - if (x != that.x) { - return x < that.x ? -1 : 1; - } - if (width != that.width) { - return width > that.width ? -1 : 1; // largest first - } - if (y != that.y) { - return y < that.y ? -1 : 1; - } - if (s == null) { - if (that.s == null) { - return 0; - } - return -1; - } - if (that.s == null) { - return 1; - } - return s.compareTo(that.s); - } - - public boolean overlaps(Chunk that) { - if (yStart != that.yStart) { - return false; - } - if (xStart > that.xStart + that.width) { - return false; - } - if (that.xStart > xStart + width) { - return false; - } - return true; - } - } - - Graphics2D graphics2D; - FontMetrics metrics; - Set[] lineContents; - double tileWidth, tileHeight; - int tileWidthCount, tileHeightCount; - double ascent; - Set initialContents = new TreeSet(); - - LabelPosition(Graphics2D graphics2D, double width, double height) { - this.graphics2D = graphics2D; - metrics = graphics2D.getFontMetrics(); - final Rectangle2D r = metrics.getStringBounds("n", graphics2D); - ascent = metrics.getAscent(); - // tile the map into a grid - tileWidthCount = (int) (width / r.getWidth()); - tileWidth = width / tileWidthCount + 0.0000001; - tileHeightCount = (int) (height / r.getHeight()); - tileHeight = height / tileHeightCount + 0.0000001; - lineContents = new Set[tileHeightCount]; - for (int i = 0; i < lineContents.length; ++i) { - lineContents[i] = new TreeSet(); - } - } - - void add(Transform trans, double longitude, double latitude, String s) { - final double xx = trans.srcW_long.back(longitude); - final double yy = trans.srcH_lat.back(latitude); - trans.transform(xx, yy, drawLineP1); - Chunk c = new Chunk(drawLineP1.x, drawLineP1.y, s); - initialContents.add(c); - c = new Chunk(drawLineP1.x, drawLineP1.y, null); // point only - lineContents[c.yStart].add(c); - } - - void fixContents() { - for (final Iterator it2 = initialContents.iterator(); it2.hasNext();) { - final Chunk c = (Chunk) it2.next(); - findFittingLine(c); - lineContents[c.yStart].add(c); - } - } - - /** - * @param c - * @return - */ - private void findFittingLine(Chunk c) { - int pos = c.yStart; - boolean positive = false; - boolean lastOutOfBounds = false; - main: for (int ii = 0;; ++ii, positive = !positive) { - pos += (positive ? ii : -ii); - if (pos < 0 || pos >= lineContents.length) { - if (lastOutOfBounds) { - c.yStart = 0; - return; - } - lastOutOfBounds = true; - continue; - } - lastOutOfBounds = false; - c.yStart = pos; // assume ok. - // go x, x+1, x-1, +2, -2, ... - for (final Iterator it = lineContents[pos].iterator(); it - .hasNext();) { - final Chunk that = (Chunk) it.next(); - if (c.overlaps(that)) { - if (DEBUG) { - System.out.println(pos + " pushing " + c.s - + " (collision with " + that.s + ")"); - } - continue main; - } - } - return; // yStart now set right. - } - } - - void draw() { - fixContents(); - graphics2D.setColor(Color.pink); - for (final Set lineContent : lineContents) { - for (final Iterator it = lineContent.iterator(); it.hasNext();) { - final Chunk c = (Chunk) it.next(); - if (c.s == null) { - continue; // point - } - final double x2 = tileWidth * c.xStart; - final double y2 = tileHeight * c.yStart; - final Line2D.Double line2 = new Line2D.Double(c.x, c.y, x2, - y2 + tileHeight / 2); - graphics2D.draw(line2); - graphics2D.drawString(c.s, (int) x2, (int) (y2 + ascent)); - } - } - } - } - - /** - * @param graphics2D - * @param trans - * @param fill - * TODO - * @param line - * TODO - * @param longitude - * @param latitude - * @return - */ - private static void drawPoint(Graphics2D graphics2D, Transform trans, - Color fill, Color line, - double longitude, double latitude, String label) { - final double xx = trans.srcW_long.back(longitude); - final double yy = trans.srcH_lat.back(latitude); - // convertLongitudeLatitudeToWidthHeight(longitude, latitude, - // trans.srcW, trans.srcH, drawLineP1); - // double xx = drawLineP1.x; - // double yy = drawLineP1.y; - // System.out.println(" xx: " + xx + ", yy: " + yy); - final double radius = 1; - trans.transform(xx, yy, drawLineP1); - - final Ellipse2D.Double ellipse = new Ellipse2D.Double(); - ellipse.x = drawLineP1.x - radius; - ellipse.y = drawLineP1.y - radius; - ellipse.height = ellipse.width = radius * 2; - graphics2D.setColor(fill); - graphics2D.fill(ellipse); - graphics2D.setColor(line); - graphics2D.draw(ellipse); - /* - * if (label == null) return; if (label != null) { Line2D.Double line2 = - * new Line2D.Double(drawLineP1.x, drawLineP1.y, drawLineP1.x + 5, - * drawLineP1.y + 5); graphics2D.draw(line2); } - * - * if (label != null) graphics2D.drawString(label, (int)drawLineP1.x + - * 5, (int)drawLineP1.y + 5); - */ - } - - private static void drawPoint(Graphics2D graphics2D, Transform trans, - Color fill, Color line, - double longitude, double latitude) { - drawPoint(graphics2D, trans, fill, line, - longitude, latitude, null); - } - - /** - * @param graphics2D - * @param trans - * @param fm - * @param retCoord - * @param increment - * @param angleI - * @param angleR00 - */ - private static void drawDegrees(Graphics2D graphics2D, Transform trans, - FontMetrics fm, double degrees, double x, double y) { - final String degreesStr = nf.format(degrees) + "°"; - final Rectangle2D r = fm.getStringBounds(degreesStr, graphics2D); - trans.transform(x - r.getWidth() / 2, y, drawLineP1); - graphics2D.drawString(degreesStr, (int) drawLineP1.x, - (int) drawLineP1.y); - } - - private static DPoint drawLineP1 = new DPoint(); - - /* - * - * private static void drawLine(Graphics2D graphics2D, Transform trans, - * double x1, double y1, double x2, double y2) { // check for cases where it - * crosses a boundary double xDist = Math.abs(x1 - x2); double yDist = - * Math.abs(y1 - y2); if (xDist > trans.srcW/2) { if (yDist > trans.srcH/2) - * { // skip, don't care about opposite corners - * System.out.println("Skipping opposite corners"); } else { if (x1 < x2) { - * drawLine2(graphics2D, trans, x1, y1, x2 - trans.srcW, y2); - * drawLine2(graphics2D, trans, x1 + trans.srcW, y1, x2, y2); } else { - * drawLine2(graphics2D, trans, x1, y1, x2 + trans.srcW, y2); - * drawLine2(graphics2D, trans, x1 - trans.srcW, y1, x2, y2); } } } else if - * (yDist > trans.srcH/2) { if (y1 < y2) { drawLine2(graphics2D, trans, x1, - * y1, x2, y2 - trans.srcH); drawLine2(graphics2D, trans, x1, y1 + - * trans.srcH, x2, y2); } else { drawLine2(graphics2D, trans, x1, y1, x2, y2 - * + trans.srcH); drawLine2(graphics2D, trans, x1, y1 - trans.srcH, x2, y2); - * } } else { drawLine2(graphics2D, trans, x1, y1, x2, y2); } } - * - * private static void drawLine2(Graphics2D graphics2D, Transform trans, - * double x, double y, double x2, double y2) { trans.transform(x, y, - * drawLineP1); int ix = (int) Math.round(drawLineP1.x); int iy = (int) - * Math.round(drawLineP1.y); trans.transform(x2, y2, drawLineP1); int ix2 = - * (int) Math.round(drawLineP1.x); int iy2 = (int) Math.round(drawLineP1.y); - * graphics2D.drawLine(ix, iy, ix2, iy2); } - */ - - abstract static class TTransform { - double x, y; - - // t is 0..1 - abstract void transform(double t); - } - - static class PathTransform extends TTransform { - private final Navigator navigator; - private final Transform trans; - private double angle; - - PathTransform(Navigator navigator, Transform trans) { - this.navigator = navigator; - this.trans = trans; - } - - void setAngle(double angle) { - this.angle = angle; - } - - transient DPoint temp = new DPoint(); - - @Override - void transform(double t) { - navigator.setDistanceCourse(t * Math.PI, angle); - y = trans.srcH_lat.back(navigator.getLat2()); - x = trans.srcW_long.back(navigator.getLon2()); - trans.transform(x, y, temp); - x = temp.x; - y = temp.y; - } - } - - static class AngleCircleTransform extends TTransform { - private final Navigator navigator; - private final Transform trans; - private double distance; - - AngleCircleTransform(Navigator navigator, Transform trans) { - this.navigator = navigator; - this.trans = trans; - } - - void setDistance(double distance) { - this.distance = distance; - } - - transient DPoint temp = new DPoint(); - - @Override - void transform(double t) { - navigator.setDistanceCourse(distance, t * (2 * Math.PI)); - y = trans.srcH_lat.back(navigator.getLat2()); - x = trans.srcW_long.back(navigator.getLon2()); - trans.transform(x, y, temp); - x = temp.x; - y = temp.y; - } - } - - static class LineDrawer { - double distanceSquaredLimit = 10 * 10; - Graphics2D graphics2D; - Line2D.Double line = new Line2D.Double(); - transient double startX, startY, startT; - // transient double endX, endY, endT; - TTransform ttransform; - - // int segments = 0; - LineDrawer(Graphics2D graphics2D, TTransform ttransform) { - this.graphics2D = graphics2D; - this.ttransform = ttransform; - } - - // t is 0..1 - void draw(double startT, double endT) { - this.startT = startT; - // this.endT = endT; - ttransform.transform(startT); - startX = ttransform.x; - startY = ttransform.y; - ttransform.transform(endT); - final double endX = ttransform.x; - final double endY = ttransform.y; - draw(3, 10, endT, endX, endY); - // System.out.println("segments: " + segments); - } - - void draw(int minDepth, int maxDepth, double endT, double endX, - double endY) { - // System.out.println(maxDepth + "\t" + startT + ", " + startX + - // ", " + startY + "\t" + endT + ", " + endX + ", " + endY); - // at the end of a draw, the startT is always moved up to the endT - boolean divide = false; - // if we've reached the limit, draw - if (minDepth > 0) { // if we are under the depth, divide and conquer - divide = true; - } else { - // if the distance is large, and still not too deep, divide and - // conquer - final double dx = endX - startX; - final double dy = endY - startY; - // System.out.println("dist: " + Math.sqrt(dx*dx + dy*dy)); - if ((dx * dx + dy * dy) > distanceSquaredLimit) { - if (maxDepth <= 0) { - return; // skip if too long - } - divide = true; - } - } - if (divide) { - final double midT = (startT + endT) / 2; - ttransform.transform((startT + endT) / 2); - final double midX = ttransform.x; // keep, since ttransform gets - // overridden - final double midY = ttransform.y; - draw(minDepth - 1, maxDepth - 1, midT, midX, midY); - draw(minDepth - 1, maxDepth - 1, endT, endX, endY); - } else { - // System.out.println("Drawing"); - // segments++; - line.x1 = startX; - line.y1 = startY; - line.x2 = endX; - line.y2 = endY; - graphics2D.draw(line); - // graphics2D.drawLine((int) Math.round(startX), (int) - // Math.round(startY), - // (int) Math.round(endX), (int) Math.round(endY)); - } - startT = endT; - startX = endX; - startY = endY; - } - } - - public static void writeImage(BufferedImage image, String filename, - float quality) { - try { -// final BufferedOutputStream out = new BufferedOutputStream( -// new FileOutputStream(filename)); - File filename2 = new File(filename); - ImageIO.write(image, "jpg", filename2); -// final JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out); -// final JPEGEncodeParam param = encoder -// .getDefaultJPEGEncodeParam(image); -// quality = Math.max(0, Math.min(quality, 100)); -// param.setQuality(quality / 100.0f, false); -// encoder.setJPEGEncodeParam(param); -// encoder.encode(image); -// out.close(); - System.out.println("Saving on: " + filename2.getCanonicalPath()); - } catch (final Exception e) { - e.printStackTrace(); - throw new RuntimeException("Failed write of image"); - } - } - - public static class Thumbnail { - public static void main(String[] args) throws Exception { - if (args.length != 5) { - System.err.println("Usage: java Thumbnail INFILE " + - "OUTFILE WIDTH HEIGHT QUALITY"); - System.exit(1); - } - // load image from INFILE - final Image image = Toolkit.getDefaultToolkit().getImage(args[0]); - final MediaTracker mediaTracker = new MediaTracker(new Container()); - mediaTracker.addImage(image, 0); - mediaTracker.waitForID(0); - // determine thumbnail size from WIDTH and HEIGHT - int thumbWidth = Integer.parseInt(args[2]); - int thumbHeight = Integer.parseInt(args[3]); - final double thumbRatio = (double) thumbWidth - / (double) thumbHeight; - final int imageWidth = image.getWidth(null); - final int imageHeight = image.getHeight(null); - final double imageRatio = (double) imageWidth - / (double) imageHeight; - if (thumbRatio < imageRatio) { - thumbHeight = (int) (thumbWidth / imageRatio); - } else { - thumbWidth = (int) (thumbHeight * imageRatio); - } - // draw original image to thumbnail image object and - // scale it to the new size on-the-fly - final BufferedImage thumbImage = new BufferedImage(thumbWidth, - thumbHeight, BufferedImage.TYPE_INT_RGB); - final Graphics2D graphics2D = thumbImage.createGraphics(); - graphics2D.setRenderingHint(RenderingHints.KEY_INTERPOLATION, - RenderingHints.VALUE_INTERPOLATION_BILINEAR); - graphics2D.drawImage(image, 0, 0, thumbWidth, thumbHeight, null); - // save thumbnail image to OUTFILE - int quality = Integer.parseInt(args[4]); - writeImage(thumbImage, args[1], quality); -// final BufferedOutputStream out = new BufferedOutputStream(new -// FileOutputStream(args[1])); -// final JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out); -// final JPEGEncodeParam param = encoder. -// getDefaultJPEGEncodeParam(thumbImage); -// quality = Math.max(0, Math.min(quality, 100)); -// param.setQuality(quality / 100.0f, false); -// encoder.setJPEGEncodeParam(param); -// encoder.encode(thumbImage); -// out.close(); - System.out.println("Done."); - System.exit(0); - } - } -} diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/ICUPropertyFactory.java b/unicodetools/src/main/java/org/unicode/jsp/ICUPropertyFactory.java similarity index 99% rename from UnicodeJsps/src/main/java/org/unicode/jsp/ICUPropertyFactory.java rename to unicodetools/src/main/java/org/unicode/jsp/ICUPropertyFactory.java index 47be56184..896c21328 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/ICUPropertyFactory.java +++ b/unicodetools/src/main/java/org/unicode/jsp/ICUPropertyFactory.java @@ -29,6 +29,8 @@ import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.VersionInfo; +import org.unicode.props.UnicodeProperty; + /** * Provides a general interface for Unicode Properties, and diff --git a/unicodetools/src/main/java/org/unicode/jsp/LanguageCode.java b/unicodetools/src/main/java/org/unicode/jsp/LanguageCode.java deleted file mode 100644 index a4696b402..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/LanguageCode.java +++ /dev/null @@ -1,427 +0,0 @@ -package org.unicode.jsp; - -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.Collator; -import com.ibm.icu.util.ULocale; - -public class LanguageCode { - - static public final Pattern languageID = Pattern.compile( - " (?: ( [a-z A-Z]{2,8} | [a-z A-Z]{2,3} [-_] [a-z A-Z]{3} )" - + " (?: [-_] ( [a-z A-Z]{4} ) )? " - + " (?: [-_] ( [a-z A-Z]{2} | [0-9]{3} ) )?" - + " (?: [-_] ( (?: [0-9 a-z A-Z]{5,8} | [0-9] [0-9 a-z A-Z]{3} ) (?: [-_] (?: [0-9 a-z A-Z]{5,8} | [0-9] [0-9 a-z A-Z]{3} ) )* ) )?" - + " (?: [-_] ( [a-w y-z A-W Y-Z] (?: [-_] [0-9 a-z A-Z]{2,8} )+ (?: [-_] [a-w y-z A-W Y-Z] (?: [-_] [0-9 a-z A-Z]{2,8} )+ )* ) )?" - + " (?: [-_] ( [xX] (?: [-_] [0-9 a-z A-Z]{1,8} )+ ) )? ) " - + " | ( [xX] (?: [-_] [0-9 a-z A-Z]{1,8} )+ )", - Pattern.COMMENTS); - - static final Pattern extensionID = Pattern.compile("[a-w y-z A-W Y-Z]([-_][0-9 a-z A-Z]{2,8})*"); - static final Collection QUALITY_EXCLUSIONS = new HashSet(Arrays.asList("ti fo so kok ps cy sw ur pa pa_Guru uz_Latn ii haw az_Cyrl bo as zu ha ha_Latn uz_Arab om pa_Arab kw kl kk kk_Cyrl gv si uz uz_Cyrl" - .split("\\s+"))); - - enum Subtag {language, script, region, - variants, - extensions, privateUse, privateUse2; - String get(Matcher m) { - return m.group(ordinal()+1); - } - } - - static class MyHandler extends FileUtilities.SemiFileReader { - TreeMap map = new TreeMap(); - @Override - protected boolean isCodePoint() { - return false; - } - @Override - public boolean handleLine(int start, int end, String[] items) { - map.put(items[0], items[1]); - return true; - } - } - - static final Map names = ((MyHandler) new MyHandler().process(LanguageCode.class, "subtagNames.txt")).map; - static final Map toAlpha3 = ((MyHandler) new MyHandler().process(LanguageCode.class, "alpha2_3.txt")).map; - static final Map fixCodes = ((MyHandler)new MyHandler().process(LanguageCode.class, "fixCodes.txt")).map; - - public static String validate(String input, ULocale ulocale) { - final String oldInput = input; - final StringBuilder canonical = new StringBuilder(); - String prefix = ""; - - input = input.trim(); - input = input.replace("_", "-"); - final Matcher m = languageID.matcher(input); - if (!m.matches()) { - int i = input.length(); - for (; ; --i) { - final String fragment = input.substring(0,i); - m.reset(fragment).matches(); - if(i == 0 || m.hitEnd()) { - final int posBefore = input.lastIndexOf('-', i-1) + 1; - int posAfter = input.indexOf('-', i); - if (posAfter < 0) { - posAfter = input.length(); - } - prefix = "

Ill-Formed Language Identifier: " + input.substring(0, posBefore) - + "" + input.substring(posBefore, i) - + "×" - + input.substring(i, posAfter) - + "" + input.substring(posAfter, input.length()) - + "
Couldn't parse past the point marked with ×.

\n"; - if (posBefore <= 0) { - return prefix; - } - input = input.substring(0, posBefore-1); - m.reset(input); - if (!m.matches()) { - return prefix; - } - break; - } - } - } - final StringBuilder builder = new StringBuilder().append("\n").append(getLine("th", "Type", "2.1", "Code", "Name", "Replacement?")); - - String languageCode = Subtag.language.get(m); - if (languageCode != null) { - String languageAndLink = languageCode = languageCode.toLowerCase(Locale.ENGLISH); - final String originalCode = languageCode; - String fixed; - String languageName; - final String[] parts = languageCode.split("[-_]"); - if (parts.length == 1) { - final boolean invalidLanguageCode = !names.containsKey(languageCode); - if (invalidLanguageCode) { - languageName = "invalid code"; - } else { - languageName = getSubtagName(languageCode, ulocale, true); - if (languageName.startsWith("@")) { - languageName = languageName.substring(1); - } - languageAndLink = getCodeAndLink(Subtag.language, languageCode, ulocale); - } - fixed = fixCodes.get(languageCode); - } else { // must be 2 - // cases are the following. For the replacement, we use fix(extlang) if valid, otherwise fix(lang) if valid, otherwise fix(extlang) - // zh-cmn - valid => cmn - // en-cmn - valid => cmn // but shouldn't be; by canonicalization en-cmn = cmn - // eng-cmn - invalid => cmn - // xxx-cmn - invalid => cmn - // zh-xxx - invalid => zh - // xxx-eng - invalid => en - // xxx-yyy - invalid => null - // That is, pick the second unless it is invald - languageCode = parts[0]; - final String extlang = parts[1]; - final String extLangName = names.get(extlang); - final boolean invalidLanguageCode = !names.containsKey(languageCode); - final boolean invalidExtlang = extLangName == null || !extLangName.startsWith("@"); - if (invalidExtlang & invalidLanguageCode) { - if (extLangName == null) { - languageName = "invalid base and extlang codes"; - } else { - languageName = "invalid base and extlang code - extlang would be valid base-lang code"; - } - } else if (invalidExtlang) { - if (extLangName == null) { - languageName = "invalid extlang code"; - } else { - languageName = "invalid extlang code - would be valid base-lang code"; - } - } else if (invalidLanguageCode) { - languageName = "invalid base-lang code"; - languageCode = extlang; - } else { - languageName = getSubtagName(extlang, ulocale, true); - if (languageName.startsWith("@")) { - languageName = languageName.substring(1); - } - //languageAndLink = getLanguageAndLink(extlang); - languageCode = extlang; - } - fixed = fixCodes.get(languageCode); - languageAndLink = originalCode; - } - builder.append(getLine("td", "Language", "2.2.1", languageAndLink, languageName, getCodeAndLink(Subtag.language, fixed, ulocale))); - addFixed(canonical, languageCode, fixed); - } - - String script = Subtag.script.get(m); - if (script != null) { - final String scriptCode = script = UCharacter.toTitleCase(Locale.ENGLISH, script, null); - String scriptName; - if (!names.containsKey(script)) { - scriptName = "invalid Code"; - } else { - scriptName = getSubtagName(script, ulocale, true); - script = getCodeAndLink(Subtag.script, script, ulocale); - } - final String fixed = fixCodes.get(scriptCode); - builder.append(getLine("td", "Script", "2.2.3", script, scriptName, getCodeAndLink(Subtag.script, fixed, ulocale))); - addFixed(canonical, scriptCode, fixed); - } - - String region = Subtag.region.get(m); - if (region != null) { - final String regionCode = region = region.toUpperCase(Locale.ENGLISH); - String regionName; - if (!names.containsKey(region)) { - regionName = "invalid Code"; - } else { - regionName = getSubtagName(region, ulocale, true); - region = getCodeAndLink(Subtag.region, region, ulocale); - } - final String fixed = fixCodes.get(regionCode); - builder.append(getLine("td", "Region", "2.2.4", region, regionName, getCodeAndLink(Subtag.region, fixed, ulocale))); - addFixed(canonical, regionCode, fixed); - } - - String variantList = Subtag.variants.get(m); - if (variantList != null) { - variantList = variantList.toLowerCase(Locale.ENGLISH); - final Set variants = new TreeSet(Arrays.asList(variantList.split("[-_]"))); - for (String variant : variants) { - final String variantCode = variant; - String variantName; - if (!names.containsKey(variant)) { - variantName = "invalid Code"; - } else { - variantName = getSubtagName(variant, ulocale, true); - variant = "" + variant + ""; - } - final String fixed = fixCodes.get(variantCode); - builder.append(getLine("td", "Variant", "2.2.5", variant, variantName, fixed)); - addFixed(canonical, variantCode, fixed); - } - } - - String extensionList = Subtag.extensions.get(m); - if (extensionList != null) { - extensionList = extensionList.toLowerCase(Locale.ENGLISH); - final Matcher m2 = extensionID.matcher(extensionList); - final Set extensions = new TreeSet(); - while (m2.find()) { - final String extension = m2.group(); - extensions.add(extension); - } - for (final String extension : extensions) { - builder.append(getLine("td", "Extension", "2.2.6", extension, "", null)); - addFixed(canonical, extension, null); - } - } - - String privateUse = Subtag.privateUse.get(m); - if (privateUse == null) { - privateUse = Subtag.privateUse2.get(m); - } - if (privateUse != null) { - privateUse = privateUse.toLowerCase(Locale.ENGLISH); - builder.append(getLine("td", "Private-Use", "2.2.7", privateUse, "", null)); - addFixed(canonical, privateUse, null); - } - builder.append("
\n"); - final String canonicalString = canonical.toString(); - if (!canonicalString.equals(oldInput)) { - builder.insert(0, "

Suggested Canonical Form: " + canonical + "

\n"); - } - builder.insert(0, prefix); - return builder.toString(); - } - - private static void addFixed(StringBuilder canonical, String code, String fixed) { - if (fixed == null) { - fixed = code; - } - if (fixed.startsWith("?")) { - return; - } - final int spacePos = fixed.indexOf(' '); - if (spacePos >= 0) { - fixed = fixed.substring(0, spacePos); - } - if (canonical.length() != 0) { - canonical.append('-'); - } - canonical.append(fixed); - } - - private static String getCodeAndLink(Subtag subtag, String codes, ULocale ulocale) { - if (codes == null) { - return codes; - } - final StringBuilder buffer = new StringBuilder(); - for (final String code : codes.split("\\s+")) { - final String value = getCodeAndLink2(subtag, code, ulocale); - if (buffer.length() != 0) { - buffer.append(" "); - } - buffer.append(value); - } - return buffer.toString(); - } - - private static String getCodeAndLink2(Subtag subtag, String code, ULocale ulocale) { - String name = getSubtagName(code, ulocale, false); - if (name != null) { - name = " title='" + name + "'"; - } else { - name = ""; - } - switch (subtag) { - case region: { - if (code.compareTo("A") < 0) { - code = "" + code + ""; - } else { - code = "" + code + ""; - } - return code; - } - case script: { - code = "" + code + ""; - return code; - } - case language: { - String alpha3 = code; - if (code.length() == 2) { - alpha3 = toAlpha3.get(code); - if (alpha3 == null) { - alpha3 = code; - } - } - code = "" + code + ""; - return code; - } - default: throw new IllegalArgumentException(); - } - } - - private static String getSubtagName(String code, ULocale ulocale, boolean html) { - String name = getIcuName(code, ulocale); - if (!name.equals(code)) { - return name; - } - if (!ulocale.equals(ULocale.ENGLISH)) { - name = getIcuName(code, ULocale.ENGLISH); - if (!name.equals(code)) { - name = name + "*"; - if (html) { - name = "" + name + ""; - } - return name; - } - } - name = names.get(code); - if (name != null) { - if (name.startsWith("@")) { - name = name.substring(1); - } - name = name + "**"; - if (html) { - name = "" + name + ""; - } - return name; - } - return null; - } - - private static String getIcuName(String code, ULocale ulocale) { - String icuName = code; - switch(code.length()) { - case 2: - case 3: - icuName = code.compareTo("a") < 0 - ? ULocale.getDisplayCountry("und-" + code, ulocale) - : ULocale.getDisplayLanguage(code, ulocale); - break; - case 4: - if (code.compareTo("A") >= 0) { - icuName = ULocale.getDisplayScript("und-" + code, ulocale); - break; - } // otherwise fall through! - default: - icuName = ULocale.getDisplayVariant("und-Latn-AQ-" + code, ulocale).toLowerCase(); - break; - } - return icuName; - } - - private static String getLine(String element, String type, String specSection, String subtag, String name, String replacement) { - if (name == null) { - name = "invalid"; - } - if (replacement != null) { - replacement = "<" + element + ">" + replacement + ""; - } else { - replacement = ""; - } - final String typeAndLink = specSection == null ? type : "" + type + ""; - return "<" + element + ">" + typeAndLink + "<" + element + ">" + subtag + "<" + element + ">" + name + "" + replacement + "\n"; - } - - public static String getLanguageOptions(ULocale toLocalizeInto) { - final StringBuilder result = new StringBuilder(); - if (toLocalizeInto.getLanguage().equals("en")) { - toLocalizeInto = ULocale.ENGLISH; - } - final ULocale[] list = ULocale.getAvailableLocales(); - final Map sorted = new TreeMap(Collator.getInstance(toLocalizeInto)); - for (final ULocale ulocale : list) { - final String country = ulocale.getCountry(); - if (country.length() != 0) { - continue; - } - if (QUALITY_EXCLUSIONS.contains(ulocale.toString())) { - continue; - } - - final String name = getLocaleName(ulocale, toLocalizeInto); - sorted.put(name, ulocale.toString()); - } - for (final String name : sorted.keySet()) { - final String code = sorted.get(name).toString(); - final String selected = code.equals(toLocalizeInto.toString()) ? " selected" : ""; - result.append("\n"); - } - return result.toString(); - /* - - - - - - - - - - - */ - } - - public static String getLocaleName(ULocale toBeLocalized, ULocale toLocalizeInto) { - String result = toBeLocalized.getDisplayName(toLocalizeInto); - final String test = toBeLocalized.getDisplayName(ULocale.ROOT); - final String englishName = toBeLocalized.getDisplayName(ULocale.ENGLISH); - - if (test.equals(result)) { - result = englishName + "*"; - } else if (!result.equalsIgnoreCase(englishName)) { - result += " / " + englishName; - } - - return result; - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/MySymbolTable.java b/unicodetools/src/main/java/org/unicode/jsp/MySymbolTable.java index f3c26b1f5..ef8ae60c9 100644 --- a/unicodetools/src/main/java/org/unicode/jsp/MySymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/jsp/MySymbolTable.java @@ -3,7 +3,7 @@ import java.util.Comparator; import java.util.List; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; @@ -26,7 +26,7 @@ public MySymbolTable(UnicodeProperty.Factory propertyFactory) { // String[] propertyNames = propertyName.split("[*]"); // for (int i = propertyNames.length - 1; i >= 0; ++i) { // String pname = propertyNames[i]; - // + // // } // return null; // } @@ -41,7 +41,7 @@ public boolean applyPropertyAlias(String propertyName, if (posNotEqual < 0) posNotEqual = propertyName.length(); if (posColon < 0) posColon = propertyName.length(); int opPos = posNotEqual < posColon ? posNotEqual : posColon; - propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) + propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) : propertyName.substring(opPos+1) + "=" + propertyValue; propertyName = propertyName.substring(0,opPos); if (posNotEqual < posColon) { @@ -170,6 +170,7 @@ public ComparisonMatcher(String pattern, Relation comparator) { this.pattern = pattern; } + @Override public boolean matches(Object value) { int comp = comparator.compare(pattern, value.toString()); switch (relation) { @@ -191,4 +192,4 @@ public static void setDefaultXSymbolTable(UnicodeProperty.Factory factory) { UnicodeSet.setDefaultXSymbolTable(new MySymbolTable(factory)); UnicodeProperty.ResetCacheProperties(); } -} \ No newline at end of file +} diff --git a/unicodetools/src/main/java/org/unicode/jsp/NFM.java b/unicodetools/src/main/java/org/unicode/jsp/NFM.java deleted file mode 100644 index c7d1fb49f..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/NFM.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.unicode.jsp; - -import java.util.regex.Pattern; - -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.impl.Utility; - -public class NFM { - public static final UnicodeMap nfm = new UnicodeMap(); - - static { - new MySemiFileReader().process(NFM.class, "nfm.txt"); - nfm.freeze(); - } - static final class MySemiFileReader extends FileUtilities.SemiFileReader { - Pattern spaces = Pattern.compile("\\s+"); - @Override - protected boolean handleLine(int start, int end, String[] items) { - String results; - switch (items.length) { - default: - throw new IllegalArgumentException(); - case 2: - results = Utility.fromHex(items[1], 1, spaces); - nfm.putAll(start, end, results); - break; - case 1: - nfm.putAll(start, end, ""); - break; - } - return true; - } - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/Navigator.java b/unicodetools/src/main/java/org/unicode/jsp/Navigator.java deleted file mode 100644 index b5b51b4f0..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/Navigator.java +++ /dev/null @@ -1,210 +0,0 @@ -package org.unicode.jsp; - -import java.text.DecimalFormat; -import java.text.NumberFormat; - -final public class Navigator { - private double lat1, lon1; - private transient double cosLat1, sinLat1; - - private double lat2, lon2; - private double distance, course; - private transient double sinDistance, cosDistance; - - public static double DEGREE = Math.PI/180.0; - - private static double EPS = 0.000001; // EPS a small number ~ machine precision - - public static void main(String[] args) { - final double latitude1 = 0; // toRadians(37.0, 28.0, 48.0, false); // N = + - final double longitude1 = 0; // toRadians(180-122.0, 8.0, 39.0, true); // W = - - final Tester tester = new Tester(latitude1, longitude1); - - final double[][] tests = { - {90*DEGREE, -180*DEGREE}, - {-50*DEGREE, -180*DEGREE}, - {25*DEGREE, 10*DEGREE}, - {latitude1, longitude1}, - }; - for (final double[] test : tests) { - tester.testItem(0, test[0], test[1]); - } - // exhaustive - tester.test(0, 0, -90, 90, -180, 180, 1); - } - private static class Tester { - Navigator a = new Navigator(); - Navigator b = new Navigator(); - int counter = 0, failures = 0; - - Tester(double latitude1, double longitude1) { - a.setLat1Lon1(latitude1, longitude1); - b.setLat1Lon1(latitude1, longitude1); - System.out.println("\tLatitude1 " + degrees(a.getLat1()) + "\tLongitude1 " + degrees(a.getLon1())); - } - - private void test(double lat0, double lon0, double latMin, double latMax, double lonMin, double lonMax, double inc) { - for (double dLat = latMin; dLat <= latMax; dLat += inc) { - for (double dLon = lonMin; dLon <= lonMax; dLon += inc) { - counter++; - final double lat2a = lat0 + dLat*DEGREE; - final double lon2a = lon0 + dLon*DEGREE; - if (!testItem(counter, lat2a, lon2a)) { - failures++; - } - } - } - System.out.println("Count: " + counter + "\tFailures: " + failures); - counter = failures = 0; - } - - boolean testItem(int counter, double lat2a, double lon2a) { - a.setLat2Lon2(lat2a, lon2a); - lat2a = a.getLat2(); - lon2a = a.getLon2(); - final double distance2 = a.getDistance(); - final double course2 = a.getCourse(); - b.setDistanceCourse(distance2, course2); - final double lat2b = b.getLat2(); - final double lon2b = b.getLon2(); - final boolean success = areClose(lat2b, lat2a) && areClose(lon2a, lon2b); - if (success && (counter % 1023) != 1) { - return true; - } - System.out.println(); - System.out.println(counter + "\tLat " + degrees(lat2a) + "\tLong " + degrees(lon2a) - + "\tDistance " + degrees(distance2) + "\tCourse " + degrees(course2)); - System.out.println("\t\tLat2 " + degrees(lat2b) + "\tLong2 " + degrees(lon2b)); - return success; - } - } - private static boolean areClose(double a, double b) { - a -= b; - return (-EPS < a && a < EPS); - } - - private static final NumberFormat nf = new DecimalFormat("+000.000;-000.000"); - public static String degrees(double in) { - return nf.format(in/DEGREE) + '°'; - } - - public static double toRadians(double degrees, double minutes, double seconds, boolean northOrWest) { - double result = (degrees + minutes / 60 + seconds / 3600); - if (!northOrWest) { - result = -result; - } - return result * DEGREE; - } - - public Navigator setLat1Lon1(double lat1, double lon1) { - if (lat1 < -Math.PI/2 + EPS) { - lat1 = -Math.PI/2; - lon1 = 0; // no point in distinguishing - } else if (lat1 > Math.PI/2 + EPS) { - lat1 = Math.PI/2; - lon1 = 0; // no point in distinguishing - } else { - lon1 = wrap(lon1, -Math.PI, Math.PI); - } - this.lat1 = lat1; - this.lon1 = lon1; - cosLat1 = Math.cos(lat1); - sinLat1 = Math.sin(lat1); - return this; - } - - public Navigator setLat2Lon2(double lat2, double lon2) { - if (lat2 < -Math.PI/2 + EPS) { - lat2 = -Math.PI/2; - lon2 = 0; // no point in distinguishing - } else if (lat2 > Math.PI/2 - EPS) { - lat2 = Math.PI/2; - lon2 = 0; // no point in distinguishing - } else { - lon2 = wrap(lon2, -Math.PI, Math.PI); - } - this.lat2 = lat2; - this.lon2 = lon2; - - final double cosLat2 = Math.cos(lat2); - final double sinLat2 = Math.sin(lat2); - - // compute distance - final double halfLatDiff = Math.sin((lat1-lat2)/2); - final double halfLonDiff = Math.sin((lon1-lon2)/2); - - distance = 2*Math.asin(Math.sqrt(halfLatDiff*halfLatDiff + cosLat1*cosLat2*halfLonDiff*halfLonDiff)); - sinDistance = Math.sin(distance); - cosDistance = Math.cos(distance); - - // compute course - if (distance < EPS) { - course = 0; - } else if (cosLat1 < EPS) { - if (lat1 > 0) { - course = Math.PI; // starting from N pole - } else { - course = 2*Math.PI; // starting from S pole - } - } else { - double cosCourse = (sinLat2-sinLat1*cosDistance)/(sinDistance*cosLat1); - if (cosCourse < -1.0) { - cosCourse = -1.0; - } - if (cosCourse > 1.0) { - cosCourse = 1.0; - } - course=Math.acos(cosCourse); - if (Math.sin(lon2-lon1) >= 0) { - course=2*Math.PI-course; - } - } - return this; - } - - public Navigator setDistanceCourse(double distance, double course) { - this.distance = distance; - this.course = course; - sinDistance = Math.sin(distance); - cosDistance = Math.cos(distance); - - lat2 = Math.asin(sinLat1*cosDistance+cosLat1*sinDistance*Math.cos(course)); - if (lat2 < -Math.PI/2 + EPS || lat2 > Math.PI/2 - EPS) { - lon2 = 0; // no point in distinguishing - } else { - final double dlon=Math.atan2( - Math.sin(course)*sinDistance*cosLat1, - cosDistance-sinLat1*Math.sin(lat2)); - lon2 = wrap(lon1-dlon, -Math.PI, Math.PI); - } - return this; - } - - public static double wrap(double aa, double low, double high) { - final double a = aa - low; - final double span = high - low; - if (a >= 0 && a < span) { - return aa; - } - final double intQuotient = Math.floor(a / span); - return a - intQuotient * span + low; - } - public double getCourse() { - return course; - } - public double getDistance() { - return distance; - } - public double getLat1() { - return lat1; - } - public double getLat2() { - return lat2; - } - public double getLon1() { - return lon1; - } - public double getLon2() { - return lon2; - } -} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/jsp/PropertyMetadata.java b/unicodetools/src/main/java/org/unicode/jsp/PropertyMetadata.java deleted file mode 100644 index 3596f9b9a..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/PropertyMetadata.java +++ /dev/null @@ -1,42 +0,0 @@ -package org.unicode.jsp; - -import java.util.Arrays; -import java.util.Collections; -import java.util.SortedSet; -import java.util.TreeSet; - -import com.ibm.icu.impl.Row; -import com.ibm.icu.impl.Row.R4; - -public class PropertyMetadata { - // #Property ; Source ; Datatype ; Category - - private static class MyHandler extends FileUtilities.SemiFileReader { - private SortedSet> set = new TreeSet>(); - @Override - protected boolean isCodePoint() { - return false; - } - @Override - public boolean handleLine(int start, int end, String[] items) { - if (items.length != 4) { - throw new IllegalArgumentException("Must have exactly 4 items: " + Arrays.asList(items)); - } - set.add((R4) Row.of(items[3], items[2], items[1], items[0]).freeze()); - return true; - } - @Override - protected void handleEnd() { - super.handleEnd(); - set = (Collections.unmodifiableSortedSet(set)); - } - SortedSet> getSet() { - if (set == null) { - throw new IllegalArgumentException("initialization failed"); - } - return set; - } - } - public static SortedSet> CategoryDatatypeSourceProperty = ((MyHandler) new MyHandler() - .process(PropertyMetadata.class, "propertyMetadata.txt")).getSet(); -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/ScriptCategoriesCopy.java b/unicodetools/src/main/java/org/unicode/jsp/ScriptCategoriesCopy.java deleted file mode 100644 index 8cd15d3e6..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/ScriptCategoriesCopy.java +++ /dev/null @@ -1,142 +0,0 @@ -package org.unicode.jsp; - -import com.ibm.icu.text.UnicodeSet; - -public class ScriptCategoriesCopy { - - - // From: http://www.phon.ucl.ac.uk/home/wells/ipa-unicode.htm - public static final UnicodeSet IPA = new UnicodeSet( - "[a-zæçðøħŋœǀ-ǃɐ-ɨɪ-ɶ ɸ-ɻɽɾʀ-ʄʈ-ʒʔʕʘʙʛ-ʝʟʡʢ ʤʧʰ-ʲʴʷʼˈˌːˑ˞ˠˤ̀́̃̄̆̈ ̘̊̋̏-̜̚-̴̠̤̥̩̪̬̯̰̹-̽͜ ͡βθχ↑-↓↗↘]" - ).freeze(); - - public static final UnicodeSet IPA_EXTENSIONS = new UnicodeSet( - "[ɩɷɼɿʅ-ʇʓʖʗʚʞʠʣʥʦʨ-ʯ]" - ).freeze(); - - - public static final UnicodeSet DEPRECATED_NEW = new UnicodeSet("[[:deprecated:][\\u0149\\u0F77\\u0F79\\u17A4\\u2329\\u232A]-[\\u0340\\u0341\\u17D3]]").freeze(); - //removing 0340, 0341, 17D3, and adding 0149, 0F77, 0F79, 17A4, 2329, 232A - - // TODO - change to Blocks - public static final UnicodeSet ARCHAIC_31 = new UnicodeSet( - // "[[:script=Bugi:][:script=Buhd:][:script=Cari:][:script=Copt:]" + - // "[:script=Cprt:][:script=Dsrt:][:script=Glag:][:script=Goth:][:script=Hano:][:script=Ital:][:script=Khar:][:script=Linb:]" + - // "[:script=Lyci:][:script=Lydi:][:script=Ogam:][:script=Osma:][:script=Phag:][:script=Phnx:][:script=Rjng:][:script=Runr:]" + - // "[:script=Shaw:][:script=Sund:][:script=Sylo:][:script=Syrc:][:script=Tagb:][:script=Tglg:][:script=Ugar:][:script=Xpeo:][:script=Xsux:]" + - // "[:block=Ancient_Greek_Musical_Notation:][:block=Phaistos_Disc:]]" - "[ [:blk=Ancient_Greek_Musical_Notation:]" + - "[:blk=Buginese:] " + - "[:blk=Buhid:] [:blk=Carian:] " + - "[:blk=Coptic:] [:blk=Cuneiform:] " + - "[:blk=Cuneiform_Numbers_And_Punctuation:] " + - "[:blk=Cypriot_Syllabary:] [:blk=Deseret:] [:blk=Glagolitic:] " + - "[:blk=Gothic:] [:blk=Hanunoo:] [:blk=Kharoshthi:] [:blk=Linear_B_Ideograms:] " + - "[:blk=Linear_B_Syllabary:] [:blk=Lycian:] [:blk=Lydian:] [:blk=Ogham:]" + - " [:blk=Old_Italic:] [:blk=Old_Persian:] [:blk=Osmanya:] [:blk=Phags_Pa:] " + - "[:blk=Phaistos_Disc:] [:blk=Phoenician:] [:blk=Rejang:] [:blk=Runic:] " + - "[:blk=Shavian:] [:blk=Sundanese:] [:blk=Syloti_Nagri:] [:blk=Syriac:] " + - "[:blk=Tagalog:] [:blk=Tagbanwa:] [:blk=Ugaritic:] [:sc=Copt:]]" - ).freeze(); - // from the old version of UTS39 - public static final UnicodeSet ARCHAIC_39 = new UnicodeSet( - // "[\\u018D\\u01AA-\\u01AB\\u01B9-\\u01BB\\u01BE\\u01BF\\u021C-\\u021D\\u025F\\u0277\\u027C\\u029E\\u0343" + - // "\\u03D0-\\u03D1\\u03D5-\\u03E1\\u03F7-\\u03F8\\u03F9-\\u03FB\\u0483-\\u0486\\u05A2\\u05C5-\\u05C7\\u066E-\\u066F\\u068E\\u0CDE\\u10F1-\\u10F6\\u1100-\\u1159" + - // "\\u115A-\\u115E\\u1161-\\u11A2\\u11A3-\\u11A7\\u11A8-\\u11F9\\u11FA-\\u11FF\\u1680-\\u169A\\u16A0-\\u16EA\\u16EE-\\u16F0\\u1700-\\u170C\\u170E-\\u1714" + - // "\\u1720-\\u1734\\u1740-\\u1753\\u1760-\\u176C\\u176E-\\u1770\\u1772-\\u1773\\u17A8\\u17D1\\u17DD\\u1B00-\\u1B4B\\u1B50-\\u1B7C\\u1DC0-\\u1DC3" + - // "\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u3165-\\u318E\\uA700-\\uA707\\uA840-\\uA877"+ - // "\\U00010000-\\U0001000B\\U0001000D-\\U00010026\\U00010028-\\U0001003A\\U0001003C-\\U0001003D\\U0001003F-\\U0001004D" + - // "\\U00010050-\\U0001005D\\U00010080-\\U000100FA\\U00010140-\\U00010174\\U00010300-\\U0001031E\\U00010330-\\U0001034A" + - // "\\U00010380-\\U0001039D\\U0001039F-\\U000103C3\\U000103C8-\\U000103D5\\U00010400-\\U0001049D\\U000104A0-\\U000104A9" + - // "\\U00010800-\\U00010805\\U00010808\\U0001080A-\\U00010835\\U00010837-\\U00010838\\U0001083C\\U0001083F\\U00010900-\\U00010919" + - // "\\U0001091F\\U00010A00-\\U00010A03\\U00010A05-\\U00010A06\\U00010A0C-\\U00010A13\\U00010A15-\\U00010A17\\U00010A19-\\U00010A33" + - // "\\U00010A38-\\U00010A3A\\U00010A3F-\\U00010A47\\U00010A50-\\U00010A58\\U00012000-\\U0001236E\\U00012400-\\U00012462\\U00012470-\\U00012473]" - "[ " + - //"[:blk=Balinese:] " + - "[:blk=Ancient_Greek_Numbers:]" + - "[:Block=Hangul_Jamo:]" + - "[:Block=Hangul_Compatibility_Jamo:]" + - "[֢ ׅ ̓ ᷀-᷃ ҃-҆ ׇ ៑ ៝ ׆ ꜀-꜇ ɟ ʞ ɷ ɼ ƪ ƾ ƫ ƍ ƹ ƺ ȝȜ ƿ ƻ ϐ ϝϜ ϛϚ ϑ ϗ ϖ ϻϺ ϟϞ ϙϘ Ϲ ϕ ϡϠ ϸ Ϸ ჱ-ჶ ٮ ڎ ٯ ೞ ឨ]" + - "]" - //"[\u018D\u01AA\u01AB\u01B9-\u01BB\u01BE\u01BF\u021C\u021D\u025F\u0277\u027C\u029E\u0343\u03D0\u03D1\u03D5-\u03E1\u03F7-\u03FB\u0483-\u0486\u05A2\u05C5-\u05C7\u066E\u066F\u068E\u0CDE\u10F1-\u10F6\u1100-\u115E\u1161-\u11FF\u17A8\u17D1\u17DD\u1DC0-\u1DC3\u3165-\u318E\uA700-\uA707\\U00010140-\\U00010174]]" - ).freeze(); - - public static final UnicodeSet ARCHAIC_HEURISTIC = new UnicodeSet( - "[ " + - "[:blk=Ancient_Symbols:]" + - "[:blk=Ancient_Greek_Musical_Notation:] " + - "[:blk=Cyrillic_Extended_A:] " + - "[:blk=Cyrillic_Extended_B:]" + - "[˯-˿ͣ-ͳͶͷߨ-ߪ᷎-᷿ᷦ᷾ẜẝẟ Ỻ-ỿ⁖⁘-⁞ↀ-Ↄↅ-ↈⱷ-ⱽ⸀-⸗⸪-⸰ ꜠꜡ꜰ-ꝸꟻ-ꟿ[ݾ ݿ ػ-ؿ]]" + - "]" - //"[\u02EF-\u02FF\u0363-\u0373\u0376\u0377\u07E8-\u07EA\u1DCE-\u1DE6\u1DFE\u1DFF\u1E9C\u1E9D\u1E9F\u1EFA-\u1EFF\u2056\u2058-\u205E\u2180-\u2183\u2185-\u2188\u2C77-\u2C7D\u2E00-\u2E17\u2E2A-\u2E30\uA720\uA721\uA730-\uA778\uA7FB-\uA7FF]]" - ).freeze(); - - public static final UnicodeSet ARCHAIC_ADDITIONS = new UnicodeSet( - "[ " + - "[:blk=Aegean_Numbers:] " + - "[:blk=Byzantine_Musical_Symbols:] " + - "[:block=Georgian Supplement:]" + - "[ͻ-ͽϏϽ-Ͽ[ƨ ƽ ƅ][ؕ-ؚ ۖ-ۤ ۧ ۨ ۪-ۭ ۩ ۥ ۦ][֑-֯][ׄ ׅ][ﬠ-ﬨ][ﭏ][Ⴀ-Ⴆ Ⴡ Ⴇ-Ⴌ Ⴢ Ⴍ-Ⴒ Ⴣ Ⴓ-Ⴞ Ⴤ Ⴟ Ⴠ Ⴥ][Ⴀ-Ⴥ][ƄƧƸƼǷϲϴↄ]჻]" + - "]" - // "[\u0269\u027F\u0285-\u0287\u0293\u0296\u0297\u029A\u02A0\u02A3\u02A5\u02A6\u02A8-\u02AF\u0313\u037B-\u037D\u03CF\u03FD-\u03FF]]" - ).freeze(); - - public static final UnicodeSet ARCHAIC = new UnicodeSet(ARCHAIC_31) - .addAll(ARCHAIC_39) - .addAll(ARCHAIC_HEURISTIC) - .addAll(ARCHAIC_ADDITIONS).freeze(); - static { - //System.out.println("Archaic: " + ARCHAIC); - final UnicodeSet knownOk = new UnicodeSet("[\u0392\u0398\u03A0\u03A6\u03B2\u03B8\u03C0\u03C6]"); - final UnicodeSet caseProblems = new UnicodeSet(ARCHAIC).closeOver(UnicodeSet.CASE).removeAll(ARCHAIC).removeAll(knownOk); - if (caseProblems.size() != 0) { - throw new IllegalArgumentException("Case: " + caseProblems); - } - } - - public static final UnicodeSet EUROPEAN = new UnicodeSet( - "[[:script=Latin:][:script=Greek:][:script=Coptic:][:script=Cyrillic:]" + - "[:script=Glag:][:script=Armenian:][:script=Georgian:][:script=Shavian:][:script=braille:]" + - "[:script=ogham:][:script=runic:][:script=Gothic:][:script=Cypriot:][:script=Linear b:]" + - "[:script=old italic:]]" - ).freeze(); - public static final UnicodeSet MIDDLE_EASTERN = new UnicodeSet( - "[[:script=Hebrew:][:script=Arabic:][:script=Syriac:][:script=Thaana:]" + - "[:script=Carian:][:script=Lycian:][:script=Lydian:][:script=Phoenician:]" + - "[:script=Cuneiform:][:script=old persian:][:ugaritic:]]" - ).freeze(); - public static final UnicodeSet SOUTH_ASIAN = new UnicodeSet( - "[[:script=Devanagari:][:script=Bengali:][:script=Gurmukhi:][:script=Gujarati:]" + - "[:script=Oriya:][:script=Tamil:][:script=Telugu:][:script=Kannada:][:script=Malayalam:]" + - "[:script=Sinhala:][:script=Tibetan:][:script=Phags-Pa:][:script=Limbu:][:script=Sylo:][:script=Kharoshthi:][:script=lepcha:][:saurashtra:][:script=ol chiki:]]" - ).freeze(); - public static final UnicodeSet SOUTHEAST_ASIAN = new UnicodeSet( - "[[:script=Thai:][:script=Lao:][:script=Myanmar:][:script=Khmer:]" + - "[:script=Tai_Le:][:script=New Tai Lue:][:script=Tagalog:][:script=Hanunoo:][:script=Buhid:]" + - "[:script=Tagbanwa:][:script=Buginese:][:script=Balinese:][:script=Cham:][:script=kayah li:][:script=rejang:][:script=sundanese:]]" - ).freeze(); - public static final UnicodeSet EAST_ASIAN = new UnicodeSet( - "[[:script=Bopomofo:][:script=Hiragana:][:script=Katakana:][:script=Mongolian:]" + - "[:script=Yi:]]" - ).freeze(); - public static final UnicodeSet AFRICAN = new UnicodeSet( - "[[:script=Ethiopic:][:script=Osmanya:][:script=Tifinagh:]" + - "[:script=Nko:][:script=vai:]]" - ).freeze(); - public static final UnicodeSet AMERICAN = new UnicodeSet( - "[[:script=Cherokee:][:script=CANS:][:script=Deseret:]]" - ).freeze(); - public static final UnicodeSet OTHER_SCRIPTS = new UnicodeSet("[^[:script=common:][:script=inherited:]]") - .removeAll(EUROPEAN) - .removeAll(MIDDLE_EASTERN) - .removeAll(SOUTH_ASIAN) - .removeAll(SOUTHEAST_ASIAN) - .removeAll(EAST_ASIAN) - .removeAll(AFRICAN) - .removeAll(AMERICAN) - .removeAll(new UnicodeSet("[[:script=han:][:script=hangul:]]")) - .freeze(); - - -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/ScriptExtensions.java b/unicodetools/src/main/java/org/unicode/jsp/ScriptExtensions.java deleted file mode 100644 index 6f2b2e5a2..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/ScriptExtensions.java +++ /dev/null @@ -1,95 +0,0 @@ -package org.unicode.jsp; - -import java.util.BitSet; -import java.util.Collection; -import java.util.Comparator; -import java.util.Set; -import java.util.TreeSet; -import java.util.regex.Pattern; - -import com.ibm.icu.dev.util.CollectionUtilities; -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.text.UnicodeSet; - -public class ScriptExtensions { - - public static final Comparator COMPARATOR = new Comparator() { - - @Override - public int compare(BitSet o1, BitSet o2) { - final int diff = o1.cardinality() - o2.cardinality(); - if (diff != 0) { - return diff; - } - if (o1.equals(o2)) { - return 0; - } - final String n1 = getNames(o1, UProperty.NameChoice.LONG, " "); - final String n2 = getNames(o2, UProperty.NameChoice.LONG, " "); - return n1.compareToIgnoreCase(n2); - } - }; - - private UnicodeMap scriptSpecials; - - public Collection getAvailableValues() { - return scriptSpecials.getAvailableValues(); - } - - public UnicodeSet getSet(BitSet value) { - return scriptSpecials.getSet(value); - } - - private static class MyHandler extends FileUtilities.SemiFileReader { - public final static Pattern SPACES = Pattern.compile("\\s+"); - - UnicodeMap map = new UnicodeMap(); - - @Override - public boolean handleLine(int start, int end, String[] items) { - final BitSet bitSet = new BitSet(ScriptTester.LIMIT); - for (final String script : SPACES.split(items[1])) { - final int scriptCode = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, script); - bitSet.set(scriptCode); - } - map.putAll(start, end, bitSet); - return true; - } - } - - public static ScriptExtensions make(String directory, String filename) { - final ScriptExtensions result = new ScriptExtensions(); - result.scriptSpecials = ((ScriptExtensions.MyHandler) new MyHandler() - .process(directory, filename)).map.freeze(); - return result; - } - - public static ScriptExtensions make(Class aClass, String filename) { - final ScriptExtensions result = new ScriptExtensions(); - result.scriptSpecials = ((ScriptExtensions.MyHandler) new MyHandler() - .process(aClass, filename)).map.freeze(); - return result; - } - - public BitSet get(int codepoint) { - return scriptSpecials.get(codepoint); - } - - public void putAllInto(UnicodeMap char2scripts) { - char2scripts.putAll(scriptSpecials); - } - - public static String getNames(BitSet value, int choice, String separator) { - return getNames(value, choice, separator, new TreeSet()); - } - - public static String getNames(BitSet value, int choice, String separator, Set names) { - names.clear(); - for (int i = value.nextSetBit(0); i >= 0; i = value.nextSetBit(i+1)) { - names.add(ScriptTester.getScriptName(i, choice)); - } - return CollectionUtilities.join(names, separator).toString(); - } -} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/jsp/ScriptTester.java b/unicodetools/src/main/java/org/unicode/jsp/ScriptTester.java deleted file mode 100644 index f67237946..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/ScriptTester.java +++ /dev/null @@ -1,403 +0,0 @@ -package org.unicode.jsp; - -import java.util.BitSet; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; - -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.lang.UScript; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.UnicodeSet; - -/** - * Class for testing whether strings have allowed combinations of multiple scripts. - * @author markdavis - */ -public class ScriptTester { - private final UnicodeMap character_compatibleScripts; - - - public enum CompatibilityLevel {Highly_Restrictive, Moderately_Restrictive} - public enum ScriptSpecials {on, off} - - /** - * Extended scripts; note that they do not have stable numbers, and should not be persisted. - */ - public static final int - //HANT = UScript.CODE_LIMIT, - //HANS = HANT + 1, - LIMIT = UScript.CODE_LIMIT; // HANS + 1; - - private static String[][] EXTENDED_NAME = {{"Hant", "Han Traditional"}, {"Hans", "Han Simplified"}}; - - public static String getScriptName(int extendedScriptCode, int choice) { - if (extendedScriptCode >= UScript.CODE_LIMIT) { - return EXTENDED_NAME[extendedScriptCode - UScript.CODE_LIMIT][choice]; - } - return UCharacter.getPropertyValueName(UProperty.SCRIPT, extendedScriptCode, choice); - } - - - private static final BitSet ALL = new BitSet(LIMIT); // be careful when using this; can't freeze it! - static { - ALL.set(0, LIMIT, true); - } - - /** - * Build a ScriptTester - * @return - */ - public static Builder start(CompatibilityLevel level, ScriptSpecials specials) { - return new Builder(level, specials); - } - - public static Builder start() { - return new Builder(CompatibilityLevel.Highly_Restrictive, ScriptSpecials.on); - } - - public static Builder start(CompatibilityLevel level) { - return new Builder(level, ScriptSpecials.on); - } - - - /** - * If the scripts in the string are compatible, then returns a list of them. Otherwise returns an empty bitset. - * The input must be in NFD. - * @param input - * @return bitset of scripts found - */ - public boolean isOk(CharSequence input) { - input = Normalizer.normalize(input.toString(), Normalizer.NFD); - // We make one pass forward and one backward, finding if each characters scripts - // are compatible with the ones before and after - // We save the value that we collect on the first pass. - int cp; - final int maxSize = input.length(); - int base = -1; - final BitSet[] actual = new BitSet[maxSize]; - final BitSet[] compat = new BitSet[maxSize]; - int codePointCount = 0; - final BitSet compatBefore = new BitSet(LIMIT); - compatBefore.or(ALL); - int lastCp = -1; - for (int i = 0; i < maxSize; i += Character.charCount(cp)) { - cp = Character.codePointAt(input, i); - // check for mixed numbers - final int type = UCharacter.getType(cp); - if (type == ECharacterCategory.DECIMAL_DIGIT_NUMBER) { - final int newBase = cp & 0xFFFFF0; - if (base < 0) { - base = newBase; - } else if (base != newBase){ - return false; - } - } - // check for multiple combining marks - if (type == ECharacterCategory.NON_SPACING_MARK || type == ECharacterCategory.ENCLOSING_MARK) { - if (lastCp == cp) { - return false; - } - } - // check scripts - compat[codePointCount] = character_compatibleScripts.get(cp); - actual[codePointCount] = getActualScripts(cp); - if (!actual[codePointCount].intersects(compatBefore)) { - return false; - } - compatBefore.and(compat[codePointCount]); - codePointCount++; - lastCp = cp; - } - compatBefore.or(ALL); - for (int i = codePointCount - 1; i >= 0; --i) { - if (!actual[i].intersects(compatBefore)) { - return false; - } - compatBefore.and(compat[i]); - } - // check numbers - return true; - } - - - - // TODO, cache results - private BitSet getActualScripts(int cp) { - BitSet actualScripts = scriptSpecials.get(cp); - if (actualScripts == null) { - actualScripts = new BitSet(LIMIT); - final int script = UCharacter.getIntPropertyValue(cp, UProperty.SCRIPT); - actualScripts.set(script); - } - return actualScripts; - } - - public boolean filterTable(List> table) { - - // We make one pass forward and one backward, finding if each characters scripts - // are compatible with the ones before. - // We then make a second pass for the ones after. - // Could be optimized if needed - final int maxSize = table.size(); - final BitSet compatBefore = new BitSet(LIMIT); - compatBefore.or(ALL); - final BitSet anyCompatAt = new BitSet(LIMIT); - - final HashSet toRemove = new HashSet(); - for (int i = 0; i < maxSize; ++i) { - toRemove.clear(); - anyCompatAt.clear(); - final Set column = table.get(i); - for (final String item : column) { - final BitSet compatibleScripts = getCompatibleScripts(item); // ANDed - anyCompatAt.or(compatibleScripts); - final BitSet actualScripts = getActualScripts(item); // ORed - if (!actualScripts.intersects(compatBefore)) { - toRemove.add(item); - } - } - column.removeAll(toRemove); - if (column.size() == 0) { - return false; - } - compatBefore.and(anyCompatAt); - } - // now reverse order - compatBefore.or(ALL); - for (int i = maxSize - 1; i >= 0; --i) { - toRemove.clear(); - anyCompatAt.clear(); - final Set column = table.get(i); - for (final String item : column) { - final BitSet compatibleScripts = getCompatibleScripts(item); // ANDed - anyCompatAt.or(compatibleScripts); - final BitSet actualScripts = getActualScripts(item); // ORed - if (!actualScripts.intersects(compatBefore)) { - toRemove.add(item); - } - } - column.removeAll(toRemove); - if (column.size() == 0) { - return false; - } - compatBefore.and(anyCompatAt); - } - return true; - } - - private BitSet getActualScripts(String item) { - final BitSet toOrWith = new BitSet(LIMIT); - int cp; - for (int i = 0; i < item.length(); i += Character.charCount(cp)) { - cp = Character.codePointAt(item, i); - toOrWith.or(getActualScripts(cp)); - } - return toOrWith; - } - - private BitSet getCompatibleScripts(String item) { - final BitSet toAndWith = new BitSet(LIMIT); - toAndWith.or(ALL); - int cp; - for (int i = 0; i < item.length(); i += Character.charCount(cp)) { - cp = Character.codePointAt(item, i); - toAndWith.and(character_compatibleScripts.get(cp)); - } - return toAndWith; - } - - /** - * Each character in item has a compatible set that intersects overall. - * @param item - * @param overallCompatible - * @return - */ - private boolean isCompatible(String input, BitSet overallCompatible) { - int cp; - for (int i = 0; i < input.length(); i += Character.charCount(cp)) { - cp = Character.codePointAt(input, i); - final BitSet scripts = character_compatibleScripts.get(cp); // will never fail - if (!scripts.intersects(overallCompatible)) { - return false; - } - } - return true; - } - - // Ugly hack, because BitSet doesn't have the method. - private boolean contains(BitSet set1, BitSet set2) { - // quick check to verify intersecting - if (!set1.intersects(set2)) { - return false; - } - final BitSet temp = new BitSet(); - temp.or(set2); - temp.and(set1); - // we now have the intersection. It must be equal to set2 - return temp.equals(set2); - } - - static ScriptExtensions scriptSpecials = ScriptExtensions.make(ScriptExtensions.class, "ScriptExtensions.txt"); - - public static BitSet getScriptSpecials(int codepoint) { - final BitSet output = new BitSet(LIMIT); - final BitSet actualScripts = scriptSpecials.get(codepoint); - if (actualScripts != null) { - output.or(actualScripts); - } else { - final int script = UCharacter.getIntPropertyValue(codepoint, UProperty.SCRIPT); - output.set(script); - } - return output; - } - - public static UnicodeMap getScriptSpecialsNames() { - final UnicodeMap result = new UnicodeMap(); - final Set names = new TreeSet(); // to alphabetize - - for (final BitSet value : scriptSpecials.getAvailableValues()) { - result.putAll(scriptSpecials.getSet(value), ScriptExtensions.getNames(value, UProperty.NameChoice.LONG, ",", names)); - } - return result; - } - - public static String[][] getScriptSpecialsAlternates() { - final Collection availableValues = scriptSpecials.getAvailableValues(); - final String[][] result = new String[availableValues.size()][]; - final Set names = new TreeSet(); // to alphabetize - - int i = 0; - for (final BitSet value : availableValues) { - final String baseName = ScriptExtensions.getNames(value, UProperty.NameChoice.LONG, ",", names); - final String altName = ScriptExtensions.getNames(value, UProperty.NameChoice.SHORT, ",", names); - final String[] row = {baseName, altName}; - result[i++] = row; - } - return result; - } - - private ScriptTester(UnicodeMap character_scripts) { - character_compatibleScripts = character_scripts; - } - - public static class Builder { - - private final Map compatible = new TreeMap(); - private final UnicodeMap char2scripts = new UnicodeMap(); - - private Builder(CompatibilityLevel level, ScriptSpecials specials) { - // make everything compatible with itself - for (int i = 0; i < LIMIT; ++i) { - final BitSet itself = new BitSet(LIMIT); - itself.set(i); - compatible.put(i, itself); - } - // first do levels - switch (level) { - case Moderately_Restrictive: - for (int i = 0; i < LIMIT; ++i) { - if (i == UScript.CYRILLIC || i == UScript.GREEK || i == UScript.CHEROKEE) { - continue; - } - addCompatible(UScript.LATIN, i); - } - // FALL THRU! - case Highly_Restrictive: - addCompatible(UScript.LATIN, UScript.HAN, UScript.HIRAGANA, UScript.KATAKANA); - //addCompatible(UScript.LATIN, HANT, UScript.HIRAGANA, UScript.KATAKANA); - //addCompatible(UScript.LATIN, HANS, UScript.HIRAGANA, UScript.KATAKANA); - - addCompatible(UScript.LATIN, UScript.HAN, UScript.HANGUL); - //addCompatible(UScript.LATIN, HANT, UScript.HANGUL); - //addCompatible(UScript.LATIN, HANS, UScript.HANGUL); - - addCompatible(UScript.LATIN, UScript.HAN, UScript.BOPOMOFO); - addCompatible(UScript.LATIN, UScript.HAN); - // ?? Asomtavruli, Nuskhuri, and Mkhedruli (georgian) - // FALL THRU! - default: - //addCompatible(UScript.HAN, HANT); - //addCompatible(UScript.HAN, HANS); - // Common and Inherited are compatible with everything! - for (int i = 0; i < LIMIT; ++i) { - addCompatible(UScript.COMMON, i); - addCompatible(UScript.INHERITED, i); - } - } - // then specials - // fix the char2scripts mapping - - if (specials == ScriptSpecials.on){ - scriptSpecials.putAllInto(char2scripts); - } - } - - public ScriptTester get() { - final UnicodeMap character_scripts = new UnicodeMap(); - // first set all the simple cases: character => script => scripts - for (int script = 0; script < UScript.CODE_LIMIT; ++script) { - final UnicodeSet uset = new UnicodeSet(); - uset.applyIntPropertyValue(UProperty.SCRIPT, script); - if (uset.size() != 0) { - final BitSet scripts = compatible.get(script); - character_scripts.putAll(uset, scripts); - } - } - // now override these (as necessary) with the charScriptMapping - for (final BitSet scripts : char2scripts.values()) { - // The scripts need fluffing up according to the acceptableTogether sets - // We have to create new Bitsets! - final BitSet fluffed = new BitSet(LIMIT); - fluffed.or(scripts); - for (int unfluffedScript = scripts.nextSetBit(0); unfluffedScript >= 0; unfluffedScript = scripts.nextSetBit(unfluffedScript+1)) { - final BitSet acceptable = compatible.get(unfluffedScript); - fluffed.or(acceptable); - } - final UnicodeSet uset = char2scripts.getSet(scripts); - character_scripts.putAll(uset, fluffed); - } - return new ScriptTester(character_scripts); - } - /** - * Add list of scripts that are acceptable in combination together. - *

Example: st.addAcceptable(UScript.LATIN, USCRIPT.HANGUL);

- * @param scripts - */ - public Builder addCompatible(int... scripts) { - // set all the scripts on each of the other scripts - for (final int script : scripts) { - final BitSet items = compatible.get(script); - for (final int script2 : scripts) { - items.set(script2); - } - } - return this; - } - - /** - * Add mapping from code point to scripts - *

Example: st.addMapping(0x, USCRIPT.HIRAGANA, USCRIPT.KATAKANA); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK

- */ - public Builder addMapping(int codePoint, int... scripts) { - final BitSet newScripts = new BitSet(LIMIT); - final BitSet oldScripts = char2scripts.get(codePoint); - if (oldScripts != null) { - newScripts.or(oldScripts); - } - for (final int script : scripts) { - newScripts.set(script); - } - char2scripts.put(codePoint, newScripts); - return this; - } - } - -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/Subheader.java b/unicodetools/src/main/java/org/unicode/jsp/Subheader.java deleted file mode 100644 index c62884e9a..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/Subheader.java +++ /dev/null @@ -1,225 +0,0 @@ -package org.unicode.jsp; - - -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.unicode.cldr.util.MultiComparator; - -import com.ibm.icu.impl.Relation; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.text.Collator; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.text.UnicodeSetIterator; -import com.ibm.icu.util.ULocale; - -public class Subheader implements Iterable { - static final boolean DEBUG = false; - Matcher subheadMatcher = Pattern.compile("(@+)\\s+(.*)").matcher(""); - Matcher hexMatcher = Pattern.compile("([A-Z0-9]+).*").matcher(""); - Map codePoint2Subblock = new HashMap(); - Map subblock2UnicodeSet = new TreeMap(); - Map> block2subblock = new TreeMap>(); - Map> subblock2block = new TreeMap>(); - - public Subheader(String unicodeDataDirectory) { - try { - subblock2UnicodeSet = getDataFromFile(unicodeDataDirectory + "NamesList.txt"); - } catch (final IOException e) { - throw new IllegalArgumentException(e); - } - fillTables(); - } - - // Subheader(String[][] data) { - // subblock2UnicodeSet = new TreeMap(); - // for (String[] pair : data) { - // subblock2UnicodeSet.put(pair[0], new UnicodeSet(pair[1])); - // } - // fillTables(); - // } - - public Subheader(InputStream resourceAsStream) { - try { - subblock2UnicodeSet = getDataFromStream(resourceAsStream); - } catch (final IOException e) { - throw new IllegalArgumentException(e); - } - fillTables(); - } - - static final Comparator SHORTEST_FIRST = new Comparator() { - @Override - public int compare(CharSequence arg0, CharSequence arg1) { - return arg0.length() - arg1.length(); - } - }; - - static final MultiComparator SHORTEST = new MultiComparator(SHORTEST_FIRST, Collator.getInstance(ULocale.ENGLISH)); - - private void fillTables() { - // fix plurals & casing - final Relation caseless = new Relation(new TreeMap(), TreeSet.class, SHORTEST); - - for (final String subhead : subblock2UnicodeSet.keySet()) { - final String norm = getSkeleton(subhead); - caseless.put(norm, subhead); - } - - for (final String norm : caseless.keySet()) { - final Set set = caseless.getAll(norm); - if (set.size() == 1) { - continue; - } - if (DEBUG) { - System.out.println("***Merging similar names:\t" + set + "\tskeleton:" + norm); - } - - UnicodeSet best = null; - String bestName = null; - for (final String name : set) { - if (best == null) { - best = subblock2UnicodeSet.get(name); - bestName = name; - } else { - final UnicodeSet other = subblock2UnicodeSet.get(name); - best.addAll(other); - subblock2UnicodeSet.remove(name); - } - } - } - - // protect the core data, since we allow iteration - for (final String subhead : subblock2UnicodeSet.keySet()) { - final UnicodeSet unicodeSet = subblock2UnicodeSet.get(subhead); - unicodeSet.freeze(); - if (DEBUG) { - System.out.println("\t" + subhead + "\t" + unicodeSet.toPattern(false)); - } - } - - for (final String subblock : subblock2UnicodeSet.keySet()) { - final UnicodeSet uset = subblock2UnicodeSet.get(subblock); - for (final UnicodeSetIterator it = new UnicodeSetIterator(uset); it.next();) { - codePoint2Subblock.put(it.codepoint, subblock); - - final String block = UCharacter.getStringPropertyValue(UProperty.BLOCK, it.codepoint, UProperty.NameChoice.LONG).toString().replace('_', ' ').intern(); - - Set set = block2subblock.get(block); - if (set == null) { - block2subblock.put(block, set = new TreeSet()); - } - set.add(subblock); - - set = subblock2block.get(subblock); - if (set == null) { - subblock2block.put(subblock, set = new TreeSet()); - } - set.add(block); - } - } - } - - static final Pattern NON_ALPHANUM = Pattern.compile("[^" + - "\\p{Ll}\\p{Lu}\\p{Lt}\\p{Lo}\\p{Lm}" + - "\\p{Me}\\p{Mc}\\p{Mn}" + - "\\p{Nd}" + - "]+"); - - static final Pattern TERMINATION = Pattern.compile("(ies|es|s|y)_"); - static final Pattern INITIAL_GORP = Pattern.compile("$[A-Z]\\."); - - private String getSkeleton(String input) { - String result = INITIAL_GORP.matcher(input).replaceAll("_"); - - result = NON_ALPHANUM.matcher(result).replaceAll("_").toLowerCase(Locale.ENGLISH); - if (!result.endsWith("_")) { - result += "_"; - } - if (!result.startsWith("_")) { - result = "_" + result; - } - result = TERMINATION.matcher(result).replaceAll("_"); - - result = result.replace("_mark_and_sign_", "_mark_"); - result = result.replace("_sign_", "_mark_"); - result = result.replace("_symbol_", "_mark_"); - result = result.replace("_additional_", "_"); - result = result.replace("_extended_", "_"); - result = result.replace("_extensions_for_", "_"); - result = result.replace("_further_", "_"); - result = result.replace("_other_", "_"); - result = result.replace("_glyphs_for_", "_"); - - result = result.replace("_poetry_", "_poetic_"); - - result = result.replace("_ancient_", "_historic_"); - result = result.replace("_archaic_", "_historic_"); - result = result.replace("_general_use_", "_general_"); - - - - return result; - } - - private Map getDataFromFile(String filename) throws FileNotFoundException, IOException { - final InputStream is = new FileInputStream(filename); - return getDataFromStream(is); - } - - private Map getDataFromStream(InputStream is) throws IOException { - final Reader reader = new InputStreamReader(is); - final BufferedReader in = new BufferedReader(reader); - final Map subblock2UnicodeSet2 = new TreeMap(); - String subblock = "?"; - while (true) { - final String line = in.readLine(); - if (line == null) { - break; - } - if (subheadMatcher.reset(line).matches()) { - subblock = subheadMatcher.group(1).equals("@") ? subheadMatcher.group(2) : "?"; - continue; - } - if (subblock.length() != 0 && hexMatcher.reset(line).matches()) { - final int cp = Integer.parseInt(hexMatcher.group(1), 16); - UnicodeSet uset = subblock2UnicodeSet2.get(subblock); - if (uset == null) { - subblock2UnicodeSet2.put(subblock, uset = new UnicodeSet()); - } - uset.add(cp); - } - } - in.close(); - return subblock2UnicodeSet2; - } - - public String getSubheader(int codepoint) { - return codePoint2Subblock.get(codepoint); - } - - @Override - public Iterator iterator() { - return subblock2UnicodeSet.keySet().iterator(); - } - - public UnicodeSet getUnicodeSet(String subhead) { - return subblock2UnicodeSet.get(subhead); - } -} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/jsp/SubheaderSnapshot.java b/unicodetools/src/main/java/org/unicode/jsp/SubheaderSnapshot.java deleted file mode 100644 index dcd9c13f0..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/SubheaderSnapshot.java +++ /dev/null @@ -1,688 +0,0 @@ -package org.unicode.jsp; - -public class SubheaderSnapshot { - static final String[][] data = { - {"APL","[\\u2336-\\u237A\\u2395]"}, - {"ASCII digits","[0-9]"}, - {"ASCII punctuation and symbols","[\\ -/\\:-@\\[-`\\{-~]"}, - {"Abbreviation mark","[\\uA66F]"}, - {"Accidentals","[\\U0001D12A-\\U0001D133]"}, - {"Addition for German typography","[\\u1E9E]"}, - {"Addition for Sanskrit","[\\u0C3D\\u0D3D]"}, - {"Addition for Torwali","[\\u0772]"}, - {"Additional Suzhou numerals","[\\u3038-\\u303A]"}, - {"Additional archaic letters for Bactrian","[\\u03F7\\u03F8]"}, - {"Additional bold Greek symbols","[\\U0001D6DB-\\U0001D6E1\\U0001D7CA\\U0001D7CB]"}, - {"Additional bold italic Greek symbols","[\\U0001D74F-\\U0001D755]"}, - {"Additional circled number","[\\u24EA]"}, - {"Additional consonant","[\\u09CE]"}, - {"Additional consonants","[\\u0958-\\u095F\\u09DC\\u09DD\\u09DF\\u0A59-\\u0A5C\\u0A5E\\u0B5C\\u0B5D\\u0B5F\\u0CDE\\u1B45-\\u1B4B\\u1BAE\\u1BAF]"}, - {"Additional dependent vowel signs","[\\u0DF2\\u0DF3]"}, - {"Additional diacritical mark for symbols","[\\u20E1]"}, - {"Additional diacritical marks for symbols","[\\u20E5-\\u20F0]"}, - {"Additional enclosing diacritics","[\\u20E2-\\u20E4]"}, - {"Additional italic Greek symbols","[\\U0001D715-\\U0001D71B]"}, - {"Additional letter","[\\u03F3]"}, - {"Additional letterlike symbols","[\\u2139-\\u213F\\u2141-\\u2144\\u214A-\\u214D]"}, - {"Additional letters","[\\u10F9\\u10FA\\u1C4D-\\u1C4F]"}, - {"Additional letters for Mingrelian and Svan","[\\u10F7\\u10F8]"}, - {"Additional marks for UPA","[\\u1DFE\\u1DFF]"}, - {"Additional punctuation","[\\u05F3\\u05F4]"}, - {"Additional sans-serif bold Greek symbols","[\\U0001D789-\\U0001D78F]"}, - {"Additional sans-serif bold italic Greek symbols","[\\U0001D7C3-\\U0001D7C9]"}, - {"Additional vowel for Marathi","[\\u0972]"}, - {"Additional vowels for Sanskrit","[\\u0960-\\u0963\\u09E0-\\u09E3\\u0AE0-\\u0AE3\\u0B60\\u0B61\\u0C60\\u0C61\\u0CE0\\u0CE1\\u0D60\\u0D61]"}, - {"Additional white on black circled number","[\\u24FF]"}, - {"Additions","[\\u0339-\\u033F]"}, - {"Additions based on 1989 IPA","[\\u02DE-\\u02E4]"}, - {"Additions for Burushaski","[\\u0773-\\u077D]"}, - {"Additions for Greek","[\\u0342-\\u0345]"}, - {"Additions for IPA","[\\u0346-\\u034A]"}, - {"Additions for Khowar","[\\u076E-\\u0771]"}, - {"Additions for Livonian","[\\u022A-\\u0233]"}, - {"Additions for Nivkh","[\\u04FA-\\u04FF]"}, - {"Additions for Romanian","[\\u0218-\\u021B]"}, - {"Additions for Sinology","[\\u0234-\\u0236\\u02AE\\u02AF]"}, - {"Additions for Slovenian and Croatian","[\\u0200-\\u0217]"}, - {"Additions for UPA","[\\u2C77-\\u2C7D\\uA720\\uA721]"}, - {"Additions for Uighur","[\\u2C67-\\u2C6C]"}, - {"Additions for early Persian","[\\u077E\\u077F]"}, - {"Additions for early Persian and Azerbaijani","[\\u063B-\\u063F]"}, - {"Additions for the Uralic Phonetic Alphabet","[\\u0350-\\u0357]"}, - {"Afona or Ypostaseis (Mutes or Hypostases)","[\\U0001D057-\\U0001D07E]"}, - {"African letters for clicks","[\\u01C0-\\u01C3]"}, - {"Africanist tone letters","[\\uA71B-\\uA71F]"}, - {"Agogika (Conduits)","[\\U0001D09A-\\U0001D0A1]"}, - {"Aleut letter","[\\u051E\\u051F]"}, - {"Alloioseis (Differentiators)","[\\U0001D0CB-\\U0001D0D9]"}, - {"Alternate consonant forms for Chinese","[\\uA86D-\\uA870]"}, - {"Analytics","[\\U0001D1A6-\\U0001D1A9]"}, - {"Ancient Greek acrophonic numerals","[\\U00010140-\\U00010174]"}, - {"Ancient Greek instrumental notation","[\\U0001D21D-\\U0001D241]"}, - {"Ancient Greek papyrological numbers","[\\U00010175-\\U0001018A]"}, - {"Ancient Greek textual symbols","[\\u2E0E-\\u2E16]"}, - {"Ancient Greek vocalic notation","[\\U0001D200-\\U0001D21C]"}, - {"Ancient Near-Eastern linguistic symbol","[\\u2E17]"}, - {"Ancient Roman epigraphic letters","[\\uA7FB-\\uA7FF]"}, - {"Angle brackets","[\\u2329\\u232A]"}, - {"Angles","[\\u299B-\\u29AF]"}, - {"Arabic-Indic digits","[\\u0660-\\u0669]"}, - {"Archaic Roman numerals","[\\u2180-\\u2183\\u2185-\\u2188]"}, - {"Archaic letters","[\\u0370-\\u0373\\u0376\\u0377\\u03D8-\\u03E1\\u03FA\\u03FB\\u066E\\u066F\\u07E8-\\u07EA\\u10F1-\\u10F6\\u3165-\\u318E]"}, - {"Archaic punctuation","[\\u2056\\u2058-\\u205E]"}, - {"Argies (Retards)","[\\U0001D07F-\\U0001D089]"}, - {"Armenian ligatures","[\\uFB13-\\uFB17]"}, - {"Arrow tails","[\\u2919-\\u291C]"}, - {"Arrows","[\\u27F0-\\u27F4]"}, - {"Arrows combined with operators","[\\u2942-\\u2949]"}, - {"Arrows combined with relations","[\\u2971-\\u297B]"}, - {"Arrows with bent tips","[\\u21B0-\\u21B3\\u2B0E-\\u2B11]"}, - {"Arrows with modifications","[\\u219A-\\u21AF]"}, - {"Articulation","[\\U0001D17B-\\U0001D18E]"}, - {"Astrological signs","[\\u0F15-\\u0F1F\\u0F3E\\u0F3F\\u0FCE\\u0FCF\\u26B3-\\u26BC]"}, - {"Astrological symbols","[\\u263D-\\u2647]"}, - {"Augmentation dot","[\\U0001D16D]"}, - {"Bamboo suit tiles","[\\U0001F010-\\U0001F018]"}, - {"Bars","[\\U0001D100-\\U0001D105]"}, - {"Based on GB 2312","[\\u3105-\\u3129]"}, - {"Based on ISO 8859-6","[\\u0621-\\u063A\\u0640-\\u064A]"}, - {"Based on ISO 8859-8","[\\u05D0-\\u05EA]"}, - {"Based on JIS X 0208","[\\u3041-\\u3094\\u30A1-\\u30FA]"}, - {"Basic Russian alphabet","[\\u0410-\\u044F]"}, - {"Basic consonants","[\\u0780-\\u0797]"}, - {"Basic glyphs for Arabic language contextual forms","[\\uFE80-\\uFEFC]"}, - {"Basic letters","[\\u1820-\\u1842]"}, - {"Basic syllables","[\\U00010000-\\U0001000B\\U0001000D-\\U00010026\\U00010028-\\U0001003A\\U0001003C\\U0001003D\\U0001003F]"}, - {"Beams and slurs","[\\U0001D173-\\U0001D17A]"}, - {"Bengali-specific additions","[\\u09F0-\\u09FA]"}, - {"Biblical editorial symbol","[\\u214F]"}, - {"Block elements","[\\u2580-\\u2590\\u2594\\u2595]"}, - {"Bohairic Coptic letters","[\\u2C80-\\u2CB1]"}, - {"Bold Fraktur symbols","[\\U0001D56C-\\U0001D59F]"}, - {"Bold Greek symbols","[\\U0001D6A8-\\U0001D6DA]"}, - {"Bold digits","[\\U0001D7CE-\\U0001D7D7]"}, - {"Bold italic Greek symbols","[\\U0001D71C-\\U0001D74E]"}, - {"Bold italic symbols","[\\U0001D468-\\U0001D49B]"}, - {"Bold script symbols","[\\U0001D4D0-\\U0001D503]"}, - {"Bold symbols","[\\U0001D400-\\U0001D433]"}, - {"Bowtie symbols","[\\u29D1-\\u29D7]"}, - {"Bracket pieces","[\\u239B-\\u23AD\\u23B0\\u23B1]"}, - {"Brackets","[\\u2983-\\u2998\\u29FC\\u29FD\\u2E1C\\u2E1D\\u2E20\\u2E21\\u2E26-\\u2E29]"}, - {"Braille patterns","[\\u2800-\\u28FF]"}, - {"C0 controls","[\\u0000-\\u001F]"}, - {"C1 controls","[\\u0080-\\u009F]"}, - {"CJK angle brackets","[\\u3008-\\u300B]"}, - {"CJK brackets","[\\u3010\\u3011\\u3014-\\u301B]"}, - {"CJK corner brackets","[\\u300C-\\u300F]"}, - {"CJK radicals supplement","[\\u2E80-\\u2E99\\u2E9B-\\u2EF3]"}, - {"CJK strokes","[\\u31C0-\\u31E3]"}, - {"CJK symbols","[\\u3012\\u3013]"}, - {"CJK symbols and punctuation","[\\u3000-\\u3007\\u301C-\\u3020]"}, - {"Candrabindu","[\\uA873]"}, - {"Cantillation marks","[\\u0591-\\u05AF]"}, - {"Cantillation signs","[\\u0FC0-\\u0FC3]"}, - {"Capital letters","[\\u2C00-\\u2C2E]"}, - {"Capital letters (Khutsuri)","[\\u10A0-\\u10C5]"}, - {"Caucasian linguistics","[\\u1D77\\u1D78]"}, - {"Ceilings and floors","[\\u2308-\\u230B]"}, - {"Cereals and plants","[\\U0001008E-\\U00010094]"}, - {"Character suit tiles","[\\U0001F007-\\U0001F00F]"}, - {"Chemistry symbol","[\\u232C\\u23E3]"}, - {"Chess symbols","[\\u2654-\\u265F]"}, - {"Chillu letters","[\\u0D7A-\\u0D7F]"}, - {"Chinantec tone marks","[\\uA717-\\uA71A]"}, - {"Chuvash letters","[\\u0520-\\u0523]"}, - {"Circle","[\\u2B24]"}, - {"Circle suit tiles","[\\U0001F019-\\U0001F021]"}, - {"Circle symbols","[\\u29B5-\\u29C3]"}, - {"Circled Hangul elements","[\\u3260-\\u326D]"}, - {"Circled Hangul syllable","[\\u327E]"}, - {"Circled Hangul syllables","[\\u326E-\\u327B]"}, - {"Circled Katakana","[\\u32D0-\\u32FE]"}, - {"Circled Korean words","[\\u327C\\u327D]"}, - {"Circled Latin letters","[\\u24B6-\\u24E9]"}, - {"Circled ideographs","[\\u3280-\\u32B0]"}, - {"Circled numbers","[\\u2460-\\u2473\\u3251-\\u325F\\u32B1-\\u32BF]"}, - {"Circles","[\\u26AA-\\u26AC]"}, - {"Claudian letters","[\\u2C75\\u2C76]"}, - {"Clefs","[\\U0001D11E-\\U0001D126]"}, - {"Codas","[\\U0001D106-\\U0001D10C]"}, - {"Combining diacritical marks for symbols","[\\u20D0-\\u20DC]"}, - {"Combining half marks","[\\uFE20-\\uFE23]"}, - {"Combining maddah and hamza","[\\u0653-\\u0655]"}, - {"Combining mark","[\\u135F]"}, - {"Combining marks for Old Cyrillic","[\\uA67C\\uA67D]"}, - {"Combining numeric signs","[\\uA670-\\uA672]"}, - {"Combining stroke","[\\U000101FD]"}, - {"Conjunction and length marks","[\\u30FB\\u30FC]"}, - {"Consonant","[\\u103F]"}, - {"Consonant addition for Tibetan","[\\uA872]"}, - {"Consonant additions for Sanskrit","[\\uA869-\\uA86C]"}, - {"Consonant for Addu dialect","[\\u07B1]"}, - {"Consonant shifters","[\\u17C9\\u17CA]"}, - {"Consonant signs","[\\u1C2D-\\u1C35\\uA94F-\\uA952\\uAA33-\\uAA36]"}, - {"Consonants","[\\u0915-\\u0939\\u0995-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u0A15-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A95-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0B15-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0C15-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C95-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0D15-\\u0D28\\u0D2A-\\u0D39\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0E01-\\u0E2E\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD\\u0EAE\\u0F40-\\u0F47\\u0F49-\\u0F6A\\u1000-\\u1020\\u1703-\\u170C\\u170E-\\u1711\\u1723-\\u1731\\u1743-\\u1751\\u1763-\\u176C\\u176E-\\u1770\\u1780-\\u17A2\\u1900-\\u191C\\u1950-\\u1962\\u1980-\\u19A9\\u1A00-\\u1A16\\u1B13-\\u1B33\\u1B8A-\\u1BA0\\u1C00-\\u1C23\\uA840-\\uA85C\\uA862-\\uA865\\uA892-\\uA8B4\\uA90A-\\uA925\\uA930-\\uA946\\uAA06-\\uAA28\\U000103A3-\\U000103C3\\U00010450-\\U00010465\\U00010A10-\\U00010A13\\U00010A15-\\U00010A17\\U00010A19-\\U00010A33]"}, - {"Consonants and consonant signs","[\\uA807-\\uA822]"}, - {"Consonants signs","[\\u1BA1-\\u1BA3]"}, - {"Continuous macrons for Coptic","[\\uFE24-\\uFE26]"}, - {"Contour tone marks","[\\u1DC4-\\u1DC9\\u1DCB\\u1DCC]"}, - {"Control character","[\\u007F]"}, - {"Control code graphics","[\\u25F0-\\u25F7]"}, - {"Coptic letters derived from Demotic","[\\u03E2-\\u03EF]"}, - {"Corner tone marks for Chinese","[\\uA700-\\uA707]"}, - {"Counting rod units","[\\U0001D360-\\U0001D371]"}, - {"Croatian digraphs matching Serbian Cyrillic letters","[\\u01C4-\\u01CC]"}, - {"Crops","[\\u230C-\\u230F]"}, - {"Crosses","[\\u2719-\\u2720]"}, - {"Crossing arrows for knot theory","[\\u2927-\\u2932]"}, - {"Currency sign","[\\u060B\\u0AF1\\uFDFC]"}, - {"Currency symbol","[\\u0BF9\\u0E3F\\u17DB]"}, - {"Currency symbols","[\\u20A0-\\u20B5]"}, - {"Cyrillic extensions","[\\u0400-\\u040F\\u0450-\\u045F\\u0510-\\u0513]"}, - {"Cyrillic letter","[\\u1D2B]"}, - {"DPRK compatibility ideographs","[\\uFA70-\\uFAD9]"}, - {"Dashes","[\\u2010-\\u2015]"}, - {"Database theory operators","[\\u27D5-\\u27D7]"}, - {"Date mark","[\\u0D79]"}, - {"Dentistry notation symbols","[\\u23BE-\\u23CC]"}, - {"Dependent consonant signs","[\\u103B-\\u103E]"}, - {"Dependent vowel sign","[\\u0DDF]"}, - {"Dependent vowel signs","[\\u093E-\\u094C\\u09BE-\\u09C4\\u09C7\\u09C8\\u0A3E-\\u0A42\\u0A47\\u0A48\\u0A4B\\u0A4C\\u0ABE-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB\\u0ACC\\u0B3E-\\u0B44\\u0B47\\u0B48\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0C3E-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4C\\u0CBE-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCC\\u0D3E-\\u0D44\\u0D46-\\u0D48\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDB\\u0F71-\\u0F7D\\u0F80\\u0F81\\u102B-\\u1035\\u1712\\u1713\\u1732\\u1733\\u1752\\u1753\\u1772\\u1773\\u17B6-\\u17BD\\u17C1-\\u17C3\\u1920-\\u1928\\u1B35-\\u1B43\\uA823-\\uA827\\uA8B5-\\uA8C3\\uAA29-\\uAA32]"}, - {"Dependent vowels","[\\u0B62\\u0B63\\u0C62\\u0C63\\u0CE2\\u0CE3\\u0D62\\u0D63\\u1C26-\\u1C2C]"}, - {"Deprecated","[\\u206A-\\u206F]"}, - {"Devanagari-specific additions","[\\u0970\\u0971]"}, - {"Diacritical marks for musical symbols","[\\u1B6B-\\u1B73]"}, - {"Diacritics","[\\u302A-\\u302F]"}, - {"Dialect (non-Mandarin) letters","[\\u312A-\\u312C]"}, - {"Diamonds","[\\u2B16-\\u2B19]"}, - {"Diamonds and lozenges","[\\u2B25-\\u2B2B]"}, - {"Dice","[\\u2680-\\u2685]"}, - {"Dictionary and map symbols","[\\u2690-\\u269B]"}, - {"Dictionary punctuation","[\\u2E1A\\u2E1B\\u2E1E\\u2E1F]"}, - {"Digits","[\\u07C0-\\u07C9\\u0966-\\u096F\\u09E6-\\u09EF\\u0A66-\\u0A6F\\u0AE6-\\u0AEF\\u0B66-\\u0B6F\\u0BE6-\\u0BEF\\u0C66-\\u0C6F\\u0CE6-\\u0CEF\\u0D66-\\u0D6F\\u0E50-\\u0E59\\u0ED0-\\u0ED9\\u0F20-\\u0F29\\u1040-\\u1049\\u1369-\\u1371\\u17E0-\\u17E9\\u1810-\\u1819\\u1946-\\u194F\\u19D0-\\u19D9\\u1B50-\\u1B59\\u1BB0-\\u1BB9\\u1C40-\\u1C49\\u1C50-\\u1C59\\uA620-\\uA629\\uA8D0-\\uA8D9\\uA900-\\uA909\\uAA50-\\uAA59\\U000104A0-\\U000104A9\\U00010A40-\\U00010A43]"}, - {"Digits minus half","[\\u0F2A-\\u0F33]"}, - {"Digrams","[\\U0001D301-\\U0001D305]"}, - {"Digraphs","[\\u0EDC\\u0EDD]"}, - {"Dingbat arrows","[\\u2794-\\u27AF\\u27B1-\\u27BE]"}, - {"Dingbat circled digits","[\\u2776-\\u2793]"}, - {"Division operator","[\\u27CC]"}, - {"Dotless symbols","[\\U0001D6A4\\U0001D6A5]"}, - {"Dotted tone letters","[\\uA708-\\uA711]"}, - {"Double arrows","[\\u21CD-\\u21D9]"}, - {"Double circled numbers","[\\u24F5-\\u24FE]"}, - {"Double diacritic","[\\u1DCD]"}, - {"Double diacritics","[\\u035C-\\u0362]"}, - {"Double punctuation for vertical text","[\\u203C\\u2047-\\u2049]"}, - {"Double-barbed harpoons","[\\u294A-\\u2951]"}, - {"Double-struck digits","[\\U0001D7D8-\\U0001D7E1]"}, - {"Double-struck italic math symbols","[\\u2145-\\u2149]"}, - {"Double-struck large operator","[\\u2140]"}, - {"Double-struck symbols","[\\U0001D538-\\U0001D56B]"}, - {"Drafting symbols","[\\u232D-\\u2335]"}, - {"Dragon tiles","[\\U0001F004-\\U0001F006]"}, - {"Duplicate characters from Big 5","[\\uFA0C\\uFA0D]"}, - {"Duplicate characters from CNS�11643-1992","[\\U0002F800-\\U0002FA1D]"}, - {"Dynamics","[\\U0001D18F-\\U0001D193]"}, - {"Eastern Arabic-Indic digits","[\\u06F0-\\u06F9]"}, - {"Editorial symbols","[\\u03FD-\\u03FF]"}, - {"Egyptological additions","[\\uA722-\\uA725]"}, - {"Ekfonetika","[\\U0001D003-\\U0001D014]"}, - {"Electrotechnical symbols","[\\u23DA\\u23DB]"}, - {"Electrotechnical symbols from IR 181","[\\u238D-\\u2394]"}, - {"Ellipses","[\\u2B2C-\\u2B2F]"}, - {"Empty sets","[\\u29B0-\\u29B4]"}, - {"Enclosing diacritics","[\\u20DD-\\u20E0]"}, - {"Error bar symbols","[\\u29EE-\\u29F3]"}, - {"European Latin","[\\u0100-\\u017F]"}, - {"Extended Arabic letter","[\\u06D5]"}, - {"Extended Arabic letter for Parkari","[\\u06FF]"}, - {"Extended Arabic letters","[\\u0671-\\u06D3\\u06FA-\\u06FC\\u0750-\\u076D]"}, - {"Extended Arabic letters for Parkari","[\\u06EE\\u06EF]"}, - {"Extended Bopomofo for Minnan and Hakka","[\\u31A0-\\u31B7]"}, - {"Extended Bopomofo tone marks","[\\u02EA\\u02EB]"}, - {"Extended Cyrillic","[\\u048A-\\u04F9]"}, - {"Extension for Geba Karen","[\\u1071]"}, - {"Extensions for Arabic","[\\u0798-\\u07A5]"}, - {"Extensions for Balti","[\\u0F6B\\u0F6C]"}, - {"Extensions for Eastern Pwo Karen","[\\u106E-\\u1070]"}, - {"Extensions for Kayah","[\\u1072-\\u1074]"}, - {"Extensions for Mon","[\\u105A-\\u1060]"}, - {"Extensions for Rumai Palaung","[\\u108E\\u108F]"}, - {"Extensions for S'gaw Karen","[\\u1061-\\u1064]"}, - {"Extensions for Sanskrit and Tibetan","[\\u1880-\\u18AA]"}, - {"Extensions for Shan","[\\u1075-\\u108D]"}, - {"Extensions for Western Pwo Karen","[\\u1065-\\u106D]"}, - {"Extracts","[\\U00010095-\\U00010099]"}, - {"Fences","[\\u2999\\u299A\\u29D8-\\u29DB]"}, - {"Figure repetitions","[\\U0001D10D-\\U0001D10F]"}, - {"Final consonants","[\\u11A8-\\u11F9\\u1930-\\u1938\\u19C1-\\u19C7]"}, - {"Final letters","[\\uAA40-\\uAA4D]"}, - {"Fish tails","[\\u297C-\\u297F]"}, - {"Fives","[\\U0001F054-\\U0001F05A\\U0001F086-\\U0001F08C]"}, - {"Fixed-form subjoined consonants","[\\u0FBA-\\u0FBC]"}, - {"Flags","[\\U0001D16E-\\U0001D172]"}, - {"Flower tiles","[\\U0001F022-\\U0001F025]"}, - {"Fonitika (Vocals)","[\\U0001D046-\\U0001D056]"}, - {"Forfeda (supplementary letters)","[\\u1695-\\u169A]"}, - {"Forks","[\\u2AD9-\\u2ADD]"}, - {"Form and chart components","[\\u2500-\\u257F]"}, - {"Format character","[\\u2060]"}, - {"Format characters","[\\u200C-\\u200F\\u2028-\\u202F]"}, - {"Format controls","[\\u180B-\\u180E]"}, - {"Fours","[\\U0001F04D-\\U0001F053\\U0001F07F-\\U0001F085]"}, - {"Fractions","[\\u0D73-\\u0D75\\u2153-\\u215F\\U0001245A-\\U00012462]"}, - {"Fraktur symbols","[\\U0001D504-\\U0001D537]"}, - {"Frown and smile","[\\u2322\\u2323]"}, - {"Fthores (Destroyers)","[\\U0001D0B6-\\U0001D0CA]"}, - {"Fullwidth ASCII variants","[\\uFF01-\\uFF5E]"}, - {"Fullwidth brackets","[\\uFF5F\\uFF60]"}, - {"Fullwidth symbol variants","[\\uFFE0-\\uFFE6]"}, - {"Further Greek musical notation symbols","[\\U0001D242-\\U0001D245]"}, - {"GUI icons","[\\u231A\\u231B]"}, - {"Gender symbol","[\\u26B2]"}, - {"Gender symbols","[\\u26A2-\\u26A9]"}, - {"Genealogical symbols","[\\u26AD-\\u26B1]"}, - {"General punctuation","[\\u2016-\\u2027\\u2030-\\u203B\\u203D-\\u2046\\u204A-\\u2055\\u2057\\u2E18\\u2E19]"}, - {"Generic punctuation for Philippine scripts","[\\u1735\\u1736]"}, - {"Generic punctuation for scripts of India","[\\u0964\\u0965]"}, - {"Geometric shapes","[\\u25A0-\\u25EF\\u25F8-\\u25FF]"}, - {"Glottal stop","[\\u097D]"}, - {"Glyph part","[\\uFE73]"}, - {"Glyphs for contextual forms of letters for Central Asian languages","[\\uFBD3-\\uFBE9]"}, - {"Glyphs for contextual forms of letters for Persian, Urdu, Sindhi, etc.","[\\uFB50-\\uFBB1]"}, - {"Glyphs for spacing forms of Arabic points","[\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFE7F]"}, - {"Glyphs for vertical variants","[\\uFE10-\\uFE19\\uFE30-\\uFE44\\uFE47\\uFE48]"}, - {"Go markers","[\\u2686-\\u2689]"}, - {"Golden number runes","[\\u16EE-\\u16F0]"}, - {"Grammata (Letters)","[\\U0001D0E6-\\U0001D0EF]"}, - {"Grapheme joiner","[\\u034F]"}, - {"Graphic picture for control code","[\\u2424]"}, - {"Graphic pictures for control codes","[\\u2400-\\u2421]"}, - {"Graphics for control codes","[\\u237B\\u237D-\\u237F]"}, - {"Greek letters","[\\u1D26-\\u1D2A]"}, - {"Greek subscript modifier letters","[\\u1D66-\\u1D6A]"}, - {"Greek superscript modifier letters","[\\u1D5D-\\u1D61]"}, - {"Gregorian notation","[\\U0001D1D0-\\U0001D1DD]"}, - {"Gurmukhi-specific additions","[\\u0A70-\\u0A75]"}, - {"Half brackets","[\\u2E22-\\u2E25]"}, - {"Halfwidth CJK punctuation","[\\uFF61-\\uFF64]"}, - {"Halfwidth Hangul variants","[\\uFFA0-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC]"}, - {"Halfwidth Katakana variants","[\\uFF65-\\uFF9F]"}, - {"Halfwidth symbol variants","[\\uFFE8-\\uFFEE]"}, - {"Harpoons","[\\u21BC-\\u21C3]"}, - {"Head marks","[\\u0F01-\\u0F07\\u0FD3\\u0FD4]"}, - {"Head marks for Tibetan","[\\uA874\\uA875]"}, - {"Hebrew letterlike math symbols","[\\u2135-\\u2138]"}, - {"Hebrew presentation forms","[\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFB4F]"}, - {"Hexagons","[\\u2B21-\\u2B23]"}, - {"Historic letters","[\\u0460-\\u0481]"}, - {"Historic miscellaneous","[\\u0482-\\u0489]"}, - {"Historic phonetic variants","[\\u0C58\\u0C59]"}, - {"Historic syllables","[\\uA610-\\uA612\\uA62A\\uA62B]"}, - {"Holds and pauses","[\\U0001D110-\\U0001D113]"}, - {"Honorifics","[\\u0610-\\u0614]"}, - {"Horizontal brackets","[\\u23B4-\\u23B6\\u23DC-\\u23E1]"}, - {"Horizontal tiles","[\\U0001F030]"}, - {"IPA characters for disordered speech","[\\u02A9-\\u02AD]"}, - {"IPA diacritics for disordered speech","[\\u034B-\\u034E]"}, - {"IPA extensions","[\\u0250-\\u02A8]"}, - {"IPA modifiers","[\\u02EC\\u02ED]"}, - {"Ichimata and Martyrika (Ichimas and Evidentials)","[\\U0001D0A2-\\U0001D0B5]"}, - {"Ideographic description characters","[\\u2FF0-\\u2FFB]"}, - {"Independent vowel (deprecated)","[\\u17A3]"}, - {"Independent vowels","[\\u0904-\\u0914\\u0985-\\u098C\\u098F\\u0990\\u0993\\u0994\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13\\u0A14\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93\\u0A94\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13\\u0B14\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B94\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C14\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0C94\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D14\\u0D85-\\u0D96\\u1021-\\u102A\\u1700-\\u1702\\u1720-\\u1722\\u1740-\\u1742\\u1760-\\u1762\\u17A4-\\u17B3\\u1B05-\\u1B12\\uA882-\\uA891\\uAA00-\\uAA05\\U000103A0-\\U000103A2]"}, - {"Independent vowels and dvisvara","[\\uA800-\\uA805]"}, - {"Inherent vowels","[\\u17B4\\u17B5]"}, - {"Initial consonants","[\\u1100-\\u1159\\u115F]"}, - {"Instrumentation","[\\U0001D1AA-\\U0001D1AD]"}, - {"Insular and Celticist letters","[\\uA779-\\uA787]"}, - {"Integral pieces","[\\u2320\\u2321]"}, - {"Integrals","[\\u222B-\\u2233]"}, - {"Interlinear annotation","[\\uFFF9-\\uFFFB]"}, - {"Intersections and unions","[\\u2A40-\\u2A50]"}, - {"Invisible operators","[\\u2061-\\u2064]"}, - {"Iota subscript","[\\u037A]"}, - {"Italic Greek symbols","[\\U0001D6E2-\\U0001D714]"}, - {"Italic symbols","[\\U0001D434-\\U0001D467]"}, - {"Iteration marks","[\\u309D\\u309E\\u30FD\\u30FE]"}, - {"JIS X 0213 compatibility ideographs","[\\uFA30-\\uFA6A]"}, - {"Japanese chess symbols","[\\u2616\\u2617]"}, - {"Japanese corporation","[\\u337F]"}, - {"Japanese era names","[\\u337B-\\u337E]"}, - {"Kanbun","[\\u3190-\\u319F]"}, - {"Kangxi radicals","[\\u2F00-\\u2FD5]"}, - {"Katakana punctuation","[\\u30A0]"}, - {"Keyboard and UI symbols","[\\u23CE\\u23CF]"}, - {"Keyboard symbol","[\\u232B\\u2425]"}, - {"Keyboard symbols","[\\u2324-\\u2328]"}, - {"Keyboard symbols and circle arrows","[\\u21B4-\\u21BB]"}, - {"Keyboard symbols from ISO 9995-7","[\\u2380-\\u238C\\u2396-\\u239A]"}, - {"Komi letters","[\\u0500-\\u050F]"}, - {"Koranic annotation signs","[\\u0615-\\u061A\\u06D6-\\u06ED]"}, - {"Kurdish letters","[\\u051A-\\u051D]"}, - {"Large operators","[\\u29F8\\u29F9]"}, - {"Latin extensions for Vietnamese","[\\u1EA0-\\u1EF1]"}, - {"Latin general extensions","[\\u1EF2-\\u1EF9]"}, - {"Latin general use extensions","[\\u1E00-\\u1E9B]"}, - {"Latin letter","[\\u1D6B]"}, - {"Latin letters","[\\u1D00-\\u1D25]"}, - {"Latin letters with middle tilde","[\\u1D6C-\\u1D76]"}, - {"Latin letters with palatal hook","[\\u1D80-\\u1D8E]"}, - {"Latin letters with retroflex hook","[\\u1D8F-\\u1D9A]"}, - {"Latin ligatures","[\\uFB00-\\uFB06]"}, - {"Latin subscript modifier letters","[\\u1D62-\\u1D65]"}, - {"Latin superscript modifier letters","[\\u02B0-\\u02B8\\u1D2C-\\u1D5C]"}, - {"Latin-1 punctuation and symbols","[\\u00A0-\\u00BF]"}, - {"Left-stem tone letters","[\\uA712-\\uA716]"}, - {"Leimmata or Siopes (Leimmas or Silencers)","[\\U0001D08A-\\U0001D08E]"}, - {"Length mark","[\\U00010A0C]"}, - {"Letter","[\\u0386]"}, - {"Letter A","[\\uA85D]"}, - {"Letter extender","[\\u07FA]"}, - {"Letterlike symbol","[\\u0608]"}, - {"Letterlike symbols","[\\u2100-\\u2134]"}, - {"Letters","[\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u00FF\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03CE\\u07CA-\\u07E7\\u16A0-\\u16EA\\u1C5A-\\u1C77\\u2D30-\\u2D65\\U00010280-\\U0001029C\\U000102A0-\\U000102D0\\U00010300-\\U0001031E\\U00010330-\\U0001034A\\U00010380-\\U0001039D\\U00010480-\\U0001049D\\U00010900-\\U00010915\\U00010920-\\U00010939]"}, - {"Letters for Old Abkhasian orthography","[\\uA680-\\uA697]"}, - {"Letters for Old Cyrillic","[\\uA640-\\uA65F\\uA662-\\uA66E]"}, - {"Ligatures (three elements)","[\\uFD50-\\uFD8F\\uFD92-\\uFDC7]"}, - {"Ligatures (two elements)","[\\uFBEA-\\uFD3D]"}, - {"Logical and set operators","[\\u2227-\\u222A]"}, - {"Logical ands and ors","[\\u2A51-\\u2A63]"}, - {"Logical operators","[\\u22CE\\u22CF]"}, - {"Logograms","[\\uA613-\\uA61F]"}, - {"Long arrows","[\\u27F5-\\u27FF]"}, - {"Lowercase Claudian letter","[\\u214E\\u2184]"}, - {"Lowercase Latin alphabet","[a-z]"}, - {"Lowercase letters","[\\u0561-\\u0587\\U00010428-\\U0001044F]"}, - {"Lowercase of editorial symbols","[\\u037B-\\u037D]"}, - {"Lunar date sign (deprecated)","[\\u17D3]"}, - {"Lunar date symbols","[\\u19E0-\\u19FF]"}, - {"Malayalam numerics","[\\u0D70-\\u0D72]"}, - {"Manchu letters","[\\u1873-\\u1877]"}, - {"Marks","[\\u0FD0-\\u0FD2]"}, - {"Marks and signs","[\\u0F08-\\u0F14\\u0F34-\\u0F39\\u0F82-\\u0F87]"}, - {"Mathematical arrows","[\\u2B30-\\u2B4C]"}, - {"Mathematical brackets","[\\u27E6-\\u27EF]"}, - {"Mathematical operator","[\\u00D7\\u00F7]"}, - {"Mayanist additions","[\\uA726-\\uA72F]"}, - {"Measures","[\\U00010137-\\U0001013F]"}, - {"Medial vowels","[\\u1160-\\u11A2]"}, - {"Medical and healing symbols","[\\u2624\\u2625]"}, - {"Medieval superscript letter diacritics","[\\u0363-\\u036F\\u1DD3-\\u1DE6]"}, - {"Medievalist addition","[\\u1E9F]"}, - {"Medievalist additions","[\\u1DCE-\\u1DD2\\u1E9C\\u1E9D\\u1EFA-\\u1EFF\\uA730-\\uA778]"}, - {"Medievalist punctuation","[\\u2E2A-\\u2E30]"}, - {"Melodimata (Melodics)","[\\U0001D015-\\U0001D045]"}, - {"Mensural notation","[\\U0001D1B6-\\U0001D1C0]"}, - {"Mensural prolations","[\\U0001D1C7-\\U0001D1CE]"}, - {"Mensural rests","[\\U0001D1C1-\\U0001D1C6]"}, - {"Metals","[\\U0001009A-\\U0001009C]"}, - {"Metrical symbols","[\\u23D1-\\u23D9]"}, - {"Miscellaneous","[\\u2701-\\u2718\\u274C-\\u275A]"}, - {"Miscellaneous addition","[\\u312D]"}, - {"Miscellaneous additions","[\\u021C-\\u0229\\u0237-\\u024F\\u0358-\\u035B\\u2C6D-\\u2C6F\\u2C71-\\u2C74]"}, - {"Miscellaneous arrow","[\\u2970]"}, - {"Miscellaneous arrows","[\\u21F4-\\u21FF\\u2900-\\u2918\\u291D-\\u2926]"}, - {"Miscellaneous arrows and keyboard symbols","[\\u21DA-\\u21E5]"}, - {"Miscellaneous curved arrows","[\\u2933-\\u2941]"}, - {"Miscellaneous large operators","[\\u2A1D-\\u2A21]"}, - {"Miscellaneous mark","[\\u1DCA]"}, - {"Miscellaneous marks","[\\u1DC2\\u1DC3]"}, - {"Miscellaneous mathematical operator","[\\u2AF6]"}, - {"Miscellaneous mathematical operators","[\\u2A39-\\u2A3F\\u2A64\\u2A65]"}, - {"Miscellaneous mathematical symbol","[\\u220E\\u223F]"}, - {"Miscellaneous mathematical symbols","[\\u2200-\\u2207\\u221E-\\u2222\\u2234\\u2235\\u22A4\\u22A5\\u22BE\\u22BF\\u2980-\\u2982\\u29DC-\\u29E2\\u29E7-\\u29ED\\u29F4-\\u29F7\\u29FE\\u29FF]"}, - {"Miscellaneous phonetic modifiers","[\\u02B9-\\u02D7]"}, - {"Miscellaneous symbol","[\\u2615\\u2668\\u27D0\\U0001D1CF]"}, - {"Miscellaneous symbols","[\\u260E-\\u2613\\u2618\\u2619\\u2638-\\u263C\\u267E\\u267F\\u269C\\u269D\\u26A0\\u26A1\\u27C0-\\u27C9\\U0001D1B1-\\U0001D1B5]"}, - {"Miscellaneous technical","[\\u2300-\\u2307\\u2310-\\u2319\\u237C\\u23CD\\u23E2\\u23E4-\\u23E7]"}, - {"Miscellaneous tiles","[\\U0001F02A\\U0001F02B]"}, - {"Mkhedruli","[\\u10D0-\\u10F0]"}, - {"Modal logic operators","[\\u27E0-\\u27E5]"}, - {"Modern letters","[\\u3131-\\u3163]"}, - {"Modified harpoons","[\\u2952-\\u2961]"}, - {"Modifier letter","[\\u10FC\\u2D6F\\uA67F]"}, - {"Modifier letters","[\\u0559-\\u055F\\u1C78-\\u1C7D\\u1D9B-\\u1DBF\\uA788-\\uA78A]"}, - {"Monogram","[\\U0001D300]"}, - {"Monospace digits","[\\U0001D7F6-\\U0001D7FF]"}, - {"Monospace symbols","[\\U0001D670-\\U0001D6A3]"}, - {"Mordvin letters","[\\u0514-\\u0519]"}, - {"Multiplication and division sign operators","[\\u2A2F-\\u2A38]"}, - {"Musical symbols","[\\u1B74-\\u1B7C\\u2669-\\u266F]"}, - {"Musical symbols for notes","[\\u1B61-\\u1B6A]"}, - {"N-ary operators","[\\u220F-\\u2211\\u22C0-\\u22C3\\u2A00-\\u2A09]"}, - {"New Testament editorial symbols","[\\u2E00-\\u2E0D]"}, - {"Non-European and historic Latin","[\\u0180-\\u01BF]"}, - {"Noncharacters","[\\uFDD0-\\uFDEF\\uFFFE\\uFFFF\\U0002FFFE\\U0002FFFF\\U0003FFFE\\U0003FFFF\\U0004FFFE\\U0004FFFF\\U0005FFFE\\U0005FFFF\\U0006FFFE\\U0006FFFF\\U0007FFFE\\U0007FFFF\\U0008FFFE\\U0008FFFF\\U0009FFFE\\U0009FFFF\\U000AFFFE\\U000AFFFF\\U000BFFFE\\U000BFFFF\\U000CFFFE\\U000CFFFF\\U000DFFFE\\U000DFFFF\\U000EFFFE\\U000EFFFF\\U000FFFFE\\U000FFFFF\\U0010FFFE\\U0010FFFF]"}, - {"Not character codes","[\\U0001FFFE\\U0001FFFF]"}, - {"Noteheads","[\\U0001D143-\\U0001D15B]"}, - {"Notes","[\\U0001D15C-\\U0001D164]"}, - {"Numbers","[\\u1372-\\u137C\\U00010107-\\U00010133\\U000103D1-\\U000103D5\\U00010916-\\U00010919\\U00010A44-\\U00010A47]"}, - {"Numbers period","[\\u2488-\\u249B]"}, - {"Numeral signs","[\\u0374\\u0375]"}, - {"Numerals","[\\U00010320-\\U00010323]"}, - {"Numeric character","[\\u2CFD]"}, - {"Numeric signs","[\\U00012400-\\U00012459]"}, - {"Numeric symbols for divination lore","[\\u17F0-\\u17F9]"}, - {"OCR","[\\u2440-\\u244A]"}, - {"Octaves","[\\U0001D136-\\U0001D139]"}, - {"Old Church Slavonic combining letters","[\\u2DE0-\\u2DFF]"}, - {"Old Coptic and dialect letters","[\\u2CB2-\\u2CDB]"}, - {"Old Nubian letters","[\\u2CDC-\\u2CE3]"}, - {"Old Nubian punctuation","[\\u2CF9-\\u2CFC]"}, - {"Ones","[\\U0001F038-\\U0001F03E\\U0001F06A-\\U0001F070]"}, - {"Operator","[\\u2238\\u223A\\u2240]"}, - {"Operators","[\\u2212-\\u221D\\u2223-\\u2226\\u228C-\\u228E\\u2293-\\u22A3\\u22BA-\\u22BD\\u22C4-\\u22C7\\u22C9-\\u22CC\\u22D2\\u22D3\\u27D1-\\u27D4\\u2AFC-\\u2AFF]"}, - {"Ordinary diacritics","[\\u0300-\\u0333]"}, - {"Oriya-specific additions","[\\u0B70\\u0B71]"}, - {"Ornamental brackets","[\\u2768-\\u2775]"}, - {"Ornaments","[\\U0001D194-\\U0001D1A5]"}, - {"Orthographic Latin additions","[\\u2C60-\\u2C66]"}, - {"Orthographic letters for glottals","[\\uA78B\\uA78C]"}, - {"Other CJK punctuation","[\\u303B-\\u303D]"}, - {"Other CJK symbols","[\\u3030-\\u3037]"}, - {"Other combining marks","[\\u0656-\\u065E]"}, - {"Other materials","[\\U0001009D-\\U000100DD]"}, - {"Other modifier letter","[\\u02EE]"}, - {"Other phonetic symbols","[\\u1D79-\\u1D7F]"}, - {"Overscores and underscores","[\\uFE49-\\uFE4F]"}, - {"Overstruck diacritics","[\\u0334-\\u0338]"}, - {"Paired arrows and harpoons","[\\u21C4-\\u21CC]"}, - {"Paired harpoons","[\\u2962-\\u296F]"}, - {"Paired punctuation","[\\u0F3A-\\u0F3D]"}, - {"Pali and Sanskrit extensions","[\\u1050-\\u1059]"}, - {"Parenthesized Hangul elements","[\\u3200-\\u320D]"}, - {"Parenthesized Hangul syllables","[\\u320E-\\u321C]"}, - {"Parenthesized Korean words","[\\u321D\\u321E]"}, - {"Parenthesized Latin letters","[\\u249C-\\u24B5]"}, - {"Parenthesized ideographs","[\\u3220-\\u3243]"}, - {"Parenthesized numbers","[\\u2474-\\u2487]"}, - {"Pedals","[\\U0001D1AE-\\U0001D1B0]"}, - {"Pentagons","[\\u2B1F\\u2B20\\u2B53\\u2B54]"}, - {"People and animals","[\\U00010080-\\U0001008D]"}, - {"Persian letters","[\\u072D-\\u072F]"}, - {"Phonetic and historic letters","[\\u01DD-\\u01FF]"}, - {"Phonetic extensions for Ainu","[\\u31F0-\\u31FF]"}, - {"Pinyin diacritic-vowel combinations","[\\u01CD-\\u01DC]"}, - {"Playing card symbols","[\\u2660-\\u2667]"}, - {"Plus and minus sign operators","[\\u2A22-\\u2A2E]"}, - {"Poetic marks","[\\u060E\\u060F]"}, - {"Poetry marks","[\\uA828-\\uA82B]"}, - {"Point","[\\u0670]"}, - {"Pointing hand symbols","[\\u261A-\\u261F]"}, - {"Points and punctuation","[\\u05B0-\\u05C3\\u05C6\\u05C7]"}, - {"Points from ISO 8859-6","[\\u064B-\\u0652]"}, - {"Precomposed polytonic Greek","[\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFE]"}, - {"Prevailing wind tiles","[\\U0001F000-\\U0001F003]"}, - {"Pronunciation variants from KS�X�1001:1998","[\\uF900-\\uFA0B]"}, - {"Prosodies (Prosodics)","[\\U0001D000-\\U0001D002]"}, - {"Puncta extraordinaria","[\\u05C4\\u05C5]"}, - {"Punctuation","[\\u037E\\u0387\\u0589\\u058A\\u0609\\u060A\\u060C\\u060D\\u061B\\u061E\\u061F\\u066A-\\u066D\\u06D4\\u07F7-\\u07F9\\u0DF4\\u104A\\u104B\\u10FB\\u1360-\\u1368\\u166E\\u1680\\u169B\\u169C\\u16EB-\\u16ED\\u1800-\\u180A\\u1B5A-\\u1B60\\u1C3B-\\u1C3F\\u1C7E\\u1C7F\\u2CFE\\u2CFF\\uA60D-\\uA60F\\uA8CE\\uA8CF\\uA92E\\uA92F\\uA95F\\uAA5C-\\uAA5F\\uFD3E\\uFD3F\\U00010100-\\U00010102\\U0001039F\\U000103D0\\U0001091F\\U0001093F\\U00010A50-\\U00010A58\\U00012470-\\U00012473]"}, - {"Punctuation for Tibetan","[\\uA876\\uA877]"}, - {"Punctuation mark","[\\uA673\\uA67E]"}, - {"Punctuation ornaments","[\\u275B-\\u275E\\u2761-\\u2767]"}, - {"Quine corners","[\\u231C-\\u231F]"}, - {"Radix symbols","[\\u0606\\u0607]"}, - {"Recycling symbols","[\\u2672-\\u267D]"}, - {"Relation","[\\u2239\\u22C8\\u22CD]"}, - {"Relational operators","[\\u2A66-\\u2ABC]"}, - {"Relations","[\\u2236\\u2237\\u223B-\\u223E\\u2241-\\u228B\\u228F-\\u2292\\u22A6-\\u22B9\\u22D0\\u22D1\\u22D4-\\u22FF\\u29E3-\\u29E6\\u2AF7-\\u2AFB]"}, - {"Religious and political symbols","[\\u2626-\\u262F]"}, - {"Replacement characters","[\\uFFFC\\uFFFD]"}, - {"Reserved","[\\u09E4\\u09E5\\u0A64\\u0A65\\u0AE4\\u0AE5\\u0B64\\u0B65\\u0BE4\\u0BE5\\u0C64\\u0C65\\u0CE4\\u0CE5\\u0D64\\u0D65]"}, - {"Rest","[\\U0001D129]"}, - {"Rests","[\\U0001D13A-\\U0001D142]"}, - {"Roman coin symbols","[\\U00010196-\\U0001019A]"}, - {"Roman military symbol","[\\U0001019B]"}, - {"Roman numerals","[\\u2160-\\u217F]"}, - {"Roman weights and measures","[\\U00010190-\\U00010195]"}, - {"Rythmika (Rhythmics)","[\\U0001D0DA-\\U0001D0E5]"}, - {"Sans-serif bold Greek symbols","[\\U0001D756-\\U0001D788]"}, - {"Sans-serif bold digits","[\\U0001D7EC-\\U0001D7F5]"}, - {"Sans-serif bold italic Greek symbols","[\\U0001D790-\\U0001D7C2]"}, - {"Sans-serif bold italic symbols","[\\U0001D63C-\\U0001D66F]"}, - {"Sans-serif bold symbols","[\\U0001D5D4-\\U0001D607]"}, - {"Sans-serif digits","[\\U0001D7E2-\\U0001D7EB]"}, - {"Sans-serif italic symbols","[\\U0001D608-\\U0001D63B]"}, - {"Sans-serif symbols","[\\U0001D5A0-\\U0001D5D3]"}, - {"Scan lines for terminal graphics","[\\u23BA-\\u23BD]"}, - {"Script symbols","[\\U0001D49C-\\U0001D4CF]"}, - {"Season tiles","[\\U0001F026-\\U0001F029]"}, - {"Set membership","[\\u2208-\\u220D]"}, - {"Shade characters","[\\u2591-\\u2593]"}, - {"Shan digits","[\\u1090-\\u1099]"}, - {"Shan symbols","[\\u109E\\u109F]"}, - {"Sibe letters","[\\u185D-\\u1872]"}, - {"Sidelining emphasis marks","[\\uFE45\\uFE46]"}, - {"Sign","[\\u09CD\\u09D7\\u0CCD\\u0DCA\\u0E2F\\u0E46\\u0EAF\\u0EC6\\u1B34\\u1B44\\uA806]"}, - {"Signs","[\\u0E4C-\\u0E4F\\u0E5A\\u0E5B\\u0EBC\\u0EBD\\u0ECC\\u0ECD\\u0FBE\\u0FBF\\U000101D0-\\U000101FC\\U00012000-\\U0001236E]"}, - {"Signs for Sindhi","[\\u06FD\\u06FE]"}, - {"Simple arrows","[\\u2190-\\u2199]"}, - {"Sindhi implosives","[\\u097B\\u097C\\u097E\\u097F]"}, - {"Sixes","[\\U0001F05B-\\U0001F061\\U0001F08D-\\U0001F093]"}, - {"Small form variants","[\\uFE50-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B]"}, - {"Small letters","[\\u2C30-\\u2C5E\\u3095\\u3096]"}, - {"Small letters (Khutsuri)","[\\u2D00-\\u2D25]"}, - {"Sogdian letters","[\\u074D-\\u074F]"}, - {"Space","[\\u205F]"}, - {"Spaces","[\\u2000-\\u200B]"}, - {"Spacing accent marks","[\\u0384\\u0385]"}, - {"Spacing clones of diacritics","[\\u02D8-\\u02DD]"}, - {"Special","[\\uFEFF]"}, - {"Special CJK indicators","[\\u303E\\u303F]"}, - {"Special character","[\\u3164]"}, - {"Special character extension","[\\u23D0]"}, - {"Special character extensions","[\\u23AE\\u23AF]"}, - {"Specialized plus sign operators","[\\u29FA\\u29FB]"}, - {"Specials","[\\U0001D0F0-\\U0001D0F5]"}, - {"Specific symbol for control code","[\\u2426]"}, - {"Specific symbols for space","[\\u2422\\u2423]"}, - {"Square symbols","[\\u29C4-\\u29C9]"}, - {"Squared Katakana words","[\\u3300-\\u3357]"}, - {"Squared Latin abbreviation","[\\u3250\\u33FF]"}, - {"Squared Latin abbreviations","[\\u32CC-\\u32CF\\u3371-\\u337A\\u3380-\\u33DF]"}, - {"Squares","[\\u2B12-\\u2B15\\u2B1A-\\u2B1E]"}, - {"Staff brackets","[\\U0001D114\\U0001D115]"}, - {"Stars","[\\u2B50-\\u2B52]"}, - {"Stars, asterisks and snowflakes","[\\u2721-\\u274B]"}, - {"Staves","[\\U0001D116-\\U0001D11B]"}, - {"Stems","[\\U0001D165\\U0001D166]"}, - {"Subjoined Consonants","[\\uA867\\uA868]"}, - {"Subjoined consonant","[\\uA871]"}, - {"Subjoined consonants","[\\u0F90-\\u0F97\\u0F99-\\u0FB9\\u1929-\\u192B\\u1C24\\u1C25]"}, - {"Subscripts","[\\u2080-\\u208E\\u2090-\\u2094]"}, - {"Subset and superset relations","[\\u2ABD-\\u2AD8]"}, - {"Subtending marks","[\\u0600-\\u0603]"}, - {"Summation sign parts","[\\u23B2\\u23B3]"}, - {"Summations and integrals","[\\u2A0A-\\u2A1C]"}, - {"Superscripts","[\\u2070-\\u207F]"}, - {"Supplementary signs","[\\U00010040-\\U0001004D]"}, - {"Suzhou numerals","[\\u3021-\\u3029]"}, - {"Syllable","[\\u0F00]"}, - {"Syllable finals","[\\uA60B\\uA60C]"}, - {"Syllable iteration mark","[\\uA015]"}, - {"Syllables","[\\u1200-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u1676\\uA000-\\uA014\\uA016-\\uA48C\\U00010800-\\U00010805\\U00010808\\U0001080A-\\U00010835\\U00010837\\U00010838\\U0001083C\\U0001083F]"}, - {"Syllables for Blin","[\\u2D93-\\u2D96]"}, - {"Syllables for Me'en","[\\u2D80-\\u2D92]"}, - {"Syllables for Sebatbeit","[\\u1380-\\u138F\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE]"}, - {"Syllables in -a","[\\uA549-\\uA570]"}, - {"Syllables in -e","[\\uA5E1-\\uA60A]"}, - {"Syllables in -ee","[\\uA500-\\uA514]"}, - {"Syllables in -i","[\\uA515-\\uA548]"}, - {"Syllables in -o","[\\uA5BA-\\uA5E0]"}, - {"Syllables in -oo","[\\uA571-\\uA594]"}, - {"Syllables in -u","[\\uA595-\\uA5B9]"}, - {"Symbol","[\\u03FC\\u07F6\\u166D\\u327F\\uFDFD]"}, - {"Symbols","[\\u0FC4-\\u0FCC\\u2CE4-\\u2CEA\\U00010050-\\U0001005D]"}, - {"Symbols for draughts and checkers","[\\u26C0-\\u26C3]"}, - {"Synagmata or Gorgotites (Synagmas or Quickeners)","[\\U0001D08F-\\U0001D099]"}, - {"Syriac cross symbols","[\\u2670\\u2671]"}, - {"Syriac format control character","[\\u070F]"}, - {"Syriac letters","[\\u0710-\\u072C]"}, - {"Syriac marks","[\\u0740-\\u074A]"}, - {"Syriac points (vowels)","[\\u0730-\\u073F]"}, - {"Syriac punctuation and signs","[\\u0700-\\u070D]"}, - {"Tablature","[\\U0001D11C\\U0001D11D]"}, - {"Tacks and turnstiles","[\\u27D8-\\u27DF\\u2ADE-\\u2AED]"}, - {"Tag components","[\\U000E0020-\\U000E007F]"}, - {"Tag identifiers","[\\U000E0001]"}, - {"Tamil numerics","[\\u0BF0-\\u0BF2]"}, - {"Tamil symbol","[\\u0BFA]"}, - {"Tamil symbols","[\\u0BF3-\\u0BF8]"}, - {"Telegraph symbols for days","[\\u33E0-\\u33FE]"}, - {"Telegraph symbols for hours","[\\u3358-\\u3370]"}, - {"Telegraph symbols for months","[\\u32C0-\\u32CB]"}, - {"Telugu fractions and weights","[\\u0C78-\\u0C7F]"}, - {"Terminal graphic characters","[\\u23B7-\\u23B9\\u2596-\\u259F]"}, - {"Tetragrams","[\\U0001D306-\\U0001D356]"}, - {"The IBM 32 compatibility ideographs","[\\uFA0E-\\uFA2D]"}, - {"Threes","[\\U0001F046-\\U0001F04C\\U0001F078-\\U0001F07E]"}, - {"Time signatures","[\\U0001D134\\U0001D135]"}, - {"Todo letters","[\\u1843-\\u185C]"}, - {"Tonal marks","[\\u1390-\\u1399]"}, - {"Tone letters","[\\u02E5-\\u02E9\\u1970-\\u1974]"}, - {"Tone marks","[\\u07EB-\\u07F5\\u0E48-\\u0E4B\\u0EC8-\\u0ECB\\u19C8\\u19C9\\uA92B-\\uA92D]"}, - {"Traditional letters","[\\u1681-\\u1694]"}, - {"Transliteration head letters","[\\u0F88-\\u0F8B]"}, - {"Tremolos","[\\U0001D167-\\U0001D16C]"}, - {"Triangle symbols","[\\u29CA-\\u29D0]"}, - {"Two-part dependent vowel signs","[\\u09CB\\u09CC\\u0B4B\\u0B4C\\u0BCA-\\u0BCC\\u0D4A-\\u0D4C\\u0DDC-\\u0DDE\\u17BE-\\u17C0\\u17C4\\u17C5]"}, - {"Twos","[\\U0001F03F-\\U0001F045\\U0001F071-\\U0001F077]"}, - {"UPA modifiers","[\\u02EF-\\u02FF]"}, - {"Uppercase Latin alphabet","[A-Z]"}, - {"Uppercase letters","[\\u0531-\\u0556\\U00010400-\\U00010427]"}, - {"Used for Ancient Greek","[\\u1DC0\\u1DC1]"}, - {"Variant letterform","[\\u03F9]"}, - {"Variant letterforms","[\\u03CF-\\u03D7\\u03F0-\\u03F2]"}, - {"Variant letterforms and symbols","[\\u03F4-\\u03F6]"}, - {"Variation selectors","[\\uFE00-\\uFE0F\\U000E0100-\\U000E01EF]"}, - {"Various signs","[\\u0901-\\u0903\\u093C\\u093D\\u094D\\u0950-\\u0954\\u0981-\\u0983\\u09BC\\u09BD\\u0A01-\\u0A03\\u0A3C\\u0A4D\\u0A51\\u0A81-\\u0A83\\u0ABC\\u0ABD\\u0ACD\\u0AD0\\u0B01-\\u0B03\\u0B3C\\u0B3D\\u0B4D\\u0B56\\u0B57\\u0B82\\u0B83\\u0BCD\\u0BD0\\u0BD7\\u0C01-\\u0C03\\u0C4D\\u0C55\\u0C56\\u0C82\\u0C83\\u0CBC\\u0CBD\\u0CD5\\u0CD6\\u0D02\\u0D03\\u0D4D\\u0D57\\u0D82\\u0D83\\u1036-\\u103A\\u104C-\\u104F\\u17C6-\\u17C8\\u17CB-\\u17D2\\u17D4-\\u17DA\\u17DC\\u17DD\\u1939-\\u193B\\u1940\\u1944\\u1945\\u19DE\\u19DF\\u1A1E\\u1A1F\\u1B00-\\u1B04\\u1B80-\\u1B82\\u1C36\\u1C37\\uA880\\uA881\\U000103C8-\\U000103CF\\U00010A0D-\\U00010A0F\\U00010A38-\\U00010A3A]"}, - {"Vedic signs","[\\u0CF1\\u0CF2]"}, - {"Vertical form digraph","[\\u309F\\u30FF]"}, - {"Vertical line operator","[\\u27CA]"}, - {"Vertical line operators","[\\u2AEE-\\u2AF5]"}, - {"Vertical tiles","[\\U0001F062]"}, - {"Vessels","[\\U000100DE-\\U000100FA]"}, - {"Vietnamese tone marks (deprecated)","[\\u0340\\u0341]"}, - {"Virama","[\\u1714\\u1734\\u1BAA\\uA8C4\\uA953\\U00010A3F]"}, - {"Vocalic modification","[\\u0F7E\\u0F7F]"}, - {"Voicing marks","[\\u3099-\\u309C]"}, - {"Vowel","[\\u0E47\\uA866]"}, - {"Vowel signs","[\\u19B0-\\u19C0\\u1BA4-\\u1BA9\\uA947-\\uA94E]"}, - {"Vowels","[\\u07A6-\\u07B0\\u0E30-\\u0E3A\\u0E40-\\u0E45\\u0EB0-\\u0EB9\\u0EBB\\u0EC0-\\u0EC4\\u1963-\\u196D\\u1A17-\\u1A1B\\u1B83-\\u1B89\\uA85E-\\uA861\\uA926-\\uA92A\\U00010466-\\U0001047F\\U00010A00-\\U00010A03\\U00010A05\\U00010A06]"}, - {"Warning signs","[\\u2620-\\u2623]"}, - {"Weather and astrological symbols","[\\u2600-\\u260D]"}, - {"Weather symbol","[\\u2614]"}, - {"White and black arrows","[\\u2B00-\\u2B0D]"}, - {"White arrows and keyboard symbols","[\\u21E6-\\u21F3]"}, - {"White on black circled numbers","[\\u24EB-\\u24F4]"}, - {"Word ligatures","[\\uFDF0-\\uFDFB]"}, - {"Yi radicals","[\\uA490-\\uA4C6]"}, - {"Yiddish digraphs","[\\u05F0-\\u05F2]"}, - {"Yijing hexagram symbols","[\\u4DC0-\\u4DFF]"}, - {"Yijing monogram and digram symbols","[\\u268A-\\u268F]"}, - {"Yijing trigram symbols","[\\u2630-\\u2637]"}, - {"Zeroes","[\\U0001F031-\\U0001F037\\U0001F063-\\U0001F069]"}, - {"Zodiacal symbols","[\\u2648-\\u2653]"}, - }; -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/Typology.java b/unicodetools/src/main/java/org/unicode/jsp/Typology.java deleted file mode 100644 index 3b323ef3b..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/Typology.java +++ /dev/null @@ -1,187 +0,0 @@ -package org.unicode.jsp; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.regex.Pattern; - -import com.ibm.icu.impl.Relation; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.lang.UProperty.NameChoice; -import com.ibm.icu.text.UnicodeSet; - -public class Typology { - //static UnicodeMap reasons = new UnicodeMap(); - public static Map label_to_uset = new TreeMap(); - static { - label_to_uset.put("S", new UnicodeSet("[:S:]").freeze()); - label_to_uset.put("L", new UnicodeSet("[:L:]").freeze()); - label_to_uset.put("M", new UnicodeSet("[:M:]").freeze()); - label_to_uset.put("N", new UnicodeSet("[:N:]").freeze()); - label_to_uset.put("C", new UnicodeSet("[:C:]").freeze()); - label_to_uset.put("Z", new UnicodeSet("[:Z:]").freeze()); - label_to_uset.put("P", new UnicodeSet("[:P:]").freeze()); - } - public static Map full_path_to_uset = new TreeMap(); - public static Map path_to_uset = new TreeMap(); - //static Map,UnicodeSet> path_to_uset = new TreeMap,UnicodeSet>(); - public static Relation labelToPaths = new Relation(new TreeMap(), TreeSet.class); - public static Map> label_parent_uset = new TreeMap(); - //public static Relation pathToList = new Relation(new TreeMap(), TreeSet.class); - - static class MyReader extends FileUtilities.SemiFileReader { - //0000 Cc [Control] [X] [X] [X] - public final static Pattern SPLIT = Pattern.compile("\\s*\t\\s*"); - public final static Pattern NON_ALPHANUM = Pattern.compile("[^0-9A-Za-z]+"); - - @Override - protected String[] splitLine(String line) { - return SPLIT.split(line); - } - - StringBuilder temp_path = new StringBuilder(); - - @Override - protected boolean handleLine(int startRaw, int endRaw, String[] items) { - temp_path.setLength(0); - temp_path.append('/'); - for (int i = 2; i < 6; ++i) { - String item = items[i]; - if (item.equals("[X]")) { - continue; - } - - if (!item.startsWith("[") || !item.endsWith("]")) { - throw new IllegalArgumentException(i + "\t" + item); - } - item = item.substring(1, item.length()-1); - if (item.length() == 0) { - continue; - } - item = NON_ALPHANUM.matcher(item).replaceAll("_"); - temp_path.append('/').append(item); - } - String fullPath = temp_path.toString(); - - // store - { - fullPath = fullPath.intern(); - UnicodeSet uset = full_path_to_uset.get(fullPath); - if (uset == null) { - full_path_to_uset.put(fullPath, uset = new UnicodeSet()); - } - uset.addAll(startRaw, endRaw); - } - - final String[] labels = fullPath.split("/"); - String path = ""; - for (final String item : labels) { - UnicodeSet uset = label_to_uset.get(item); - if (uset == null) { - label_to_uset.put(item, uset = new UnicodeSet()); - } - uset.add(startRaw, endRaw); - - //labelToPath.put(item, path); - - path = (path + "/" + item).intern(); - - uset = path_to_uset.get(path); - if (uset == null) { - path_to_uset.put(path, uset = new UnicodeSet()); - } - uset.addAll(startRaw, endRaw); - } - return true; - } - - Map,List> listCache = new HashMap,List>(); - Map,Set> setCache = new HashMap,Set>(); - - private T intern(Map cache, T list) { - final T old = cache.get(list); - if (old != null) { - return old; - } - cache.put(list,list); - return list; - } - } - - static { - new MyReader().process(Typology.class, "Categories.txt"); // "09421-u52m09xxxx.txt" - - // fix the paths - final Map temp= new TreeMap(); - for (int i = 0; i < ECharacterCategory.CHAR_CATEGORY_COUNT; ++i) { - final UnicodeSet same = new UnicodeSet() - .applyIntPropertyValue(UProperty.GENERAL_CATEGORY, i); - final String gcName = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, i, NameChoice.SHORT); - //System.out.println("\n" + gcName); - final String prefix = gcName.substring(0,1); - - for (final String path : path_to_uset.keySet()) { - final UnicodeSet uset = path_to_uset.get(path); - if (!same.containsSome(uset)) { - continue; - } - final String path2 = prefix + path; - temp.put(path2, new UnicodeSet(uset).retainAll(same)); - final String[] labels = path2.split("/"); - String parent = ""; - for (int j = 0; j < labels.length; ++j) { - labelToPaths.put(labels[j], path2); - if (j == 0) { - continue; - } - Map map = label_parent_uset.get(labels[j]); - if (map == null) { - label_parent_uset.put(labels[j], map = new TreeMap()); - } - UnicodeSet uset2 = map.get(parent); - if (uset2 == null) { - map.put(parent, uset2 = new UnicodeSet()); - } - uset2.addAll(uset); - parent += labels[j] + "/"; - } - } - } - // Set labelUsetKeys = label_to_uset.keySet(); - // Set labelToPathKeys = labelToPath.keySet(); - // if (!labelUsetKeys.equals(labelToPathKeys)) { - // TreeSet uset_path = new TreeSet(labelUsetKeys); - // uset_path.removeAll(labelToPathKeys); - // System.out.println("\nuset - path labels\t" + uset_path); - // TreeSet path_uset = new TreeSet(labelToPathKeys); - // path_uset.removeAll(labelUsetKeys); - // System.out.println("\npath -uset labels\t" + path_uset); - // } - label_to_uset = freezeMapping(label_to_uset); - path_to_uset = freezeMapping(temp); - labelToPaths.freeze(); - // invert - } - - private static Map freezeMapping(Map map) { - for (final String key : map.keySet()) { - final UnicodeSet uset = map.get(key); - uset.freeze(); - } - return Collections.unmodifiableMap(map); - } - - public static UnicodeSet getSet(String label) { - return label_to_uset.get(label); - } - - public static Set getLabels() { - return label_to_uset.keySet(); - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/UnicodeJsp.java b/unicodetools/src/main/java/org/unicode/jsp/UnicodeJsp.java deleted file mode 100644 index 325c8c2eb..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/UnicodeJsp.java +++ /dev/null @@ -1,365 +0,0 @@ -package org.unicode.jsp; - -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Random; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.unicode.cldr.util.BNF; -import org.unicode.cldr.util.Quoter; -import org.unicode.idna.Idna2003; -import org.unicode.idna.Idna2008; -import org.unicode.idna.Uts46; -import org.unicode.jsp.UnicodeUtilities.CodePointShower; - -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.NumberFormat; -import com.ibm.icu.text.RuleBasedBreakIterator; -import com.ibm.icu.text.Transliterator; -import com.ibm.icu.text.UTF16; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.ULocale; - -public class UnicodeJsp { - - public static NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); - static { - nf.setGroupingUsed(true); - nf.setMaximumFractionDigits(0); - } - - public static String showBidi(String str, int baseDirection, boolean asciiHack) { - return UnicodeUtilities.showBidi(str, baseDirection, asciiHack); - } - - public static String validateLanguageID(String input, String locale) { - final String result = LanguageCode.validate(input, new ULocale(locale)); - return result; - } - - public static String showRegexFind(String regex, String test) { - try { - final Matcher matcher = Pattern.compile(regex, Pattern.COMMENTS).matcher(test); - String result = UnicodeUtilities.toHTML.transform(matcher.replaceAll("⇑⇑$0⇓⇓")); - result = result.replaceAll("⇑⇑", "").replaceAll("⇓⇓", ""); - return result; - } catch (final Exception e) { - return "Error: " + e.getMessage(); - } - } - - /** - * The regex doesn't have to have the UnicodeSets resolved. - * @param regex - * @param count - * @param maxRepeat - * @return - */ - public static String getBnf(String regexSource, int count, int maxRepeat) { - //String regex = new UnicodeRegex().compileBnf(rules); - final String regex = regexSource.replace("(?:", "(").replace("(?i)", ""); - - final BNF bnf = new BNF(new Random(), new Quoter.RuleQuoter()); - if (maxRepeat > 20) { - maxRepeat = 20; - } - bnf.setMaxRepeat(maxRepeat) - .addRules("$root=" + regex + ";") - .complete(); - final StringBuffer output = new StringBuffer(); - for (int i = 0; i < count; ++i) { - final String line = bnf.next(); - output.append("

").append(UnicodeUtilities.toHTML(line)).append("

"); - } - return output.toString(); - } - - public static String showBreaks(String text, String choice) { - - RuleBasedBreakIterator b; - if (choice.equals("Word")) { - b = (RuleBasedBreakIterator) BreakIterator.getWordInstance(); - } else if (choice.equals("Line")) { - b = (RuleBasedBreakIterator) BreakIterator.getLineInstance(); - } else if (choice.equals("Sentence")) { - b = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance(); - } else { - b = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(); - } - - final Matcher decimalEscapes = Pattern.compile("&#(x)?([0-9]+);").matcher(text); - // quick hack, since hex-any doesn't do decimal escapes - int start = 0; - final StringBuffer result2 = new StringBuffer(); - while (decimalEscapes.find(start)) { - final int radix = 10; - final int code = Integer.parseInt(decimalEscapes.group(2), radix); - result2.append(text.substring(start,decimalEscapes.start()) + UTF16.valueOf(code)); - start = decimalEscapes.end(); - } - result2.append(text.substring(start)); - text = result2.toString(); - - int lastBreak = 0; - final StringBuffer result = new StringBuffer(); - b.setText(text); - b.first(); - for (int nextBreak = b.next(); nextBreak != BreakIterator.DONE; nextBreak = b.next()) { - final int status = b.getRuleStatus(); - String piece = text.substring(lastBreak, nextBreak); - //piece = toHTML.transliterate(piece); - piece = UnicodeUtilities.toHTML(piece); - - piece = piece.replaceAll(" ","
"); - result.append("").append(piece).append(""); - lastBreak = nextBreak; - } - - return result.toString(); } - - public static void showProperties(int cp, Appendable out) throws IOException { - UnicodeUtilities.showProperties(cp, out); - } - - static String defaultIdnaInput = "" - +"fass.de faß.de fäß.de xn--fa-hia.de" - + "\n₹.com 𑀓.com" - + "\n\u0080.com xn--a.com a\u200cb xn--ab-j1t" - +"\nöbb.at ÖBB.at ÖBB.at" - +"\nȡog.de ☕.de I♥NY.de" - +"\nABC・日本.co.jp 日本。co。jp 日本。co.jp 日本⒈co.jp" - +"\nx\\u0327\\u0301.de x\\u0301\\u0327.de" - +"\nσόλος.gr Σόλος.gr ΣΌΛΟΣ.gr" - +"\nﻋﺮﺑﻲ.de عربي.de نامهای.de نامه\\u200Cای.de".trim(); - - public static String getDefaultIdnaInput() { - return defaultIdnaInput; - } - public static final Transliterator UNESCAPER = Transliterator.getInstance("hex-any"); - - public static String getLanguageOptions(String locale) { - return LanguageCode.getLanguageOptions(new ULocale(locale)); - } - - public static String getTrace(Exception e) { - return Arrays.asList(e.getStackTrace()).toString().replace("\n", "<\br>"); - } - - public static String getSimpleSet(String setA, UnicodeSet a, boolean abbreviate, boolean escape) { - String a_out; - a.clear(); - try { - //setA = UnicodeSetUtilities.MyNormalize(setA, Normalizer.NFC); - setA = setA.replace("..U+", "-\\u"); - setA = setA.replace("U+", "\\u"); - a.addAll(UnicodeSetUtilities.parseUnicodeSet(setA)); - a_out = UnicodeUtilities.getPrettySet(a, abbreviate, escape); - } catch (final Exception e) { - a_out = e.getMessage(); - } - return a_out; - } - - public static void showSet(String grouping, UnicodeSet a, boolean abbreviate, boolean ucdFormat, Appendable out) throws IOException { - final CodePointShower codePointShower = new CodePointShower(abbreviate, ucdFormat, false); - UnicodeUtilities.showSet(grouping, a, codePointShower, out); - } - public static void showPropsTable(Appendable out, String propForValues, String myLink) throws IOException { - UnicodeUtilities.showPropsTable(out, propForValues, myLink); - } - public static String showTransform(String transform, String sample) { - return UnicodeUtilities.showTransform(transform, sample); - } - - public static String listTransforms() { - return UnicodeUtilities.listTransforms(); - } - - public static void getDifferences(String setA, String setB, - boolean abbreviate, String[] abResults, int[] abSizes, String[] abLinks) { - UnicodeUtilities.getDifferences(setA, setB, abbreviate, abResults, abSizes, abLinks); - } - - public static int parseCode(String text, String nextButton, String previousButton) { - //text = fromHTML.transliterate(text); - final String trimmed = text.trim(); - if (trimmed.length() > 1) { - try { - text = UTF16.valueOf(Integer.parseInt(trimmed,16)); - } catch (final Exception e) {} - } - int cp = UTF16.charAt(text, 0); - if (nextButton != null) { - cp += 1; - if (cp > 0x10FFFF) { - cp = 0; - } - } else if (previousButton != null) { - cp -= 1; - if (cp < 0) { - cp = 0x10FFFF; - } - } - return cp; - } - - public static String getConfusables(String test, int choice) { - try { - - final Confusables confusables = new Confusables(test); - switch (choice) { - case 0: // none - break; - case 1: // IDNA2008 - confusables.setAllowedCharacters(Idna2003.SINGLETON.validSet_transitional); - confusables.setNormalizationCheck(Normalizer.NFC); - break; - case 2: // IDNA2008 - confusables.setAllowedCharacters(Idna2008.SINGLETON.validSet_transitional); - confusables.setNormalizationCheck(Normalizer.NFC); - break; - case 3: // UTS46/39 - confusables.setAllowedCharacters(new UnicodeSet(Uts46.SINGLETON.validSet_transitional).retainAll(XIDModifications.getAllowed())); - confusables.setNormalizationCheck(Normalizer.NFC); - confusables.setScriptCheck(Confusables.ScriptCheck.same); - break; - } - return getConfusablesCore(test, confusables); - } catch (final Exception e) { - return returnStackTrace(e); - } - } - - private static String returnStackTrace(Exception e) { - final StringWriter s = new StringWriter(); - final PrintWriter p = new PrintWriter(s); - e.printStackTrace(p); - String str = UnicodeUtilities.toHTML(s.toString()); - str = str.replace("\n", "
"); - return str; - } - - - public static String getConfusables(String test, boolean nfkcCheck, boolean scriptCheck, boolean idCheck, boolean xidCheck) { - try { - - final Confusables confusables = new Confusables(test); - if (nfkcCheck) { - confusables.setNormalizationCheck(Normalizer.NFKC); - } - if (scriptCheck) { - confusables.setScriptCheck(Confusables.ScriptCheck.same); - } - if (idCheck) { - confusables.setAllowedCharacters(new UnicodeSet("[\\-[:L:][:M:][:N:]]")); - } - if (xidCheck) { - confusables.setAllowedCharacters(XIDModifications.getAllowed()); - } - - return getConfusablesCore(test, confusables); - } catch (final Exception e) { - return returnStackTrace(e); - } - } - - private static String getConfusablesCore(String test, Confusables confusables) { - test = test.replaceAll("[\n\t]", " ").trim(); - final StringBuilder result = new StringBuilder(); - final double maxSize = confusables.getMaxSize(); - final List> alternates = confusables.getAlternates(); - if (alternates.size() > 0) { - int max = 0; - for (final Collection items : alternates) { - final int size = items.size(); - if (size > max) { - max = size; - } - } - final String topCell = ""; - final String underStart = " "; - final String underEnd = " "; - final UnicodeSet nsm = new UnicodeSet("[[:Mn:][:Me:]]"); - - result.append("\n"); - for (final Collection items : alternates) { - result.append(""); - for (final String item : items) { - result.append(topCell); - String htmlItem = UnicodeUtilities.toHTML(item); - if (nsm.containsAll(item)) { - htmlItem = " " + htmlItem + " "; - } - result.append(underStart).append(htmlItem).append(underEnd); - result.append(""); - } - for (int i = max - items.size(); i > 0; --i) { - result.append(""); - } - result.append("\n"); - - result.append(""); - for (final String item : items) { - result.append(""); - } - result.append("\n"); - - result.append(""); - for (final String item : items) { - result.append(""); - } - result.append("\n"); - } - result.append("

Confusable Characters

 
"); - result.append(com.ibm.icu.impl.Utility.hex(item)); - result.append("
"); - result.append(UCharacter.getName(item, " + ")); - result.append("
\n"); - } - - result.append("

Total raw values: " + nf.format(maxSize) + "

\n"); - if (maxSize > 1000000) { - result.append( "

Too many raw items to process.

\n"); - return result.toString(); - } - - result.append("

Confusable Results

"); - int count = 0; - result.append("
"); - for (final String item : confusables) { - ++count; - if (count > 1000) { - continue; - } - if (count != 1) { - result.append("\n"); - } - result.append(UnicodeUtilities.toHTML(item)); - } - if (count > 1000) { - result.append(" ...\n"); - } - result.append("
\n"); - result.append("

Total filtered values: " + nf.format(count) + "

\n"); - - if (count > 1000) { - result.append("

Too many filtered items to display; truncating to 1,000.

\n"); - } - return result.toString(); - } - public static String testIdnaLines(String lines, String filter) { - return UnicodeUtilities.testIdnaLines(lines, filter); - } - - public static String getIdentifier(String script) { - return UnicodeUtilities.getIdentifier(script); - } -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/UnicodeRegex.java b/unicodetools/src/main/java/org/unicode/jsp/UnicodeRegex.java index 6b356ab22..a9a7eebbc 100644 --- a/unicodetools/src/main/java/org/unicode/jsp/UnicodeRegex.java +++ b/unicodetools/src/main/java/org/unicode/jsp/UnicodeRegex.java @@ -68,11 +68,10 @@ public UnicodeRegex setSymbolTable(SymbolTable symbolTable) { * @return A processed Java regex pattern, suitable for input to * Pattern.compile(). */ - @Override public String transform(String regex) { - final StringBuilder result = new StringBuilder(); - final UnicodeSet temp = new UnicodeSet(); - final ParsePosition pos = new ParsePosition(0); + StringBuilder result = new StringBuilder(); + UnicodeSet temp = new UnicodeSet(); + ParsePosition pos = new ParsePosition(0); int state = 0; // 1 = after \ // We add each character unmodified to the output, unless we have a @@ -81,7 +80,7 @@ public String transform(String regex) { for (int i = 0; i < regex.length(); ++i) { // look for UnicodeSets, allowing for quoting with \ and \Q - final char ch = regex.charAt(i); + char ch = regex.charAt(i); switch (state) { case 0: // we only care about \, and '['. if (ch == '\\') { @@ -172,7 +171,7 @@ public String compileBnf(String bnfLines) { * *

* Caveats: at this point the parsing is simple; for example, # cannot be - * quoted (use \\u0023); you can set it to null to disable. + * quoted (use \\u0023); you can set it to null to disable. * The equality sign and a few others can be reset with * setBnfX(). * @@ -184,26 +183,26 @@ public String compileBnf(String bnfLines) { * @return Pattern */ public String compileBnf(List lines) { - final Map variables = getVariables(lines); - final Set unused = new LinkedHashSet(variables.keySet()); + Map variables = getVariables(lines); + Set unused = new LinkedHashSet(variables.keySet()); // brute force replacement; do twice to allow for different order // later on can optimize for (int i = 0; i < 2; ++i) { - for (final String variable : variables.keySet()) { - final String definition = variables.get(variable); - for (final String variable2 : variables.keySet()) { + for (String variable : variables.keySet()) { + String definition = variables.get(variable); + for (String variable2 : variables.keySet()) { if (variable.equals(variable2)) { continue; } - final String definition2 = variables.get(variable2); - final String altered2 = definition2.replace(variable, definition); + String definition2 = variables.get(variable2); + String altered2 = definition2.replace(variable, definition); if (!altered2.equals(definition2)) { unused.remove(variable); variables.put(variable2, altered2); if (log != null) { try { log.append(variable2 + "=" + altered2 + ";"); - } catch (final IOException e) { + } catch (IOException e) { throw (IllegalArgumentException) new IllegalArgumentException().initCause(e); } } @@ -260,9 +259,9 @@ public void setBnfLineSeparator(String bnfLineSeparator) { * @throws IOException */ public static List loadFile(String file, List result) throws IOException { - final BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); while (true) { - final String line = in.readLine(); + String line = in.readLine(); if (line == null) { break; } @@ -275,12 +274,11 @@ public static List loadFile(String file, List result) throws IOE /* (non-Javadoc) * @see com.ibm.icu.util.Freezable#cloneAsThawed() */ - @Override public Object cloneAsThawed() { // TODO Auto-generated method stub try { return clone(); - } catch (final CloneNotSupportedException e) { + } catch (CloneNotSupportedException e) { throw new IllegalArgumentException(); // should never happen } } @@ -288,7 +286,6 @@ public Object cloneAsThawed() { /* (non-Javadoc) * @see com.ibm.icu.util.Freezable#freeze() */ - @Override public Object freeze() { // no action needed now. return this; @@ -297,7 +294,6 @@ public Object freeze() { /* (non-Javadoc) * @see com.ibm.icu.util.Freezable#isFrozen() */ - @Override public boolean isFrozen() { // at this point, always true return true; @@ -308,12 +304,12 @@ public boolean isFrozen() { private int processSet(String regex, int i, StringBuilder result, UnicodeSet temp, ParsePosition pos) { try { pos.setIndex(i); - final UnicodeSet x = temp.clear().applyPattern(regex, pos, symbolTable, 0); + UnicodeSet x = temp.clear().applyPattern(regex, pos, symbolTable, 0); x.complement().complement(); // hack to fix toPattern result.append(x.toPattern(false)); i = pos.getIndex() - 1; // allow for the loop increment return i; - } catch (final Exception e) { + } catch (Exception e) { throw (IllegalArgumentException) new IllegalArgumentException("Error in " + regex).initCause(e); } } @@ -322,13 +318,12 @@ private int processSet(String regex, int i, StringBuilder result, UnicodeSet tem private String bnfCommentString = "#"; private String bnfVariableInfix = "="; private String bnfLineSeparator = "\n"; - private final Appendable log = null; + private Appendable log = null; - private final Comparator LongestFirst = new Comparator () { - @Override + private Comparator LongestFirst = new Comparator () { public int compare(String arg0, String arg1) { - final int len0 = arg0.length(); - final int len1 = arg1.length(); + int len0 = arg0.length(); + int len1 = arg1.length(); if (len0 != len1) { return len1 - len0; } @@ -338,9 +333,9 @@ public int compare(String arg0, String arg1) { private Map getVariables(List lines) { - final Map variables = new TreeMap(LongestFirst); + Map variables = new TreeMap(LongestFirst); String variable = null; - final StringBuffer definition = new StringBuffer(); + StringBuffer definition = new StringBuffer(); int count = 0; for (String line : lines) { ++count; @@ -353,12 +348,12 @@ private Map getVariables(List lines) { } if (bnfCommentString != null) { - final int hashPos = line.indexOf(bnfCommentString); + int hashPos = line.indexOf(bnfCommentString); if (hashPos >= 0) { line = line.substring(0, hashPos); } } - final String trimline = line.trim(); + String trimline = line.trim(); if (trimline.length() == 0) { continue; } @@ -368,11 +363,11 @@ private Map getVariables(List lines) { if (linePart.trim().length() == 0) { continue; } - final boolean terminated = trimline.endsWith(";"); + boolean terminated = trimline.endsWith(";"); if (terminated) { linePart = linePart.substring(0,linePart.lastIndexOf(';')); } - final int equalsPos = linePart.indexOf(bnfVariableInfix); + int equalsPos = linePart.indexOf(bnfVariableInfix); if (equalsPos >= 0) { if (variable != null) { throw new IllegalArgumentException("Missing ';' before " + count + ") " + line); diff --git a/unicodetools/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java b/unicodetools/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java deleted file mode 100644 index 6d3bcbd9b..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java +++ /dev/null @@ -1,32 +0,0 @@ -package org.unicode.jsp; - -import java.text.ParsePosition; - -import org.unicode.cldr.util.props.UnicodePropertySymbolTable; - -import com.ibm.icu.text.UnicodeSet; - -public class UnicodeSetUtilities { - private static UnicodeSet OK_AT_END = new UnicodeSet("[ \\]\t]").freeze(); - - public static UnicodeSet parseUnicodeSet(String input) { - input = input.trim() + "]]]]]"; - final String parseInput = "[" + input + "]]]]]"; - final ParsePosition parsePosition = new ParsePosition(0); - final UnicodeSet result = new UnicodeSet(parseInput, parsePosition, fullSymbolTable); - int parseEnd = parsePosition.getIndex(); - if (parseEnd != parseInput.length() && !UnicodeSetUtilities.OK_AT_END.containsAll(parseInput.substring(parseEnd))) { - parseEnd--; // get input offset - throw new IllegalArgumentException("Additional characters past the end of the set, at " - + parseEnd + ", ..." - + input.substring(Math.max(0, parseEnd - 10), parseEnd) - + "|" - + input.substring(parseEnd, Math.min(input.length(), parseEnd + 10)) - ); - } - return result; - } - - - static UnicodeSet.XSymbolTable fullSymbolTable = new UnicodePropertySymbolTable(XPropertyFactory.make()); -} diff --git a/unicodetools/src/main/java/org/unicode/jsp/UnicodeUtilities.java b/unicodetools/src/main/java/org/unicode/jsp/UnicodeUtilities.java deleted file mode 100644 index a9413e432..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/UnicodeUtilities.java +++ /dev/null @@ -1,1701 +0,0 @@ -package org.unicode.jsp; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.unicode.cldr.tool.TablePrinter; -import org.unicode.cldr.util.Predicate; -import org.unicode.cldr.util.UnicodeSetPrettyPrinter; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.idna.GenerateIdnaTest; -import org.unicode.idna.Idna.IdnaType; -import org.unicode.idna.Idna2003; -import org.unicode.idna.Idna2008; -import org.unicode.idna.IdnaTypes; -import org.unicode.idna.Punycode; -import org.unicode.idna.Uts46; - -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.impl.Row.R4; -import com.ibm.icu.impl.Utility; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.text.Collator; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.NumberFormat; -import com.ibm.icu.text.RuleBasedCollator; -import com.ibm.icu.text.StringTransform; -import com.ibm.icu.text.Transliterator; -import com.ibm.icu.text.UTF16; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.text.UnicodeSetIterator; -import com.ibm.icu.util.ULocale; -import com.ibm.icu.util.VersionInfo; - -public class UnicodeUtilities { - - - static final UnicodeSet OFF_LIMITS = new UnicodeSet(UnicodeProperty.getUNASSIGNED()).addAll(UnicodeProperty.PRIVATE_USE).addAll(UnicodeProperty.SURROGATE).freeze(); - static final UnicodeSet NONCHAR = new UnicodeSet(OFF_LIMITS).addAll(new UnicodeSet("[:Cc:]")).removeAll(new UnicodeSet("[:whitespace:]")).freeze(); - - - private static Subheader subheader = null; - - static Transliterator toHTML; - static String HTML_RULES_CONTROLS; - static { - - final String BASE_RULES = "'<' > '<' ;" + "'<' < '&'[lL][Tt]';' ;" - + "'&' > '&' ;" + "'&' < '&'[aA][mM][pP]';' ;" - + "'>' < '&'[gG][tT]';' ;" + "'\"' < '&'[qQ][uU][oO][tT]';' ; " - + "'' < '&'[aA][pP][oO][sS]';' ; "; - - final String CONTENT_RULES = "'>' > '>' ;"; - - final String HTML_RULES = BASE_RULES + CONTENT_RULES + "'\"' > '"' ; "; - - HTML_RULES_CONTROLS = HTML_RULES - + "[[:di:]-[:cc:]-[:cs:]-[\\u200E\\u200F]] > ; " // remove, should ignore in rendering (but may not be in browser) - + "[[:nchar:][:cn:][:cs:][:co:][:cc:]-[:whitespace:]-[\\u200E\\u200F]] > \\uFFFD ; "; // should be missing glyph (but may not be in browser) - // + "([[:C:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]-[\\u0020]]) > &hex/xml($1) ; "; // [\\u0080-\\U0010FFFF] - - toHTML = Transliterator.createFromRules("any-xml", HTML_RULES_CONTROLS, - Transliterator.FORWARD); - } - - public static String toHTML(String input) { - return toHTML.transliterate(input); - } - - static Transliterator UNICODE = Transliterator.getInstance("hex-any"); - - static final int IDNA_TYPE_LIMIT = 4; - - // static final Map idnaTypeSet = new TreeMap(); - // static { - // for (IdnaType i : IdnaType.values()) { - // idnaTypeSet.put(i, new UnicodeSet()); - // } - // } - - public static UnicodeMap getIdnaDifferences(UnicodeSet remapped, UnicodeSet skip) { - final UnicodeMap result = new UnicodeMap(); - final UnicodeSet valid2008 = GenerateIdnaTest.getIdna2008Valid(); - - final VersionInfo empty = VersionInfo.getInstance(0); - - for (int i = 0; i <= 0x10FFFF; ++i) { - if ((i & 0xFFF) == 0) { - System.out.println(Utility.hex(i)); - } - if (i == 0x058F) { - System.out.println("debug"); - } - if (skip.contains(i)) { - continue; - } - final VersionInfo birth = UCharacter.getAge(i); - final boolean isNew = birth.compareTo(VersionInfo.UNICODE_3_2) > 0 || birth.equals(empty); - final String age = isNew ? ">3.2!!!" : "v3.2"; - final IdnaType idna2003 = Idna2003.getIDNA2003Type(i); - final IdnaType tr46 = Uts46.SINGLETON.getType(i); - if (isNew) {// skip - } else if ((tr46 == IdnaType.mapped || idna2003 == IdnaType.mapped) && tr46 != IdnaType.disallowed && idna2003 != IdnaType.disallowed) { - remapped.add(i); - } - //TestStatus testResult = valid2008.contains(i); - final IdnaType idna2008 = valid2008.contains(i) ? IdnaType.valid : IdnaType.disallowed; - final String iClass = age - + "\t" + getShortName(idna2003) - + "\t" + getShortName(tr46) - + "\t" + getShortName(idna2008) - ; - result.put(i, iClass); - } - return result.freeze(); - } - - - static String getShortName(IdnaType tr46) { - // TODO Auto-generated method stub - return UCharacter.toTitleCase( - tr46==IdnaType.valid ? "Valid" - : tr46==IdnaType.ignored || tr46==IdnaType.mapped ? "Mapped/Ignored" - : tr46.toString() - , null); - } - - - - static final UnicodeSet MARK = new UnicodeSet("[:M:]").freeze(); - - static String getXStringPropertyValue(int propertyEnum, int codepoint, int nameChoice, Normalizer.Mode compat) { - if (compat == null || Normalizer.isNormalized(codepoint, compat, 0)) { - return Common.getXStringPropertyValue(propertyEnum, codepoint, nameChoice); - } - final String s = Common.MyNormalize(codepoint, compat); - int cp; - String lastPart = null; - for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { - cp = UTF16.charAt(s, i); - final String part = Common.getXStringPropertyValue(propertyEnum, cp, nameChoice); - if (lastPart == null) { - lastPart = part; - } else if (!lastPart.equals(part)) { - if (propertyEnum == UProperty.SCRIPT && MARK.contains(cp)) { - continue; - } - return "Mixed"; - } - } - return lastPart; - } - - static UnicodeSet COMMON_USE_SCRIPTS = new UnicodeSet("[[:script=Zyyy:] [:script=Zinh:] [:script=Arab:] [:script=Armn:]" + - " [:script=Beng:] [:script=Bopo:] [:script=Cans:] [:script=Cyrl:] [:script=Deva:] [:script=Ethi:]" + - " [:script=Geor:] [:script=Grek:] [:script=Gujr:] [:script=Guru:] [:script=Hani:] [:script=Hang:]" + - " [:script=Hebr:] [:script=Hira:] [:script=Knda:] [:script=Kana:] [:script=Khmr:] [:script=Laoo:]" + - " [:script=Latn:] [:script=Mlym:] [:script=Mong:] [:script=Mymr:] [:script=Orya:] [:script=Sinh:] " + - "[:script=Taml:] [:script=Telu:] [:script=Tfng:] [:script=Thaa:] [:script=Thai:] [:script=Tibt:] [:script=Yiii:]]").freeze(); - - static UnicodeSet LITURGICAL = new UnicodeSet("[\u0615\u0617-\u061A\u0671\u06D6-\u06ED\u08F0-\u08F3[:sc=coptic:]" + - "\u1CD0-\u1CF2\u214F]"); - static UnicodeSet DEPRECATED = new UnicodeSet("[:deprecated:]").freeze(); - - static int getXPropertyEnum(String propertyAlias) { - final int extra = Common.XPROPERTY_NAMES.indexOf(propertyAlias - .toLowerCase(Locale.ENGLISH)); - if (extra != -1) { - return UProperty.STRING_LIMIT + extra; - } - return UCharacter.getPropertyEnum(propertyAlias); - } - - // protected static boolean getIdnaProperty(String propertyValue, - // UnicodeSet result) { - // try { - // String lowercase = propertyValue.toLowerCase(Locale.ENGLISH); - // IdnaType i = lowercase.equals("output") ? IdnaType.valid - // : lowercase.equals("remapped") ? IdnaType.mapped - // : IdnaType.valueOf(lowercase); - // result.clear().addAll(idnaTypeSet.get(i)); - // return true; - // } catch (Exception e) { - // throw new IllegalArgumentException("Error with <" + propertyValue + ">", e); - // } - // } - - static boolean getBinaryValue(String propertyValue) { - boolean invert; - if (propertyValue.length() == 0 || propertyValue.equalsIgnoreCase("true") - || propertyValue.equalsIgnoreCase("t") - || propertyValue.equalsIgnoreCase("yes") - || propertyValue.equalsIgnoreCase("y")) { - invert = false; - } else if (propertyValue.equalsIgnoreCase("false") - || propertyValue.equalsIgnoreCase("f") - || propertyValue.equalsIgnoreCase("no") - || propertyValue.equalsIgnoreCase("n")) { - invert = true; - } else { - throw new IllegalArgumentException( - "PropertyValue must be empty (= T) or one of: True, T, False, F"); - } - return invert; - } - - public static boolean equals(CharSequence inbuffer, CharSequence outbuffer) { - if (inbuffer.length() != outbuffer.length()) { - return false; - } - for (int i = inbuffer.length() - 1; i >= 0; --i) { - if (inbuffer.charAt(i) != outbuffer.charAt(i)) { - return false; - } - } - return true; - } - - static final int BLOCK_ENUM = UCharacter.getPropertyEnum("block"); - - static XPropertyFactory factory = XPropertyFactory.make(); - - static NumberFormat numberFormat = NumberFormat.getInstance(ULocale.ENGLISH, NumberFormat.NUMBERSTYLE); - static { - numberFormat.setGroupingUsed(true); - } - - public static void showSet(String grouping, UnicodeSet a, CodePointShower codePointShower, Appendable out) throws IOException { - grouping = grouping.trim(); - if (grouping.length() == 0) { - showSet(a, codePointShower, out); - return; - } - final String[] props = grouping.split("[;,\\s]\\s*"); - final int length = props.length; - - if (length > 5) { - out.append("

Too many groups: " + Arrays.asList(props) + "

"); - return; - } - final boolean getShortest = false; - final UnicodeProperty[] properties = new UnicodeProperty[length]; - final String[] names = new String[length]; - for (int i = 0; i < length; ++i) { - try { - properties[i] = factory.getProperty(props[i]); - names[i] = properties[i].getName(); - names[i].charAt(0); // trigger exception - properties[i].getValue(0, getShortest); - } catch (final Exception e) { - out.append("

Unknown 'Group by' property name: '" + props[i] + "'

"); - return; - } - } - final UnicodeMap map = new UnicodeMap(); - final StringBuilder builder = new StringBuilder(); - for (final UnicodeSetIterator it = new UnicodeSetIterator(a); it.next();) { - final int s = it.codepoint; - if (s == UnicodeSetIterator.IS_STRING) { - final String ss = it.string; - builder.setLength(0); - for (int i = 0; i < length; ++i) { - if (i != 0) { - builder.append("; "); - } - builder.append(names[i]).append("="); - builder.append(getStringProperties(properties[i], ss, ",", getShortest)); - } - map.put(ss, builder.toString()); - } else { - builder.setLength(0); - for (int i = 0; i < length; ++i) { - if (i != 0) { - builder.append("; "); - } - try { - builder.append(names[i]).append("="); - builder.append(properties[i].getValue(s, getShortest)); - } catch (final Exception e) { - builder.append("Internal error: " + names[i] + ", " + properties[i] + ", " + getHex(i,true)); - } - } - map.put(s, builder.toString()); - } - } - final TreeSet sorted = new TreeSet(Collator.getInstance(ULocale.ENGLISH)); - sorted.addAll(map.values()); - final String[] propsOld = new String[length]; - for (int i = 0; i < propsOld.length; ++i) { - propsOld[i] = ""; - } - int lastLevel = -1; - for (final String s : sorted) { - final String[] props2 = s.split("; "); - final int level = getFirstDiff(propsOld, props2); - //out.append("// level: " + level + ", lastLevel: " + lastLevel + "\n"); - // if higher, back off - if (lastLevel >= 0) { - for (int i = level; i < length; ++i) { - out.append("\n"); - } - } - lastLevel = level; - final UnicodeSet items = map.getSet(s); - - for (int i = lastLevel; i < length; ++i) { - out.append("

" + props2[i] + - (i == length - 1 ? "
items: " + numberFormat.format(items.size()) : "
") + - "

\n"); - } - showSet(items, codePointShower, out); - for (int i = 0; i < propsOld.length; ++i) { - propsOld[i] = props2[i]; - } - } - for (int i = 0; i <= lastLevel; ++i) { - out.append("
\n"); - } - } - - static int getFirstDiff(String[] a, String[] b) { - for (int i = 0; i < a.length; ++i) { - if (!a[i].equals(b[i])) { - return i; - } - } - return a.length; - } - - // static getPropNames() { - // return factory.getAvailableNames(); - // } - - public static String getStringProperties(UnicodeProperty prop, String s, String separator, boolean getShortest) { - final StringBuilder builder = new StringBuilder(); - int cp; - for (int i = 0; i < s.length(); i += Character.charCount(cp)) { - cp = s.codePointAt(i); - if (i != 0) { - builder.append(separator); - } - builder.append(prop.getValue(cp, getShortest)); - } - return builder.toString(); - } - - /*jsp*/ - public static void showSet(UnicodeSet inputSetRaw, CodePointShower codePointShower, Appendable out) throws IOException { - if (codePointShower.doTable) { - out.append(""); - } - if (inputSetRaw.getRangeCount() > 10000) { - if (codePointShower.doTable) { - out.append(""); - } - } else if (codePointShower.abbreviate) { - if (codePointShower.doTable) { - out.append(""); - } - } else { - final LinkedHashMap items = new LinkedHashMap(); - final String specials = "Unassigned, Private use, or Surrogates"; - - final UnicodeSet specialSet = new UnicodeSet(inputSetRaw).retainAll(UnicodeProperty.getSPECIALS()); - final UnicodeSet inputSet = specialSet.size() == 0 ? inputSetRaw : new UnicodeSet(inputSetRaw).removeAll(UnicodeProperty.getSPECIALS()); - if (specialSet.size() != 0) { - items.put(specials, specialSet); - } - - for (final UnicodeSetIterator it = new UnicodeSetIterator(inputSet); it.next();) { - final int s = it.codepoint; - if (s == UnicodeSetIterator.IS_STRING) { - final String newBlock = "Strings"; - UnicodeSet set = items.get(newBlock); - if (set == null) { - items.put(newBlock, set = new UnicodeSet()); - } - set.add(it.string); - } else { - final String block = UCharacter.getStringPropertyValue(BLOCK_ENUM, s, UProperty.NameChoice.LONG).replace('_', ' '); - String newBlock = "" + block + ""; - String newSubhead = getSubheader().getSubheader(s); - if (newSubhead == null) { - newSubhead = "no subhead"; - } else { - newSubhead = "" + newSubhead + ""; - } - newBlock = newBlock + " \u2014 " + newSubhead + ""; - UnicodeSet set = items.get(newBlock); - if (set == null) { - items.put(newBlock, set = new UnicodeSet()); - } - set.add(s); - } - } - - for (final String newBlock : items.keySet()) { - final UnicodeSet set = items.get(newBlock); - if (codePointShower.doTable) { - out.append(""); - } - - if (set.size() > 500 || newBlock == specials) { - codePointShower.showAbbreviated(set, out); - } else { - for (final UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { - final int s = it.codepoint; - if (s == UnicodeSetIterator.IS_STRING) { - codePointShower.showString(it.string, ", ", out); - } else { - codePointShower.showCodePoint(s, out); - } - } - } - } - } - if (codePointShower.doTable) { - out.append("
"); - } - out.append("Too many to list individually\n"); - if (codePointShower.doTable) { - out.append("
"); - } - codePointShower.showAbbreviated(inputSetRaw, out); - if (codePointShower.doTable) { - out.append("
"); - } - out.append("

" + newBlock + "
items: " + numberFormat.format(set.size()) + "

\n"); - if (codePointShower.doTable) { - out.append("
"); - } - } - - public static String getIdentifier(String script) { - final StringBuilder result = new StringBuilder(); - final UnicodeProperty scriptProp = factory.getProperty("sc"); - UnicodeSet scriptSet; - scriptSet = scriptProp.getSet(script); - scriptSet.removeAll(NONCHAR); - if (scriptSet.size() == 0) { - result.append("

Illegal script: " + toHTML(script) + ". Please pick one of the following:

\n

"); - String last = null; - final TreeSet sorted = new TreeSet(col); - sorted.addAll(scriptProp.getAvailableValues()); - for (final String s : sorted) { - scriptSet = scriptProp.getSet(s); - scriptSet.removeAll(NONCHAR); - if (scriptSet.size() == 0) { - continue; - } - final String name = toHTML(s); - if (last == null) { - // nothing - } else if (last.charAt(0) == s.charAt(0)) { - result.append(' '); - } else { - result.append("

"); - } - result.append("" + name + ""); - last = s; - } - result.append("

\n"); - return result.toString(); - } - try { - final UnicodeSet allowed = new UnicodeSet(scriptSet).retainAll(XIDModifications.allowed); - final UnicodeSet restricted = new UnicodeSet(scriptSet).removeAll(XIDModifications.allowed); - result.append("

Allowed

"); - if (allowed.size() == 0) { - result.append("none"); - } else { - showSet(allowed, new CodePointShower(false, false, true), result); - } - - if (restricted.size() == 0) { - result.append("

Restricted

"); - result.append("none"); - } else { - for (final String reason : XIDModifications.reasons.values()) { - final UnicodeSet shard = XIDModifications.reasons.getSet(reason); - final UnicodeSet items = new UnicodeSet(restricted).retainAll(shard); - if (items.size() != 0) { - result.append("

Restricted - " + reason + "

"); - showSet(items, new CodePointShower(false, false, true).setRestricted(true), result); - } - } - } - return result.toString(); - } catch (final IOException e) { - return "Internal Error"; - } - } - - static private UnicodeSet RTL= new UnicodeSet("[[:bc=R:][:bc=AL:]]"); - - private static String showCodePoint(int codepoint) { - return showCodePoint(UTF16.valueOf(codepoint)); - } - - private static String showCodePoint(String s) { - final String literal = getLiteral(s); - return "\u00a0" + literal + "\u00a0"; - } - - private static String getLiteral(int codepoint) { - return getLiteral(UTF16.valueOf(codepoint)); - } - - private static String getLiteral(String s) { - String literal = toHTML.transliterate(s); - if (RTL.containsSome(literal)) { - literal = '\u200E' + literal + '\u200E'; - } - return literal; - } - - static class CodePointShower { - - public boolean doTable; - public boolean abbreviate; - public boolean ucdFormat; - public boolean identifierInfo; - public boolean restricted; - - public CodePointShower setRestricted(boolean restricted) { - this.restricted = restricted; - return this; - } - - public CodePointShower(boolean abbreviate, boolean ucdFormat, boolean identifierInfo) { - this.abbreviate = abbreviate; - this.ucdFormat = ucdFormat; - this.identifierInfo = doTable = identifierInfo; - } - - void showCodePoint(int codePoint, Appendable out) throws IOException { - final String string = UTF16.valueOf(codePoint); - final String separator = ", "; - showString(string, separator, out); - } - - private void showString(final String string, String separator, Appendable out) throws IOException { - if (doTable) { - out.append(""); - } - String literal = UnicodeUtilities.toHTML.transliterate(string); - if (UnicodeUtilities.RTL.containsSome(literal)) { - literal = '\u200E' + literal + '\u200E'; - } - String name = UnicodeUtilities.getName(string, separator, false); - if (name == null || name.length() == 0) { - name = "no name"; - } else { - final boolean special = name.indexOf('<') >= 0; - name = UnicodeUtilities.toHTML.transliterate(name); - if (special) { - name = "" + name + ""; - } - } - if (doTable) { - out.append(UnicodeUtilities.getHex(string, separator, ucdFormat) + "\u00A0" + literal + "\u00A0" + name); - } else { - out.append(UnicodeUtilities.getHex(string, separator, ucdFormat) + " " + (ucdFormat ? "\t;" : "(\u00A0" + literal + "\u00A0) ") + name); - } - if (identifierInfo) { - final int cp = string.codePointAt(0); - final StringBuilder confusableString = displayConfusables(cp); - if (doTable) { - out.append(""); - } else { - out.append("; "); - } - if (confusableString.length() == 0) { - out.append("none"); - } else { - out.append(confusableString.toString()); - } - } - if (doTable) { - out.append("\n"); - } else { - out.append("
\n"); - } - } - - private void showAbbreviated(UnicodeSet a, Appendable out) throws IOException { - final UnicodeUtilities.CodePointShower codePointShower = this; - - for (final UnicodeSetIterator it = new UnicodeSetIterator(a); it.nextRange();) { - final int s = it.codepoint; - if (s == UnicodeSetIterator.IS_STRING) { - out.append(UnicodeUtilities.showCodePoint(it.string)).append("
\n"); - } else { - final int end = it.codepointEnd; - if (end == s) { - codePointShower.showCodePoint(s, out); - } else if (end == s + 1) { - codePointShower.showCodePoint(s, out); - codePointShower.showCodePoint(end, out); - } else { - if (codePointShower.ucdFormat) { - out.append(UnicodeUtilities.getHex(s, codePointShower.ucdFormat)); - out.append(".."); - codePointShower.showCodePoint(end, out); - } else { - codePointShower.showCodePoint(s, out); - if (doTable) { - out.append("" + "\u2026{" + (end-s-1) + "}\u2026"); - } else { - out.append("\u2026{" + (end-s-1) + "}\u2026"); - } - codePointShower.showCodePoint(end, out); - } - } - } - } - } - - } - - private static String getName(String string, String separator, boolean andCode) { - final StringBuilder result = new StringBuilder(); - int cp; - for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) { - cp = UTF16.charAt(string, i); - if (i != 0) { - result.append(separator); - } - if (andCode) { - result.append("U+").append(com.ibm.icu.impl.Utility.hex(cp, 4)).append(' '); - } - result.append(UCharacter.getExtendedName(cp)); - } - return result.toString(); - } - - private static String getHex(int codePoint, boolean ucdFormat) { - final String hex = com.ibm.icu.impl.Utility.hex(codePoint, 4); - final String string = "" + - ("") - + (ucdFormat ? "" : "U+") - + hex + ""; - return string; - } - - private static String getHex(String string, String separator, boolean ucdFormat) { - final StringBuilder result = new StringBuilder(); - int cp; - for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) { - if (i != 0) { - result.append(separator); - } - result.append(getHex(cp = UTF16.charAt(string, i), ucdFormat)); - } - return result.toString(); - } - - // private static void showString(String s, String separator, boolean ucdFormat, Writer out) throws IOException { - // int cp; - // for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { - // if (i != 0) { - // out.write(separator); - // } - // showCodePoint(cp = UTF16.charAt(s, i), ucdFormat, out); - // } - // } - - static final UnicodeSet MAPPING_SET = new UnicodeSet("[:^c:]"); - - static { - Transliterator.registerInstance(getTransliteratorFromFile("en-IPA", "en-IPA.txt", Transliterator.FORWARD)); - Transliterator.registerInstance(getTransliteratorFromFile("IPA-en", "en-IPA.txt", Transliterator.REVERSE)); - - Transliterator.registerInstance(getTransliteratorFromFile("deva-ipa", "Deva-IPA.txt", Transliterator.FORWARD)); - Transliterator.registerInstance(getTransliteratorFromFile("ipa-deva", "Deva-IPA.txt", Transliterator.REVERSE)); - } - - public static Transliterator getTransliteratorFromFile(String ID, String file, int direction) { - try { - final BufferedReader br = FileUtilities.openFile(UnicodeUtilities.class, file); - final StringBuffer input = new StringBuffer(); - while (true) { - String line = br.readLine(); - if (line == null) { - break; - } - if (line.startsWith("\uFEFF")) { - line = line.substring(1); // remove BOM - } - input.append(line); - input.append('\n'); - } - return Transliterator.createFromRules(ID, input.toString(), direction); - } catch (final IOException e) { - throw (IllegalArgumentException) new IllegalArgumentException("Can't open transliterator file " + file).initCause(e); - } - } - - public static final Transliterator UNESCAPER = Transliterator.getInstance("hex-any"); - - - /*jsp*/ - public static String showTransform(String transform, String sample) { - // if (!haveCaseFold) { - // registerCaseFold(); - // } - Transliterator trans; - try { - trans = Transliterator.createFromRules("foo", transform, Transliterator.FORWARD); - } catch (final Exception e) { - try { - trans = Transliterator.getInstance(transform); - } catch (final Exception e2) { - return "Error: " + toHTML.transform(e.getMessage() + "; " + e2.getMessage()); - } - } - - UnicodeSet set = null; - // see if sample is a UnicodeSet - if (UnicodeSet.resemblesPattern(sample, 0)) { - try { - set = UnicodeSetUtilities.parseUnicodeSet(sample); - } catch (final Exception e) {} - } - if (set == null) { - sample = UNESCAPER.transform(sample); - return getLiteral(trans.transform(sample)).replace("\n", "
"); - } - - final UnicodeSetPrettyPrinter pp = new UnicodeSetPrettyPrinter().setOrdering(Collator.getInstance(ULocale.ROOT)).setSpaceComparator(Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY)).setSpaceComparator(new Comparator() { - @Override - public int compare(String o1, String o2) { - return 1; - } - }); - - final Map mapping = new TreeMap(pp.getOrdering()); - - for (final UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { - final String s = it.getString(); - final String mapped = trans.transform(s); - if (!mapped.equals(s)) { - UnicodeSet x = mapping.get(mapped); - if (x == null) { - mapping.put(mapped, x = new UnicodeSet()); - } - x.add(s); - } - } - final StringBuilder result = new StringBuilder(); - for (final String mapped : mapping.keySet()) { - final UnicodeSet source = mapping.get(mapped); - result.append(showCodePoint(mapped)); - result.append("\t←\t"); - if (source.size() == 1) { - final UnicodeSetIterator it = new UnicodeSetIterator(source); - it.next(); - result.append(showCodePoint(it.getString())); - } else { - result.append(showCodePoint(pp.format(source))); - } - result.append("
\n"); - } - return result.toString(); - } - - public static class StringPair implements Comparable { - String first; - String second; - public StringPair(String first, String second) { - this.first = first; - this.second = second; - } - @Override - public int compareTo(StringPair o) { - final int result = first.compareTo(o.first); - if (result != 0) { - return result; - } - return second.compareTo(o.second); - } - } - - static String TRANSFORMLIST = null; - - public static String listTransforms() { - if (TRANSFORMLIST == null) { - final StringBuilder result = new StringBuilder(); - final Set pairs = new TreeSet(); - final Set sources = append(new TreeSet(col), Transliterator.getAvailableSources()); - for (final String source : sources) { - final Set targets = append(new TreeSet(col), Transliterator.getAvailableTargets(source)); - for (final String target : targets) { - final Set variants = append(new TreeSet(col), Transliterator.getAvailableVariants(source, target)); - for (final String variant : variants) { - final String id = toHTML.transform(source + "-" + target + (variant.length() == 0 ? "" : "/" + variant)); - pairs.add(new StringPair(target, id)); - } - } - } - result.append("
\n"); - String last = ""; - boolean first = true; - for (final StringPair pair : pairs) { - if (!last.equals(pair.first)) { - if (first) { - first = false; - } else { - result.append("\n"); - } - result.append("
ResultIDs
" + pair.first + ""); - } - result.append("" + pair.second + "\n"); - last = pair.first; - } - result.append("\t\t\n\t\n"); - result.append("
"); - TRANSFORMLIST = result.toString(); - } - return TRANSFORMLIST; - } - - private static > U append(U result, Enumeration sources) { - while (sources.hasMoreElements()) { - result.add(sources.nextElement()); - } - return result; - } - - // private static void registerCaseFold() { - // StringBuilder rules = new StringBuilder(); - // for (UnicodeSetIterator it = new UnicodeSetIterator(MAPPING_SET); it.nextRange();) { - // for (int i = it.codepoint; i <= it.codepointEnd; ++i) { - // String s = UTF16.valueOf(i); - // String caseFold = UCharacter.foldCase(s, true); - // String lower = UCharacter.toLowerCase(Locale.ENGLISH, s); - // if (!caseFold.equals(lower) || i == 'Σ') { - // rules.append(s + ">" + caseFold + " ;\n"); - // } - // } - // } - // rules.append("::Lower;"); - // Transliterator.registerInstance(Transliterator.createFromRules("Any-CaseFold", rules.toString(), Transliterator.FORWARD)); - // haveCaseFold = true; - // } - - static class FilteredStringTransform implements StringTransform { - final UnicodeSet toExclude; - final StringTransform trans; - public FilteredStringTransform(UnicodeSet toExclude, StringTransform trans) { - this.toExclude = toExclude; - this.trans = trans; - } - @Override - public String transform(String source) { - final StringBuilder result = new StringBuilder(); - int start = 0; - while (start < source.length()) { - final int end = toExclude.findIn(source, start, false); - result.append(trans.transform(source.substring(start,end))); - if (end == source.length()) { - break; - } - start = toExclude.findIn(source, end, true); - result.append(source.substring(end,start)); - } - return result.toString(); - } - } - - static String getPrettySet(UnicodeSet a, boolean abbreviate, boolean escape) { - String a_out; - if (a.size() < 10000 && !abbreviate) { - final UnicodeSetPrettyPrinter pp = new UnicodeSetPrettyPrinter().setOrdering(Collator.getInstance(ULocale.ROOT)).setSpaceComparator(Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY)); - if (escape) { - pp.setToQuote(new UnicodeSet("[^\\u0021-\\u007E]")); - } - a_out = toHTML(pp.format(a)); - } else { - a.complement().complement(); - a_out = toHTML(a.toPattern(escape)); - } - // insert spaces occasionally - int cp; - int oldCp = 0; - final StringBuffer out = new StringBuffer(); - int charCount = 0; - for (int i = 0; i < a_out.length(); i+= UTF16.getCharCount(cp)) { - cp = UTF16.charAt(a_out, i); - ++charCount; - if (charCount > 20) { - // add a space, but not in x-y, or \\uXXXX - if (cp == '-' || oldCp == '-') { - // do nothing - } else if (oldCp == '\\' || cp < 0x80) { - // do nothing - } else { - out.append(' '); - charCount = 0; - } - } - UTF16.append(out, cp); - oldCp = cp; - } - return out.toString(); - } - - public static UnicodeSet parseSimpleSet(String setA, String[] exceptionMessage) { - try { - exceptionMessage[0] = null; - setA = setA.replace("..U+", "-\\u"); - setA = setA.replace("U+", "\\u"); - return UnicodeSetUtilities.parseUnicodeSet(setA); - } catch (final Exception e) { - exceptionMessage[0] = e.getMessage(); - } - return null; - } - - public static void getDifferences(String setA, String setB, - boolean abbreviate, String[] abResults, int[] abSizes, String[] abLinks) { - final boolean escape = false; - - final String setAr = toHTML.transliterate(UtfParameters.fixQuery(setA)); - final String setBr = toHTML.transliterate(UtfParameters.fixQuery(setB)); - abLinks[0] = "http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[" + setAr + '-' + setBr + "]"; - abLinks[1] = "http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[" + setBr + '-' + setAr + "]"; - abLinks[2] = "http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[" + setAr + "%26" + setBr + "]"; - final String[] aMessage = new String[1]; - final String[] bMessage = new String[1]; - - final UnicodeSet a = UnicodeUtilities.parseSimpleSet(setA, aMessage); - final UnicodeSet b = UnicodeUtilities.parseSimpleSet(setB, bMessage); - - String a_b; - String b_a; - String ab; - - // try { - // setA = MyNormalize(setA, Normalizer.NFC); - // a = UnicodeUtilities.parseUnicodeSet(setA); - // } catch (Exception e) { - // a_b = e.getMessage(); - // } - // UnicodeSet b = null; - // try { - // setB = MyNormalize(setB, Normalizer.NFC); - // b = UnicodeUtilities.parseUnicodeSet(setB); - // } catch (Exception e) { - // b_a = e.getMessage(); - // } - int a_bSize = 0, b_aSize = 0, abSize = 0; - if (a == null || b == null) { - a_b = a == null ? aMessage[0] : "error" ; - b_a = b == null ? bMessage[0] : "error" ; - ab = "error"; - } else { - UnicodeSet temp = new UnicodeSet(a).removeAll(b); - a_bSize = temp.size(); - a_b = getPrettySet(temp, abbreviate, escape); - - temp = new UnicodeSet(b).removeAll(a); - b_aSize = temp.size(); - b_a = getPrettySet(temp, abbreviate, escape); - - temp = new UnicodeSet(a).retainAll(b); - abSize = temp.size(); - ab = getPrettySet(temp, abbreviate, escape); - } - abResults[0] = a_b; - abSizes[0] = a_bSize; - abResults[1] = b_a; - abSizes[1] = b_aSize; - abResults[2] = ab; - abSizes[2] = abSize; - } - - static int[][] ranges = { { UProperty.BINARY_START, UProperty.BINARY_LIMIT }, - { UProperty.INT_START, UProperty.INT_LIMIT }, - { UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT }, - { UProperty.STRING_START, UProperty.STRING_LIMIT }, }; - - static Collator col = Collator.getInstance(ULocale.ROOT); - static { - ((RuleBasedCollator) col).setNumericCollation(true); - } - - public static void showProperties(int cp, Appendable out) throws IOException { - final String text = UTF16.valueOf(cp); - - String name = factory.getProperty("Name").getValue(cp); - if (name != null) { - name = toHTML.transliterate(name); - } else { - name = "Unknown"; - } - final boolean allowed = XIDModifications.allowed.contains(cp); - - String scriptCat = factory.getProperty("script").getValue(cp).replace("_", " "); - if (scriptCat.equals("Common") || scriptCat.equals("Inherited")) { - scriptCat = factory.getProperty("gc").getValue(cp).replace("_", " "); - } else { - scriptCat += " Script"; - } - - final String hex = com.ibm.icu.impl.Utility.hex(cp, 4); - - out.append("
\n"); - out.append("\n"); - out.append("\n"); - out.append("\n"); - out.append("\n"); - out.append("\n"); - final StringBuilder confusableString = displayConfusables(cp); - out.append("\n"); - out.append("
\u00A0" + toHTML.transliterate(text) + "\u00A0
" + hex + "
" + name + "
" + scriptCat + "
id: "); - if (allowed) { - out.append("allowed"); - } else { - out.append("restricted"); - } - out.append("
confuse: "); - if (confusableString.length() == 0) { - out.append("none"); - } else { - out.append(confusableString.toString()); - } - out.append("
\n"); - - final List availableNames = factory.getAvailableNames(); - final TreeSet sortedProps = Builder - .with(new TreeSet(col)) - .addAll(availableNames) - .remove("Name") - .get(); - - out.append("" - + "" - + "" + - "
Properties for U+" + hex + "
With Non-Default ValuesWith Default Values
\n"); - out.append("\n"); - - for (final String propName : sortedProps) { - final UnicodeProperty prop = factory.getProperty(propName); - if (prop.getName().equals("confusable")) { - continue; - } - - final boolean isDefault = prop.isDefault(cp); - if (isDefault) { - continue; - } - final String propValue = prop.getValue(cp); - showPropertyValue(propName, propValue, isDefault, out); - } - out.append("
\n"); - - out.append("
\n"); - - out.append("\n"); - for (final String propName : sortedProps) { - final UnicodeProperty prop = factory.getProperty(propName); - if (prop.getName().equals("confusable")) { - continue; - } - - final boolean isDefault = prop.isDefault(cp); - if (!isDefault) { - continue; - } - final String propValue = prop.getValue(cp); - showPropertyValue(propName, propValue, isDefault, out); - } - out.append("
\n"); - - out.append("
\n"); - } - - private static StringBuilder displayConfusables(int codepoint) { - final StringBuilder confusableString = new StringBuilder(); - final Set skip = new HashSet(); - final String same = UTF16.valueOf(codepoint); - final String nfd = Normalizer.normalize(same, Normalizer.NFD); - - skip.add(same); - skip.add(nfd); - - // get basic confusables - final Set list = Confusables.getEquivalents(same); - if (list != null) { - for (final String s: list) { - if (same.equals(s)) { - continue; - } - if (confusableString.length() != 0) { - confusableString.append(", "); - } - getBoxedCharacters(s, confusableString); - skip.add(s); - final String nfd2 = Normalizer.normalize(same, Normalizer.NFD); - skip.add(nfd2); - } - } - - - // Now, get the combinations - if (UTF16.countCodePoint(nfd) > 1) { - if (confusableString.length() != 0) { - confusableString.append(", "); - } - - final List combos = new ArrayList(); - // get all the combinations - int cp; - for (int i = 0; i < nfd.length(); i += Character.charCount(cp)) { - if (i != 0) { - confusableString.append("+"); - } - cp = nfd.codePointAt(i); - final Confusables currentCombos = new Confusables(UTF16.valueOf(cp)).setNormalizationCheck(Normalizer.NFKC); - combos.add(currentCombos); - confusableString.append("
"); - for (final String s : currentCombos) { - getBoxedCharacters(s, confusableString); - } - confusableString.append("
"); - } - // now add them to the skip list - addToSkip("", 0, combos, skip); - } - - final Confusables confusables = new Confusables(same).setNormalizationCheck(Normalizer.NFKC); - for (final String s: confusables) { - if (skip.contains(s)) { - continue; - } - final String nfd2 = Normalizer.normalize(same, Normalizer.NFD); - if (skip.contains(nfd2)) { - continue; - } - if (confusableString.length() != 0) { - confusableString.append(", "); - } - getBoxedCharacters(s, confusableString); - } - - // - // // first, try the nfd - // skip.add(same); - // String nfd = Normalizer.normalize(same, Normalizer.NFD); - // // get all the confusables that are simple products - // int cp; - // for (int i = 0; i < nfd.length(); i += Character.charCount(cp)) { - // cp = nfd.codePointAt(i); - // } - // - // for (String s: confusables) { - // if (same.equals(s)) { - // continue; - // } - // getBoxedCharacters(s, confusableString); - // } - return confusableString; - } - - // add recursively, for simplicity - private static void addToSkip(String prefix, int i, List combos, Set skip) { - if (i >= combos.size()) { - skip.add(prefix); - } else { - for (final String s : combos.get(i)) { - addToSkip(prefix + s, i+1, combos, skip); - } - } - } - - private static void getBoxedCharacters(String s, StringBuilder confusableString) { - confusableString - .append("
"); - int cp; - for (int i = 0; i < s.length(); i += Character.charCount(cp)) { - cp = s.codePointAt(i); - if (i != 0) { - confusableString.append("+"); - } - confusableString - .append("" + " ") - .append(toHTML(UTF16.valueOf(cp))) - .append(" "); - } - confusableString.append("
"); - } - - private static void showPropertyValue(String propName, String propValue, boolean isDefault, Appendable out) throws IOException { - final String defaultClass = isDefault ? " class='default'" : ""; - if (propValue == null) { - out.append("" + propName + "null\n"); - return; - } - String hValue = toHTML.transliterate(propValue); - hValue = "" + hValue + ""; - - out.append("" + propName + "" + hValue + "\n"); - } - - /*jsp*/ - public static void showPropsTable(Appendable out, String propForValues, String myLink) throws IOException { - ((RuleBasedCollator)col).setNumericCollation(true); - final Map> alpha = new TreeMap>(col); - final Map longToShort = new HashMap(); - - final Set showLink = new HashSet(); - - final TablePrinter tablePrinter = new TablePrinter() - .setTableAttributes("style='border-collapse: collapse' border='1'") - .addColumn("Category").setSpanRows(true).setBreakSpans(true).setCellAttributes("class='propCategory'").setSortPriority(0) - .addColumn("Datatype").setSpanRows(true).setCellAttributes("class='propDatatype'").setSortPriority(1) - .addColumn("Source").setSpanRows(true).setCellAttributes("class='propSource'").setSortPriority(2) - .addColumn("Property").setSpanRows(false).setCellAttributes("class='propTitle'") - .addColumn("Values").setSpanRows(false).setCellAttributes("class='propValues'") - ; - //tablePrinter.addRows(data); - //tablePrinter.addRow().addCell("Foo").addCell(1.5d).addCell(99).finishRow(); - - //out.append("\n"); - // out.append("\n") - // .append("\n") - // .append("\n") - // .append("\n") - // .append("\n"); - - //for (String propName : Builder.with(new TreeSet(col)).addAll((List)factory.getAvailableNames()).get()) { - for (final R4 propData : PropertyMetadata.CategoryDatatypeSourceProperty) { - final String propName = propData.get3(); - final UnicodeProperty prop = factory.getProperty(propName); - if (prop == null) { - continue; - } - final String propHtml = toHTML.transform(propName); - String shortName; - try { - shortName = prop.getFirstNameAlias(); - } catch (final Exception e) { - throw new IllegalArgumentException(propData.toString(), e); - } - final String title = shortName == null || shortName.equals(propName) ? "" : " title='" + toHTML(shortName) + "'"; - String propInfo = "" + propHtml + ""; - if (shortName == null || shortName.equals(propName)) { - propInfo = "" + propInfo + ""; - } - // out.append("") - // .append("\n") - // .append("\n") - // .append("\n") - // .append("") - // .append(propHtml) - // .append("\n"); - // out.append("\n"); - } - //out.append("
SourceCategoryDatatypePropertyValues
").append(propData.get0()).append("").append(propData.get1()).append("").append(propData.get2()).append(""); - final StringBuilder propValues = new StringBuilder(); - // List availableValues = (List)prop.getAvailableValues(); - if (propName.equals(propForValues) ) { // || availableValues.size() < 10 - getHtmlPropValues(prop, propHtml, propValues); - } else { - propValues.append("Show Values"); - } - tablePrinter.addRow() - .addCell(propData.get0()) - .addCell(propData.get1()) - .addCell(propData.get2()) - .addCell(propInfo) - .addCell(propValues.toString()) - .finishRow(); - //out.append("
\n"); - out.append(tablePrinter.toTable()); - } - - private static void getHtmlPropValues(UnicodeProperty prop, String propHtml, StringBuilder propValues) { - final List availableValues = prop.getAvailableValues(); - final TreeSet sortedList = Builder.with(new TreeSet(col)).addAll(availableValues).get(); - int count = 500; - int lastFirstChar = 0; - for (final String valueName : sortedList) { - if (--count < 0) { - propValues.append("\ntoo many values to show"); - break; - } - final int firstChar = valueName.codePointAt(0); - if (lastFirstChar != 0) { - if (lastFirstChar != firstChar) { - propValues.append(",
\n"); - } else { - propValues.append(", "); - } - } - lastFirstChar = firstChar; - final String valueHtml = toHTML.transform(valueName); - final String shortValue = prop.getFirstValueAlias(valueName); - if (valueName.startsWith("<") && valueName.endsWith(">")) { - propValues.append(valueHtml); - } else { - propValues.append(getPropLink(propHtml, valueHtml, valueHtml, shortValue)); - } - } - } - - private static String getPropLink(String propName, String propValue, String linkText, String shortName) { - final String propExp = - propValue == "T" ? propName - : propValue == "F" ? "^" + propName - : propName + "=" + propValue; - final String title = shortName == null ? "" : " title='" + toHTML(shortName) + "'"; - return "" + linkText + ""; - } - - static Subheader getSubheader() { - if (subheader == null) { - // /home/users/jakarta/apache-tomcat-6.0.14/bin - // /home/users/jakarta/apache-tomcat-6.0.14/webapps/cldr/utility - subheader = new Subheader(UnicodeUtilities.class.getResourceAsStream("NamesList.txt")); - // try { - // final String unicodeDataDirectory = "../webapps/cldr/utility/"; - // //System.out.println(canonicalPath); - // subheader = new Subheader(unicodeDataDirectory); - // } catch (IOException e) { - // try { - // final String unicodeDataDirectory = "./jsp/"; - // subheader = new Subheader(unicodeDataDirectory); - // } catch (IOException e2) { - // final String[] list = new File("home").list(); - // String currentDirectory = list == null ? null : new TreeSet(Arrays.asList(list)).toString(); - // throw (RuntimeException) new IllegalArgumentException("Can't find file starting from: <" + currentDirectory + ">").initCause(e); - // } - // } - } - return subheader; - } - - //static IdnaLabelTester tester = null; - static String removals = new UnicodeSet("[\u1806[:di:]-[:cn:]]").complement().complement().toPattern(false); - static Matcher rem = Pattern.compile(removals).matcher(""); - // TODO use UnicodeRegex - - - // static IdnaLabelTester getIdna2008Tester() { - // if (tester == null) { - // try { - // URL path = UnicodeUtilities.class.getResource("idnaContextRules.txt"); - // String externalForm = path.toExternalForm(); - // if (externalForm.startsWith("file:")) { - // externalForm = externalForm.substring(5); - // } - // tester = new IdnaLabelTester(externalForm); - // } catch (IOException e) { - // throw new IllegalArgumentException(e); - // } - // } - // return tester; - // } - - static void addBlank(StringBuilder resultLines) { - resultLines.append(" \n"); - } - - static void addCell(StringBuilder resultLines, Transliterator hex, String tr46, String attributes, String confusableChoice) { - if (tr46 == null) { - resultLines.append("fails\n"); - } else { - final String escaped = showEscaped(tr46); - String linkStart = "", linkEnd = ""; - if (confusableChoice != null) { - linkStart = ""; - linkEnd = ""; - } - resultLines.append("") - .append(linkStart) - .append(escaped) - .append(linkEnd) - .append("\n"); - } - } - - public static final UnicodeSet TO_QUOTE = new UnicodeSet("[[:z:][:me:][:mn:][:di:][:c:]-[\u0020]]"); - - static final Transliterator ESCAPER = Transliterator.createFromRules("escaper", - "(" + TO_QUOTE + ") > ''&any-hex($1)'';" - + HTML_RULES_CONTROLS, Transliterator.FORWARD); - - public static final UnicodeSet SYMBOL = new UnicodeSet("[:s:]").freeze(); - public static final UnicodeSet PUNCTUATION = new UnicodeSet("[:p:]").freeze(); - - private static String showEscaped(String line) { - String toShow = toHTML.transform(line); - final String escaped = ESCAPER.transform(line); - if (!escaped.equals(toShow)) { - toShow += "
" + escaped + ""; - } - return toShow; - } - - public static String showBidi(String str, int baseDirection, boolean asciiHack) { - // warning, only BMP for now - final StringWriter stringWriter = new StringWriter(); - final PrintWriter writer = new PrintWriter(stringWriter); - - final BidiCharMap bidiCharMap = new BidiCharMap(asciiHack); - - final String[] parts = str.split("\\r\\n?|\\n"); - for (int i = 0; i < parts.length; ++i) { - writer.println("

Paragraph " + (i+1) + "

"); - if (parts[i] == null || parts[i].length() == 0) { - continue; - } - showBidiLine(parts[i], baseDirection, writer, bidiCharMap); - } - - if (asciiHack) { - writer.println("

ASCII Hack

"); - writer.println("

For testing the UBA with only ASCII characters, the following property values are used (<,> are RLM and LRM):

"); - writer.println(""); - for (byte i = 0; i < BidiReference.typenames.length; ++i) { - final UnicodeSet modifiedClass = BidiCharMap.getAsciiHack(i); - writer.println(""); - } - writer.println("
" + BidiReference.getHtmlTypename(i) + "" + getList(modifiedClass) + "
"); - } - - writer.flush(); - return stringWriter.toString(); - } - - private static String getList(final UnicodeSet uset) { - final StringBuffer codePointString = new StringBuffer(); - for (final UnicodeSetIterator it = new UnicodeSetIterator(uset); it.next();) { - if (codePointString.length() != 0) { - codePointString.append(" "); - } - final String literal = it.codepoint <= 0x20 ? "\u00AB" + getLiteral(UCharacter.getExtendedName(it.codepoint)) + "\u00BB" : getLiteral(it.codepoint); - codePointString.append(literal); - } - return codePointString.toString(); - } - - private static void showBidiLine(String str, int baseDirection, PrintWriter writer, BidiCharMap bidiCharMap) { - final byte[] codes = new byte[str.length()]; - for (int i = 0; i < str.length(); ++i) { - codes[i] = bidiCharMap.getBidiClass(str.charAt(i)); - } - final int[] linebreaks = new int[1]; - linebreaks[0] = str.length(); - - final BidiReference bidi = new BidiReference(codes, (byte)baseDirection); - final int[] reorder = bidi.getReordering(new int[] { codes.length }); - final byte[] levels = bidi.getLevels(linebreaks); - - writer.println(""); - final byte baseLevel = bidi.getBaseLevel(); - writer.println(""); - writer.println("
Base Level" + baseLevel + " = " + (baseLevel == 0 ? "LTR" : "RTL") + "" + (baseDirection >= 0 ? "explicit" : "heuristic") + "
"); - - // output original text - writer.println("

Source

"); - writer.println(""); - for (int i = 0; i < str.length(); ++i) { - writer.println(""); - } - writer.println(""); - for (int i = 0; i < str.length(); ++i) { - final String s = str.substring(i,i+1); - final String title = toHTML.transform(getName(s, "", true)); - writer.println(""); - } - writer.println(""); - for (int i = 0; i < str.length(); ++i) { - writer.println(""); - } - writer.println(""); - for (int i = 0; i < str.length(); ++i) { - writer.println(""); - } - writer.println(""); - for (int i = 0; i < str.length(); ++i) { - writer.println(""); - } - writer.println("
Memory Position" + i + "
Character " + getLiteral(getBidiChar(str, i, codes[i])) + "
Bidi Class" + BidiReference.getHtmlTypename(codes[i]) + "
Rules Applied" + bidi.getChanges(i).replace("\n", "
") + "
Resulting Level" + showLevel(levels[i]) + "
"); - - // output visually ordered text - writer.println("

Reordered

"); - writer.println(""); - for (int k = 0; k < str.length(); ++k) { - final int i = reorder[k]; - final String bidiChar = getBidiChar(str, i, codes[i]); - final String td = bidiChar.length() == 0 ? ""); - } - writer.println(""); - for (int k = 0; k < str.length(); ++k) { - final int i = reorder[k]; - final String bidiChar = getBidiChar(str, i, codes[i]); - final String td = bidiChar.length() == 0 ? ""); - } - writer.println(""); - for (int k = 0; k < str.length(); ++k) { - final int i = reorder[k]; - final String bidiChar = getBidiChar(str, i, codes[i]); - final String title = bidiChar.length() == 0 ? "deleted" : toHTML.transform(getName(bidiChar, "", true)); - final String td = bidiChar.length() == 0 ? "bxcell" : "bccell"; - writer.println(""); - } - writer.println("
Display Position" : ""; - writer.println(td + k + "
Memory Position" : ""; - writer.println(td + i + "
Character" + " " + getLiteral(bidiChar) +"
"); - - } - - private static String getBidiChar(String str, int i, byte b) { - if (b == BidiReference.PDF || b == BidiReference.RLE || b == BidiReference.LRE || b == BidiReference.LRO || b == BidiReference.RLO || b == BidiReference.BN) { - return ""; - } - final String substring = str.substring(i,i+1); - if ((substring.equals("<") || substring.equals(">")) && (b == BidiReference.L || b == BidiReference.R)) { - return ""; - } - return substring; - } - - private static String showLevel(int level) { - final StringBuffer result = new StringBuffer(); - for (int i = 0; i < level; ++i) { - result.append("
"); - } - result.append("L").append(level); - return result.toString(); - } - - public static String testIdnaLines(String lines, String filter) { - final Transliterator hex = Transliterator.getInstance("any-hex"); - try { - - lines = UnicodeJsp.UNESCAPER.transform(lines.trim()); - final StringBuilder resultLines = new StringBuilder(); - //UnicodeUtilities.getIdna2008Tester(); - - final Predicate verifier2008 = new Predicate() { - @Override - public boolean is(String item) { - return Idna2008.SINGLETON.isValid(item); - } - }; - - resultLines.append("\n"); - resultLines.append("\n"); - - boolean first = true; - final boolean[] errorOut = new boolean[1]; - - for (final String line : lines.split("\\s+")) { - if (first) { - first = false; - } else { - addBlank(resultLines); - } - - final String rawPunycode = UnicodeUtilities.processLabels(line, IdnaTypes.DOTS, true, new Predicate() { - @Override - public boolean is(Object item) { - return true; - }}); - - - // String tr46 = UnicodeUtilities.processLabels(tr46back, UnicodeUtilities.DOTS, true, new Predicate() { - // public boolean is(String item) { - // return Uts46.SINGLETON.transform(item).indexOf('\uFFFD') < 0; // Uts46.SINGLETON.Uts46Chars.containsAll(item); - // } - // }); - // String tr46display = Uts46.SINGLETON.toUnicode(line, errorOut); - // tr46display = UnicodeUtilities.processLabels(tr46display, UnicodeUtilities.DOTS, false, new Predicate() { - // public boolean is(String item) { - // return Uts46.SINGLETON.toUnicode(item).indexOf('\uFFFD') < 0; // Uts46.SINGLETON.Uts46Chars.containsAll(item); - // //return Uts46.SINGLETON.Uts46CharsDisplay.containsAll(item); - // } - // }); - - - // first lines - resultLines.append(""); - resultLines.append(""); - addCell(resultLines, hex, line, "class='cn ltgreen'", "None"); - final String idna2003unic = Idna2003.SINGLETON.toUnicode(line, errorOut, true); - addCell(resultLines, hex, idna2003unic, getIdnaClass("cn i2003", errorOut[0]), "IDNA2003"); - - final String uts46unic = Uts46.SINGLETON.toUnicode(line, errorOut, true); - addCell(resultLines, hex, uts46unic, getIdnaClass("cn i46", errorOut[0]), "UTS46%2BUTS39"); - - final String idna2008unic = UnicodeUtilities.processLabels(line, IdnaTypes.DOT, false, verifier2008); - addCell(resultLines, hex, idna2008unic, getIdnaClass("cn i2008", idna2008unic.contains("\uFFFD")), "IDNA2003"); - resultLines.append(""); - - resultLines.append(""); - addCell(resultLines, hex, rawPunycode, "class='cn ltgreen mono'", null); - final String idna2003puny = Idna2003.SINGLETON.toPunyCode(line, errorOut); - addCell(resultLines, hex, idna2003puny, getIdnaClass("cn mono i2003", errorOut[0]), null); - - final String uts46puny = Uts46.SINGLETON.toPunyCode(line, errorOut); - addCell(resultLines, hex, uts46puny, getIdnaClass("cn mono i46", errorOut[0]), null); - - final String idna2008puny = UnicodeUtilities.processLabels(line, IdnaTypes.DOT, true, verifier2008); - addCell(resultLines, hex, idna2008puny, getIdnaClass("cn mono i2008", idna2008puny.contains("\uFFFD")), null); - - // if (result == null) { - // resultLines.append(""); - // } else { - // resultLines.append(""); - // } - resultLines.append("\n"); - } - - resultLines.append("
InputIDNA2003UTS46IDNA2008
Display
Punycode\u00A0\u00A0") - // .append(toHTML.transform(IdnaLabelTester.ESCAPER.transform(normalized.substring(0, result.position))) - // + "\u2639" + toHTML.transform(IdnaLabelTester.ESCAPER.transform(normalized.substring(result.position))) - // + "" + result.title - // //+ "" + result.ruleLine - // + "
\n"); - return resultLines.toString(); - } catch (final Exception e) { - return toHTML.transform(e.getMessage()); - } - } - - private static String getIdnaClass(String classItems, boolean error) { - return "class='" + - classItems + (error ? " error" : "") + "'"; - } - - static String processLabels(String inputLabels, Pattern dotPattern, boolean punycode, Predicate verifier) { - final StringBuilder result = new StringBuilder(); - for (final String label : dotPattern.split(inputLabels)) { - if (result.length() != 0) { - result.append('.'); - } - try { - if (!verifier.is(label)) { - throw new IllegalArgumentException(); - } - if (!punycode || IdnaTypes.ASCII.containsAll(label)) { - result.append(label); - } else { - final StringBuffer puny = Punycode.encode(new StringBuffer(label), null); - if (puny.length() == 0) { - throw new IllegalArgumentException(); - } - result.append("xn--").append(puny); - } - } catch (final Exception e) { - result.append('\uFFFD'); - } - } - return result.toString(); - } - - -} - -/* - * <% http://www.devshed.com/c/a/Java/Developing-JavaServer-Pages/ Enumeration - * parameterNames = request.getParameterNames(); while - * (parameterNames.hasMoreElements()){ String parameterName = (String) - * parameterNames.nextElement(); String parameterValue = - * request.getParameter(parameterName); %> <%= parameterName %> has value <%= - * parameterValue %>.
<% } %> - */ \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/jsp/UtfParameters.java b/unicodetools/src/main/java/org/unicode/jsp/UtfParameters.java deleted file mode 100644 index 223d85bf2..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/UtfParameters.java +++ /dev/null @@ -1,86 +0,0 @@ -/** - * - */ -package org.unicode.jsp; - -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.Map; - -import com.ibm.icu.text.UnicodeSet; - -public class UtfParameters implements Iterable { - - private Map map = new LinkedHashMap(); - - public UtfParameters(String query) { - if (query != null) { - final String[] queries = query.split("&"); - for (final String s : queries) { - final int pos = s.indexOf('='); - String key = pos == -1 ? s : s.substring(0,pos); - try { - key = URLDecoder.decode(key, "UTF-8"); - } catch (final Exception e) {} - String value = pos == -1 ? "" : s.substring(pos+1); - try { - value = URLDecoder.decode(value, "UTF-8"); - } catch (final Exception e) {} - map.put(key, value); - } - } - map = Collections.unmodifiableMap(map); - } - public String getParameter(String key) { - return map.get(key); - } - public String getParameter(String key, String nullReplacement) { - final String result = map.get(key); - if (result == null) { - return nullReplacement; - } - return result; - } - public String getParameter(String key, String nullReplacement, String emptyReplacement) { - final String result = map.get(key); - if (result == null) { - return nullReplacement; - } - if (result.length() == 0) { - return emptyReplacement; - } - return result; - } - @Override - public Iterator iterator() { - return map.keySet().iterator(); - } - - private static UnicodeSet okByte = new UnicodeSet("[A-Za-z0-9]"); - - public static String fixQuery(String input) { - try { - final StringBuilder result = new StringBuilder(); - final byte[] bytes = input.getBytes("utf-8"); - for (final byte b : bytes) { - final int ch = b & 0xFF; - if (okByte.contains(ch)) { - result.append((char)ch); - } else { - result.append('%'); - final String hex = Integer.toHexString(ch); - if (hex.length() == 1) { - result.append('0'); - } - result.append(hex); - } - } - return result.toString(); - } catch (final UnsupportedEncodingException e) { - return null; - } - } -} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/jsp/XIDModifications.java b/unicodetools/src/main/java/org/unicode/jsp/XIDModifications.java index cf9171182..b2b2f3c6e 100644 --- a/unicodetools/src/main/java/org/unicode/jsp/XIDModifications.java +++ b/unicodetools/src/main/java/org/unicode/jsp/XIDModifications.java @@ -4,37 +4,52 @@ import com.ibm.icu.text.UnicodeSet; public class XIDModifications { - static UnicodeSet allowed = new UnicodeSet(); - static UnicodeMap reasons = new UnicodeMap(); + private static UnicodeMap allowed = new UnicodeMap(); // "[:XID_Continue:]"); + private static UnicodeMap reasons = new UnicodeMap(); static class MyReader extends FileUtilities.SemiFileReader { - // add other - // # @missing: 0000..10FFFF; restricted ; not-chars + @Override protected boolean handleLine(int start, int end, String[] items) { - final String type = items[1]; - if (type.equals("allowed")) { - allowed.add(start, end); - } else if (type.equals("restricted")) { - // allowed.remove(start, end); - } else { - throw new IllegalArgumentException(type); - } +// String type = items[1]; +// if (type.equalsIgnoreCase("allowed")) { +// reasons.putAll(start, end, items[2]); +// } else if (type.equalsIgnoreCase("restricted")) { +// // allowed.remove(start, end); +// } else { +// throw new IllegalArgumentException(type); +// } + allowed.putAll(start, end, items[1]); reasons.putAll(start, end, items[2]); return true; } } static { - reasons.putAll(0,0x10FFFF,"not-chars"); + //# @missing: 0000..10FFFF; Restricted ; Not-Characters + allowed.putAll(0,0x10FFFF,"Restricted"); + reasons.putAll(0,0x10FFFF,"Not-Characters"); //reasons.putAll(new UnicodeSet("[[:gc=cn:][:gc=co:][:gc=cs:][:gc=cc:]-[:whitespace:]]"),"not-char"); new MyReader().process(XIDModifications.class, "xidmodifications.txt"); allowed.freeze(); reasons.freeze(); } - public static UnicodeSet getAllowed() { - return allowed; + public static UnicodeMap getTypes() { + return reasons; } public static UnicodeMap getReasons() { return reasons; } + public static UnicodeMap getStatus() { + return allowed; + } + public static UnicodeSet getAllowed() { + return allowed.getSet("Restricted"); + } + + public static boolean isAllowed(int codePoint) { + return allowed.get(codePoint).equals("Restricted"); + } + public static String getType(int codePoint) { + return reasons.get(codePoint); + } } diff --git a/unicodetools/src/main/java/org/unicode/jsp/XPropertyFactory.java b/unicodetools/src/main/java/org/unicode/jsp/XPropertyFactory.java deleted file mode 100644 index 42d0078eb..000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/XPropertyFactory.java +++ /dev/null @@ -1,582 +0,0 @@ -package org.unicode.jsp; - -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.List; -import java.util.Locale; -import java.util.SortedMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.AliasAddAction; -import org.unicode.cldr.util.props.UnicodeProperty.SimpleProperty; -import org.unicode.cldr.util.props.UnicodeProperty.UnicodeSetProperty; -import org.unicode.idna.Idna.IdnaType; -import org.unicode.idna.Idna2003; -import org.unicode.idna.Idna2008; -import org.unicode.idna.Uts46; - -import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UProperty.NameChoice; -import com.ibm.icu.text.Collator; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.RawCollationKey; -import com.ibm.icu.text.RuleBasedCollator; -import com.ibm.icu.text.StringTransform; -import com.ibm.icu.text.Transform; -import com.ibm.icu.text.UTF16; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.ULocale; -import com.ibm.icu.util.VersionInfo; - -public class XPropertyFactory extends UnicodeProperty.Factory { - - static final UnicodeSet ALL = new UnicodeSet("[[:^C:][:Cc:][:Cf:][:noncharactercodepoint:]]").freeze(); - - private static final boolean DEBUG_CHARSET_NAMES = false; - - private static XPropertyFactory singleton = null; - - public static synchronized XPropertyFactory make() { - if (singleton != null) { - return singleton; - } - singleton = new XPropertyFactory(); - return singleton; - } - - - { - final ICUPropertyFactory base = ICUPropertyFactory.make(); - for (final String propertyAlias : (List)base.getInternalAvailablePropertyAliases(new ArrayList())) { - add(base.getProperty(propertyAlias)); - } - for (int i = Common.XSTRING_START; i < Common.XSTRING_LIMIT; ++i) { - final XUnicodeProperty property = new XUnicodeProperty(i); - add(property); - } - - add(new IDNA2003()); - add(new UTS46()); - add(new IDNA2008()); - add(new IDNA2008c()); - add(new Usage()); - add(new HanType()); - add(new UnicodeProperty.UnicodeMapProperty().set(XIDModifications.getReasons()).setMain("identifier-restriction", "idr", UnicodeProperty.ENUMERATED, "1.1")); - add(new UnicodeProperty.UnicodeMapProperty().set(Confusables.getMap()).setMain("confusable", "confusable", UnicodeProperty.ENUMERATED, "1.1")); - add(new UnicodeProperty.UnicodeMapProperty().set(Idna2003.SINGLETON.mappings).setMain("toIdna2003", "toIdna2003", UnicodeProperty.STRING, "1.1")); - add(new UnicodeProperty.UnicodeMapProperty().set(Uts46.SINGLETON.mappings).setMain("toUts46t", "toUts46t", UnicodeProperty.STRING, "1.1")); - add(new UnicodeProperty.UnicodeMapProperty().set(Uts46.SINGLETON.getMappingsDisplay()).setMain("toUts46n", "toUts46n", UnicodeProperty.STRING, "1.1")); - - add(new StringTransformProperty(Common.NFKC_CF, false).setMain("NFKC_Casefold", "NFKC_CF", UnicodeProperty.STRING, "1.1").addName("toNFKC_CF")); - // add(new UnicodeSetProperty().set(Common.isNFKC_CF).setMain("isNFKC_Casefolded", "isNFKC_CF", UnicodeProperty.BINARY, "1.1")); - // - // add(new UnicodeSetProperty().set(Common.isCaseFolded).setMain("isCaseFolded", "caseFolded", UnicodeProperty.BINARY, "1.1")); - // add(new UnicodeSetProperty().set(Common.isUppercase).setMain("isUppercase", "uppercase", UnicodeProperty.BINARY, "1.1")); - // add(new UnicodeSetProperty().set(Common.isLowercase).setMain("isLowercase", "lowercase", UnicodeProperty.BINARY, "1.1")); - // add(new UnicodeSetProperty().set(Common.isTitlecase).setMain("isTitlecase", "titlecase", UnicodeProperty.BINARY, "1.1")); - // add(new UnicodeSetProperty().set(Common.isCased).setMain("isCased", "cased", UnicodeProperty.BINARY, "1.1")); - - add(new CodepointTransformProperty(new Transform() { - @Override - public String transform(Integer source) { - return Normalizer.normalize(source, Normalizer.NFC); - }}, false).setMain("toNFC", "toNFC", UnicodeProperty.STRING, "1.1")); - add(new CodepointTransformProperty(new Transform() { - @Override - public String transform(Integer source) { - return Normalizer.normalize(source, Normalizer.NFD); - }}, false).setMain("toNFD", "toNFD", UnicodeProperty.STRING, "1.1")); - add(new CodepointTransformProperty(new Transform() { - @Override - public String transform(Integer source) { - return Normalizer.normalize(source, Normalizer.NFKC); - }}, false).setMain("toNFKC", "toNFKC", UnicodeProperty.STRING, "1.1")); - add(new CodepointTransformProperty(new Transform() { - @Override - public String transform(Integer source) { - return Normalizer.normalize(source, Normalizer.NFKD); - }}, false).setMain("toNFKD", "toNFKD", UnicodeProperty.STRING, "1.1")); - - add(new StringTransformProperty(new StringTransform() { - @Override - public String transform(String source) { - return UCharacter.foldCase(source, true); - }}, false).setMain("toCasefold", "toCasefold", UnicodeProperty.STRING, "1.1")); - add(new StringTransformProperty(new StringTransform() { - @Override - public String transform(String source) { - return UCharacter.toLowerCase(ULocale.ROOT, source); - }}, false).setMain("toLowerCase", "toLowerCase", UnicodeProperty.STRING, "1.1")); - add(new StringTransformProperty(new StringTransform() { - @Override - public String transform(String source) { - return UCharacter.toUpperCase(ULocale.ROOT, source); - }}, false).setMain("toUpperCase", "toUpperCase", UnicodeProperty.STRING, "1.1")); - add(new StringTransformProperty(new StringTransform() { - @Override - public String transform(String source) { - return UCharacter.toTitleCase(ULocale.ROOT, source, null); - }}, false).setMain("toTitleCase", "toTitleCase", UnicodeProperty.STRING, "1.1")); - - add(new StringTransformProperty(new StringTransform() { - @Override - public String transform(String source) { - final String result = NFM.nfm.get(source); - return result == null ? source : result; - }}, false).setMain("toNFM", "toNFM", UnicodeProperty.STRING, "1.1")); - //add(new UnicodeProperty.UnicodeMapProperty().set(NFM.nfm).setMain("toNFM", "toNFM", UnicodeProperty.STRING, "1.1")); - add(new UnicodeSetProperty().set(NFM.nfm.getSet(null)).setMain("isNFM", "isNFM", UnicodeProperty.BINARY, "1.1")); - - add(new CodepointTransformProperty(new Transform() { - @Override - public String transform(Integer source) { - return UnicodeUtilities.getSubheader().getSubheader(source); - }}, false).setMain("Subheader", "subhead", UnicodeProperty.STRING, "1.1")); - - add(new UnicodeSetProperty().set("[:^nfcqc=n:]").setMain("isNFC", "isNFC", UnicodeProperty.BINARY, "1.1")); - add(new UnicodeSetProperty().set("[:^nfdqc=n:]").setMain("isNFD", "isNFD", UnicodeProperty.BINARY, "1.1")); - add(new UnicodeSetProperty().set("[:^nfkcqc=n:]").setMain("isNFKC", "isNFKC", UnicodeProperty.BINARY, "1.1")); - add(new UnicodeSetProperty().set("[:^nfkdqc=n:]").setMain("isNFKD", "isNFKD", UnicodeProperty.BINARY, "1.1")); - add(new UnicodeSetProperty().set("[\\u0000-\\u007F]").setMain("ASCII", "ASCII", UnicodeProperty.BINARY, "1.1")); - add(new UnicodeSetProperty().set("[\\u0000-\\U0010FFFF]").setMain("ANY", "ANY", UnicodeProperty.BINARY, "1.1")); - - final String emojiSource = "[\\u00A9\\u00AE\\u2002\\u2003\\u2005\\u203C\\u2049\\u2122\\u2139\\u2194-\\u2199\\u21A9\\u21AA\\u231A\\u231B\\u23E9-\\u23EC\\u23F0\\u23F3\\u24C2\\u25AA\\u25AB\\u25B6\\u25C0\\u25FB-\\u25FE\\u2600\\u2601\\u260E\\u2611\\u2614\\u2615\\u261D\\u263A\\u2648-\\u2653\\u2660\\u2663\\u2665\\u2666\\u2668\\u267B\\u267F\\u2693\\u26A0\\u26A1\\u26AA\\u26AB\\u26BD\\u26BE\\u26C4\\u26C5\\u26CE\\u26D4\\u26EA\\u26F2\\u26F3\\u26F5\\u26FA\\u26FD\\u2702\\u2705\\u2708-\\u270C\\u270F\\u2712\\u2714\\u2716\\u2728\\u2733\\u2734\\u2744\\u2747\\u274C\\u274E\\u2753-\\u2755\\u2757\\u2764\\u2795-\\u2797\\u27A1\\u27B0\\u27BF\\u2934\\u2935\\u2B05-\\u2B07\\u2B1B\\u2B1C\\u2B50\\u2B55\\u3030\\u303D\\u3297\\u3299\\U0001F004\\U0001F0CF\\U0001F170\\U0001F171\\U0001F17E\\U0001F17F\\U0001F18E\\U0001F191-\\U0001F19A\\U0001F1E6-\\U0001F1FF\\U0001F201\\U0001F202\\U0001F21A\\U0001F22F\\U0001F232-\\U0001F23A\\U0001F250\\U0001F251\\U0001F300-\\U0001F30C\\U0001F30F\\U0001F311\\U0001F313-\\U0001F315\\U0001F319\\U0001F31B\\U0001F31F\\U0001F320\\U0001F330\\U0001F331\\U0001F334\\U0001F335\\U0001F337-\\U0001F34A\\U0001F34C-\\U0001F34F\\U0001F351-\\U0001F37B\\U0001F380-\\U0001F393\\U0001F3A0-\\U0001F3C4\\U0001F3C6\\U0001F3C8\\U0001F3CA\\U0001F3E0-\\U0001F3E3\\U0001F3E5-\\U0001F3F0\\U0001F40C-\\U0001F40E\\U0001F411\\U0001F412\\U0001F414\\U0001F417-\\U0001F429\\U0001F42B-\\U0001F43E\\U0001F440\\U0001F442-\\U0001F464\\U0001F466-\\U0001F46B\\U0001F46E-\\U0001F4AC\\U0001F4AE-\\U0001F4B5\\U0001F4B8-\\U0001F4EB\\U0001F4EE\\U0001F4F0-\\U0001F4F4\\U0001F4F6\\U0001F4F7\\U0001F4F9-\\U0001F4FC\\U0001F503\\U0001F50A-\\U0001F514\\U0001F516-\\U0001F52B\\U0001F52E-\\U0001F53D\\U0001F550-\\U0001F55B\\U0001F5FB-\\U0001F5FF\\U0001F601-\\U0001F606\\U0001F609-\\U0001F60D\\U0001F60F\\U0001F612-\\U0001F614\\U0001F616\\U0001F618\\U0001F61A\\U0001F61C-\\U0001F61E\\U0001F620-\\U0001F625\\U0001F628-\\U0001F62B\\U0001F62D\\U0001F630-\\U0001F633\\U0001F635\\U0001F637-\\U0001F640\\U0001F645-\\U0001F64F\\U0001F680\\U0001F683-\\U0001F685\\U0001F687\\U0001F689\\U0001F68C\\U0001F68F\\U0001F691-\\U0001F693\\U0001F695\\U0001F697\\U0001F699\\U0001F69A\\U0001F6A2\\U0001F6A4\\U0001F6A5\\U0001F6A7-\\U0001F6AD\\U0001F6B2\\U0001F6B6\\U0001F6B9-\\U0001F6BE\\U0001F6C0" + - "\\u20E3" + - "\\U0001F1E8\\U0001F1F3 \\U0001F1E9\\U0001F1EA \\U0001F1EA\\U0001F1F8 \\U0001F1EB\\U0001F1F7 \\U0001F1EC\\U0001F1E7 \\U0001F1EE\\U0001F1F9 \\U0001F1EF\\U0001F1F5 \\U0001F1F0\\U0001F1F7 \\U0001F1F7\\U0001F1FA \\U0001F1FA\\U0001F1F8 ]"; - - add(new UnicodeSetProperty().set(emojiSource).setMain("emoji", "emoji", UnicodeProperty.BINARY, "6.0")); - - add(new UnicodeSetProperty().set(new UnicodeSet("[\\u0000-\\uFFFF]")).setMain("bmp", "bmp", UnicodeProperty.BINARY, "6.0")); - - addCollationProperty(); - - // set up the special script property - final UnicodeProperty scriptProp = base.getProperty("sc"); - final UnicodeMap specialMap = new UnicodeMap(); - specialMap.putAll(scriptProp.getUnicodeMap()); - specialMap.putAll(ScriptTester.getScriptSpecialsNames()); - add(new UnicodeProperty.UnicodeMapProperty() - .set(specialMap) - .setMain("Script_Specials", "scs", UnicodeProperty.ENUMERATED, "1.1") - .addValueAliases(ScriptTester.getScriptSpecialsAlternates(), AliasAddAction.IGNORE_IF_MISSING) - ); - - final SortedMap charsets = Charset.availableCharsets(); - if (DEBUG_CHARSET_NAMES) { - System.out.println(charsets.keySet()); - } - final Matcher charsetMatcher = Pattern.compile("ISO-8859-\\d*|GB2312|Shift_JIS|GBK|Big5|EUC-KR").matcher(""); - for (final String name : charsets.keySet()) { - if (!charsetMatcher.reset(name).matches()) { - continue; - } - final Charset charset = charsets.get(name); - final EncodingProperty prop = new EncodingProperty(charset); - prop._setType(UnicodeProperty.STRING); - prop._setName("enc_" + name); - - final EncodingPropertyBoolean isProp = new EncodingPropertyBoolean(charset); - isProp._setType(UnicodeProperty.BINARY); - isProp._setName("is_enc_" + name); - - for (final String alias : charset.aliases()) { - if (DEBUG_CHARSET_NAMES) { - System.out.println(name + " => " + alias); - } - prop.addName("enc_" + alias); - isProp.addName("isEnc_" + alias); - } - - add(prop); - add(isProp); - } - - // exemplars - // String[] typeName = {"", "aux_"}; - // for (ULocale locale : ULocale.getAvailableLocales()) { - // if (locale.getCountry().length() != 0 || locale.getVariant().length() != 0) { - // continue; - // } - // LocaleData localeData = LocaleData.getInstance(locale); - // for (int type = 0; type < LocaleData.ES_COUNT; ++type) { - // String name = "exemplars_" + typeName[type] + locale; - // UnicodeSet us = localeData.getExemplarSet(UnicodeSet.CASE, type).freeze(); - // add(new UnicodeSetProperty().set(us).setMain(name, name, UnicodeProperty.BINARY, "1.1")); - // } - // } - } - - private void addCollationProperty() { - final RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); - - final UnicodeMap collationMap = new UnicodeMap(); - final UnicodeMap collationMap2 = new UnicodeMap(); - final RawCollationKey key = new RawCollationKey(); - final StringBuilder builder = new StringBuilder(); - final StringBuilder builder2 = new StringBuilder(); - final UnicodeSet contractions = new UnicodeSet(); - final UnicodeSet expansions = new UnicodeSet(); - try { - c.getContractionsAndExpansions(contractions, expansions, true); - } catch (final Exception e) { - throw new IllegalArgumentException(e); - } - final UnicodeSet stuff = new UnicodeSet(ALL).addAll(contractions).addAll(expansions); - for (final String s : stuff) { - c.getRawCollationKey(s, key); - builder.setLength(0); - builder2.setLength(0); - int oneCount = 0; - for (int i = 0; i < key.size; ++i) { - final byte b = key.bytes[i]; - if (b == 1) { - oneCount++; - } - if (oneCount > 1) { - break; - } - final String hex = com.ibm.icu.impl.Utility.hex(0xFF&b, 2); - // look at both - if (builder2.length() != 0) { - builder2.append(' '); - } - builder2.append(hex); - // only look at primary values - if (oneCount != 0) { - continue; - } - if (builder.length() != 0) { - builder.append(' '); - } - builder.append(hex); - } - collationMap.put(s, builder.toString()); - String builderString2 = builder2.toString(); - if (builderString2.endsWith("01")) { - builderString2 = builderString2.substring(0,builderString2.length() - 2).trim(); - } - collationMap2.put(s, builderString2); - } - add(new UnicodeProperty.UnicodeMapProperty().set(collationMap).setMain("uca", "uca1", UnicodeProperty.ENUMERATED, "1.1")); - add(new UnicodeProperty.UnicodeMapProperty().set(collationMap2).setMain("uca2", "uca2", UnicodeProperty.ENUMERATED, "1.1")); - } - - // public UnicodeProperty getInternalProperty(String propertyAlias) { - // UnicodeProperty result = props.get(propertyAlias.toLowerCase(Locale.ENGLISH)); - // if (result != null) { - // return result; - // } - // return base.getInternalProperty(propertyAlias); - // } - // - // public List getInternalAvailablePropertyAliases(List result) { - // base.getInternalAvailablePropertyAliases(result); - // result.addAll(UnicodeUtilities.XPROPERTY_NAMES); - // return result; - // } - - private static class XUnicodeProperty extends UnicodeProperty { - int fakeEnumValue; - - public XUnicodeProperty(int i) { - setName(Common.XPROPERTY_NAMES.get(i - Common.XSTRING_START)); - fakeEnumValue = i; - setType(UnicodeProperty.EXTENDED_STRING); - } - - @Override - protected List _getAvailableValues(List result) { - addUnique("", result); - return result; - } - - @Override - protected List _getNameAliases(List result) { - addUnique(getName(), result); - return result; - } - - @Override - protected String _getValue(int codepoint) { - return Common.getXStringPropertyValue(fakeEnumValue, codepoint, NameChoice.LONG); - } - - @Override - protected List _getValueAliases(String valueAlias, List result) { - addUnique("", result); - return result; - } - - @Override - protected String _getVersion() { - return VersionInfo.ICU_VERSION.toString(); - } - - } - - private static abstract class XEnumUnicodeProperty extends UnicodeProperty { - List values = new ArrayList(); - - public XEnumUnicodeProperty(String name, Object[] values) { - setName(name); - for (final Object item : values) { - this.values.add(item.toString()); - } - setType(UnicodeProperty.ENUMERATED); - } - - @Override - protected List _getAvailableValues(List result) { - for (final String s : values) { - addUnique(s, result); - } - return result; - } - - @Override - protected List _getNameAliases(List result) { - addUnique(getName(), result); - return result; - } - - @Override - protected List _getValueAliases(String valueAlias, List result) { - if (values.contains(valueAlias)) { - addUnique(valueAlias, result); - } - return result; - } - - @Override - protected String _getVersion() { - return VersionInfo.ICU_VERSION.toString(); - } - - } - - private static class IDNA2003 extends XEnumUnicodeProperty { - public IDNA2003() { - super("idna2003", IdnaType.values()); - } - - @Override - protected String _getValue(int codepoint) { - return Idna2003.SINGLETON.getType(codepoint).toString(); - } - @Override - protected List _getNameAliases(List result) { - super._getNameAliases(result); - result.add("idna"); - return result; - } - } - - private static class UTS46 extends XEnumUnicodeProperty { - public UTS46() { - super("uts46", IdnaType.values()); - } - - @Override - protected String _getValue(int codepoint) { - return Uts46.SINGLETON.getType(codepoint).toString(); - } - } - - private static class IDNA2008 extends XEnumUnicodeProperty { - public IDNA2008() { - super("idna2008", Idna2008.Idna2008Type.values()); - } - - @Override - protected String _getValue(int codepoint) { - return Idna2008.getTypeMapping().get(codepoint).toString(); - } - } - - private static class IDNA2008c extends XEnumUnicodeProperty { - public IDNA2008c() { - super("idna2008c", IdnaType.values()); - } - - @Override - protected String _getValue(int codepoint) { - return Idna2008.SINGLETON.getType(codepoint).toString(); - } - } - - private static class Usage extends XEnumUnicodeProperty { - enum UsageValues {common, historic, deprecated, liturgical, limited, symbol, punctuation, na; - public static UsageValues getValue(int codepoint) { - if (UnicodeProperty.getSPECIALS().contains(codepoint)) { - return na; - } - if (UnicodeUtilities.DEPRECATED.contains(codepoint)) { - return deprecated; - } - if (UnicodeUtilities.LITURGICAL.contains(codepoint)) { - return liturgical; - } - if (ScriptCategoriesCopy.ARCHAIC.contains(codepoint)) { - return historic; - } - //if (UnicodeUtilities.LIM.contains(codepoint)) return archaic; - if (UnicodeUtilities.COMMON_USE_SCRIPTS.contains(codepoint)) { - if (UnicodeUtilities.SYMBOL.contains(codepoint)) { - return symbol; - } - if (UnicodeUtilities.PUNCTUATION.contains(codepoint)) { - return punctuation; - } - return common; - } - return limited; - } - } - public Usage() { - super("Usage", UsageValues.values()); - setType(UnicodeProperty.EXTENDED_ENUMERATED); - } - - @Override - protected String _getValue(int codepoint) { - return UsageValues.getValue(codepoint).toString(); - } - } - - static class HanType extends XEnumUnicodeProperty { - enum HanTypeValues {na, Hans, Hant, Han} - public HanType() { - super("HanType", HanTypeValues.values()); - setType(UnicodeProperty.EXTENDED_ENUMERATED); - } - - @Override - protected String _getValue(int codepoint) { - return Common.getValue(codepoint).toString(); - } - } - - private static class StringTransformProperty extends SimpleProperty { - Transform transform; - - public StringTransformProperty(Transform transform, boolean hasUniformUnassigned) { - this.transform = transform; - setUniformUnassigned(hasUniformUnassigned); - } - @Override - protected String _getValue(int codepoint) { - return transform.transform(UTF16.valueOf(codepoint)); - } - } - - private static class CodepointTransformProperty extends SimpleProperty { - Transform transform; - - public CodepointTransformProperty(Transform transform, boolean hasUniformUnassigned) { - this.transform = transform; - setUniformUnassigned(hasUniformUnassigned); - } - @Override - protected String _getValue(int codepoint) { - return transform.transform(codepoint); - } - } - - public static class EncodingProperty extends SimpleProperty { - - public static final String ERROR = "\uFFFD"; - - CharEncoder encoder; - byte[] temp = new byte[32]; // any more than this and we don't care - - EncodingProperty(Charset charset) { - encoder = new CharEncoder(charset, false, false); - } - - @Override - protected String _getValue(int codepoint) { - final int len = encoder.getValue(codepoint, temp, 0); - if (len < 0) { - return ERROR; - } - final StringBuffer result = new StringBuffer(); - for (int i = 0; i < len; ++i) { - if (result.length() > 0) { - result.append(' '); - } - result.append(hex(temp[i])); - } - return result.toString(); - } - - @Override - public boolean isDefault(int codepoint) { - final int len = encoder.getValue(codepoint, temp, 0); - return len < 0; - } - - private Object hex(byte b) { - final String result = Integer.toHexString(0xFF&b).toUpperCase(Locale.ENGLISH); - return result.length() == 2 ? result : "0" + result; - } - - public void _setName(String string) { - super.setName(string); - } - - protected final void _setType(int i) { - super.setType(i); - } - } - - public static class EncodingPropertyBoolean extends SimpleProperty { - - CharEncoder encoder; - - EncodingPropertyBoolean(Charset charset) { - encoder = new CharEncoder(charset, true, true); - } - - @Override - protected String _getValue(int codepoint) { - return (encoder.getValue(codepoint, null, 0) > 0) ? "Yes" : "No"; - } - public void _setName(String string) { - super.setName(string); - } - - protected final void _setType(int i) { - super.setType(i); - } - } - - - // public static class UnicodeSetProperty extends BaseProperty { - // protected UnicodeSet unicodeSet; - // private static final String[] YESNO_ARRAY = new String[]{"Yes", "No"}; - // private static final List YESNO = Arrays.asList(YESNO_ARRAY); - // - // public UnicodeSetProperty set(UnicodeSet set) { - // unicodeSet = set; - // return this; - // } - // - // public UnicodeSetProperty set(String string) { - // // TODO Auto-generated method stub - // return set(new UnicodeSet(string).freeze()); - // } - // - // protected String _getValue(int codepoint) { - // return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1]; - // } - // - // protected List _getAvailableValues(List result) { - // return YESNO; - // } - // } - -} diff --git a/unicodetools/src/main/java/org/unicode/props/BagFormatter.java b/unicodetools/src/main/java/org/unicode/props/BagFormatter.java new file mode 100644 index 000000000..9ee80cd69 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/props/BagFormatter.java @@ -0,0 +1,1126 @@ +/* + ******************************************************************************* + * Copyright (C) 2002-2016, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ +package org.unicode.props; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.text.MessageFormat; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Locale; +import java.util.Map; + +import org.unicode.cldr.draft.FileUtilities; +import org.unicode.cldr.util.Tabber; +import org.unicode.cldr.util.Visitor; +import org.unicode.cldr.util.props.UnicodeLabel; +import org.unicode.jsp.ICUPropertyFactory; + +import com.ibm.icu.impl.Utility; +import com.ibm.icu.text.NumberFormat; +import com.ibm.icu.text.Transliterator; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +public class BagFormatter { + static final boolean DEBUG = false; + public static final boolean SHOW_FILES; + static { + boolean showFiles = false; + try { + showFiles = System.getProperty("SHOW_FILES") != null; + } + catch (SecurityException e) { + } + SHOW_FILES = showFiles; + } + + public static final PrintWriter CONSOLE = new PrintWriter(System.out,true); + + private static PrintWriter log = CONSOLE; + + private boolean abbreviated = false; + private String separator = ","; + private String prefix = "["; + private String suffix = "]"; + private UnicodeProperty.Factory source; + private UnicodeLabel nameSource; + private UnicodeLabel labelSource; + private UnicodeLabel rangeBreakSource; + private UnicodeLabel valueSource; + private String propName = ""; + private boolean showCount = true; + //private boolean suppressReserved = true; + private boolean hexValue = false; + private static final String NULL_VALUE = "_NULL_VALUE_"; + private int fullTotal = -1; + private boolean showTotal = true; + private String lineSeparator = System.lineSeparator(); + private Tabber tabber = new Tabber.MonoTabber(); + + /** + * Compare two UnicodeSets, and show the differences + * @param name1 name of first set to be compared + * @param set1 first set + * @param name2 name of second set to be compared + * @param set2 second set + * @return formatted string + */ + public String showSetDifferences( + String name1, + UnicodeSet set1, + String name2, + UnicodeSet set2) { + + StringWriter result = new StringWriter(); + showSetDifferences(new PrintWriter(result),name1,set1,name2,set2); + result.flush(); + return result.getBuffer().toString(); + } + + public String showSetDifferences( + String name1, + Collection set1, + String name2, + Collection set2) { + + StringWriter result = new StringWriter(); + showSetDifferences(new PrintWriter(result), name1, set1, name2, set2); + result.flush(); + return result.getBuffer().toString(); + } + + public void showSetDifferences( + PrintWriter pw, + String name1, + UnicodeSet set1, + String name2, + UnicodeSet set2) { + showSetDifferences(pw, name1, set1, name2, set2, -1); + } + /** + * Compare two UnicodeSets, and show the differences + * @param name1 name of first set to be compared + * @param set1 first set + * @param name2 name of second set to be compared + * @param set2 second set + */ + public void showSetDifferences( + PrintWriter pw, + String name1, + UnicodeSet set1, + String name2, + UnicodeSet set2, + int flags) + { + if (pw == null) pw = FileUtilities.CONSOLE; + String[] names = { name1, name2 }; + + UnicodeSet temp; + + if ((flags&1) != 0) { + temp = new UnicodeSet(set1).removeAll(set2); + pw.print(lineSeparator); + pw.print(inOut.format(names)); + pw.print(lineSeparator); + showSetNames(pw, temp); + } + + if ((flags&2) != 0) { + temp = new UnicodeSet(set2).removeAll(set1); + pw.print(lineSeparator); + pw.print(outIn.format(names)); + pw.print(lineSeparator); + showSetNames(pw, temp); + } + + if ((flags&4) != 0) { + temp = new UnicodeSet(set2).retainAll(set1); + pw.print(lineSeparator); + pw.print(inIn.format(names)); + pw.print(lineSeparator); + showSetNames(pw, temp); + } + pw.flush(); + } + + public void showSetDifferences( + PrintWriter pw, + String name1, + Collection set1, + String name2, + Collection set2) { + + if (pw == null) pw = FileUtilities.CONSOLE; + String[] names = { name1, name2 }; + // damn'd collection doesn't have a clone, so + // we go with Set, even though that + // may not preserve order and duplicates + Collection temp = new HashSet(set1); + temp.removeAll(set2); + pw.println(); + pw.println(inOut.format(names)); + showSetNames(pw, temp); + + temp.clear(); + temp.addAll(set2); + temp.removeAll(set1); + pw.println(); + pw.println(outIn.format(names)); + showSetNames(pw, temp); + + temp.clear(); + temp.addAll(set1); + temp.retainAll(set2); + pw.println(); + pw.println(inIn.format(names)); + showSetNames(pw, temp); + } + + /** + * Returns a list of items in the collection, with each separated by the separator. + * Each item must not be null; its toString() is called for a printable representation + * @param c source collection + * @return a String representation of the list + */ + public String showSetNames(Object c) { + StringWriter buffer = new StringWriter(); + PrintWriter output = new PrintWriter(buffer); + showSetNames(output,c); + return buffer.toString(); + } + + /** + * Returns a list of items in the collection, with each separated by the separator. + * Each item must not be null; its toString() is called for a printable representation + * @param output destination to which to write names + * @param c source collection + */ + public void showSetNames(PrintWriter output, Object c) { + mainVisitor.doAt(c, output); + output.flush(); + } + + public String getAbbreviatedName( + String src, + String pattern, + String substitute) { + + int matchEnd = NameIterator.findMatchingEnd(src, pattern); + int sdiv = src.length() - matchEnd; + int pdiv = pattern.length() - matchEnd; + StringBuffer result = new StringBuffer(); + addMatching( + src.substring(0, sdiv), + pattern.substring(0, pdiv), + substitute, + result); + addMatching( + src.substring(sdiv), + pattern.substring(pdiv), + substitute, + result); + return result.toString(); + } + + abstract public static class Relation { + abstract public String getRelation(String a, String b); + } + + static class NullRelation extends Relation { + @Override + public String getRelation(String a, String b) { return ""; } + } + + private Relation r = new NullRelation(); + + public BagFormatter setRelation(Relation r) { + this.r = r; + return this; // for chaining + } + + public Relation getRelation() { + return r; + } + + /* + r.getRelati on(last, s) + quote(s) + "\t#" + UnicodeSetFormatter.getResolvedName(s) + */ + /* + static final UnicodeSet NO_NAME = + new UnicodeSet("[\\u0080\\u0081\\u0084\\u0099\\p{Cn}\\p{Co}]"); + static final UnicodeSet HAS_NAME = new UnicodeSet(NO_NAME).complement(); + static final UnicodeSet NAME_CHARACTERS = + new UnicodeSet("[A-Za-z0-9\\<\\>\\-\\ ]"); + + public UnicodeSet getSetForName(String namePattern) { + UnicodeSet result = new UnicodeSet(); + Matcher m = Pattern.compile(namePattern).matcher(""); + // check for no-name items, and add in bulk + m.reset(""); + if (m.matches()) { + result.addAll(NO_NAME); + } + // check all others + UnicodeSetIterator usi = new UnicodeSetIterator(HAS_NAME); + while (usi.next()) { + String name = getName(usi.codepoint); + if (name == null) + continue; + m.reset(name); + if (m.matches()) { + result.add(usi.codepoint); + } + } + // Note: if Regex had some API so that if we could tell that + // an initial substring couldn't match, e.g. "CJK IDEOGRAPH-" + // then we could optimize by skipping whole swathes of characters + return result; + } + */ + + public BagFormatter setMergeRanges(boolean in) { + mergeRanges = in; + return this; + } + public BagFormatter setShowSetAlso(boolean b) { + showSetAlso = b; + return this; + } + + public String getName(int codePoint) { + return getName("", codePoint, codePoint); + } + + public String getName(String sep, int start, int end) { + if (getNameSource() == null || getNameSource() == UnicodeLabel.NULL) return ""; + String result = getName(start, false); + if (start == end) return sep + result; + String endString = getName(end, false); + if (result.length() == 0 && endString.length() == 0) return sep; + if (abbreviated) endString = getAbbreviatedName(endString,result,"~"); + return sep + result + ".." + endString; + } + + public String getName(String s) { + return getName(s, false); + } + + public static class NameLabel extends UnicodeLabel { + UnicodeProperty nameProp; + UnicodeSet control; + UnicodeSet private_use; + UnicodeSet noncharacter; + UnicodeSet surrogate; + + public NameLabel(UnicodeProperty.Factory source) { + nameProp = source.getProperty("Name"); + control = source.getSet("gc=Cc"); + private_use = source.getSet("gc=Co"); + surrogate = source.getSet("gc=Cs"); + noncharacter = source.getSet("noncharactercodepoint=yes"); + } + + @Override + public String getValue(int codePoint, boolean isShort) { + String hcp = !isShort + ? "U+" + Utility.hex(codePoint, 4) + " " + : ""; + String result = nameProp.getValue(codePoint); + if (result != null) + return hcp + result; + if (control.contains(codePoint)) { + return ""; + } + if (private_use.contains(codePoint)) { + return ""; + } + if (surrogate.contains(codePoint)) { + return ""; + } + if (noncharacter.contains(codePoint)) { + return ""; + } + //if (suppressReserved) return ""; + return hcp + ""; + } + + } + + // refactored + public String getName(int codePoint, boolean withCodePoint) { + String result = getNameSource().getValue(codePoint, !withCodePoint); + return fixName == null ? result : fixName.transliterate(result); + } + + public String getName(String s, boolean withCodePoint) { + String result = getNameSource().getValue(s, separator, !withCodePoint); + return fixName == null ? result : fixName.transliterate(result); + } + + public String hex(String s) { + return hex(s,separator); + } + + public String hex(String s, String sep) { + return UnicodeLabel.HEX.getValue(s, sep, true); + } + + public String hex(int start, int end) { + String s = Utility.hex(start,4); + if (start == end) return s; + return s + ".." + Utility.hex(end,4); + } + + public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) { + this.source = source; + return this; + } + + private UnicodeProperty.Factory getUnicodePropertyFactory() { + if (source == null) source = ICUPropertyFactory.make(); + return source; + } + + public BagFormatter () { + } + + public BagFormatter (UnicodeProperty.Factory source) { + setUnicodePropertyFactory(source); + } + + public String join(Object o) { + return labelVisitor.join(o); + } + + // ===== PRIVATES ===== + + private Join labelVisitor = new Join(); + + private boolean mergeRanges = true; + private Transliterator showLiteral = null; + private Transliterator fixName = null; + private boolean showSetAlso = false; + + private RangeFinder rf = new RangeFinder(); + + private MessageFormat inOut = new MessageFormat("In {0}, but not in {1}:"); + private MessageFormat outIn = new MessageFormat("Not in {0}, but in {1}:"); + private MessageFormat inIn = new MessageFormat("In both {0}, and in {1}:"); + + private MyVisitor mainVisitor = new MyVisitor(); + + /* + private String getLabels(int start, int end) { + Set names = new TreeSet(); + for (int cp = start; cp <= end; ++cp) { + names.add(getLabel(cp)); + } + return labelVisitor.join(names); + } + */ + + private void addMatching( + String src, + String pattern, + String substitute, + StringBuffer result) { + NameIterator n1 = new NameIterator(src); + NameIterator n2 = new NameIterator(pattern); + boolean first = true; + while (true) { + String s1 = n1.next(); + if (s1 == null) + break; + String s2 = n2.next(); + if (!first) + result.append(" "); + first = false; + if (s1.equals(s2)) + result.append(substitute); + else + result.append(s1); + } + } + + private static NumberFormat nf = + NumberFormat.getIntegerInstance(Locale.ENGLISH); + static { + nf.setGroupingUsed(false); + } + + private int maxWidthOverride = -1; + private int maxLabelWidthOverride = -1; + + public BagFormatter setValueWidthOverride(int maxWidthOverride) { + this.maxWidthOverride = maxWidthOverride; + return this; + } + + public int getValueWidthOverride() { + return maxWidthOverride; + } + + public BagFormatter setLabelWidthOverride(int maxWidthOverride) { + this.maxLabelWidthOverride = maxWidthOverride; + return this; + } + + public int getLabelWidthOverride() { + return maxLabelWidthOverride; + } + + + private class MyVisitor extends Visitor { + private PrintWriter output; + String commentSeparator; + int counter; + int valueSize; + int labelSize; + boolean isHtml; + boolean inTable = false; + + public void toOutput(String s) { + if (isHtml) { + if (inTable) { + output.print(""); + inTable = false; + } + output.print("

"); + } + output.print(s); + if (isHtml) + output.println("

"); + else + output.print(lineSeparator); + } + + public void toTable(String s) { + if (isHtml && !inTable) { + output.print(""); + inTable = true; + } + output.print(tabber.process(s) + lineSeparator); + } + + public void doAt(Object c, PrintWriter out) { + output = out; + isHtml = tabber instanceof Tabber.HTMLTabber; + counter = 0; + + tabber.clear(); + // old: + // 0009..000D ; White_Space # Cc [5] .. + // new + // 0009..000D ; White_Space #Cc [5] .. + tabber.add(mergeRanges ? 14 : 6,Tabber.LEFT); + + if (propName.length() > 0) { + tabber.add(propName.length() + 2,Tabber.LEFT); + } + + valueSize = maxWidthOverride > 0 ? maxWidthOverride : getValueSource().getMaxWidth(shortValue); + + if (DEBUG) System.out.println("ValueSize: " + valueSize); + if (valueSize > 0) { + tabber.add(valueSize + 2,Tabber.LEFT); // value + } + + tabber.add(3,Tabber.LEFT); // comment character + + labelSize = maxLabelWidthOverride > 0 ? maxLabelWidthOverride : getLabelSource(true).getMaxWidth(shortLabel); + if (labelSize > 0) { + tabber.add(labelSize + 1,Tabber.LEFT); // value + } + + if (mergeRanges && showCount) { + tabber.add(5,Tabber.RIGHT); + } + + if (showLiteral != null) { + tabber.add(4,Tabber.LEFT); + } + //myTabber.add(7,Tabber.LEFT); + + commentSeparator = (showCount || showLiteral != null + || getLabelSource(true) != UnicodeLabel.NULL + || getNameSource() != UnicodeLabel.NULL) + ? "\t #" : ""; + + if (DEBUG) System.out.println("Tabber: " + tabber.toString()); + if (DEBUG) System.out.println("Tabber: " + tabber.process( + "200C..200D\t; White_Space\t #\tCf\t [2]\t ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER")); + doAt(c); + } + + @SuppressWarnings("unused") + public String format(Object o) { + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + doAt(o); + pw.flush(); + String result = sw.getBuffer().toString(); + pw.close(); + return result; + } + + @Override + protected void doBefore(Object container, Object o) { + if (showSetAlso && container instanceof UnicodeSet) { + toOutput("#" + container); + } + } + + @Override + protected void doBetween(Object container, Object lastItem, Object nextItem) { + } + + @Override + protected void doAfter(Object container, Object o) { + if (fullTotal != -1 && fullTotal != counter) { + if (showTotal) { + toOutput(""); + toOutput("# The above property value applies to " + nf.format(fullTotal-counter) + " code points not listed here."); + toOutput("# Total code points: " + nf.format(fullTotal)); + } + fullTotal = -1; + } else if (showTotal) { + toOutput(""); + toOutput("# Total code points: " + nf.format(counter)); + } + } + + @Override + protected void doSimpleAt(Object o) { + if (o instanceof Map.Entry) { + Map.Entry oo = (Map.Entry)o; + Object key = oo.getKey(); + Object value = oo.getValue(); + doBefore(o, key); + doAt(key); + output.println("\u2192"); + doAt(value); + doAfter(o, value); + counter++; + } else if (o instanceof Visitor.CodePointRange) { + doAt((Visitor.CodePointRange) o); + } else { + String thing = o.toString(); + String value = getValueSource() == UnicodeLabel.NULL ? "" : getValueSource().getValue(thing, ",", true); + if (getValueSource() != UnicodeLabel.NULL) value = "\t; " + value; + String label = getLabelSource(true) == UnicodeLabel.NULL ? "" : getLabelSource(true).getValue(thing, ",", true); + if (label.length() != 0) label = " " + label; + toTable( + hex(thing) + + value + + commentSeparator + + label + + insertLiteral(thing) + + "\t" + + getName(thing)); + counter++; + } + } + + protected void doAt(Visitor.CodePointRange usi) { + if (!mergeRanges) { + for (int cp = usi.codepoint; cp <= usi.codepointEnd; ++cp) { + showLine(cp, cp); + } + } else { + rf.reset(usi.codepoint, usi.codepointEnd + 1); + while (rf.next()) { + showLine(rf.start, rf.limit - 1); + } + } + } + + private void showLine(int start, int end) { + String label = getLabelSource(true).getValue(start, shortLabel); + String value = getValue(start, shortValue); + if (value == NULL_VALUE) return; + + counter += end - start + 1; + String pn = propName; + if (pn.length() != 0) { + pn = "\t; " + pn; + } + if (valueSize > 0) { + value = "\t; " + value; + } else if (value.length() > 0) { + throw new IllegalArgumentException("maxwidth bogus " + value + "," + getValueSource().getMaxWidth(shortValue)); + } + if (labelSize > 0) { + label = "\t" + label; + } else if (label.length() > 0) { + throw new IllegalArgumentException("maxwidth bogus " + label + ", " + getLabelSource(true).getMaxWidth(shortLabel)); + } + + String count = ""; + if (mergeRanges && showCount) { + if (end == start) count = "\t"; + else count = "\t ["+ nf.format(end - start + 1)+ "]"; + } + + toTable( + hex(start, end) + + pn + + value + + commentSeparator + + label + + count + + insertLiteral(start, end) + + getName("\t ", start, end)); + } + + private String insertLiteral(String thing) { + return (showLiteral == null ? "" + : " \t(" + showLiteral.transliterate(thing) + ") "); + } + + private String insertLiteral(int start, int end) { + return (showLiteral == null ? "" : + " \t(" + showLiteral.transliterate(UTF16.valueOf(start)) + + ((start != end) + ? (".." + showLiteral.transliterate(UTF16.valueOf(end))) + : "") + + ") "); + } + /* + private String insertLiteral(int cp) { + return (showLiteral == null ? "" + : " \t(" + showLiteral.transliterate(UTF16.valueOf(cp)) + ") "); + } + */ + } + + /** + * Iterate through a string, breaking at words. + * @author Davis + */ + private static class NameIterator { + String source; + int position; + int limit; + + NameIterator(String source) { + this.source = source; + this.limit = source.length(); + } + /** + * Find next word, including trailing spaces + * @return the next word + */ + String next() { + if (position >= limit) + return null; + int pos = source.indexOf(' ', position); + if (pos < 0 || pos >= limit) + pos = limit; + String result = source.substring(position, pos); + position = pos + 1; + return result; + } + + static int findMatchingEnd(String s1, String s2) { + int i = s1.length(); + int j = s2.length(); + try { + while (true) { + --i; // decrement both before calling function! + --j; + if (s1.charAt(i) != s2.charAt(j)) + break; + } + } catch (Exception e) {} // run off start + + ++i; // counteract increment + i = s1.indexOf(' ', i); // move forward to space + if (i < 0) + return 0; + return s1.length() - i; + } + } + + private class RangeFinder { + int start, limit; + private int veryLimit; + //String label, value; + void reset(int rangeStart, int rangeLimit) { + limit = rangeStart; + veryLimit = rangeLimit; + } + boolean next() { + if (limit >= veryLimit) + return false; + start = limit; // set to end of last + String label = getLabelSource(false).getValue(limit, true); + String value = getValue(limit, true); + String breaker = getRangeBreakSource().getValue(limit,true); + if (DEBUG && 0x3FFD < limit && limit < 0x9FD6) { + System.out.println(Utility.hex(limit) + ", Label: " + label + ", Value: " + value + ", Break: " + breaker); + } + limit++; + for (; limit < veryLimit; limit++) { + String s = getLabelSource(false).getValue(limit, true); + String v = getValue(limit, true); + String b = getRangeBreakSource().getValue(limit, true); + if (DEBUG && limit > 0x9FD4) { + System.out.println(Utility.hex(limit) + ", *Label: " + s + ", Value: " + v + ", Break: " + b); + } + if (!equalTo(s, label) + || !equalTo(v, value) + || !equalTo(b, breaker)) { + break; + } + } + // at this point, limit is the first item that has a different label than source + // OR, we got to the end, and limit == veryLimit + return true; + } + } + + boolean equalTo(Object a, Object b) { + if (a == b) return true; + if (a == null) return false; + return a.equals(b); + } + + boolean shortLabel = true; + boolean shortValue = true; + + public String getPrefix() { + return prefix; + } + + public String getSuffix() { + return suffix; + } + + public BagFormatter setPrefix(String string) { + prefix = string; + return this; + } + + public BagFormatter setSuffix(String string) { + suffix = string; + return this; + } + + public boolean isAbbreviated() { + return abbreviated; + } + + public BagFormatter setAbbreviated(boolean b) { + abbreviated = b; + return this; + } + + public UnicodeLabel getLabelSource(boolean visible) { + if (labelSource == null) { + Map labelMap = new HashMap(); + //labelMap.put("Lo","L&"); + labelMap.put("Lu","L&"); + labelMap.put("Lt","L&"); + labelMap.put("Ll","L&"); + labelSource = new UnicodeProperty.FilteredProperty( + getUnicodePropertyFactory().getProperty("General_Category"), + new UnicodeProperty.MapFilter(labelMap) + ).setAllowValueAliasCollisions(true); + } + return labelSource; + } + + /** + * @deprecated + */ + @Deprecated + public static void addAll(UnicodeSet source, Collection target) { + source.addAllTo(target); + } + + // UTILITIES + + public static final Transliterator hex = Transliterator.getInstance( + "[^\\u0009\\u0020-\\u007E\\u00A0-\\u00FF] hex"); + + public String getSeparator() { + return separator; + } + public BagFormatter setSeparator(String string) { + separator = string; + return this; + } + public Transliterator getShowLiteral() { + return showLiteral; + } + public BagFormatter setShowLiteral(Transliterator transliterator) { + showLiteral = transliterator; + return this; + } + + // ===== CONVENIENCES ===== + private class Join extends Visitor { + StringBuffer output = new StringBuffer(); + @SuppressWarnings("unused") + int depth = 0; + String join (Object o) { + output.setLength(0); + doAt(o); + return output.toString(); + } + @Override + protected void doBefore(Object container, Object item) { + ++depth; + output.append(prefix); + } + @Override + protected void doAfter(Object container, Object item) { + output.append(suffix); + --depth; + } + @Override + protected void doBetween(Object container, Object lastItem, Object nextItem) { + output.append(separator); + } + @Override + protected void doSimpleAt(Object o) { + if (o != null) output.append(o.toString()); + } + } + + /** + * @param label + */ + public BagFormatter setLabelSource(UnicodeLabel label) { + if (label == null) label = UnicodeLabel.NULL; + labelSource = label; + return this; + } + + /** + * @return the NameLable representing the source + */ + public UnicodeLabel getNameSource() { + if (nameSource == null) { + nameSource = new NameLabel(getUnicodePropertyFactory()); + } + return nameSource; + } + + /** + * @param label + */ + public BagFormatter setNameSource(UnicodeLabel label) { + if (label == null) label = UnicodeLabel.NULL; + nameSource = label; + return this; + } + + /** + * @return the UnicodeLabel representing the value + */ + public UnicodeLabel getValueSource() { + if (valueSource == null) valueSource = UnicodeLabel.NULL; + return valueSource; + } + + private String getValue(int cp, boolean shortVal) { + String result = getValueSource().getValue(cp, shortVal); + if (result == null) return NULL_VALUE; + if (hexValue) result = hex(result, " "); + return result; + } + + /** + * @param label + */ + public BagFormatter setValueSource(UnicodeLabel label) { + if (label == null) label = UnicodeLabel.NULL; + valueSource = label; + return this; + } + + public BagFormatter setValueSource(String label) { + return setValueSource(new UnicodeLabel.Constant(label)); + } + + /** + * @return true if showCount is true + */ + public boolean isShowCount() { + return showCount; + } + + /** + * @param b true to show the count + * @return this (for chaining) + */ + public BagFormatter setShowCount(boolean b) { + showCount = b; + return this; + } + + /** + * @return the property name + */ + public String getPropName() { + return propName; + } + + /** + * @param string + * @return this (for chaining) + */ + public BagFormatter setPropName(String string) { + if (string == null) string = ""; + propName = string; + return this; + } + + /** + * @return true if this is a hexValue + */ + public boolean isHexValue() { + return hexValue; + } + + /** + * @param b + * @return this (for chaining) + */ + public BagFormatter setHexValue(boolean b) { + hexValue = b; + return this; + } + + /** + * @return the full total + */ + public int getFullTotal() { + return fullTotal; + } + + /** + * @param i set the full total + * @return this (for chaining) + */ + public BagFormatter setFullTotal(int i) { + fullTotal = i; + return this; + } + + /** + * @return the line separator + */ + public String getLineSeparator() { + return lineSeparator; + } + + /** + * @param string + * @return this (for chaining) + */ + public BagFormatter setLineSeparator(String string) { + lineSeparator = string; + return this; + } + + /** + * @return the UnicodeLabel representing the range break source + */ + public UnicodeLabel getRangeBreakSource() { + if (rangeBreakSource == null) { + Map labelMap = new HashMap(); + // reflects the code point types on p 25 + labelMap.put("Lo", "G&"); + labelMap.put("Lm", "G&"); + labelMap.put("Lu", "G&"); + labelMap.put("Lt", "G&"); + labelMap.put("Ll", "G&"); + labelMap.put("Mn", "G&"); + labelMap.put("Me", "G&"); + labelMap.put("Mc", "G&"); + labelMap.put("Nd", "G&"); + labelMap.put("Nl", "G&"); + labelMap.put("No", "G&"); + labelMap.put("Zs", "G&"); + labelMap.put("Pd", "G&"); + labelMap.put("Ps", "G&"); + labelMap.put("Pe", "G&"); + labelMap.put("Pc", "G&"); + labelMap.put("Po", "G&"); + labelMap.put("Pi", "G&"); + labelMap.put("Pf", "G&"); + labelMap.put("Sm", "G&"); + labelMap.put("Sc", "G&"); + labelMap.put("Sk", "G&"); + labelMap.put("So", "G&"); + + labelMap.put("Zl", "Cf"); + labelMap.put("Zp", "Cf"); + + rangeBreakSource = + new UnicodeProperty + .FilteredProperty( + getUnicodePropertyFactory().getProperty( + "General_Category"), + new UnicodeProperty.MapFilter(labelMap)) + .setAllowValueAliasCollisions(true); + + /* + "Cn", // = Other, Not Assigned 0 + "Cc", // = Other, Control 15 + "Cf", // = Other, Format 16 + UnicodeProperty.UNUSED, // missing + "Co", // = Other, Private Use 18 + "Cs", // = Other, Surrogate 19 + */ + } + return rangeBreakSource; + } + + /** + * @param label + */ + public BagFormatter setRangeBreakSource(UnicodeLabel label) { + if (label == null) label = UnicodeLabel.NULL; + rangeBreakSource = label; + return this; + } + + /** + * @return Returns the fixName. + */ + public Transliterator getFixName() { + return fixName; + } + /** + * @param fixName The fixName to set. + */ + public BagFormatter setFixName(Transliterator fixName) { + this.fixName = fixName; + return this; + } + + public Tabber getTabber() { + return tabber; + } + + public void setTabber(Tabber tabber) { + this.tabber = tabber; + } + + public boolean isShowTotal() { + return showTotal; + } + + public void setShowTotal(boolean showTotal) { + this.showTotal = showTotal; + } +} diff --git a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java index e319fd783..cf2f176cd 100644 --- a/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java +++ b/unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java @@ -25,7 +25,6 @@ import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; -import org.unicode.cldr.util.props.UnicodeProperty; import org.unicode.draft.CldrUtility.VariableReplacer; import org.unicode.draft.UnicodeDataInput; import org.unicode.draft.UnicodeDataInput.ItemReader; diff --git a/unicodetools/src/main/java/org/unicode/props/PropNormalizationData.java b/unicodetools/src/main/java/org/unicode/props/PropNormalizationData.java index 21dd9b805..814ac6b68 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropNormalizationData.java +++ b/unicodetools/src/main/java/org/unicode/props/PropNormalizationData.java @@ -3,7 +3,7 @@ import java.util.BitSet; import java.util.HashMap; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.props.UcdPropertyValues.Canonical_Combining_Class_Values; import org.unicode.text.UCD.NormalizationData; import org.unicode.text.utility.Utility; @@ -43,7 +43,7 @@ public PropNormalizationData(IndexUnicodeProperties properties) { System.out.println("Nulls: " + nullValues); } canonical.freeze(); - + UnicodeProperty dtp = properties.getProperty("dt"); UnicodeProperty dmp = properties.getProperty("dm"); UnicodeProperty ce = properties.getProperty("composition_exclusion"); @@ -72,10 +72,10 @@ public PropNormalizationData(IndexUnicodeProperties properties) { um2 = new HashMap<>(); pairwiseComposition.put(first, um2); } - um2.put(second, cp); + um2.put(second, cp); } } - + buffer.setLength(0); getRecursiveDecomposition2(cp, false, dtp, dmp, buffer); String nfdstr = buffer.toString(); @@ -91,7 +91,7 @@ public PropNormalizationData(IndexUnicodeProperties properties) { nfkd.freeze(); pairwiseComposition.freeze(); // later do deep freeze } - + public short getCcc(int cp) { return canonical.get(cp); } @@ -169,35 +169,35 @@ public boolean isNonSpacing(int cp) { // TODO Auto-generated method stub return false; } - + public enum Type {nfd, nfc, nfkd, nfkc} public String normalize(CharSequence source, Type type) { StringBuilder target = new StringBuilder(); switch (type) { - case nfd: + case nfd: internalDecompose(source, target, false); break; - case nfkd: + case nfkd: internalDecompose(source, target, true); break; - case nfc: + case nfc: internalDecompose(source, target, false); internalCompose(target); break; - case nfkc: + case nfkc: internalDecompose(source, target, true); internalCompose(target); break; } return target.toString(); } - + public String normalize(int source, Type type) { StringBuilder target = new StringBuilder(); - - String buffer = type == Type.nfkd || type == Type.nfkc - ? nfkd.get(source) + + String buffer = type == Type.nfkd || type == Type.nfkc + ? nfkd.get(source) : nfd.get(source); if (buffer == null) { target.append(source); @@ -206,10 +206,10 @@ public String normalize(int source, Type type) { } switch (type) { - case nfc: + case nfc: internalCompose(target); break; - case nfkc: + case nfkc: internalCompose(target); break; } @@ -297,7 +297,7 @@ private void internalCompose(StringBuilder target) { if (um2 != null) { Integer temp = um2.get(ch); if (temp != null) { - composite = temp; + composite = temp; } } if (composite != NormalizationData.NOT_COMPOSITE diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyType.java b/unicodetools/src/main/java/org/unicode/props/PropertyType.java index 980d6cedc..11feb9f3e 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropertyType.java +++ b/unicodetools/src/main/java/org/unicode/props/PropertyType.java @@ -1,13 +1,13 @@ package org.unicode.props; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; public enum PropertyType { - Numeric(UnicodeProperty.NUMERIC), - String(UnicodeProperty.STRING), - Miscellaneous(UnicodeProperty.MISC), - Catalog(UnicodeProperty.CATALOG), - Enumerated(UnicodeProperty.ENUMERATED), + Numeric(UnicodeProperty.NUMERIC), + String(UnicodeProperty.STRING), + Miscellaneous(UnicodeProperty.MISC), + Catalog(UnicodeProperty.CATALOG), + Enumerated(UnicodeProperty.ENUMERATED), Binary(UnicodeProperty.BINARY), Unknown(-1) ; @@ -16,8 +16,8 @@ public enum PropertyType { private PropertyType(int oldNumber) { this.oldNumber = oldNumber; } - + public int getOldNumber() { return oldNumber; } -} \ No newline at end of file +} diff --git a/unicodetools/src/main/java/org/unicode/props/RandomStringGenerator.java b/unicodetools/src/main/java/org/unicode/props/RandomStringGenerator.java new file mode 100644 index 000000000..47ff90428 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/props/RandomStringGenerator.java @@ -0,0 +1,103 @@ +/* + ********************************************************************** + * Copyright (c) 2002-2004, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * Author: Mark Davis + ********************************************************************** + */ +package org.unicode.props; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import com.ibm.icu.dev.util.UnicodeMap; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +import org.unicode.jsp.ICUPropertyFactory; + +public class RandomStringGenerator { + + private static final UnicodeSet SUPPLEMENTARIES = new UnicodeSet(0x10000, 0x10FFFF); + + /** + * If not null, masks off the character properties so the UnicodeSets are easier to use when debugging. + */ + public static UnicodeSet DEBUG_REDUCE_SET_SIZE = null; // new + // UnicodeSet("[\\u0000-\\u00FF\\u0300-\\u03FF\\u2000-\\u20FF]"); + // // new UnicodeSet("[\\u0000-\\u00FF\\u2000-\\u20FF]"); // + // or null + + + private Random random = new Random(0); + private UnicodeSet[] sets; + private UnicodeMap map; + private UnicodeMap shortMap; + private UnicodeMap extendedMap; + + void init(UnicodeProperty.Factory factory) { + extendedMap = new UnicodeMap(); + UnicodeMap tempMap = factory.getProperty("GraphemeClusterBreak").getUnicodeMap(); + extendedMap.putAll(tempMap.keySet("CR"), "CR"); + extendedMap.putAll(tempMap.keySet("LF"), "LF"); + extendedMap.putAll(tempMap.keySet("Extend"), "GCExtend"); + extendedMap.putAll(tempMap.keySet("Control"), "GCControl"); + } + + public RandomStringGenerator(UnicodeProperty.Factory factory, String propertyName) { + this(factory, propertyName, false, false); + } + + public RandomStringGenerator(UnicodeProperty.Factory factory, String propertyName, boolean useShortName, + boolean addGCStuff) { + this(factory, factory.getProperty(propertyName).getUnicodeMap(), + useShortName ? ICUPropertyFactory.make().getProperty(propertyName).getUnicodeMap(true) : null, + addGCStuff); + } + + RandomStringGenerator(UnicodeProperty.Factory factory, UnicodeMap longNameMap, UnicodeMap shortNameMap, + boolean addGCStuff) { + init(factory); + map = !addGCStuff ? longNameMap + : longNameMap.composeWith(extendedMap, MyComposer); + shortMap = (shortNameMap == null ? longNameMap + : !addGCStuff ? shortNameMap + : shortNameMap.composeWith(extendedMap, MyComposer)); + List values = new ArrayList(map.getAvailableValues()); + sets = new UnicodeSet[values.size()]; + for (int i = 0; i < sets.length; ++i) { + sets[i] = map.keySet(values.get(i)); + sets[i].removeAll(SUPPLEMENTARIES); + if (DEBUG_REDUCE_SET_SIZE != null) { + int first = sets[i].charAt(0); + sets[i].retainAll(DEBUG_REDUCE_SET_SIZE); + if (sets[i].size() == 0) sets[i].add(first); + } + } + } + + static UnicodeMap.Composer MyComposer = new UnicodeMap.Composer() { + @Override + public Object compose(int codePoint, String string, Object a, Object b) { + if (a == null) return b; + if (b == null) return a; + return a + "_" + b; + } + }; + + public String getValue(int cp) { + return (String) shortMap.getValue(cp); + } + + public String next(int len) { + StringBuffer result = new StringBuffer(); + for (int i = 0; i < len; ++i) { + UnicodeSet us = sets[random.nextInt(sets.length)]; + int cp = us.charAt(random.nextInt(us.size())); + UTF16.append(result, cp); + } + return result.toString(); + } +} diff --git a/unicodetools/src/main/java/org/unicode/cldr/util/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java similarity index 99% rename from unicodetools/src/main/java/org/unicode/cldr/util/props/UnicodeProperty.java rename to unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index d3d0253ef..514c0d2af 100644 --- a/unicodetools/src/main/java/org/unicode/cldr/util/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -4,7 +4,7 @@ * others. All Rights Reserved. * ******************************************************************************* */ -package org.unicode.cldr.util.props; +package org.unicode.props; import java.io.PrintWriter; import java.io.StringWriter; @@ -32,6 +32,9 @@ import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; +import org.unicode.props.BagFormatter; +import org.unicode.cldr.util.props.UnicodeLabel; + public abstract class UnicodeProperty extends UnicodeLabel { public static final UnicodeSet NONCHARACTERS = new UnicodeSet("[:noncharactercodepoint:]").freeze(); @@ -59,6 +62,7 @@ public abstract class UnicodeProperty extends UnicodeLabel { private static UnicodeSet STUFF_TO_TEST; private static UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED; + public static synchronized UnicodeSet getUNASSIGNED() { if (UNASSIGNED == null) { UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze(); @@ -141,7 +145,7 @@ public static synchronized void ResetCacheProperties() { /* * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name - * + * */ public static final int UNKNOWN = 0, BINARY = 2, EXTENDED_BINARY = 3, @@ -376,7 +380,7 @@ public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) { /* * public UnicodeSet getMatchSet(UnicodeSet result) { if (result == null) * result = new UnicodeSet(); addAll(matchIterator, result); return result; } - * + * * public void setMatchSet(UnicodeSet set) { matchIterator = new * UnicodeSetIterator(set); } */ @@ -528,7 +532,7 @@ public int compare(Object o1, Object o2) { /** * Utility for managing property & non-string value aliases - * + * */ // TODO optimize public static boolean equalNames(String a, String b) { @@ -662,7 +666,7 @@ public static String regularize(String source, boolean titlecaseStart) { /** * Utility function for comparing codepoint to string without generating new * string. - * + * * @param codepoint * @param other * @return true if the codepoint equals the string @@ -690,7 +694,7 @@ public static final boolean equals(T a, T b) { /** * Utility that should be on UnicodeSet - * + * * @param source * @param result */ @@ -1165,7 +1169,7 @@ public static abstract class BaseProperty extends UnicodeProperty { private static final String[] YES_VALUES = {"Yes", "Y", "T", "True"}; /** - * + * */ private static final String[][] YES_NO_ALIASES = new String[][] {YES_VALUES, NO_VALUES}; @@ -1217,7 +1221,7 @@ public void addValueAlias(String value, String valueAlias, switch(aliasAddAction) { case IGNORE_IF_MISSING: return; case REQUIRE_MAIN_ALIAS: throw new IllegalArgumentException("Can't add alias for mising value: " + value); - case ADD_MAIN_ALIAS: + case ADD_MAIN_ALIAS: toValueAliases.put(value, result = new ArrayList(0)); break; } @@ -1497,7 +1501,7 @@ public boolean isDefault(int cp) { return equals(cp, value); } String defaultValue = getValue(getSAMPLE_UNASSIGNED()); - return defaultValue == null ? value == null : defaultValue.equals(value); + return defaultValue == null ? value == null : defaultValue.equals(value); } public boolean hasUniformUnassigned() { @@ -1555,5 +1559,10 @@ protected List _getAvailableValues(List result) { // return transform.transform(codepoint); // } // } + + // from the jsp version + public boolean isTrimable() { + return !isType(STRING_OR_MISC_MASK); + } } diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java b/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java new file mode 100644 index 000000000..817f0c6f3 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/props/UnicodePropertySymbolTable.java @@ -0,0 +1,285 @@ +/* + ******************************************************************************* + * Copyright (C) 1996-2012, Google, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package org.unicode.props; + +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; + +import org.unicode.props.UnicodeProperty.PatternMatcher; + +import com.ibm.icu.impl.UnicodeRegex; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; + +/** + * Allows for overriding the parsing of UnicodeSet property patterns. + *

+ * WARNING: If this UnicodePropertySymbolTable is used with {@code UnicodeSet.setDefaultXSymbolTable}, and the + * Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call + * {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable} + * with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}. + * + * @author markdavis + */ +public class UnicodePropertySymbolTable extends UnicodeSet.XSymbolTable { + UnicodeRegex unicodeRegex; + final UnicodeProperty.Factory factory; + + public UnicodePropertySymbolTable(UnicodeProperty.Factory factory) { + unicodeRegex = new UnicodeRegex().setSymbolTable(this); + this.factory = factory; + } + + + // public boolean applyPropertyAlias0(String propertyName, + // String propertyValue, UnicodeSet result) { + // if (!propertyName.contains("*")) { + // return applyPropertyAlias(propertyName, propertyValue, result); + // } + // String[] propertyNames = propertyName.split("[*]"); + // for (int i = propertyNames.length - 1; i >= 0; ++i) { + // String pname = propertyNames[i]; + // + // } + // return null; + // } + + @Override + public boolean applyPropertyAlias(String propertyName, + String propertyValue, UnicodeSet result) { + boolean status = false; + boolean invert = false; + int posNotEqual = propertyName.indexOf('\u2260'); + int posColon = propertyName.indexOf(':'); + if (posNotEqual >= 0 || posColon >= 0) { + if (posNotEqual < 0) posNotEqual = propertyName.length(); + if (posColon < 0) posColon = propertyName.length(); + int opPos = posNotEqual < posColon ? posNotEqual : posColon; + propertyValue = propertyValue.length() == 0 ? propertyName.substring(opPos+1) + : propertyName.substring(opPos+1) + "=" + propertyValue; + propertyName = propertyName.substring(0,opPos); + if (posNotEqual < posColon) { + invert = true; + } + } + if (propertyName.endsWith("!")) { + propertyName = propertyName.substring(0, propertyName.length() - 1); + invert = !invert; + } + propertyValue = propertyValue.trim(); + if (propertyValue.length() != 0) { + status = applyPropertyAlias0(propertyName, propertyValue, result); + } else { + try { + status = applyPropertyAlias0("gc", propertyName, result); + } catch (Exception e) {} + if (!status) { + try { + status = applyPropertyAlias0("sc", propertyName, result); + } catch (Exception e) {} + if (!status) { + try { + status = applyPropertyAlias0(propertyName, "Yes", result); + } catch (Exception e) {} + if (!status) { + status = applyPropertyAlias0(propertyName, "", result); + } + } + } + } + if (status && invert) { + result.complement(); + } + return status; + } + + static final HashMap GC_REMAP = new HashMap(); + { + GC_REMAP.put("c", "Cc Cf Cn Co Cs".split(" ")); + GC_REMAP.put("other", GC_REMAP.get("c")); + + GC_REMAP.put("l", "Ll Lm Lo Lt Lu".split(" ")); + GC_REMAP.put("letter", GC_REMAP.get("l")); + + GC_REMAP.put("lc", "Ll Lt Lu".split(" ")); + GC_REMAP.put("casedletter", GC_REMAP.get("lc")); + + GC_REMAP.put("m", "Mc Me Mn".split(" ")); + GC_REMAP.put("mark", GC_REMAP.get("m")); + + GC_REMAP.put("n", "Nd Nl No".split(" ")); + GC_REMAP.put("number", GC_REMAP.get("n")); + + GC_REMAP.put("p", "Pc Pd Pe Pf Pi Po Ps".split(" ")); + GC_REMAP.put("punctuation", GC_REMAP.get("p")); + GC_REMAP.put("punct", GC_REMAP.get("p")); + + GC_REMAP.put("s", "Sc Sk Sm So".split(" ")); + GC_REMAP.put("symbol", GC_REMAP.get("s")); + + GC_REMAP.put("z", "Zl Zp Zs".split(" ")); + GC_REMAP.put("separator", GC_REMAP.get("z")); + } + + public boolean applyPropertyAlias0(String propertyName, + String propertyValue, UnicodeSet result) { + result.clear(); + UnicodeProperty prop = factory.getProperty(propertyName); + String canonicalName = prop.getName(); + boolean isAge = UnicodeProperty.equalNames("Age", canonicalName); + + // Hack for special GC values + if (canonicalName.equals("General_Category")) { + String[] parts = GC_REMAP.get(UnicodeProperty.toSkeleton(propertyValue)); + if (parts != null) { + for (String part : parts) { + prop.getSet(part, result); + } + return true; + } + } + + PatternMatcher patternMatcher = null; + if (propertyValue.length() > 1 && propertyValue.startsWith("/") && propertyValue.endsWith("/")) { + String fixedRegex = unicodeRegex.transform(propertyValue.substring(1, propertyValue.length() - 1)); + patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex); + } + UnicodeProperty otherProperty = null; + boolean testCp = false; + if (propertyValue.length() > 1 && propertyValue.startsWith("@") && propertyValue.endsWith("@")) { + String otherPropName = propertyValue.substring(1, propertyValue.length() - 1).trim(); + if ("cp".equalsIgnoreCase(otherPropName)) { + testCp = true; + } else { + otherProperty = factory.getProperty(otherPropName); + } + } + if (prop != null) { + UnicodeSet set; + if (testCp) { + set = new UnicodeSet(); + for (int i = 0; i <= 0x10FFFF; ++i) { + if (UnicodeProperty.equals(i, prop.getValue(i))) { + set.add(i); + } + } + } else if (otherProperty != null) { + set = new UnicodeSet(); + for (int i = 0; i <= 0x10FFFF; ++i) { + String v1 = prop.getValue(i); + String v2 = otherProperty.getValue(i); + if (UnicodeProperty.equals(v1, v2)) { + set.add(i); + } + } + } else if (patternMatcher == null) { + if (!isValid(prop, propertyValue)) { + throw new IllegalArgumentException("The value '" + propertyValue + "' is illegal. Values for " + propertyName + + " must be in " + + prop.getAvailableValues() + " or in " + prop.getValueAliases()); + } + if (isAge) { + set = prop.getSet(new ComparisonMatcher(propertyValue, Relation.geq, DOUBLE_STRING_COMPARATOR)); + } else { + set = prop.getSet(propertyValue); + } + } else if (isAge) { + set = new UnicodeSet(); + List values = prop.getAvailableValues(); + for (String value : values) { + if (patternMatcher.matches(value)) { + for (String other : values) { + if (other.compareTo(value) <= 0) { + set.addAll(prop.getSet(other)); + } + } + } + } + } else { + set = prop.getSet(patternMatcher); + } + result.addAll(set); + return true; + } + throw new IllegalArgumentException("Illegal property: " + propertyName); + } + + + + private boolean isValid(UnicodeProperty prop, String propertyValue) { +// if (prop.getName().equals("General_Category")) { +// if (propertyValue) +// } + return prop.isValidValue(propertyValue); + } + + public enum Relation {less, leq, equal, geq, greater} + + public static class ComparisonMatcher implements PatternMatcher { + final Relation relation; + final Comparator comparator; + String pattern; + + public ComparisonMatcher(String pattern, Relation relation) { + this(pattern, relation, new UTF16.StringComparator(true, false,0)); + } + + public ComparisonMatcher(String pattern, Relation relation, Comparator comparator) { + this.relation = relation; + this.pattern = pattern; + this.comparator = comparator; + } + + @Override + public boolean matches(Object value) { + int comp = comparator.compare(pattern, value.toString()); + switch (relation) { + case less: return comp < 0; + case leq: return comp <= 0; + default: return comp == 0; + case geq: return comp >= 0; + case greater: return comp > 0; + } + } + + @Override + public PatternMatcher set(String pattern) { + this.pattern = pattern; + return this; + } + } + + /** + * Special parser for doubles. Anything not parsable is higher than everything else. + */ + public static final Comparator DOUBLE_STRING_COMPARATOR = new Comparator(){ + + @Override + public int compare(String o1, String o2) { + int f1 = o1.codePointAt(0); + int f2 = o2.codePointAt(0); + boolean n1 = f1 < '0' || f1 > '9'; + boolean n2 = f2 < '0' || f2 > '9'; + if (n1) { + return n2 ? o1.compareTo(o2) : 1; + } else if (n2) { + return -1; + } + double d1 = Double.parseDouble(o1); + double d2 = Double.parseDouble(o2); + if (Double.isNaN(d1) || Double.isNaN(d2)) { + throw new IllegalArgumentException(); + } + + return d1 > d2 ? 1 + : d1 < d2 ? -1 + : 0; + } + + }; + } diff --git a/unicodetools/src/main/java/org/unicode/temp/UnicodePropertyX.java b/unicodetools/src/main/java/org/unicode/temp/UnicodePropertyX.java index d7a5c487c..54e307eb1 100644 --- a/unicodetools/src/main/java/org/unicode/temp/UnicodePropertyX.java +++ b/unicodetools/src/main/java/org/unicode/temp/UnicodePropertyX.java @@ -8,7 +8,7 @@ import java.util.List; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.text.UnicodeSet; diff --git a/unicodetools/src/main/java/org/unicode/text/UCA/Validity.java b/unicodetools/src/main/java/org/unicode/text/UCA/Validity.java index 5f2c8ae60..cbe2c7948 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCA/Validity.java +++ b/unicodetools/src/main/java/org/unicode/text/UCA/Validity.java @@ -16,8 +16,8 @@ import org.unicode.cldr.util.TransliteratorUtilities; import org.unicode.cldr.util.With; -import org.unicode.cldr.util.props.BagFormatter; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.BagFormatter; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCA.UCA.AppendToCe; import org.unicode.text.UCA.UCA.CollatorType; import org.unicode.text.UCA.UCA.UCAContents; diff --git a/unicodetools/src/main/java/org/unicode/text/UCA/WriteCharts.java b/unicodetools/src/main/java/org/unicode/text/UCA/WriteCharts.java index 1d0bc2006..40336359a 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCA/WriteCharts.java +++ b/unicodetools/src/main/java/org/unicode/text/UCA/WriteCharts.java @@ -30,7 +30,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.ToolUnicodePropertySource; import org.unicode.text.UCD.UCD; @@ -773,7 +773,7 @@ static void showCell(PrintWriter output, String s, String classType, String extr } } - final String outline = "" @@ -785,7 +785,7 @@ static void showCell(PrintWriter output, String s, String classType, String extr output.println(outline); } - + private static String showCell2( String sortKey, String s, @@ -810,7 +810,7 @@ private static String showCell2( classname = "new"; indexHasNew = true; } - + // TODO: merge with showCell final String outline = "

numbers = getMissing(fileName, rulesFound); if (!numbers.isEmpty()) { //throw new IllegalArgumentException - System.err.println("***Rules missing from TESTS for " + fileName + ": " + numbers + System.err.println("***Rules missing from TESTS for " + fileName + ": " + numbers + "You will need to add samples that trigger those rules. " + "See https://sites.google.com/site/unicodetools/home/changing-ucd-properties#TOC-Adding-Segmentation-Sample-Strings"); } @@ -1205,7 +1205,7 @@ String get(String value, int count) { return s; } } - throw new IllegalArgumentException(prop.getName() + ":" + value + throw new IllegalArgumentException(prop.getName() + ":" + value + " doesn't have " + count + " values"); } } @@ -1213,10 +1213,10 @@ String get(String value, int count) { static class GenerateGraphemeBreakTest extends XGenerateBreakTest { public GenerateGraphemeBreakTest(UCD ucd, Segmenter.Target target) { - super(ucd, + super(ucd, Segmenter.make( ToolUnicodePropertySource.make(ucd.getVersion()), - "GraphemeClusterBreak", target), + "GraphemeClusterBreak", target), "aa", "Grapheme", new String[]{unicodePropertySource.getSet("GC=Cn").iterator().next()}, @@ -1287,7 +1287,7 @@ public GenerateLineBreakTest(UCD ucd, Segmenter.Target target) { "LineBreak", target), "aa", "Line", // extraSamples - new String[]{}, + new String[]{}, // extraSingleSamples new String[]{ "\u000Bぁ", //4.0 @@ -1321,7 +1321,7 @@ public GenerateLineBreakTest(UCD ucd, Segmenter.Target target) { "☝🏻", //30.2 "final", //999.0 - "can't", + "can't", "can\u2019t", "'can' not", "can 'not'", @@ -1550,7 +1550,7 @@ public GenerateLineBreakTest(UCD ucd, Segmenter.Target target) { ToolUnicodePropertySource propSource = ToolUnicodePropertySource.make(ucd.getVersion()); UnicodeSet unassigned = propSource.getSet("gc=Cn"); UnicodeSet extPict = propSource.getSet("ExtPict=yes"); - // [\p{Extended_Pictographic}&\p{Cn}] + // [\p{Extended_Pictographic}&\p{Cn}] UnicodeSet extPictUnassigned = extPict.cloneAsThawed().retainAll(unassigned); String firstExtPictUnassigned = UTF16.valueOf(extPictUnassigned.charAt(0)); // [\p{Extended_Pictographic}&\p{Cn}] × EM @@ -1633,11 +1633,11 @@ static String[] getExtraSamples(UCD ucd, Segmenter.Target target) { static class GenerateWordBreakTest extends XGenerateBreakTest { public GenerateWordBreakTest(UCD ucd, Segmenter.Target target) { - super(ucd, + super(ucd, Segmenter.make( ToolUnicodePropertySource.make(ucd.getVersion()), - "WordBreak", target), - "aa", + "WordBreak", target), + "aa", "Word", new String[] { /*"\uFF70", "\uFF65", "\u30FD", */ "a\u2060", @@ -1664,8 +1664,8 @@ public GenerateWordBreakTest(UCD ucd, Segmenter.Target target) { WB.get("Numeric") + WB.get("MidNum") + WB.get("Numeric"), WB.get("Numeric") + WB.get("MidNum") + WB.get("MidNum") + WB.get("Numeric"), WB.get("Katakana") + WB.get("Katakana"), - WB.get("ALetter") + WB.get("ExtendNumLet") - + WB.get("Numeric") + WB.get("ExtendNumLet") + WB.get("ALetter") + WB.get("ExtendNumLet") + + WB.get("Numeric") + WB.get("ExtendNumLet") + WB.get("Katakana") + WB.get("ExtendNumLet"), WB.get("ALetter") + WB.get("ExtendNumLet") + WB.get("ExtendNumLet") + WB.get("ALetter"), WB.get("RI") + WB.get("RI",2) + WB.get("RI",3) + "b", @@ -1698,9 +1698,9 @@ public GenerateWordBreakTest(UCD ucd, Segmenter.Target target) { for (String mid : Arrays.asList(":", ".", ",")) { for (String mid2 : Arrays.asList(":", ".", ",")) { for (String numLet2 : Arrays.asList("1", "a")) { - extraTestSamples.add(numLet + mid + mid2 + numLet2); + extraTestSamples.add(numLet + mid + mid2 + numLet2); for (String numLet3 : Arrays.asList("1", "a")) { - extraTestSamples.add(numLet + "_" + numLet3 + mid + mid2 + numLet2); + extraTestSamples.add(numLet + "_" + numLet3 + mid + mid2 + numLet2); } } } @@ -1710,10 +1710,10 @@ public GenerateWordBreakTest(UCD ucd, Segmenter.Target target) { static String[] getExtraSamples(UCD ucd, Segmenter.Target target) { final GenerateBreakTest grapheme = new GenerateGraphemeBreakTest(ucd, target); final String [] temp = { - "can't", - "can\u2019t", - "ab\u00ADby", - "a$-34,567.14%b", + "can't", + "can\u2019t", + "ab\u00ADby", + "a$-34,567.14%b", "3a", "c.d", "C.d", @@ -2812,7 +2812,7 @@ public int previousBase() { } } /* - * + * * if (false) { PrintWriter log = Utility.openPrintWriter("Diff.txt", Utility.UTF8_WINDOWS); diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusables.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusables.java index 9d6d5d06d..eae9715ad 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusables.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusables.java @@ -42,9 +42,9 @@ import org.unicode.cldr.util.With; import org.unicode.cldr.util.XEquivalenceClass; import org.unicode.cldr.util.XEquivalenceClass.Linkage; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; import org.unicode.cldr.util.props.UnicodeLabel; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.idna.Idna.IdnaType; import org.unicode.idna.Uts46; import org.unicode.props.IndexUnicodeProperties; @@ -157,7 +157,7 @@ public class GenerateConfusables { private static final UnicodeProperty SCRIPT_PROPERTY = ups.getProperty("sc"); static final UnicodeProperty AGE = ups.getProperty("age"); - private static final String EXCAPE_FUNNY_RULE = + private static final String EXCAPE_FUNNY_RULE = ":: [[:C:]-[:cn:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]] hex/unicode ; "; static final Transliterator EXCAPE_FUNNY = Transliterator.createFromRules( @@ -381,7 +381,7 @@ private static String fromHexLenient(String hexOrChars) { // } /** - * + * */ // private static UnicodeSet _Non_IICore; // @@ -478,7 +478,7 @@ private static String fromHexLenient(String hexOrChars) { private static Comparator codepointComparator = new UTF16.StringComparator(true,false,0); static Comparator UCAComparator = new org.unicode.cldr.util.MultiComparator(new Comparator[] { - Collator.getInstance(ULocale.ROOT), + Collator.getInstance(ULocale.ROOT), //UCA.buildCollator(null), codepointComparator}); @@ -526,7 +526,7 @@ private static String fromHexLenient(String hexOrChars) { /** * @throws IOException - * + * */ private static void generateIDN() throws IOException { final IdentifierInfo info = IdentifierInfo.getIdentifierInfo(); @@ -538,12 +538,12 @@ private static void generateIDN() throws IOException { // static final String UNPROHIBITED = "Allowed ; "; private static final boolean suppress_NFKC = true; /** - * + * */ /** - * + * */ static void generateDecompFile() throws IOException { final PrintWriter out = FileUtilities.openUTF8Writer(reformatedInternal, "decomps.txt"); @@ -592,7 +592,7 @@ public String getValue(int codepoint, boolean isShort) { } /** - * + * */ // private static void showRemapped(PrintWriter out, String title, UnicodeMap remap) { // out.println(""); @@ -607,7 +607,7 @@ public String getValue(int codepoint, boolean isShort) { // out.println("# Total code points: " + count); // } /** - * + * */ static UnicodeSet IDNOutputSet; static UnicodeSet IDNInputSet; @@ -745,7 +745,7 @@ public static int getSingleScript(String source) { } /** - * + * */ private static void generateConfusables() throws IOException { log = FileUtilities.openUTF8Writer(reformatedInternal, "log.txt"); @@ -793,7 +793,7 @@ public int compareTo(Object o) { /** * @param relation TODO - * + * */ private static void writeSourceTargetLine(PrintWriter out, String source, String tag, String target, String reason, String relation) { out.print( @@ -865,7 +865,7 @@ public boolean addCheck(String a, String b, String reason) { } /** - * + * */ private boolean checkForBad(String a, String b, String reason) { final Set equivalences = getEquivalences(b); @@ -958,7 +958,7 @@ public void close(String reason) { } /** - * + * */ private String mapString(String item, StringBuffer reasons, boolean onlyLowercase, boolean onlySameScript) { if (DEBUG && item.startsWith("\u03D2")) { @@ -1044,8 +1044,8 @@ public String getParadigm(String item, boolean onlyLowercase, boolean onlySameSc filteredSet.add(other); } // } - return CollectionUtilities.getBest(filteredSet, - // onlyLowercase || onlySameScript ? betterTargetIsLessFavorNeutral : + return CollectionUtilities.getBest(filteredSet, + // onlyLowercase || onlySameScript ? betterTargetIsLessFavorNeutral : betterTargetIsLess, -1); } @@ -1055,7 +1055,7 @@ public Set getOrderedExplicitItems() { return cloneForSafety; } /** - * + * */ // public void writeSource(PrintWriter out) { // final Set items = getOrderedExplicitItems(); @@ -1175,7 +1175,7 @@ public String toString() { /* *//** * @param errorLine TODO - * + * *//* private DataSet add(Data newData, String errorLine) { if (controls.containsSome(newData.source) || controls.containsSome(newData.target)) { @@ -1316,16 +1316,16 @@ public void writeSourceOrder(String directory, String filename, boolean appendFi Utility.appendFile(Settings.SRC_UCD_DIR + "confusablesHeader.txt", Utility.UTF8_WINDOWS, out, replacements); } - Relation, String> confusableMap - = Relation.of(new TreeMap(MyPairComparator), TreeSet.class); + Relation, String> confusableMap + = Relation.of(new TreeMap(MyPairComparator), TreeSet.class); if (true) { - // writeSourceOrder(out, dataMixedAnycase, "SL", "Single-Script, Lowercase Confusables", skipNFKEquivs, + // writeSourceOrder(out, dataMixedAnycase, "SL", "Single-Script, Lowercase Confusables", skipNFKEquivs, // true, true, confusableMap); - // writeSourceOrder(out, dataMixedAnycase, "SA", "Single-Script, Anycase Confusables", skipNFKEquivs, + // writeSourceOrder(out, dataMixedAnycase, "SA", "Single-Script, Anycase Confusables", skipNFKEquivs, // false, true, confusableMap); - // writeSourceOrder(out, dataMixedAnycase, "ML", "Mixed-Script, Lowercase Confusables", skipNFKEquivs, + // writeSourceOrder(out, dataMixedAnycase, "ML", "Mixed-Script, Lowercase Confusables", skipNFKEquivs, // true, false, confusableMap); - writeSourceOrder(out, dataMixedAnycase, "MA", "Mixed-Script, Anycase Confusables", skipNFKEquivs, + writeSourceOrder(out, dataMixedAnycase, "MA", "Mixed-Script, Anycase Confusables", skipNFKEquivs, false, false, confusableMap); Counter> counter = new Counter(); Map, Pair> examples = new HashMap, Pair>(); @@ -1358,11 +1358,11 @@ public int compare(Pair o1, Pair o2) { * @param skipNFKEquivs TODO * @param onlyLowercase TODO * @param onlySingleScript TODO - * @param confusableMap - * + * @param confusableMap + * */ private void writeSourceOrder(PrintWriter out, MyEquivalenceClass data, String tag, String title, - boolean skipNFKEquivs, boolean onlyLowercase, boolean onlySingleScript, + boolean skipNFKEquivs, boolean onlyLowercase, boolean onlySingleScript, Relation, String> confusableMap) { // first get all the sets. Then get the best paradigm from each. Then sort. // Set setOfSets = data.getEquivalenceSets(); @@ -1427,7 +1427,7 @@ private void writeSourceOrder(PrintWriter out, MyEquivalenceClass data, String t } /** - * + * */ // private String fixReason(List reasons) { // final List first = (List)reasons.get(0); @@ -1470,7 +1470,7 @@ public Set getEquivalences(String string) { return dataMixedAnycase.getEquivalences(string); } /* *//** - * + * *//* public DataSet clean() { // remove all skips @@ -1542,7 +1542,7 @@ public DataSet clean() { return s; } *//** - * + * *//* private void remove(Data already) { String[] key = {already.source, already.target}; @@ -1550,7 +1550,7 @@ private void remove(Data already) { dataSet.remove(already); }*/ /** - * + * */ public void close(String reason) { dataMixedAnycase.close(reason); @@ -1559,7 +1559,7 @@ public void close(String reason) { // dataSingleLowercase.close(reason); } /** - * + * */ public void addUnicodeMap(UnicodeMap decompMap, String type, String errorLine) { int count = 0; @@ -1595,7 +1595,7 @@ public boolean matches(Object o) { /** * @param script TODO * @throws IOException - * + * */ public void writeSummary(String outdir, String filename, boolean outputOnly, UnicodeSet script) throws IOException { final PrintWriter out = openAndWriteHeader(outdir, filename, "Summary: Recommended confusable mapping for IDN"); @@ -1763,7 +1763,7 @@ public void writeWholeScripts(String outdir, String filename) throws IOException out.close(); } /** - * + * */ // private String getStatus(String source) { // // TODO Auto-generated method stub @@ -1931,9 +1931,9 @@ void write(PrintWriter out) throws IOException { } } - final String sname = UCD.getScriptID_fromIndex(j, UCD_Types.SHORT) + "; " + final String sname = UCD.getScriptID_fromIndex(j, UCD_Types.SHORT) + "; " + UCD.getScriptID_fromIndex(k, UCD_Types.SHORT) + "; " + label; - final String name = getScriptIndexName(j, UCD_Types.LONG) + final String name = getScriptIndexName(j, UCD_Types.LONG) + "; " + getScriptIndexName(k, UCD_Types.LONG); StringWriter b = new StringWriter(); PrintWriter out2 = new PrintWriter(b); @@ -1959,7 +1959,7 @@ public String getScriptIndexName(short scriptIndex, byte length) { /** * @throws IOException - * + * */ // private static void fixMichel(String indir, String outdir) throws IOException { // final BufferedReader in = FileUtilities.openUTF8Reader(indir + "michel/", "tr36comments-annex.txt"); @@ -1983,7 +1983,7 @@ public String getScriptIndexName(short scriptIndex, byte length) { // out.close(); // } /** - * + * */ private static void generateSource() throws IOException { @@ -2236,7 +2236,7 @@ private static void gen() throws IOException { if (DEBUG) System.out.println("Done"); } /** - * + * */ // private static String formatLine(String source, String target, int count) { // return Utility.hex(source) + " ; " + Utility.hex(target," ") @@ -2247,7 +2247,7 @@ private static void gen() throws IOException { // + " " + ARROW + " " + DEFAULT_UCD.getName(target); // } /** - * + * */ /* private static void add(Map m, String source, String target, int count) { if (source.length() == 0 || target.length() == 0) return; @@ -2276,7 +2276,7 @@ private static void gen() throws IOException { private static boolean isXid(String x) { return XID.containsAll(x); } - + private static class _BetterTargetIsLess implements Comparator { IdentifierInfo info = IdentifierInfo.getIdentifierInfo(); private boolean favorNeutral; @@ -2297,9 +2297,9 @@ public int compare(String a, String b) { if (ca != cb) { return ca > cb ? -1 : 1; } - + // favor item with higher old last value, if there is one. - + long lasta = LAST_COUNT.get(a); long lastb = LAST_COUNT.get(b); long ldiff = lasta - lastb; diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusablesCopy.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusablesCopy.java index 467abddcd..adb93b35b 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusablesCopy.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusablesCopy.java @@ -47,9 +47,9 @@ import org.unicode.cldr.util.With; import org.unicode.cldr.util.XEquivalenceClass; import org.unicode.cldr.util.XEquivalenceClass.Linkage; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; import org.unicode.cldr.util.props.UnicodeLabel; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.idna.Idna.IdnaType; import org.unicode.idna.Uts46; import org.unicode.props.IndexUnicodeProperties; @@ -129,7 +129,7 @@ public class GenerateConfusablesCopy { private static final UnicodeProperty SCRIPT_PROPERTY = ups.getProperty("sc"); private static final UnicodeProperty AGE = ups.getProperty("age"); - private static final String EXCAPE_FUNNY_RULE = + private static final String EXCAPE_FUNNY_RULE = ":: [[:C:]-[:cn:][:Z:][:whitespace:][:Default_Ignorable_Code_Point:]] hex/unicode ; "; private static final Transliterator EXCAPE_FUNNY = Transliterator.createFromRules( @@ -348,7 +348,7 @@ private static String fromHexLenient(String hexOrChars) { // } /** - * + * */ // private static UnicodeSet _Non_IICore; // @@ -445,7 +445,7 @@ private static String fromHexLenient(String hexOrChars) { private static Comparator codepointComparator = new UTF16.StringComparator(true,false,0); private static Comparator UCAComparator = new org.unicode.cldr.util.MultiComparator(new Comparator[] { - Collator.getInstance(ULocale.ROOT), + Collator.getInstance(ULocale.ROOT), //UCA.buildCollator(null), codepointComparator}); @@ -493,7 +493,7 @@ private static String fromHexLenient(String hexOrChars) { /** * @throws IOException - * + * */ private static void generateIDN() throws IOException { final IdentifierInfo info = IdentifierInfo.getIdentifierInfo(); @@ -715,7 +715,7 @@ public boolean replaceBy(Reason possibleReplacement) { } } /** - * + * */ private void loadFileData() throws IOException { BufferedReader br; @@ -880,7 +880,7 @@ void printIDNStuff() throws IOException { } /** - * + * */ private void writeIDReview() throws IOException { final BagFormatter bf = makeFormatter() @@ -936,7 +936,7 @@ private void writeIDReview() throws IOException { } /** - * + * */ private void writeIDChars() throws IOException { final BagFormatter bf = makeFormatter(); @@ -987,7 +987,7 @@ private void writeIDChars() throws IOException { /** - * + * */ private void showExtras(BagFormatter bf, UnicodeSet source, UnicodeSet letters) { final UnicodeSet extra = new UnicodeSet(source).removeAll(letters); @@ -1003,7 +1003,7 @@ private void showExtras(BagFormatter bf, UnicodeSet source, UnicodeSet letters) } /** - * + * */ private void printIDModifications() throws IOException { final BagFormatter bf = makeFormatter(); @@ -1159,8 +1159,8 @@ public Object compose(int codePoint, String string, Object a, Object b) { UnicodeSet newRecommended = new UnicodeSet(keySet).retainAll(current); for (String s : newRecommended) { // [:script=Phag:] ; historic # UAX31 T4 # Phags Pa - System.out.println(Utility.hex(s) - + "\t;\thistoric\t#\t" + System.out.println(Utility.hex(s) + + "\t;\thistoric\t#\t" + DEFAULT_UCD.getName(s)); } } @@ -1182,12 +1182,12 @@ public Object compose(int codePoint, String string, Object a, Object b) { private static final String NOT_IN_XID = "not in XID+"; private static final boolean suppress_NFKC = true; /** - * + * */ /** - * + * */ private static void generateDecompFile() throws IOException { final PrintWriter out = FileUtilities.openUTF8Writer(outdir, "decomps.txt"); @@ -1236,7 +1236,7 @@ public String getValue(int codepoint, boolean isShort) { } /** - * + * */ // private static void showRemapped(PrintWriter out, String title, UnicodeMap remap) { // out.println(""); @@ -1251,7 +1251,7 @@ public String getValue(int codepoint, boolean isShort) { // out.println("# Total code points: " + count); // } /** - * + * */ private static UnicodeSet IDNOutputSet, IDNInputSet, _preferredIDSet; @@ -1394,7 +1394,7 @@ public static int getSingleScript(String source) { } /** - * + * */ private static void generateConfusables() throws IOException { log = FileUtilities.openUTF8Writer(outdir, "log.txt"); @@ -1442,7 +1442,7 @@ public int compareTo(Object o) { /** * @param relation TODO - * + * */ private static void writeSourceTargetLine(PrintWriter out, String source, String tag, String target, String reason, String relation) { out.print( @@ -1514,7 +1514,7 @@ public boolean addCheck(String a, String b, String reason) { } /** - * + * */ private boolean checkForBad(String a, String b, String reason) { final Set equivalences = getEquivalences(b); @@ -1600,7 +1600,7 @@ public void close(String reason) { bestSelector.addAll(set); String baseItem = bestSelector.iterator().next(); bestSelector.clear(); - + for (Entry mapped : newItems.entrySet()) { String newItem = mapped.getKey(); String newReasion = mapped.getValue(); @@ -1624,10 +1624,10 @@ public void close(String reason) { } /** - * @param alreadyIn - * @param alreadyIn - * @param combinations - * + * @param alreadyIn + * @param alreadyIn + * @param combinations + * */ private Map mapString(String item, Set alreadyIn) { if (false && item.startsWith("\u03D2")) { @@ -1736,7 +1736,7 @@ public Set getOrderedExplicitItems() { return cloneForSafety; } /** - * + * */ // public void writeSource(PrintWriter out) { // final Set items = getOrderedExplicitItems(); @@ -1856,7 +1856,7 @@ public String toString() { /* *//** * @param errorLine TODO - * + * *//* private DataSet add(Data newData, String errorLine) { if (controls.containsSome(newData.source) || controls.containsSome(newData.target)) { @@ -1997,16 +1997,16 @@ public void writeSourceOrder(String directory, String filename, boolean appendFi Utility.appendFile(Settings.SRC_UCD_DIR + "confusablesHeader.txt", Utility.UTF8_WINDOWS, out, replacements); } - Relation, String> confusableMap - = Relation.of(new TreeMap(MyPairComparator), TreeSet.class); + Relation, String> confusableMap + = Relation.of(new TreeMap(MyPairComparator), TreeSet.class); if (true) { - writeSourceOrder(out, dataMixedAnycase, "SL", "Single-Script, Lowercase Confusables", skipNFKEquivs, + writeSourceOrder(out, dataMixedAnycase, "SL", "Single-Script, Lowercase Confusables", skipNFKEquivs, true, true, confusableMap); - writeSourceOrder(out, dataMixedAnycase, "SA", "Single-Script, Anycase Confusables", skipNFKEquivs, + writeSourceOrder(out, dataMixedAnycase, "SA", "Single-Script, Anycase Confusables", skipNFKEquivs, false, true, confusableMap); - writeSourceOrder(out, dataMixedAnycase, "ML", "Mixed-Script, Lowercase Confusables", skipNFKEquivs, + writeSourceOrder(out, dataMixedAnycase, "ML", "Mixed-Script, Lowercase Confusables", skipNFKEquivs, true, false, confusableMap); - writeSourceOrder(out, dataMixedAnycase, "MA", "Mixed-Script, Anycase Confusables", skipNFKEquivs, + writeSourceOrder(out, dataMixedAnycase, "MA", "Mixed-Script, Anycase Confusables", skipNFKEquivs, false, false, confusableMap); Counter> counter = new Counter(); Map, Pair> examples = new HashMap, Pair>(); @@ -2039,11 +2039,11 @@ public int compare(Pair o1, Pair o2) { * @param skipNFKEquivs TODO * @param onlyLowercase TODO * @param onlySingleScript TODO - * @param confusableMap - * + * @param confusableMap + * */ private void writeSourceOrder(PrintWriter out, MyEquivalenceClass data, String tag, String title, - boolean skipNFKEquivs, boolean onlyLowercase, boolean onlySingleScript, + boolean skipNFKEquivs, boolean onlyLowercase, boolean onlySingleScript, Relation, String> confusableMap) { // first get all the sets. Then get the best paradigm from each. Then sort. // Set setOfSets = data.getEquivalenceSets(); @@ -2104,7 +2104,7 @@ private void writeSourceOrder(PrintWriter out, MyEquivalenceClass data, String t } /** - * + * */ // private String fixReason(List reasons) { // final List first = (List)reasons.get(0); @@ -2147,7 +2147,7 @@ public Set getEquivalences(String string) { return dataMixedAnycase.getEquivalences(string); } /* *//** - * + * *//* public DataSet clean() { // remove all skips @@ -2219,7 +2219,7 @@ public DataSet clean() { return s; } *//** - * + * *//* private void remove(Data already) { String[] key = {already.source, already.target}; @@ -2227,7 +2227,7 @@ private void remove(Data already) { dataSet.remove(already); }*/ /** - * + * */ public void close(String reason) { dataMixedAnycase.close(reason); @@ -2236,7 +2236,7 @@ public void close(String reason) { // dataSingleLowercase.close(reason); } /** - * + * */ public void addUnicodeMap(UnicodeMap decompMap, String type, String errorLine) { int count = 0; @@ -2272,7 +2272,7 @@ public boolean matches(Object o) { /** * @param script TODO * @throws IOException - * + * */ public void writeSummary(String outdir, String filename, boolean outputOnly, UnicodeSet script) throws IOException { final PrintWriter out = openAndWriteHeader(outdir, filename, "Summary: Recommended confusable mapping for IDN"); @@ -2440,7 +2440,7 @@ public void writeWholeScripts(String outdir, String filename) throws IOException out.close(); } /** - * + * */ // private String getStatus(String source) { // // TODO Auto-generated method stub @@ -2608,9 +2608,9 @@ void write(PrintWriter out) throws IOException { } } - final String sname = UCD.getScriptID_fromIndex(j, UCD_Types.SHORT) + "; " + final String sname = UCD.getScriptID_fromIndex(j, UCD_Types.SHORT) + "; " + UCD.getScriptID_fromIndex(k, UCD_Types.SHORT) + "; " + label; - final String name = getScriptIndexName(j, UCD_Types.LONG) + final String name = getScriptIndexName(j, UCD_Types.LONG) + "; " + getScriptIndexName(k, UCD_Types.LONG); StringWriter b = new StringWriter(); PrintWriter out2 = new PrintWriter(b); @@ -2636,7 +2636,7 @@ public String getScriptIndexName(short scriptIndex, byte length) { /** * @throws IOException - * + * */ // private static void fixMichel(String indir, String outdir) throws IOException { // final BufferedReader in = FileUtilities.openUTF8Reader(indir + "michel/", "tr36comments-annex.txt"); @@ -2660,7 +2660,7 @@ public String getScriptIndexName(short scriptIndex, byte length) { // out.close(); // } /** - * + * */ private static void generateSource() throws IOException { @@ -2914,7 +2914,7 @@ private static void gen() throws IOException { System.out.println("Done"); } /** - * + * */ // private static String formatLine(String source, String target, int count) { // return Utility.hex(source) + " ; " + Utility.hex(target," ") @@ -2925,7 +2925,7 @@ private static void gen() throws IOException { // + " " + ARROW + " " + DEFAULT_UCD.getName(target); // } /** - * + * */ /* private static void add(Map m, String source, String target, int count) { if (source.length() == 0 || target.length() == 0) return; diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateStandardizedVariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateStandardizedVariants.java index b2b7fd61b..adb5e4801 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateStandardizedVariants.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateStandardizedVariants.java @@ -16,7 +16,7 @@ import java.util.HashMap; import java.util.Map; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.utility.Settings; import org.unicode.text.utility.UnicodeDataFile; import org.unicode.text.utility.Utility; @@ -65,7 +65,7 @@ static public void generate() throws IOException { final String[] splits = new String[4]; final String[] codes = new String[2]; final String[] shapes = new String[4]; - + ToolUnicodePropertySource tups = ToolUnicodePropertySource.make(Default.ucdVersion()); final UnicodeProperty ui = tups.getProperty("Unified_Ideograph"); UnicodeSet uiSet = ui.getSet("Yes"); diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateStringPrep.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateStringPrep.java index d297fd2ff..4ac8f5ba8 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateStringPrep.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateStringPrep.java @@ -11,7 +11,7 @@ import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.TransliteratorUtilities; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; import org.unicode.cldr.util.props.UnicodeLabel; import org.unicode.text.UCD.TestData.RegexMatcher; import org.unicode.text.utility.Settings; @@ -202,7 +202,7 @@ void genStringPrep() throws IOException { } /** - * + * */ private void showScriptToBlock() { final UnicodeMap scripts = ToolUnicodePropertySource.make("").getProperty("script").getUnicodeMap(); @@ -297,7 +297,7 @@ public Object compose(int codepoint, String string, Object a, Object b) { } static final int OK = 0, DELETED = 1, ILLEGAL = 2, REMAPPED = 3, IDNA_TYPE_LIMIT = 4; /** - * + * */ static public int getIDNAType(int cp) { inbuffer.setLength(0); @@ -436,7 +436,7 @@ private void showCodes(PrintWriter htmlOut, PrintWriter textOut, UnicodeSet uset /** * @throws IOException - * + * */ private UnicodeMap getPositions() throws IOException { final UnicodeMap result = new UnicodeMap(); @@ -471,7 +471,7 @@ public int compare(Object o1, Object o2) { }; /** - * + * */ private UnicodeSet extract(UnicodeSet other, UnicodeSet core) { final UnicodeSet decomp = new UnicodeSet(core).retainAll(other); diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java b/unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java index eda981450..2fe1a06b2 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java @@ -19,9 +19,9 @@ import org.unicode.cldr.draft.ScriptMetadata; import org.unicode.cldr.draft.ScriptMetadata.IdUsage; import org.unicode.cldr.draft.ScriptMetadata.Info; -import org.unicode.cldr.util.props.BagFormatter; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.Factory; +import org.unicode.props.BagFormatter; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.Factory; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.ScriptInfo; import org.unicode.props.UcdProperty; @@ -375,7 +375,7 @@ public static Set fromStringSet(String strings) { } } /** - * + * */ private void loadFileData() throws IOException { BufferedReader br; @@ -449,12 +449,12 @@ private void loadFileData() throws IOException { addToRemovalSets(s, reasons); - if (oldReason == Identifier_Type.inclusion + if (oldReason == Identifier_Type.inclusion || oldReason == reasons) { continue; // always ok } - if (override - || oldReason == null + if (override + || oldReason == null || oldReason.compareTo(reasons) < 0 || reasons == Identifier_Type.inclusion) { removals.put(s, reasons); @@ -502,8 +502,8 @@ private void loadFileData() throws IOException { for (final String script : scripts) { String shortName = UcdPropertyValues.Script_Values.forName(script).getShortName(); Info scriptInfo = ScriptMetadata.getInfo(shortName); - final IdUsage idUsage = scriptInfo != null - ? scriptInfo.idUsage + final IdUsage idUsage = scriptInfo != null + ? scriptInfo.idUsage : IdUsage.EXCLUSION; IdentifierInfo.Identifier_Type status; switch(idUsage) { @@ -547,10 +547,10 @@ private void loadFileData() throws IOException { if (DEBUG) System.out.println("*Removal Collision\t" + value + "\n\t" + removalCollision.getSet(value).toPattern(false)); } removals.freeze(); - + // pick up all the explict inclusions UnicodeSet inclusions = identifierTypesMap.getSet(SINGLETON_INCLUSION); - + // Clean up values by setting to singletons. ORDER is important!! identifierTypesMap.putAll(NOT_NFKC, Collections.singleton(Identifier_Type.not_nfkc)); identifierTypesMap.putAll(DEFAULT_IGNORABLE, Collections.singleton(Identifier_Type.default_ignorable)); @@ -568,7 +568,7 @@ private void loadFileData() throws IOException { identifierTypesMap.putAll(inclusions, SINGLETON_INCLUSION); identifierTypesMap.putAll(identifierTypesMap.getSet(null), Collections.singleton(Identifier_Type.recommended)); - + // make immutable // special hack for Exclusion + Obsolete!! for (Set value : identifierTypesMap.getAvailableValues()) { @@ -642,7 +642,7 @@ void printIDNStuff() throws IOException { } /** - * + * */ private void writeIDReviewInternal() throws IOException { final BagFormatter bf = GenerateConfusables.makeFormatter() @@ -698,7 +698,7 @@ private void writeIDReviewInternal() throws IOException { } /** - * + * */ private void writeIDCharsInternal() throws IOException { final BagFormatter bf = GenerateConfusables.makeFormatter(); @@ -749,7 +749,7 @@ private void writeIDCharsInternal() throws IOException { /** - * + * */ private void showExtras(BagFormatter bf, UnicodeSet source, UnicodeSet letters) { final UnicodeSet extra = new UnicodeSet(source).removeAll(letters); @@ -923,8 +923,8 @@ public String compose(int codePoint, String string, String a, String b) { UnicodeSet newRecommended = new UnicodeSet(keySet).retainAll(current); for (String s : newRecommended) { // [:script=Phag:] ; historic # UAX31 T4 # Phags Pa - if (DEBUG) System.out.println(Utility.hex(s) - + "\t;\thistoric\t#\t" + if (DEBUG) System.out.println(Utility.hex(s) + + "\t;\thistoric\t#\t" + DEFAULT_UCD.getName(s)); } } @@ -970,7 +970,7 @@ public int compare(String o1, String o2) { final String propName = "Identifier_Type"; final String filename = status == Style.byValue ? "IdentifierType.txt" : "IdentifierTypeFlat.txt"; - try (PrintWriter out2 = GenerateConfusables.openAndWriteHeader(GenerateConfusables.GEN_SECURITY_DIR, + try (PrintWriter out2 = GenerateConfusables.openAndWriteHeader(GenerateConfusables.GEN_SECURITY_DIR, filename, "Security Profile for General Identifiers: " + propName)) { out2.println("# Format" @@ -1005,11 +1005,11 @@ public int compare(String o1, String o2) { + propName + ":\t" + value); out2.println(""); - bf2.showSetNames(out2, tempMap.getSet(value)); + bf2.showSetNames(out2, tempMap.getSet(value)); } } else { out2.println(""); - bf2.showSetNames(out2, tempMap.keySet()); + bf2.showSetNames(out2, tempMap.keySet()); } } } @@ -1031,7 +1031,7 @@ private void printIdentifierStatus() throws IOException { bf2.setLabelSource(age); final String propName = "Identifier_Status"; - try (PrintWriter out2 = GenerateConfusables.openAndWriteHeader(GenerateConfusables.GEN_SECURITY_DIR, + try (PrintWriter out2 = GenerateConfusables.openAndWriteHeader(GenerateConfusables.GEN_SECURITY_DIR, "IdentifierStatus.txt", "Security Profile for General Identifiers: " + propName)) { out2.println("# Format" + "\n#" @@ -1065,4 +1065,4 @@ private void printIdentifierStatus() throws IOException { } } } -} \ No newline at end of file +} diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index b46827f5b..28f498969 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -23,9 +23,9 @@ import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.Tabber; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; import org.unicode.cldr.util.props.UnicodeLabel; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.MakeUnicodeFiles.Format.PrintStyle; import org.unicode.text.utility.ChainException; import org.unicode.text.utility.Settings; diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/QuickTest.java b/unicodetools/src/main/java/org/unicode/text/UCD/QuickTest.java index 785187c96..9231b0b0d 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/QuickTest.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/QuickTest.java @@ -36,8 +36,8 @@ import org.unicode.cldr.util.Counter; import org.unicode.cldr.util.Tabber; import org.unicode.cldr.util.UnicodeSetPrettyPrinter; -import org.unicode.cldr.util.props.BagFormatter; -import org.unicode.cldr.util.props.UnicodeProperty.UnicodeMapProperty; +import org.unicode.props.BagFormatter; +import org.unicode.props.UnicodeProperty.UnicodeMapProperty; import org.unicode.text.utility.Settings; import com.ibm.icu.dev.util.UnicodeMap; @@ -537,7 +537,7 @@ void add(int codePoint, int cuLen, int processedUnitLength, String processedStri static final int skip = (1< scriptCount = new Counter(); diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestNameUniqueness.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestNameUniqueness.java index 7ebad76cf..02d9fc644 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestNameUniqueness.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestNameUniqueness.java @@ -18,7 +18,7 @@ import java.util.Map; import org.unicode.cldr.draft.FileUtilities; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.utility.Settings; import org.unicode.text.utility.Utility; diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestNormalization.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestNormalization.java index 4f4fc4562..1ed5265e9 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestNormalization.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestNormalization.java @@ -22,7 +22,7 @@ import java.util.Map; import java.util.TreeMap; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; import org.unicode.text.utility.ChainException; import org.unicode.text.utility.Settings; import org.unicode.text.utility.UTF32; diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java index 149e40076..e4d566609 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java @@ -16,12 +16,12 @@ import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.Tabber; import org.unicode.cldr.util.Tabber.HTMLTabber; -import org.unicode.cldr.util.props.BagFormatter; -import org.unicode.cldr.util.props.ICUPropertyFactory; +import org.unicode.props.BagFormatter; +import org.unicode.jsp.ICUPropertyFactory; import org.unicode.cldr.util.props.UnicodeLabel; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.Factory; -import org.unicode.cldr.util.props.UnicodeProperty.PatternMatcher; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.Factory; +import org.unicode.props.UnicodeProperty.PatternMatcher; import org.unicode.props.IndexUnicodeProperties; import org.unicode.text.utility.Settings; @@ -546,13 +546,13 @@ public static void checkRelation(ParsePosition pp, char relation) private static void checkExpected(Expected expected, UnicodeSet segment, String rightStatus, String rightSide, String leftStatus, String leftSide) { switch (expected) { - case empty: + case empty: if (segment.size() == 0) { return; } else { break; } - case not_empty: + case not_empty: if (segment.size() != 0) { return; } else { @@ -572,10 +572,10 @@ private static void checkExpected(Expected expected, UnicodeSet segment, String println(); } - static UnicodeMapParser UMP = UnicodeMapParser.create(UnicodeMapParser.STRING_VALUE_PARSER, + static UnicodeMapParser UMP = UnicodeMapParser.create(UnicodeMapParser.STRING_VALUE_PARSER, new UnicodeMapParser.ChainedFactory( - getProperties(Settings.latestVersion), - IndexUnicodeProperties.make(Settings.latestVersion)), + getProperties(Settings.latestVersion), + IndexUnicodeProperties.make(Settings.latestVersion)), new UnicodeMapParser.ChainedFactory( getProperties(Settings.lastVersion), IndexUnicodeProperties.make(Settings.lastVersion))); @@ -643,13 +643,13 @@ private static void testMapLine(String line, ParsePosition pp) throws ParseExcep private static void checkExpected(Expected expected, UnicodeMap segment, String rightStatus, String rightSide, String leftStatus, String leftSide) { switch (expected) { - case empty: + case empty: if (segment.size() == 0) { return; } else { break; } - case not_empty: + case not_empty: if (segment.size() != 0) { return; } else { @@ -817,7 +817,7 @@ private static Factory getProperties(final String version) { ? ICUPropertyFactory.make() : ToolUnicodePropertySource.make(version); } - + private static Factory getIndexedProperties(String version2) { return IndexUnicodeProperties.make(version2); } @@ -931,7 +931,7 @@ public VersionedProperty set(String xPropertyName) { } return this; } - + public UnicodeSet getSet(String propertyValue) { UnicodeSet set; if (propertyValue.length() == 0) { @@ -957,7 +957,7 @@ public UnicodeSet getSet(String propertyValue) { return set; } } - + static final UnicodeProperty.PatternMatcher NULL_MATCHER = new UnicodeProperty.PatternMatcher() { @Override public boolean matches(Object o) { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java index 2b71b9fd6..ddf391c1c 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java @@ -16,11 +16,11 @@ import java.util.TreeSet; import java.util.regex.Pattern; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.AliasAddAction; -import org.unicode.cldr.util.props.UnicodeProperty.BaseProperty; -import org.unicode.cldr.util.props.UnicodeProperty.SimpleProperty; -import org.unicode.cldr.util.props.UnicodeProperty.UnicodeMapProperty; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.AliasAddAction; +import org.unicode.props.UnicodeProperty.BaseProperty; +import org.unicode.props.UnicodeProperty.SimpleProperty; +import org.unicode.props.UnicodeProperty.UnicodeMapProperty; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.props.UcdPropertyValues; @@ -47,9 +47,9 @@ * Class that provides all of the properties for formatting in the Unicode * standard data files. Note that many of these are generated directly from UCD, * and many from {@link DerivedProperty}. So fixes to some will go there. - * + * * @author markdavis - * + * */ public class ToolUnicodePropertySource extends UnicodeProperty.Factory { @@ -107,7 +107,7 @@ private ToolUnicodePropertySource(String version) { UnicodeSet tags = new UnicodeSet(0xE0020,0xE007f).freeze(); VersionInfo versionInfo = VersionInfo.getInstance(version); - + IndexUnicodeProperties iup = IndexUnicodeProperties.make(versionInfo); final UnicodeSet E_Modifier = iup.loadEnum(UcdProperty.Emoji_Modifier, Binary.class).getSet(Binary.Yes); @@ -775,12 +775,12 @@ public int getMaxWidth(boolean isShort) { /* Virama = Indic_Syllabic_Category = Virama, or -Indic_Syllabic_Category = Invisible_Stacker -and not General_Category = Spacing_Mark +Indic_Syllabic_Category = Invisible_Stacker +and not General_Category = Spacing_Mark LinkingConsonant = Indic_Syllabic_Category = Consonant -extend -and not GCB = Virama +extend -and not GCB = Virama */ //IndexUnicodeProperties iup = IndexUnicodeProperties.make(GenerateEnums.ENUM_VERSION); @@ -854,12 +854,12 @@ public int getMaxWidth(boolean isShort) { // unicodeMap.putAll(E_Modifier, "E_Modifier"); /* Virama = Indic_Syllabic_Category = Virama, or -Indic_Syllabic_Category = Invisible_Stacker -and not General_Category = Spacing_Mark +Indic_Syllabic_Category = Invisible_Stacker +and not General_Category = Spacing_Mark LinkingConsonant = Indic_Syllabic_Category = Consonant -extend -and not GCB = Virama +extend -and not GCB = Virama Glue_After_Zwj empty E_Base_GAZ empty E_Base Emoji characters listed as Emoji_Modifier_Base=Yes in emoji-data.txt @@ -990,7 +990,7 @@ public int getMaxWidth(boolean isShort) { .add(0xFF0C).add(0xFF1B).remove(0x002E).remove(0x003A).remove(0xFE13), "MidNum"); /* * 066C ( ٬ ) ARABIC THOUSANDS SEPARATOR - * + * * FE50 ( ﹐ ) SMALL COMMA FE54 ( ﹔ ) SMALL SEMICOLON FF0C ( , ) * FULLWIDTH COMMA FF1B ( ; ) FULLWIDTH SEMICOLON */ @@ -1258,12 +1258,12 @@ public String _getValue(int codepoint) { return ""; } }; - if (unicodePropertyType == UnicodeProperty.BINARY) { - item.addValueAliases( + if (unicodePropertyType == UnicodeProperty.BINARY) { + item.addValueAliases( new String[][] { { "No", "N" }, { "Yes", "Y" }, - }, + }, AliasAddAction.IGNORE_IF_MISSING); } else { item.setValues(defaultValue); @@ -1394,7 +1394,7 @@ protected List _getAvailableValues(List result) { /* * (non-Javadoc) - * + * * @see * com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyAliases(java * .util.Collection) @@ -1695,7 +1695,7 @@ public String getAge(int codePoint) { /* * (non-Javadoc) - * + * * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType() */ private int getPropertyTypeInternal() { @@ -1781,22 +1781,22 @@ static List lookup(String valueAlias, String[] main, String[] aux, /* * static class DerivedPropertyWrapper extends UnicodeProperty { UCDProperty * derivedProperty; UCD ucd; - * + * * DerivedPropertyWrapper(int derivedPropertyID, UCD ucd) { this.ucd = ucd; * derivedProperty = DerivedProperty.make(derivedPropertyID, ucd); } protected * String _getVersion() { return ucd.getVersion(); } - * + * * protected String _getValue(int codepoint) { return * derivedProperty.getValue(codepoint, UCD_Types.LONG); } protected List * _getNameAliases(List result) { if (result != null) result = new * ArrayList(1); addUnique(derivedProperty.getName(UCD_Types.SHORT), result); * addUnique(derivedProperty.getName(UCD_Types.LONG), result); return null; } - * + * * protected List _getValueAliases(String valueAlias, List result) { // TODO * Auto-generated method stub return null; } protected List * _getAvailableValues(List result) { // TODO Auto-generated method stub * return null; } - * + * * } */ diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java index 941b613bd..1299454f4 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java @@ -16,7 +16,7 @@ import java.util.Set; import java.util.TreeMap; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.utility.Utility; import com.ibm.icu.impl.Relation; @@ -1458,7 +1458,7 @@ static void titlecase (String[] array) { UNASSIGNED_INFO.mirrored = NO; } */ - + static { if (LIMIT_CATEGORY != GENERAL_CATEGORY.length || LIMIT_CATEGORY != LONG_GENERAL_CATEGORY.length) { throw new IllegalArgumentException("!! ERROR !! Enums and Names out of sync: category"); diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UnicodeMapParser.java b/unicodetools/src/main/java/org/unicode/text/UCD/UnicodeMapParser.java index dc7afb3d0..65bc91bbb 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UnicodeMapParser.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UnicodeMapParser.java @@ -5,8 +5,8 @@ import java.util.regex.Pattern; import org.unicode.cldr.util.InternalCldrException; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.Factory; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.Factory; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.dev.util.UnicodeMap.EntryRange; @@ -260,7 +260,7 @@ private void parsePropertyAndAdd(String source, ParsePosition pos, Operation op, } boolean oldVersion = source.charAt(current+3) == '*'; UnicodeProperty prop = (oldVersion ? - oldUnicodePropertyFactory.getProperty(source.substring(current+4, term)) : + oldUnicodePropertyFactory.getProperty(source.substring(current+4, term)) : unicodePropertyFactory.getProperty(source.substring(current+3, term))); if (prop == null) { pos.setErrorIndex(current+4); @@ -290,12 +290,12 @@ public void addUnicodeMapString(Operation op, UnicodeMap um, public void addUnicodeMap(Operation op, UnicodeMap um, UnicodeMap result) { switch(op) { - case REMOVE_KEYS: - result.putAll(um.keySet(), null); + case REMOVE_KEYS: + result.putAll(um.keySet(), null); break; - case RETAIN_KEYS: + case RETAIN_KEYS: UnicodeSet toRemove = new UnicodeSet(result.keySet()).removeAll(um.keySet()); - result.putAll(toRemove, null); + result.putAll(toRemove, null); break; case ADD: throw new InternalCldrException("Should never happen"); } @@ -440,8 +440,8 @@ public String parse(String s) { // */ // public static int getContainmentRelation(UnicodeMap a, UnicodeMap b) { // if (a.size() == 0) { - // return (b.size() == 0) - // ? CollectionUtilities.ALL_EMPTY + // return (b.size() == 0) + // ? CollectionUtilities.ALL_EMPTY // : CollectionUtilities.NOT_A_SUPERSET_B; // } else if (b.size() == 0) { // return CollectionUtilities.NOT_A_SUBSET_B; @@ -457,13 +457,13 @@ public String parse(String s) { // } // for (EntryRange ae : a.entryRanges()) { // if (ae.string != null) { - // result |= (Objects.equals(ae.value, b.get(ae.string))) - // ? CollectionUtilities.NOT_A_DISJOINT_B + // result |= (Objects.equals(ae.value, b.get(ae.string))) + // ? CollectionUtilities.NOT_A_DISJOINT_B // : CollectionUtilities.NOT_A_SUBSET_B; // } else { // for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { - // result |= (Objects.equals(ae.value, b.get(i))) - // ? CollectionUtilities.NOT_A_DISJOINT_B + // result |= (Objects.equals(ae.value, b.get(i))) + // ? CollectionUtilities.NOT_A_DISJOINT_B // : CollectionUtilities.NOT_A_SUBSET_B; // } // } diff --git a/unicodetools/src/main/java/org/unicode/text/tools/CharsByAgeAndCategory.java b/unicodetools/src/main/java/org/unicode/text/tools/CharsByAgeAndCategory.java index 5e8b62c4e..06af42a97 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/CharsByAgeAndCategory.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/CharsByAgeAndCategory.java @@ -4,7 +4,7 @@ import java.util.TreeMap; import org.unicode.cldr.util.Counter; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.ToolUnicodePropertySource; import org.unicode.text.UCD.UCD; diff --git a/unicodetools/src/main/java/org/unicode/text/tools/CompareProperties.java b/unicodetools/src/main/java/org/unicode/text/tools/CompareProperties.java index b087be232..415d68619 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/CompareProperties.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/CompareProperties.java @@ -3,7 +3,7 @@ import java.util.HashMap; import java.util.List; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.ToolUnicodePropertySource; import com.ibm.icu.dev.util.UnicodeMap; diff --git a/unicodetools/src/main/java/org/unicode/text/tools/CompareScriptExtensions.java b/unicodetools/src/main/java/org/unicode/text/tools/CompareScriptExtensions.java index c9c60a41f..e7e7a56f4 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/CompareScriptExtensions.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/CompareScriptExtensions.java @@ -2,7 +2,7 @@ import java.util.TreeSet; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.ToolUnicodePropertySource; import org.unicode.text.utility.Utility; diff --git a/unicodetools/src/main/java/org/unicode/text/tools/ShowCharacters.java b/unicodetools/src/main/java/org/unicode/text/tools/ShowCharacters.java index 41189ebc4..8e1a82af0 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/ShowCharacters.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/ShowCharacters.java @@ -10,8 +10,8 @@ import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.With; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.RegexMatcher; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.RegexMatcher; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.props.UcdPropertyValues; @@ -37,7 +37,7 @@ public class ShowCharacters { static UnicodeMap names = iup.load(UcdProperty.Name); static UnicodeMap ages = iup.loadEnum(UcdProperty.Age, UcdPropertyValues.Age_Values.class); static CandidateData CD = CandidateData.getInstance(); - + public static void main(String[] args) { show("New", CD.keySet()); System.out.println(); @@ -48,7 +48,7 @@ public static void main(String[] args) { show("Emoji_Gender_Base", GenerateEmojiData.genderBase); show("Emoji_Hair_Base", GenerateEmojiData.hairBase); show("Emoji_Direction_Base", GenerateEmojiData.directionBase); - + UnicodeSet mods = EmojiData.EMOJI_DATA.getModifierBases(); UnicodeSet modsWithVS = new UnicodeSet(mods).retainAll(EmojiData.EMOJI_DATA.getEmojiWithVariants()); UnicodeSet modsWithVSAndDefaultEmoji = new UnicodeSet(modsWithVS) @@ -59,14 +59,14 @@ public static void main(String[] args) { .retainAll(EmojiData.EMOJI_DATA.getTextPresentationSet()); show("basesWithVSAndDefaultText", modsWithVSAndDefaultText); } - + private static void show(String prop, UnicodeSet unicodeSet) { System.out.println("# All omitted code points have " + prop + "=No\n" + "# @missing: 0000..10FFFF ; " + prop + " ; No\n"); for (String s : unicodeSet) { - System.out.println(Utility.hex(s) + " ;\t" + prop + System.out.println(Utility.hex(s) + " ;\t" + prop + "\t# " + getAge(s) - + " (" + s + ") " + + " (" + s + ") " + getName(s)); } System.out.println("# total:\t" + unicodeSet.size() + "\n# uset: \t" + unicodeSet.toPattern(false) + "\n"); @@ -80,7 +80,7 @@ private static String getAge(String s) { private static String getName(String s) { return CldrUtility.ifNull(CD.getName(s),names.get(s)); } - + public void test(String[] args) { final ToolUnicodePropertySource pSource = ToolUnicodePropertySource.make(null); final Map,UnicodeSet> data = new TreeMap(); diff --git a/unicodetools/src/main/java/org/unicode/text/tools/ShowPatternSyntax.java b/unicodetools/src/main/java/org/unicode/text/tools/ShowPatternSyntax.java index a36ebb761..8186db510 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/ShowPatternSyntax.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/ShowPatternSyntax.java @@ -1,6 +1,6 @@ package org.unicode.text.tools; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; import org.unicode.text.UCD.ToolUnicodePropertySource; import com.ibm.icu.text.UnicodeSet; diff --git a/unicodetools/src/main/java/org/unicode/text/tools/VerifyIdna.java b/unicodetools/src/main/java/org/unicode/text/tools/VerifyIdna.java index c19ba0cfa..0c40a1a4f 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/VerifyIdna.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/VerifyIdna.java @@ -9,9 +9,9 @@ import java.util.regex.Pattern; import org.unicode.cldr.draft.FileUtilities; -import org.unicode.cldr.util.props.BagFormatter; -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.BagFormatter; +import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.ToolUnicodePropertySource; import org.unicode.text.UCD.UCD_Types; diff --git a/unicodetools/src/main/java/org/unicode/text/tools/VerifyUCD.java b/unicodetools/src/main/java/org/unicode/text/tools/VerifyUCD.java index 982a13ff6..74a9d1691 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/VerifyUCD.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/VerifyUCD.java @@ -13,9 +13,9 @@ import org.unicode.cldr.util.Log; import org.unicode.cldr.util.Tabber; import org.unicode.cldr.util.TransliteratorUtilities; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; import org.unicode.cldr.util.props.UnicodeLabel; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.Normalizer; import org.unicode.text.UCD.ToolUnicodePropertySource; diff --git a/unicodetools/src/main/java/org/unicode/text/tools/VerifyXmlUcd.java b/unicodetools/src/main/java/org/unicode/text/tools/VerifyXmlUcd.java index 9cb671745..a8bdb1de1 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/VerifyXmlUcd.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/VerifyXmlUcd.java @@ -17,9 +17,9 @@ import org.unicode.cldr.util.XMLFileReader; import org.unicode.cldr.util.XPathParts; -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.Factory; +import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.Factory; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.ToolUnicodePropertySource; import org.unicode.text.utility.Settings; diff --git a/unicodetools/src/main/java/org/unicode/text/utility/CallArgs.java b/unicodetools/src/main/java/org/unicode/text/utility/CallArgs.java index 55d14cc58..76be36b18 100644 --- a/unicodetools/src/main/java/org/unicode/text/utility/CallArgs.java +++ b/unicodetools/src/main/java/org/unicode/text/utility/CallArgs.java @@ -3,7 +3,7 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; -import org.unicode.cldr.util.props.BagFormatter; +import org.unicode.props.BagFormatter; public class CallArgs { static BagFormatter bf = new BagFormatter(); @@ -71,4 +71,4 @@ private static Method tryMethod(String className, String methodName, String[] me return null; } } -} \ No newline at end of file +} diff --git a/unicodetools/src/main/java/org/unicode/text/utility/TestUtility.java b/unicodetools/src/main/java/org/unicode/text/utility/TestUtility.java index 7f04d1127..bd353465c 100644 --- a/unicodetools/src/main/java/org/unicode/text/utility/TestUtility.java +++ b/unicodetools/src/main/java/org/unicode/text/utility/TestUtility.java @@ -32,8 +32,8 @@ import java.util.zip.GZIPOutputStream; import org.unicode.cldr.util.Counter; -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.UnicodeProperty; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.text.UCD.Default; @@ -166,7 +166,7 @@ static void check() throws IOException, ClassNotFoundException { } /** - * + * */ private static boolean equals(int i, String value) { final int len = value.length(); @@ -183,7 +183,7 @@ private static boolean equals(int i, String value) { } /** - * + * */ private static void testHanProp(int iterations, int total, String pname, String type) throws IOException, ClassNotFoundException { System.out.println(); @@ -199,7 +199,7 @@ private static void testHanProp(int iterations, int total, String pname, String static String outdir = outdircore + "4.1.0/"; /** * @param pname - * + * */ private static int testUnicodeMapSerialization(int iterations, int total, String pname, UnicodeMap umap) throws IOException, ClassNotFoundException { System.out.print("\tValue Count:\t" + umap.getAvailableValues().size()); @@ -315,7 +315,7 @@ private static int testUnicodeMapSerialization(int iterations, int total, String } /** - * + * */ private static String showBuffer(byte[] buffer, long size) { final StringBuffer result = new StringBuffer(); @@ -329,7 +329,7 @@ private static String showBuffer(byte[] buffer, long size) { } /** - * + * */ private static void testStreamCompressor() throws IOException { final Object[] tests = { @@ -385,7 +385,7 @@ private static void testStreamCompressor() throws IOException { } /** - * + * */ private static void showBytes(byte[] buffer, int len) { for (int i = 0; i < len; ++i) { @@ -394,7 +394,7 @@ private static void showBytes(byte[] buffer, int len) { } /** - * + * */ private static UnicodeMap fixNameMap(BreakIterator bk, UnicodeMap umap) { final UnicodeMap temp = new UnicodeMap(); @@ -446,7 +446,7 @@ private static UnicodeMap fixNameMap(BreakIterator bk, UnicodeMap umap) { } /** - * + * */ private static void tryFileUnicodeProperty() { final UnicodeProperty.Factory factory = FileUnicodeProperty.Factory.make("4.1.0"); @@ -508,7 +508,7 @@ protected String _getValue(int codepoint) { } /** - * + * */ private void make() { try { diff --git a/unicodetools/src/main/java/org/unicode/text/utility/Utility.java b/unicodetools/src/main/java/org/unicode/text/utility/Utility.java index 8764c14c5..356e32da6 100644 --- a/unicodetools/src/main/java/org/unicode/text/utility/Utility.java +++ b/unicodetools/src/main/java/org/unicode/text/utility/Utility.java @@ -36,7 +36,7 @@ import java.util.Set; import java.util.TreeSet; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.UCD; import org.unicode.text.UCD.UCD_Types; diff --git a/unicodetools/src/main/java/org/unicode/tools/ListProps.java b/unicodetools/src/main/java/org/unicode/tools/ListProps.java index 8ea5baaaa..a715bb498 100644 --- a/unicodetools/src/main/java/org/unicode/tools/ListProps.java +++ b/unicodetools/src/main/java/org/unicode/tools/ListProps.java @@ -8,7 +8,7 @@ import java.util.LinkedHashSet; import java.util.Set; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.PropertyStatus; import org.unicode.props.PropertyStatus.PropertyScope; @@ -34,7 +34,7 @@ public class ListProps { static final boolean ONLY_JSP = true; public static final Set SKIP_JSP_STATUS = ImmutableSet.of( - PropertyStatus.Deprecated, + PropertyStatus.Deprecated, PropertyStatus.Obsolete, PropertyStatus.Stabilized, PropertyStatus.Contributory, @@ -59,7 +59,7 @@ public static void main(String[] args) { // UnicodeSet combined = new UnicodeSet(ep).addAll(em).freeze(); // PropertyLister pl = new PropertyLister(latest); // System.out.println( -// pl.listSet(combined, +// pl.listSet(combined, // UcdProperty.Extended_Pictographic.toString(), // new StringBuilder())); // return; @@ -86,8 +86,8 @@ public static void main(String[] args) { Set values = map.values(); PropertyScope scope = PropertyStatus.getScope(propName); - String itemInfo = item - + "\tType:\t" + type + String itemInfo = item + + "\tType:\t" + type + "\tStatus:\t"+ CollectionUtilities.join(status, ", ") + "\tCard:\t" + cardinality + "\tDefVal:\t" + IndexUnicodeProperties.getDefaultValue(item) @@ -113,8 +113,8 @@ public static void main(String[] args) { if (propName.startsWith("k")) { switch (type) { case Miscellaneous: - case String: - if (item == UcdProperty.kSimplifiedVariant + case String: + if (item == UcdProperty.kSimplifiedVariant || item == UcdProperty.kTraditionalVariant) { break; } diff --git a/unicodetools/src/main/java/org/unicode/tools/Segmenter.java b/unicodetools/src/main/java/org/unicode/tools/Segmenter.java index 6b246c992..83722fdc0 100644 --- a/unicodetools/src/main/java/org/unicode/tools/Segmenter.java +++ b/unicodetools/src/main/java/org/unicode/tools/Segmenter.java @@ -27,7 +27,7 @@ import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.RegexUtilities; import org.unicode.cldr.util.TransliteratorUtilities; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.ToolUnicodePropertySource; import org.unicode.text.utility.Settings; @@ -815,7 +815,7 @@ public UnicodeMap getSamples() { "9.3) $LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* \u00D7 $LinkingConsonant", "# Do not break within emoji modifier sequences or emoji zwj sequences.", //"10) $E_Base $Extend* × $E_Modifier", - "11) $ExtPict $Extend* $ZWJ × $ExtPict", + "11) $ExtPict $Extend* $ZWJ × $ExtPict", "# Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.", "12) ^ ($RI $RI)* $RI × $RI", "13) [^$RI] ($RI $RI)* $RI × $RI", diff --git a/unicodetools/src/main/java/org/unicode/tools/TestSegments.java b/unicodetools/src/main/java/org/unicode/tools/TestSegments.java index 1653df1ca..41e768a14 100644 --- a/unicodetools/src/main/java/org/unicode/tools/TestSegments.java +++ b/unicodetools/src/main/java/org/unicode/tools/TestSegments.java @@ -16,9 +16,8 @@ import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.Log; -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.RandomStringGenerator; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.RandomStringGenerator; +import org.unicode.props.UnicodeProperty; import org.unicode.tools.Segmenter.Rule.Breaks; import com.ibm.icu.text.BreakIterator; @@ -26,6 +25,8 @@ import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; +import org.unicode.jsp.ICUPropertyFactory; + /** * Quick class for testing proposed syntax for Segments. * TODO doesn't yet handle supplementaries. It looks like even Java 5 won't help, since it doesn't have syntax for them. diff --git a/unicodetools/src/test/java/org/unicode/draft/TestCompressed.java b/unicodetools/src/test/java/org/unicode/draft/TestCompressed.java index 2a37681e8..f06653d95 100644 --- a/unicodetools/src/test/java/org/unicode/draft/TestCompressed.java +++ b/unicodetools/src/test/java/org/unicode/draft/TestCompressed.java @@ -14,8 +14,8 @@ import java.util.zip.GZIPOutputStream; import org.junit.jupiter.api.Test; -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.UnicodeProperty; import org.unicode.draft.UnicodeDataInput.StringReader; import org.unicode.draft.UnicodeDataOutput.StringWriter; import org.unicode.unittest.TestFmwkMinusMinus; diff --git a/unicodetools/src/test/java/org/unicode/propstest/CheckProperties.java b/unicodetools/src/test/java/org/unicode/propstest/CheckProperties.java index 4dd70d2a2..e1687aecf 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/CheckProperties.java +++ b/unicodetools/src/test/java/org/unicode/propstest/CheckProperties.java @@ -19,8 +19,8 @@ import org.unicode.cldr.util.Tabber; import org.unicode.cldr.util.Timer; import org.unicode.cldr.util.With; -import org.unicode.cldr.util.props.ICUPropertyFactory; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.UnicodeProperty; import org.unicode.draft.UnicodeDataOutput; import org.unicode.draft.UnicodeDataOutput.ItemWriter; import org.unicode.props.IndexUnicodeProperties; @@ -550,7 +550,7 @@ public static List checkFiles(Set latestFiles, File dir, List summary) { UnicodeMap lastMap; UnicodeMap latestMap; @@ -619,7 +619,7 @@ public static void showChanges(UcdProperty prop, UnicodeSet retain, + "\t" + getHexAndName(chars.iterator().next())); continue; } - currentOut.print(prop + currentOut.print(prop + "\t" + value //+ "\t" + FIX_INVISIBLES.transform(chars.toPattern(false)) + "\t" + chars.size() @@ -638,11 +638,11 @@ public static void showChanges(UcdProperty prop, UnicodeSet retain, } if (others.size() != 0) { //indent = "\t\t\t\t\t\t#\t"; - currentOut.println(prop + currentOut.println(prop + "\t" + "OTHERS\t\t" //+ "\t" + FIX_INVISIBLES.transform(chars.toPattern(false)) + "\t" + others.size() - + "\t" + abbreviate(others, 200, false) // others.toPattern(false) // + + "\t" + abbreviate(others, 200, false) // others.toPattern(false) // ); } } diff --git a/unicodetools/src/test/java/org/unicode/propstest/CheckXmlProperties.java b/unicodetools/src/test/java/org/unicode/propstest/CheckXmlProperties.java index dfca15abe..3ddf96470 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/CheckXmlProperties.java +++ b/unicodetools/src/test/java/org/unicode/propstest/CheckXmlProperties.java @@ -1,7 +1,7 @@ package org.unicode.propstest; import org.unicode.cldr.util.Timer; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.props.ValueCardinality; @@ -13,7 +13,7 @@ public class CheckXmlProperties { /** * TODO Known problems - * + * Property cp xml unicodetools Numeric_Value 109F7 [2/12] [1/6] ... Formatting issue with rationals @@ -111,12 +111,12 @@ public static void main(String[] args) { System.out.println("\nProperty\t cp\t xml\t unicodetools"); } if (++errors < 11) { - System.out.println(prop - + "\t" + Utility.hex(i) - + "\t" + XMLProperties.show(xval) + System.out.println(prop + + "\t" + Utility.hex(i) + + "\t" + XMLProperties.show(xval) + "\t" + XMLProperties.show(ival)); } - errorMap.put(i, XMLProperties.show(xval) + errorMap.put(i, XMLProperties.show(xval) + "\t" + XMLProperties.show(ival)); } } diff --git a/unicodetools/src/test/java/org/unicode/propstest/FileUnicodeProperty.java b/unicodetools/src/test/java/org/unicode/propstest/FileUnicodeProperty.java index 874612ae9..443c30ebe 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/FileUnicodeProperty.java +++ b/unicodetools/src/test/java/org/unicode/propstest/FileUnicodeProperty.java @@ -1,7 +1,7 @@ package org.unicode.propstest; -import org.unicode.cldr.util.props.UnicodeProperty; -import org.unicode.cldr.util.props.UnicodeProperty.UnicodeMapProperty; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.UnicodeMapProperty; public class FileUnicodeProperty extends UnicodeMapProperty { public static class Factory extends UnicodeProperty.Factory { diff --git a/unicodetools/src/test/java/org/unicode/propstest/ShowDifferences.java b/unicodetools/src/test/java/org/unicode/propstest/ShowDifferences.java index e3fcf9c55..2cf5ee96b 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/ShowDifferences.java +++ b/unicodetools/src/test/java/org/unicode/propstest/ShowDifferences.java @@ -7,7 +7,7 @@ import java.util.Map; import java.util.Map.Entry; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.props.GenerateEnums; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; @@ -37,9 +37,9 @@ public int hashCode() { } @Override public boolean equals(Object obj) { - return obj != null - && obj.getClass() == DPair.class - && Objects.equal(first, ((DPair) obj).first) + return obj != null + && obj.getClass() == DPair.class + && Objects.equal(first, ((DPair) obj).first) && Objects.equal(second, ((DPair) obj).second); } @Override @@ -49,14 +49,14 @@ public String toString() { } /** - * Computes differences between two versions. The args should either be [old] [new] or empty + * Computes differences between two versions. The args should either be [old] [new] or empty * (for the most recent two versions). * @param args */ public static void main(String[] args) { final String OLD_VERSION = args.length > 0 ? args[0] : Settings.lastVersion; final String NEW_VERSION = args.length > 1 ? args[1] : Settings.latestVersion; - + final IndexUnicodeProperties latestVersion = IndexUnicodeProperties.make(NEW_VERSION); final IndexUnicodeProperties lastVersion = IndexUnicodeProperties.make(OLD_VERSION); @@ -84,7 +84,7 @@ public static void main(String[] args) { try { latestMap = latestVersion.load(prop); } catch (Exception e) {} - + UnicodeMap> diff = new UnicodeMap<>(); UnicodeMap> newDiff = new UnicodeMap<>(); @@ -186,8 +186,8 @@ private static int displayDiff(int count, UcdProperty prop, UnicodeMap propUmOld = oldProp.getUnicodeMap(); // UnicodeMap propUmNew = prop.getUnicodeMap(); -// String message = UnicodeProperty.getTypeName(prop.getType()) -// + "\t" + CollectionUtilities.join(prop.getNameAliases(), ", ") +// String message = UnicodeProperty.getTypeName(prop.getType()) +// + "\t" + CollectionUtilities.join(prop.getNameAliases(), ", ") // + "\tvalues: " + availableValues.size(); // if (propUmOld.equals(propUmNew)) { // System.out.println(message); @@ -198,7 +198,7 @@ private static int displayDiff(int count, UcdProperty prop, UnicodeMap> remainder = it2.hasNext() ? it2 : it1; // while (remainder.hasNext()) { // EntryRange er = remainder.next(); diff --git a/unicodetools/src/test/java/org/unicode/propstest/ShowTirhuta.java b/unicodetools/src/test/java/org/unicode/propstest/ShowTirhuta.java index a10336607..87895a211 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/ShowTirhuta.java +++ b/unicodetools/src/test/java/org/unicode/propstest/ShowTirhuta.java @@ -5,7 +5,7 @@ import java.util.Map.Entry; import java.util.TreeMap; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.props.IndexUnicodeProperties; import org.unicode.text.UCD.Default; @@ -18,12 +18,12 @@ public class ShowTirhuta { public static void main(String[] args) { - + Transform t = Transliterator.createFromRules("id", "([:di:]) > &hex($1);", Transliterator.FORWARD); String source = "abc\u00ADd\u034Fe"; String formatted = t.transform(source); System.out.println(source + " => " + formatted); - + UnicodeSet us = new UnicodeSet("[:di:]").freeze(); UnicodeSet x = new UnicodeSet().addAll(source).retainAll(us); StringBuilder b = new StringBuilder(); @@ -38,9 +38,9 @@ public static void main(String[] args) { } System.out.println(b); // abc­d => abc\u00ADd - - - + + + if (true) return; IndexUnicodeProperties latest = IndexUnicodeProperties.make(Default.ucdVersion()); UnicodeProperty scriptProp = latest.getProperty("sc"); diff --git a/unicodetools/src/test/java/org/unicode/propstest/TestScriptMetadata.java b/unicodetools/src/test/java/org/unicode/propstest/TestScriptMetadata.java index ff694d774..74c7f0813 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/TestScriptMetadata.java +++ b/unicodetools/src/test/java/org/unicode/propstest/TestScriptMetadata.java @@ -8,7 +8,7 @@ import org.unicode.cldr.draft.ScriptMetadata; import org.unicode.cldr.draft.ScriptMetadata.Info; import org.unicode.cldr.draft.ScriptMetadata.Trinary; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.props.UcdPropertyValues; diff --git a/unicodetools/src/test/java/org/unicode/propstest/TestXUnicodeSet.java b/unicodetools/src/test/java/org/unicode/propstest/TestXUnicodeSet.java index 3b54e71c9..485775f58 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/TestXUnicodeSet.java +++ b/unicodetools/src/test/java/org/unicode/propstest/TestXUnicodeSet.java @@ -2,11 +2,13 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.props.IndexUnicodeProperties; import org.unicode.text.UCD.Default; import org.unicode.unittest.TestFmwkMinusMinus; +import org.unicode.jsp.MySymbolTable; + import com.ibm.icu.text.UnicodeSet; public class TestXUnicodeSet extends TestFmwkMinusMinus { @@ -28,7 +30,7 @@ public class TestXUnicodeSet extends TestFmwkMinusMinus { @Test public void TestAge() { try { - org.unicode.jsp.MySymbolTable.setDefaultXSymbolTable(IUP); + MySymbolTable.setDefaultXSymbolTable(IUP); UnicodeSet v70 = new UnicodeSet("[:age=7.0:]").complement().complement(); UnicodeSet v63 = new UnicodeSet("[:age=6.3:]").complement().complement(); @@ -42,7 +44,7 @@ public void TestAge() { // System.out.println(Utility.hex(s) + "\t" + age.getValue(s.codePointAt(0)) + "\t" + name.getValue(s.codePointAt(0))); // } } finally { - org.unicode.jsp.MySymbolTable.setDefaultXSymbolTable(null); + MySymbolTable.setDefaultXSymbolTable(null); } } } diff --git a/unicodetools/src/test/java/org/unicode/test/TestBreaks.java b/unicodetools/src/test/java/org/unicode/test/TestBreaks.java index 2cc70219f..ede5e49e3 100644 --- a/unicodetools/src/test/java/org/unicode/test/TestBreaks.java +++ b/unicodetools/src/test/java/org/unicode/test/TestBreaks.java @@ -1,7 +1,7 @@ package org.unicode.test; import org.unicode.cldr.draft.FileUtilities; -import org.unicode.cldr.util.props.UnicodePropertySymbolTable; +import org.unicode.props.UnicodePropertySymbolTable; import org.unicode.parse.EBNF; import org.unicode.parse.EBNF.Position; import org.unicode.props.IndexUnicodeProperties; @@ -12,13 +12,13 @@ public class TestBreaks { static final boolean DEBUG = true; - + static final IndexUnicodeProperties IUP = IndexUnicodeProperties.make(Settings.latestVersion); static final XSymbolTable toolUPS = new UnicodePropertySymbolTable(IUP); static { UnicodeSet.setDefaultXSymbolTable(toolUPS); } - + public static void main(String[] args) { EBNF bnf = new EBNF(); @@ -74,7 +74,7 @@ public static void main(String[] args) { System.out.println(p); bnfValue = bnf.match(line, 0, p.clear()); } - } + } } } } diff --git a/unicodetools/src/test/java/org/unicode/test/TestSegment.java b/unicodetools/src/test/java/org/unicode/test/TestSegment.java index 9f2907c93..02e7c7389 100644 --- a/unicodetools/src/test/java/org/unicode/test/TestSegment.java +++ b/unicodetools/src/test/java/org/unicode/test/TestSegment.java @@ -15,7 +15,7 @@ import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.BNF; import org.unicode.cldr.util.MapComparator; -import org.unicode.cldr.util.props.UnicodePropertySymbolTable; +import org.unicode.props.UnicodePropertySymbolTable; import org.unicode.jsp.UnicodeRegex; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; @@ -47,8 +47,8 @@ public class TestSegment { TestSegment(String testBnf) { BNF foo; - - + + StringBuilder generationRules = new StringBuilder(); for (String line : FileUtilities.in(TestSegment.class, testBnf)) { if (line.startsWith("#")) { @@ -120,7 +120,7 @@ public static List getbreaks(Pattern bnf, List results, CharSe end = matcher.end(); } else { end = Character.offsetByCodePoints(charSequence, end, 1); - } + } results.add(end); if (end == length) { break; @@ -171,19 +171,19 @@ static UnicodeMap pickBestExemplars(UcdProperty firstProp, Map mainMap = new UnicodeMap<>(iup.load(firstProp)); - + UnicodeMap partition = getPartition(extras); UnicodeMap check = new UnicodeMap(mainMap).composeWith(partition, PROP_COMPOSER); // now pick single values Set rawValues = mainMap.values(); UnicodeMap result = new UnicodeMap<>(); - - for (String value : rawValues) { + + for (String value : rawValues) { Enum propValue2 = firstProp.getEnum(value); UnicodeSet uset = mainMap.getSet(value); for (String partitionValue : partition.values()) { UnicodeSet oSet = partition.getSet(partitionValue); - if (oSet.containsSome(uset) + if (oSet.containsSome(uset) && !oSet.containsAll(uset)) { String positive = getBestExemplar(new UnicodeSet(uset).retainAll(oSet)); String negative = getBestExemplar(new UnicodeSet(uset).removeAll(oSet)); @@ -195,7 +195,7 @@ static UnicodeMap pickBestExemplars(UcdProperty firstProp, Map pickBestExemplars(UcdProperty firstProp, Map getPartition(Map nameToSet) { UnicodeMap result = null; - + for (Entry entry : nameToSet.entrySet()) { String name = entry.getKey(); UnicodeSet uset = entry.getValue(); @@ -226,7 +226,7 @@ private static UnicodeMap getPartition(Map nameToSet } static Splitter D_BAR = Splitter.on(Pattern.compile("__")); - + private static String getBestExemplar(UnicodeSet uset) { Set set = uset.addAllTo(new TreeSet(BestExemplarLess)); return set.iterator().next(); @@ -250,7 +250,7 @@ public int compare(String o1, String o2) { } private int bestGc(int cp, General_Category_Values gc) { - return gc == General_Category_Values.Control && !WHITESPACE.contains(cp) + return gc == General_Category_Values.Control && !WHITESPACE.contains(cp) ? gcMap.getNumericOrder(General_Category_Values.Unassigned) : gcMap.getNumericOrder(gc); @@ -302,7 +302,7 @@ private int bestGc(int cp, General_Category_Values gc) { @Override public String compose(int codePoint, String string, String a, String b) { return a == null ? (b == null ? null : "__" + b) - : b == null ? a + "__" + : b == null ? a + "__" : a + "__" + b; } }; @@ -331,27 +331,27 @@ private static void testSegments() { UnicodeSet.setDefaultXSymbolTable(toolUPS); TestSegment gc = new TestSegment("SegmentBnf" + "Ccs" + ".txt"); - gc.test("Ccs", Arrays.asList("÷ 0020 ÷ 0020 ÷", - "÷ 0020 0308 ÷", - "÷ 0020 0308 ÷ 0061 ÷", - "÷ 0020 0301 0301 ÷ 0061 ÷", + gc.test("Ccs", Arrays.asList("÷ 0020 ÷ 0020 ÷", + "÷ 0020 0308 ÷", + "÷ 0020 0308 ÷ 0061 ÷", + "÷ 0020 0301 0301 ÷ 0061 ÷", "÷ 0061 ÷ 0062 ÷")); TestSegment gc2 = new TestSegment("SegmentBnf" + "GraphemeBreakSimple" + ".txt"); - gc2.test("GBS", Arrays.asList("÷ 0020 ÷ 0020 ÷", - "÷ 0020 0308 ÷", - "÷ 0020 0308 ÷ 0061 ÷", - "÷ 0020 0301 0301 ÷ 0061 ÷", + gc2.test("GBS", Arrays.asList("÷ 0020 ÷ 0020 ÷", + "÷ 0020 0308 ÷", + "÷ 0020 0308 ÷ 0061 ÷", + "÷ 0020 0301 0301 ÷ 0061 ÷", "÷ 0061 ÷ 0062 ÷", "÷ 000D 000A ÷", - "÷ 1F1E6 1F1E7 ÷", - "÷ 1F1E6 1F1E7 ÷ 1F1E8 ÷", + "÷ 1F1E6 1F1E7 ÷", + "÷ 1F1E6 1F1E7 ÷ 1F1E8 ÷", "÷ 1F1E6 1F1E7 ÷ 1F1E8 1F1E9 ÷")); TestSegment gc3 = new TestSegment("SegmentBnf" + "GraphemeBreak" + ".txt"); gc3.test("Gb", "/Users/markdavis/Documents/workspace/unicode-draft/Public/UCD/auxiliary/","GraphemeBreak"); - gc3.test("GB+", Arrays.asList("÷ 1F1E6 1F1E7 ÷", - "÷ 1F1E6 1F1E7 ÷ 1F1E8 ÷", + gc3.test("GB+", Arrays.asList("÷ 1F1E6 1F1E7 ÷", + "÷ 1F1E6 1F1E7 ÷ 1F1E8 ÷", "÷ 1F1E6 1F1E7 ÷ 1F1E8 1F1E9 ÷")); // TestSegment gc4 = new TestSegment("SegmentBnf" + "WordBreak" + ".txt"); @@ -368,7 +368,7 @@ private static void checkExemplars() { makesDifference); show(exemplars); show(makesDifference); - + Builder segmenter = Segmenter.make(ToolUnicodePropertySource.make(Default.ucd().getVersion()),"GraphemeClusterBreak"); getExemplarStrings(exemplars, segmenter); @@ -379,7 +379,7 @@ private static void getExemplarStrings(UnicodeMap exemplars, Builder segm for (Entry entry : srules.entrySet()) { System.out.println(entry.getKey() + "\t\t" + entry.getValue()); } - + } } diff --git a/unicodetools/src/test/java/org/unicode/test/TestUnicodeMapParser.java b/unicodetools/src/test/java/org/unicode/test/TestUnicodeMapParser.java index c60e21e04..9ccf92351 100644 --- a/unicodetools/src/test/java/org/unicode/test/TestUnicodeMapParser.java +++ b/unicodetools/src/test/java/org/unicode/test/TestUnicodeMapParser.java @@ -4,7 +4,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import org.unicode.cldr.util.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty; import org.unicode.props.IndexUnicodeProperties; import org.unicode.text.UCD.UnicodeMapParser; import org.unicode.text.UCD.UnicodeMapParser.ValueParser;