From a60805cacf18d3aedb1259c56bdb25106b9462c2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 25 Mar 2025 15:28:57 +0100 Subject: [PATCH 01/38] aaaaa --- .../text/UCD/VersionedSymbolTable.java | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java new file mode 100644 index 000000000..405110799 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -0,0 +1,155 @@ +package org.unicode.text.UCD; + +import java.util.Map; +import java.util.TreeMap; + +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UcdProperty; +import org.unicode.props.UnicodeProperty; +import org.unicode.text.utility.Settings; + +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.VersionInfo; + +/** + * This class implements the semantics of property-query as defined in the + * UnicodeSet specification. + */ +public class VersionedSymbolTable extends UnicodeSet.XSymbolTable { + private VersionedSymbolTable() {} + public static VersionedSymbolTable forReview() { + var result = new VersionedSymbolTable(); + result.requireSuffixForLatest = true; + result.implicitVersion = Settings.LAST_VERSION_INFO; + result.previousVersion = Settings.LAST2_VERSION_INFO; + return result; + } + public static VersionedSymbolTable forDevelopment() { + var result = new VersionedSymbolTable(); + result.requireSuffixForLatest = false; + result.implicitVersion = Settings.LATEST_VERSION_INFO; + result.previousVersion = Settings.LAST_VERSION_INFO; + return result; + } + + /** Parses a string prefixed with an optional-version-qualifier. + * If there is a version-qualifier, returns the corresponding VersionInfo and + * removes the prefix from the given StringBuilder. + */ + private VersionInfo parseVersionQualifier(StringBuilder qualified) { + int posColon = qualified.indexOf(":", 0); + if (posColon < 0) { + return null; + } else { + final String versionQualifier = qualified.substring(0, posColon + 1); + qualified.delete(0, posColon + 1); + if (versionQualifier.equals("U-1")) { + return previousVersion; + } else { + switch (versionQualifier.charAt(0)) { + case 'R': + // Extension: we allow a version-qualifier starting with R for retroactive properties, that + // is, property derivations applied before the property existed. + case 'U': + break; + default: + throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier); + } + String versionNumber = versionQualifier.substring(1, posColon + 1); + if (versionNumber.endsWith("dev")) { + versionNumber = versionNumber.substring(0, versionNumber.length() - 3); + if (!versionNumber.isEmpty() && + VersionInfo.getInstance(versionNumber) != Settings.LATEST_VERSION_INFO) { + throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier + " with version-suffix dev: the current dev version is " + Settings.latestVersion); + } + return Settings.LATEST_VERSION_INFO; + } else if (versionNumber.endsWith("α") || versionNumber.endsWith("β")) { + final String versionSuffix = versionNumber.substring(versionNumber.length() - 1); + versionNumber = versionNumber.substring(0, versionNumber.length() - 1); + if (versionSuffix != Settings.latestVersionPhase.toString()) { + throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier + " with version-suffix " + versionSuffix + ": the current stage is " + Settings.latestVersionPhase); + } + if (!versionNumber.isEmpty() && + VersionInfo.getInstance(versionNumber) != Settings.LATEST_VERSION_INFO) { + throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier + " with version-suffix " + versionNumber + ": the current " + versionSuffix + " version is " + Settings.latestVersion); + } + return Settings.LATEST_VERSION_INFO; + } else { + var result = VersionInfo.getInstance(versionNumber); + if (result == Settings.LATEST_VERSION_INFO && requireSuffixForLatest) { + throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier + " version-suffix " + Settings.latestVersionPhase + " required for unpublished version"); + } + return result; + } + } + } + } + + @Override + public boolean applyPropertyAlias(String beforeEquals, String afterEquals, UnicodeSet result) { + String leftHandSide = beforeEquals; + String propertyPredicate = afterEquals; + boolean interiorlyNegated = false; + int posNotEqual = beforeEquals.indexOf('≠'); + // TODO(egg): We cannot distinguish \p{X=} from \p{X} in this API, both give us an empty string + // as afterEquals. + if (posNotEqual >= 0) { + propertyPredicate = afterEquals.length() == 0 + ? beforeEquals.substring(posNotEqual + 1) + : beforeEquals.substring(posNotEqual + 1) + "=" + afterEquals; + leftHandSide = beforeEquals.substring(0, posNotEqual); + interiorlyNegated = true; + } + + final var queriedPropertyName = new StringBuilder(leftHandSide); + final var queriedVersion = parseVersionQualifier(queriedPropertyName); + final var deducedQueriedVersion = queriedVersion == null ? implicitVersion : queriedVersion; + + final var queriedUcd = IndexUnicodeProperties.make(deducedQueriedVersion); + + var generalCategory = queriedUcd.getProperty(UcdProperty.General_Category); + var script = queriedUcd.getProperty(UcdProperty.Script); + + UnicodeProperty queriedProperty = queriedUcd.getProperty(queriedPropertyName.toString()); + if (propertyPredicate.length() != 0) { + if (queriedProperty == null) { + propertyValue = propertyValue.trim(); + } else if (prop.isTrimmable()) { + propertyValue = propertyValue.trim(); + } else { + int debug = 0; + } + status = applyPropertyAlias0(prop, propertyValue, result, invert); + } else { + try { + status = applyPropertyAlias0(gcProp, versionlessPropertyName, result, invert); + } catch (Exception e) { + } + ; + if (!status) { + try { + status = applyPropertyAlias0( + scProp, versionlessPropertyName, result, invert); + } catch (Exception e) { + } + if (!status) { + if (prop.isType(UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { + try { + status = applyPropertyAlias0(prop, "No", result, !invert); + } catch (Exception e) { + } + } + if (!status) { + status = applyPropertyAlias0(prop, "", result, invert); + } + } + } + } + //TODO(egg):Something about a factory as a fallback; + return status; + } + + private VersionInfo implicitVersion; + private VersionInfo previousVersion; + private boolean requireSuffixForLatest; +} From cebd1b5712aeeedc30f7944ca6122ae24331a669 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 25 Mar 2025 22:23:53 +0100 Subject: [PATCH 02/38] meow --- .../text/UCD/VersionedSymbolTable.java | 331 +++++++++++------- 1 file changed, 204 insertions(+), 127 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 405110799..c1acd2326 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -1,155 +1,232 @@ package org.unicode.text.UCD; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.VersionInfo; import java.util.Map; -import java.util.TreeMap; - import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; +import org.unicode.props.UcdPropertyValues; import org.unicode.props.UnicodeProperty; import org.unicode.text.utility.Settings; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.VersionInfo; - /** - * This class implements the semantics of property-query as defined in the - * UnicodeSet specification. + * This class implements the semantics of property-query as defined in the UnicodeSet specification. */ public class VersionedSymbolTable extends UnicodeSet.XSymbolTable { - private VersionedSymbolTable() {} - public static VersionedSymbolTable forReview() { - var result = new VersionedSymbolTable(); - result.requireSuffixForLatest = true; - result.implicitVersion = Settings.LAST_VERSION_INFO; - result.previousVersion = Settings.LAST2_VERSION_INFO; - return result; - } - public static VersionedSymbolTable forDevelopment() { - var result = new VersionedSymbolTable(); - result.requireSuffixForLatest = false; - result.implicitVersion = Settings.LATEST_VERSION_INFO; - result.previousVersion = Settings.LAST_VERSION_INFO; - return result; - } + private VersionedSymbolTable() {} - /** Parses a string prefixed with an optional-version-qualifier. - * If there is a version-qualifier, returns the corresponding VersionInfo and - * removes the prefix from the given StringBuilder. - */ - private VersionInfo parseVersionQualifier(StringBuilder qualified) { - int posColon = qualified.indexOf(":", 0); - if (posColon < 0) { - return null; - } else { - final String versionQualifier = qualified.substring(0, posColon + 1); - qualified.delete(0, posColon + 1); - if (versionQualifier.equals("U-1")) { - return previousVersion; - } else { - switch (versionQualifier.charAt(0)) { - case 'R': - // Extension: we allow a version-qualifier starting with R for retroactive properties, that - // is, property derivations applied before the property existed. - case 'U': - break; - default: - throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier); - } - String versionNumber = versionQualifier.substring(1, posColon + 1); - if (versionNumber.endsWith("dev")) { - versionNumber = versionNumber.substring(0, versionNumber.length() - 3); - if (!versionNumber.isEmpty() && - VersionInfo.getInstance(versionNumber) != Settings.LATEST_VERSION_INFO) { - throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier + " with version-suffix dev: the current dev version is " + Settings.latestVersion); - } - return Settings.LATEST_VERSION_INFO; - } else if (versionNumber.endsWith("α") || versionNumber.endsWith("β")) { - final String versionSuffix = versionNumber.substring(versionNumber.length() - 1); - versionNumber = versionNumber.substring(0, versionNumber.length() - 1); - if (versionSuffix != Settings.latestVersionPhase.toString()) { - throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier + " with version-suffix " + versionSuffix + ": the current stage is " + Settings.latestVersionPhase); - } - if (!versionNumber.isEmpty() && - VersionInfo.getInstance(versionNumber) != Settings.LATEST_VERSION_INFO) { - throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier + " with version-suffix " + versionNumber + ": the current " + versionSuffix + " version is " + Settings.latestVersion); - } - return Settings.LATEST_VERSION_INFO; + public static VersionedSymbolTable forReview() { + var result = new VersionedSymbolTable(); + result.requireSuffixForLatest = true; + result.implicitVersion = Settings.LAST_VERSION_INFO; + result.previousVersion = Settings.LAST2_VERSION_INFO; + return result; + } + + public static VersionedSymbolTable forDevelopment() { + var result = new VersionedSymbolTable(); + result.requireSuffixForLatest = false; + result.implicitVersion = Settings.LATEST_VERSION_INFO; + result.previousVersion = Settings.LAST_VERSION_INFO; + return result; + } + + /** + * Parses a string prefixed with an optional-version-qualifier. If there is a version-qualifier, + * returns the corresponding VersionInfo and removes the prefix from the given StringBuilder. + */ + private VersionInfo parseVersionQualifier(StringBuilder qualified) { + int posColon = qualified.indexOf(":", 0); + if (posColon < 0) { + return null; } else { - var result = VersionInfo.getInstance(versionNumber); - if (result == Settings.LATEST_VERSION_INFO && requireSuffixForLatest) { - throw new IllegalArgumentException("Invalid version-qualifier " + versionQualifier + " version-suffix " + Settings.latestVersionPhase + " required for unpublished version"); - } - return result; + final String versionQualifier = qualified.substring(0, posColon + 1); + qualified.delete(0, posColon + 1); + if (versionQualifier.equals("U-1")) { + return previousVersion; + } else { + switch (versionQualifier.charAt(0)) { + case 'R': + // Extension: we allow a version-qualifier starting with R for retroactive + // properties, that + // is, property derivations applied before the property existed. + case 'U': + break; + default: + throw new IllegalArgumentException( + "Invalid version-qualifier " + versionQualifier); + } + String versionNumber = versionQualifier.substring(1, posColon + 1); + if (versionNumber.endsWith("dev")) { + versionNumber = versionNumber.substring(0, versionNumber.length() - 3); + if (!versionNumber.isEmpty() + && VersionInfo.getInstance(versionNumber) + != Settings.LATEST_VERSION_INFO) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix dev: the current dev version is " + + Settings.latestVersion); + } + return Settings.LATEST_VERSION_INFO; + } else if (versionNumber.endsWith("α") || versionNumber.endsWith("β")) { + final String versionSuffix = + versionNumber.substring(versionNumber.length() - 1); + versionNumber = versionNumber.substring(0, versionNumber.length() - 1); + if (versionSuffix != Settings.latestVersionPhase.toString()) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix " + + versionSuffix + + ": the current stage is " + + Settings.latestVersionPhase); + } + if (!versionNumber.isEmpty() + && VersionInfo.getInstance(versionNumber) + != Settings.LATEST_VERSION_INFO) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix " + + versionNumber + + ": the current " + + versionSuffix + + " version is " + + Settings.latestVersion); + } + return Settings.LATEST_VERSION_INFO; + } else { + var result = VersionInfo.getInstance(versionNumber); + if (result == Settings.LATEST_VERSION_INFO && requireSuffixForLatest) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " version-suffix " + + Settings.latestVersionPhase + + " required for unpublished version"); + } + return result; + } + } } - } } - } - @Override - public boolean applyPropertyAlias(String beforeEquals, String afterEquals, UnicodeSet result) { - String leftHandSide = beforeEquals; - String propertyPredicate = afterEquals; - boolean interiorlyNegated = false; - int posNotEqual = beforeEquals.indexOf('≠'); - // TODO(egg): We cannot distinguish \p{X=} from \p{X} in this API, both give us an empty string - // as afterEquals. - if (posNotEqual >= 0) { - propertyPredicate = afterEquals.length() == 0 - ? beforeEquals.substring(posNotEqual + 1) - : beforeEquals.substring(posNotEqual + 1) + "=" + afterEquals; - leftHandSide = beforeEquals.substring(0, posNotEqual); - interiorlyNegated = true; + private static Map + COARSE_GENERAL_CATEGORIES = + Map.of( + UcdPropertyValues.General_Category_Values.Other, + new String[] {"Cc", "Cf", "Cn", "Co", "Cs"}, + UcdPropertyValues.General_Category_Values.Letter, + new String[] {"Ll", "Lm", "Lo", "Lt", "Lu"}, + UcdPropertyValues.General_Category_Values.Cased_Letter, + new String[] {"Ll", "Lt", "Lu"}, + UcdPropertyValues.General_Category_Values.Mark, + new String[] {"Mc", "Me", "Mn"}, + UcdPropertyValues.General_Category_Values.Number, + new String[] {"Nd", "Nl", "No"}, + UcdPropertyValues.General_Category_Values.Punctuation, + new String[] {"Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"}, + UcdPropertyValues.General_Category_Values.Symbol, + new String[] {"Sc", "Sk", "Sm", "So"}, + UcdPropertyValues.General_Category_Values.Separator, + new String[] {"Zl", "Zp", "Zs"}); + + /** + * Similar to iup.getProperty(UcdProperty.General_Category).getSet(propertyValue), but takes the + * groupings into account. Implements both unary-query for a General_Category alias and + * binary-query with a property-value where the queried property is General_Category. + */ + private UnicodeSet getGeneralCategorySet(IndexUnicodeProperties iup, String propertyValue) { + var gc = iup.getProperty(UcdProperty.General_Category); + for (var entry : COARSE_GENERAL_CATEGORIES.entrySet()) { + final var aliases = entry.getKey().getNames().getAllNames(); + if (aliases.stream().anyMatch(a -> UnicodeProperty.equalNames(propertyValue, a))) { + UnicodeSet result = new UnicodeSet(); + for (var value : entry.getValue()) { + gc.getSet(value, result); + } + return result; + } + } + return gc.getSet(propertyValue); } - final var queriedPropertyName = new StringBuilder(leftHandSide); - final var queriedVersion = parseVersionQualifier(queriedPropertyName); - final var deducedQueriedVersion = queriedVersion == null ? implicitVersion : queriedVersion; + @Override + public boolean applyPropertyAlias(String beforeEquals, String afterEquals, UnicodeSet result) { + String leftHandSide = beforeEquals; + String propertyPredicate = afterEquals; + boolean interiorlyNegated = false; + int posNotEqual = beforeEquals.indexOf('≠'); + // TODO(egg): We cannot distinguish \p{X=} from \p{X} in this API, both give us an empty + // string + // as afterEquals. This is an @internal API, so we could change it to pass null in the + // unary + // case. + if (posNotEqual >= 0) { + propertyPredicate = + afterEquals.length() == 0 + ? beforeEquals.substring(posNotEqual + 1) + : beforeEquals.substring(posNotEqual + 1) + "=" + afterEquals; + leftHandSide = beforeEquals.substring(0, posNotEqual); + interiorlyNegated = true; + } + if (interiorlyNegated) { + final var complement = getNonNegatedPropertyQuerySet(leftHandSide, propertyPredicate); + result.addAll(complement.complement().removeAllStrings()); + } else { + result.addAll(getNonNegatedPropertyQuerySet(leftHandSide, propertyPredicate)); + } + return true; + } - final var queriedUcd = IndexUnicodeProperties.make(deducedQueriedVersion); + private UnicodeSet getNonNegatedPropertyQuerySet( + String leftHandSide, String propertyPredicate) { + final var unqualifiedLeftHandSide = new StringBuilder(leftHandSide); + final var queriedVersion = parseVersionQualifier(unqualifiedLeftHandSide); + final var deducedQueriedVersion = queriedVersion == null ? implicitVersion : queriedVersion; - var generalCategory = queriedUcd.getProperty(UcdProperty.General_Category); - var script = queriedUcd.getProperty(UcdProperty.Script); + final var queriedProperties = IndexUnicodeProperties.make(deducedQueriedVersion); - UnicodeProperty queriedProperty = queriedUcd.getProperty(queriedPropertyName.toString()); - if (propertyPredicate.length() != 0) { - if (queriedProperty == null) { - propertyValue = propertyValue.trim(); - } else if (prop.isTrimmable()) { - propertyValue = propertyValue.trim(); - } else { - int debug = 0; - } - status = applyPropertyAlias0(prop, propertyValue, result, invert); - } else { - try { - status = applyPropertyAlias0(gcProp, versionlessPropertyName, result, invert); - } catch (Exception e) { - } - ; - if (!status) { - try { - status = applyPropertyAlias0( - scProp, versionlessPropertyName, result, invert); - } catch (Exception e) { - } - if (!status) { - if (prop.isType(UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { + if (propertyPredicate.length() == 0) { + // Either unary-property-query, or binary-property-query with an empty property-value. try { - status = applyPropertyAlias0(prop, "No", result, !invert); + return queriedProperties + .getProperty(UcdProperty.Script) + .getSet(unqualifiedLeftHandSide.toString()); } catch (Exception e) { } - } - if (!status) { - status = applyPropertyAlias0(prop, "", result, invert); - } + try { + return getGeneralCategorySet(queriedProperties, unqualifiedLeftHandSide.toString()); + } catch (Exception e) { + } + UnicodeProperty queriedProperty = + queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); + if (queriedProperty != null) { + if (!queriedProperty.isType(UnicodeProperty.BINARY)) { + if (queriedProperty.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { + return queriedProperty.getSet(""); + } + throw new IllegalArgumentException( + "Invalid unary-query-expression for non-binary property " + + queriedProperty.getName()); + } + return queriedProperty.getSet(UcdPropertyValues.Binary.Yes); + } + } else { + if (queriedProperty == null) { + propertyValue = propertyValue.trim(); + } else if (prop.isTrimmable()) { + propertyValue = propertyValue.trim(); + } else { + int debug = 0; + } + status = applyPropertyAlias0(prop, propertyValue, result, invert); } - } + // TODO(egg):Something about a factory as a fallback; } - //TODO(egg):Something about a factory as a fallback; - return status; - } - private VersionInfo implicitVersion; - private VersionInfo previousVersion; - private boolean requireSuffixForLatest; + private VersionInfo implicitVersion; + private VersionInfo previousVersion; + private boolean requireSuffixForLatest; } From d79bdffa8406de976eada36ff7ba6e75fec8395a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 25 Mar 2025 22:25:40 +0100 Subject: [PATCH 03/38] wrap --- .../org/unicode/text/UCD/VersionedSymbolTable.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index c1acd2326..d3e07183a 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -48,8 +48,8 @@ private VersionInfo parseVersionQualifier(StringBuilder qualified) { switch (versionQualifier.charAt(0)) { case 'R': // Extension: we allow a version-qualifier starting with R for retroactive - // properties, that - // is, property derivations applied before the property existed. + // properties, that is, property derivations applied before the property + // existed. case 'U': break; default: @@ -159,10 +159,8 @@ public boolean applyPropertyAlias(String beforeEquals, String afterEquals, Unico boolean interiorlyNegated = false; int posNotEqual = beforeEquals.indexOf('≠'); // TODO(egg): We cannot distinguish \p{X=} from \p{X} in this API, both give us an empty - // string - // as afterEquals. This is an @internal API, so we could change it to pass null in the - // unary - // case. + // string as afterEquals. This is an @internal API, so we could change it to pass null in + // the unary case. if (posNotEqual >= 0) { propertyPredicate = afterEquals.length() == 0 From 186afada1501c8e2f68ec5f9281eec9878005c57 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 26 Mar 2025 15:58:51 +0100 Subject: [PATCH 04/38] fallback --- .../text/UCD/VersionedSymbolTable.java | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index d3e07183a..2743bb776 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -200,26 +200,31 @@ private UnicodeSet getNonNegatedPropertyQuerySet( } UnicodeProperty queriedProperty = queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); - if (queriedProperty != null) { - if (!queriedProperty.isType(UnicodeProperty.BINARY)) { - if (queriedProperty.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { - return queriedProperty.getSet(""); - } - throw new IllegalArgumentException( - "Invalid unary-query-expression for non-binary property " - + queriedProperty.getName()); - } - return queriedProperty.getSet(UcdPropertyValues.Binary.Yes); + if (queriedProperty == null && unversionedExtensions != null) { + queriedProperty = unversionedExtensions.getProperty(unqualifiedLeftHandSide.toString()); } - } else { if (queriedProperty == null) { - propertyValue = propertyValue.trim(); - } else if (prop.isTrimmable()) { - propertyValue = propertyValue.trim(); - } else { - int debug = 0; + throw new IllegalArgumentException( + "Invalid unary-query-expression; could not find property " + + unqualifiedLeftHandSide); } - status = applyPropertyAlias0(prop, propertyValue, result, invert); + if (!queriedProperty.isType(UnicodeProperty.BINARY_MASK)) { + // TODO(egg): Remove when we can tell this is a unary query. + if (queriedProperty.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { + return queriedProperty.getSet(""); + } + throw new IllegalArgumentException( + "Invalid unary-query-expression for non-binary property " + + queriedProperty.getName()); + } + return queriedProperty.getSet(UcdPropertyValues.Binary.Yes); + } else { + // We have a binary-property-query. + UnicodeProperty queriedProperty = + queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); + if (queriedProperty == null && unversionedExtensions != null) { + queriedProperty = unversionedExtensions.getProperty(unqualifiedLeftHandSide.toString()); + } } // TODO(egg):Something about a factory as a fallback; } @@ -227,4 +232,5 @@ private UnicodeSet getNonNegatedPropertyQuerySet( private VersionInfo implicitVersion; private VersionInfo previousVersion; private boolean requireSuffixForLatest; + private UnicodeProperty.Factory unversionedExtensions; } From da5eb68ee8f6a4eb7cfba2c979b5a57cb1c5cc66 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 26 Mar 2025 18:00:06 +0100 Subject: [PATCH 05/38] meow --- .../text/UCD/VersionedSymbolTable.java | 82 ++++++++++++++++--- 1 file changed, 71 insertions(+), 11 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 2743bb776..92a292a08 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -2,11 +2,14 @@ import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.VersionInfo; + +import java.util.Comparator; import java.util.Map; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.props.UcdPropertyValues; import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodePropertySymbolTable; import org.unicode.text.utility.Settings; /** @@ -201,12 +204,13 @@ private UnicodeSet getNonNegatedPropertyQuerySet( UnicodeProperty queriedProperty = queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); if (queriedProperty == null && unversionedExtensions != null) { - queriedProperty = unversionedExtensions.getProperty(unqualifiedLeftHandSide.toString()); + queriedProperty = + unversionedExtensions.getProperty(unqualifiedLeftHandSide.toString()); } if (queriedProperty == null) { - throw new IllegalArgumentException( - "Invalid unary-query-expression; could not find property " - + unqualifiedLeftHandSide); + throw new IllegalArgumentException( + "Invalid unary-query-expression; could not find property " + + unqualifiedLeftHandSide); } if (!queriedProperty.isType(UnicodeProperty.BINARY_MASK)) { // TODO(egg): Remove when we can tell this is a unary query. @@ -219,14 +223,70 @@ private UnicodeSet getNonNegatedPropertyQuerySet( } return queriedProperty.getSet(UcdPropertyValues.Binary.Yes); } else { - // We have a binary-property-query. - UnicodeProperty queriedProperty = - queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); - if (queriedProperty == null && unversionedExtensions != null) { - queriedProperty = unversionedExtensions.getProperty(unqualifiedLeftHandSide.toString()); - } + // We have a binary-property-query. + UnicodeProperty queriedProperty = + queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); + if (queriedProperty == null && unversionedExtensions != null) { + queriedProperty = + unversionedExtensions.getProperty(unqualifiedLeftHandSide.toString()); + } + if (queriedProperty == null) { + throw new IllegalArgumentException( + "Invalid binary-query-expression; could not find property " + + unqualifiedLeftHandSide); + } + final boolean isAge = queriedProperty.getName().equals("Age"); + final boolean isGeneralCategory = queriedProperty.getName().equals("General_Category"); + final boolean isName = queriedProperty.getName().equals("Name"); + final boolean isPropertyComparison = propertyPredicate.startsWith("@") && propertyPredicate.endsWith("@"); + final boolean isRegularExpressionMatch = propertyPredicate.startsWith("/") && propertyPredicate.endsWith("/"); + if (isPropertyComparison) { + if (isAge) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with property-comparison for Age"); + } + final var unqualifiedRightHandSide = new StringBuilder(propertyPredicate.substring(1, propertyPredicate.length() - 1)); + final var comparisonVersion = parseVersionQualifier(unqualifiedRightHandSide); + if (UnicodeProperty.equalNames(unqualifiedRightHandSide.toString(), "code point")) { + if (comparisonVersion != null) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with comparison version on identity query"); + } + // TODO(egg): Return. + } else if (UnicodeProperty.equalNames(unqualifiedRightHandSide.toString(), "none")) { + if (comparisonVersion != null) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with comparison version on null query"); + } + // TODO(egg): Return. + } else { + // TODO(egg): comparison. + } + } else if (isRegularExpressionMatch) { + if (isAge) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with regular-expression-match for Age"); + } + } else { + // TODO(egg): Validate against unescaped @ or : etc. + // TODO(egg): Unescape. + String propertyValue = propertyPredicate; + if (isAge) { + return queriedProperty.getSet( + new UnicodePropertySymbolTable.ComparisonMatcher< + VersionInfo>( + UnicodePropertySymbolTable.parseVersionInfoOrMax( + propertyValue), + UnicodePropertySymbolTable.Relation.geq, + Comparator.nullsFirst(Comparator.naturalOrder()), + UnicodePropertySymbolTable::parseVersionInfoOrMax)); + } + if (isGeneralCategory) { + return getGeneralCategorySet(queriedProperties, propertyValue); + } + } + return null; } - // TODO(egg):Something about a factory as a fallback; } private VersionInfo implicitVersion; From 9ee1c94aa98542cbd9a520cca3cfac98774ece82 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 26 Mar 2025 21:38:56 +0100 Subject: [PATCH 06/38] Identity and null queries --- .../text/UCD/VersionedSymbolTable.java | 106 +++++++++++------- 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 92a292a08..dabbc6f3e 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -2,7 +2,6 @@ import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.VersionInfo; - import java.util.Comparator; import java.util.Map; import org.unicode.props.IndexUnicodeProperties; @@ -236,54 +235,75 @@ private UnicodeSet getNonNegatedPropertyQuerySet( + unqualifiedLeftHandSide); } final boolean isAge = queriedProperty.getName().equals("Age"); - final boolean isGeneralCategory = queriedProperty.getName().equals("General_Category"); final boolean isName = queriedProperty.getName().equals("Name"); - final boolean isPropertyComparison = propertyPredicate.startsWith("@") && propertyPredicate.endsWith("@"); - final boolean isRegularExpressionMatch = propertyPredicate.startsWith("/") && propertyPredicate.endsWith("/"); + final boolean isPropertyComparison = + propertyPredicate.startsWith("@") && propertyPredicate.endsWith("@"); + final boolean isRegularExpressionMatch = + propertyPredicate.startsWith("/") && propertyPredicate.endsWith("/"); if (isPropertyComparison) { - if (isAge) { - throw new IllegalArgumentException( - "Invalid binary-query-expression with property-comparison for Age"); - } - final var unqualifiedRightHandSide = new StringBuilder(propertyPredicate.substring(1, propertyPredicate.length() - 1)); - final var comparisonVersion = parseVersionQualifier(unqualifiedRightHandSide); - if (UnicodeProperty.equalNames(unqualifiedRightHandSide.toString(), "code point")) { - if (comparisonVersion != null) { - throw new IllegalArgumentException( - "Invalid binary-query-expression with comparison version on identity query"); + if (isAge) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with property-comparison for Age"); } - // TODO(egg): Return. - } else if (UnicodeProperty.equalNames(unqualifiedRightHandSide.toString(), "none")) { - if (comparisonVersion != null) { - throw new IllegalArgumentException( - "Invalid binary-query-expression with comparison version on null query"); + final var unqualifiedRightHandSide = + new StringBuilder( + propertyPredicate.substring(1, propertyPredicate.length() - 1)); + final var comparisonVersion = parseVersionQualifier(unqualifiedRightHandSide); + if (UnicodeProperty.equalNames(unqualifiedRightHandSide.toString(), "code point")) { + if (comparisonVersion != null) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with comparison version on identity query"); + } + if (!queriedProperty.isType(UnicodeProperty.STRING_MASK)) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with identity query for " + + queriedProperty.getTypeName() + + " property"); + } + final var result = new UnicodeSet(); + for (int cp = 0; cp <= 0x10FFFF; ++cp) { + if (UnicodeProperty.equals(cp, prop.getValue(cp))) { + result.add(cp); + } + } + return result; + } else if (UnicodeProperty.equalNames( + unqualifiedRightHandSide.toString(), "none")) { + if (comparisonVersion != null) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with comparison version on null query"); + } + if (!queriedProperty.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with null query for " + + queriedProperty.getTypeName() + + " property"); + } + return queriedProperty.getSet((String) null); + } else { + // TODO(egg): comparison. } - // TODO(egg): Return. - } else { - // TODO(egg): comparison. - } } else if (isRegularExpressionMatch) { - if (isAge) { - throw new IllegalArgumentException( - "Invalid binary-query-expression with regular-expression-match for Age"); - } + if (isAge) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with regular-expression-match for Age"); + } } else { - // TODO(egg): Validate against unescaped @ or : etc. - // TODO(egg): Unescape. - String propertyValue = propertyPredicate; - if (isAge) { - return queriedProperty.getSet( - new UnicodePropertySymbolTable.ComparisonMatcher< - VersionInfo>( - UnicodePropertySymbolTable.parseVersionInfoOrMax( - propertyValue), - UnicodePropertySymbolTable.Relation.geq, - Comparator.nullsFirst(Comparator.naturalOrder()), - UnicodePropertySymbolTable::parseVersionInfoOrMax)); - } - if (isGeneralCategory) { - return getGeneralCategorySet(queriedProperties, propertyValue); - } + // TODO(egg): Validate against unescaped @ or : etc. + // TODO(egg): Check for valid values. + // TODO(egg): Unescape. + String propertyValue = propertyPredicate; + if (isAge) { + return queriedProperty.getSet( + new UnicodePropertySymbolTable.ComparisonMatcher( + UnicodePropertySymbolTable.parseVersionInfoOrMax(propertyValue), + UnicodePropertySymbolTable.Relation.geq, + Comparator.nullsFirst(Comparator.naturalOrder()), + UnicodePropertySymbolTable::parseVersionInfoOrMax)); + } + if (queriedProperty.getName().equals("General_Category")) { + return getGeneralCategorySet(queriedProperties, propertyValue); + } } return null; } From 92b3be1a5f8379aca60bd8e77b6f118100ed4e0e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 27 Mar 2025 01:26:43 +0100 Subject: [PATCH 07/38] comparison --- .../org/unicode/props/UnicodeProperty.java | 1 + .../text/UCD/VersionedSymbolTable.java | 47 ++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index 4e8c06d5f..602ab70a1 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -198,6 +198,7 @@ public UnicodeProperty setDelimiter(String value) { EXTENDED_MASK = 1, CORE_MASK = ~EXTENDED_MASK, BINARY_MASK = (1 << BINARY) | (1 << EXTENDED_BINARY), + NUMERIC_MASK = (1 << NUMERIC) | (1 << EXTENDED_NUMERIC), STRING_MASK = (1 << STRING) | (1 << EXTENDED_STRING), STRING_OR_MISC_MASK = (1 << STRING) | (1 << EXTENDED_STRING) | (1 << MISC) | (1 << EXTENDED_MISC), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index dabbc6f3e..dc323a7fa 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -262,7 +262,7 @@ private UnicodeSet getNonNegatedPropertyQuerySet( } final var result = new UnicodeSet(); for (int cp = 0; cp <= 0x10FFFF; ++cp) { - if (UnicodeProperty.equals(cp, prop.getValue(cp))) { + if (UnicodeProperty.equals(cp, queriedProperty.getValue(cp))) { result.add(cp); } } @@ -281,7 +281,50 @@ private UnicodeSet getNonNegatedPropertyQuerySet( } return queriedProperty.getSet((String) null); } else { - // TODO(egg): comparison. + UnicodeProperty comparisonProperty = + IndexUnicodeProperties.make(comparisonVersion) + .getProperty(unqualifiedRightHandSide.toString()); + if (comparisonProperty == null && unversionedExtensions != null) { + comparisonProperty = + unversionedExtensions.getProperty( + unqualifiedRightHandSide.toString()); + } + if (comparisonProperty == null) { + throw new IllegalArgumentException( + "Invalid binary-query-expression; could not find comparison property " + + unqualifiedRightHandSide); + } + if (!((queriedProperty.isType(UnicodeProperty.BINARY_MASK) + && comparisonProperty.isType(UnicodeProperty.BINARY_MASK)) + || (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK) + && comparisonProperty.isType(UnicodeProperty.NUMERIC_MASK)) + || (queriedProperty.isType(UnicodeProperty.STRING_MASK) + && comparisonProperty.isType(UnicodeProperty.STRING_MASK)) + || (queriedProperty.isType(UnicodeProperty.ENUMERATED_OR_CATALOG_MASK) + && comparisonProperty.isType( + UnicodeProperty.ENUMERATED_OR_CATALOG_MASK) + && queriedProperty + .getAvailableValues() + .equals(comparisonProperty.getAvailableValues())) + || queriedProperty.getName().equals(comparisonProperty.getName()))) { + throw new IllegalArgumentException( + "Invalid property comparison between " + + queriedProperty.getTypeName() + + " property " + + queriedProperty.getName() + + " and " + + comparisonProperty.getTypeName() + + " property " + + comparisonProperty.getName()); + } + final var result = new UnicodeSet(); + for (int cp = 0; cp <= 0x10FFFF; ++cp) { + if (UnicodeProperty.equals( + queriedProperty.getValue(cp), comparisonProperty.getValue(cp))) { + result.add(cp); + } + } + return result; } } else if (isRegularExpressionMatch) { if (isAge) { From 4a8998aefb67929d3adaa77bdf84d0eefe66f11f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 27 Mar 2025 13:55:03 +0100 Subject: [PATCH 08/38] Factor things out and write the other path --- .../text/UCD/VersionedSymbolTable.java | 145 ++++++++++++------ 1 file changed, 102 insertions(+), 43 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index dc323a7fa..3294992fb 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -4,6 +4,7 @@ import com.ibm.icu.util.VersionInfo; import java.util.Comparator; import java.util.Map; +import java.util.regex.Pattern; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.props.UcdPropertyValues; @@ -260,13 +261,7 @@ private UnicodeSet getNonNegatedPropertyQuerySet( + queriedProperty.getTypeName() + " property"); } - final var result = new UnicodeSet(); - for (int cp = 0; cp <= 0x10FFFF; ++cp) { - if (UnicodeProperty.equals(cp, queriedProperty.getValue(cp))) { - result.add(cp); - } - } - return result; + return getIdentitySet(queriedProperty); } else if (UnicodeProperty.equalNames( unqualifiedRightHandSide.toString(), "none")) { if (comparisonVersion != null) { @@ -282,7 +277,10 @@ private UnicodeSet getNonNegatedPropertyQuerySet( return queriedProperty.getSet((String) null); } else { UnicodeProperty comparisonProperty = - IndexUnicodeProperties.make(comparisonVersion) + IndexUnicodeProperties.make( + comparisonVersion == null + ? implicitVersion + : comparisonVersion) .getProperty(unqualifiedRightHandSide.toString()); if (comparisonProperty == null && unversionedExtensions != null) { comparisonProperty = @@ -294,48 +292,64 @@ private UnicodeSet getNonNegatedPropertyQuerySet( "Invalid binary-query-expression; could not find comparison property " + unqualifiedRightHandSide); } - if (!((queriedProperty.isType(UnicodeProperty.BINARY_MASK) - && comparisonProperty.isType(UnicodeProperty.BINARY_MASK)) - || (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK) - && comparisonProperty.isType(UnicodeProperty.NUMERIC_MASK)) - || (queriedProperty.isType(UnicodeProperty.STRING_MASK) - && comparisonProperty.isType(UnicodeProperty.STRING_MASK)) - || (queriedProperty.isType(UnicodeProperty.ENUMERATED_OR_CATALOG_MASK) - && comparisonProperty.isType( - UnicodeProperty.ENUMERATED_OR_CATALOG_MASK) - && queriedProperty - .getAvailableValues() - .equals(comparisonProperty.getAvailableValues())) - || queriedProperty.getName().equals(comparisonProperty.getName()))) { - throw new IllegalArgumentException( - "Invalid property comparison between " - + queriedProperty.getTypeName() - + " property " - + queriedProperty.getName() - + " and " - + comparisonProperty.getTypeName() - + " property " - + comparisonProperty.getName()); - } - final var result = new UnicodeSet(); - for (int cp = 0; cp <= 0x10FFFF; ++cp) { - if (UnicodeProperty.equals( - queriedProperty.getValue(cp), comparisonProperty.getValue(cp))) { - result.add(cp); - } - } - return result; + return compareProperties(queriedProperty, comparisonProperty); } } else if (isRegularExpressionMatch) { if (isAge) { throw new IllegalArgumentException( "Invalid binary-query-expression with regular-expression-match for Age"); } + return queriedProperty.getSet( + new UnicodeProperty.RegexMatcher() + .set( + propertyPredicate.substring( + 1, propertyPredicate.length() - 1))); } else { - // TODO(egg): Validate against unescaped @ or : etc. - // TODO(egg): Check for valid values. - // TODO(egg): Unescape. String propertyValue = propertyPredicate; + // Validation. For Name, validation entails computing the query, so we return here. + if (isName) { + var result = queriedProperty.getSet(propertyValue); + if (result.isEmpty()) { + result = + queriedProperties + .getProperty(UcdProperty.Name_Alias) + .getSet(propertyValue); + } + if (result.isEmpty()) { + throw new IllegalArgumentException( + "No character name nor name alias matches " + propertyValue); + } + } else if (queriedProperty.getName().equals("Name_Alias")) { + var result = queriedProperty.getSet(propertyValue); + if (result.isEmpty()) { + throw new IllegalArgumentException( + "No name alias matches " + propertyValue); + } + return result; + } else if (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK)) { + if (!RATIONAL_PATTERN.matcher(propertyValue).matches()) { + throw new IllegalArgumentException( + "Invalid value '" + + propertyValue + + "' for numeric property " + + queriedProperty.getName()); + } + } else if (queriedProperty.isType( + UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { + if (!queriedProperty.isValidValue(propertyValue)) { + throw new IllegalArgumentException( + "The value '" + + propertyValue + + "' is illegal. Values for " + + queriedProperty.getName() + + " must be in " + + queriedProperty.getAvailableValues() + + " or in " + + queriedProperty.getValueAliases()); + } + } else { + // TODO(egg): Check for unescaped :, @, =, etc. and unescape. + } if (isAge) { return queriedProperty.getSet( new UnicodePropertySymbolTable.ComparisonMatcher( @@ -347,13 +361,58 @@ private UnicodeSet getNonNegatedPropertyQuerySet( if (queriedProperty.getName().equals("General_Category")) { return getGeneralCategorySet(queriedProperties, propertyValue); } + return queriedProperty.getSet(propertyValue); + } + } + } + + private static UnicodeSet getIdentitySet(UnicodeProperty queriedProperty) { + final var result = new UnicodeSet(); + for (int cp = 0; cp <= 0x10FFFF; ++cp) { + if (UnicodeProperty.equals(cp, queriedProperty.getValue(cp))) { + result.add(cp); + } + } + return result; + } + + private static UnicodeSet compareProperties( + UnicodeProperty queriedProperty, UnicodeProperty comparisonProperty) { + if (!((queriedProperty.isType(UnicodeProperty.BINARY_MASK) + && comparisonProperty.isType(UnicodeProperty.BINARY_MASK)) + || (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK) + && comparisonProperty.isType(UnicodeProperty.NUMERIC_MASK)) + || (queriedProperty.isType(UnicodeProperty.STRING_MASK) + && comparisonProperty.isType(UnicodeProperty.STRING_MASK)) + || (queriedProperty.isType(UnicodeProperty.ENUMERATED_OR_CATALOG_MASK) + && comparisonProperty.isType(UnicodeProperty.ENUMERATED_OR_CATALOG_MASK) + && queriedProperty + .getAvailableValues() + .equals(comparisonProperty.getAvailableValues())) + || queriedProperty.getName().equals(comparisonProperty.getName()))) { + throw new IllegalArgumentException( + "Invalid property comparison between " + + queriedProperty.getTypeName() + + " property " + + queriedProperty.getName() + + " and " + + comparisonProperty.getTypeName() + + " property " + + comparisonProperty.getName()); + } + final var result = new UnicodeSet(); + for (int cp = 0; cp <= 0x10FFFF; ++cp) { + if (UnicodeProperty.equals( + queriedProperty.getValue(cp), comparisonProperty.getValue(cp))) { + result.add(cp); } - return null; } + return result; } private VersionInfo implicitVersion; private VersionInfo previousVersion; private boolean requireSuffixForLatest; private UnicodeProperty.Factory unversionedExtensions; + private static Pattern RATIONAL_PATTERN = Pattern.compile("[+-]?[0-9]+(/[0-9]*[1-9][0-9]*)?"); } From 0c3036169be863f7ac36ed0ad0207901543797ce Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 27 Mar 2025 19:14:37 +0100 Subject: [PATCH 09/38] =?UTF-8?q?=E2=84=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/unicode/props/UnicodeProperty.java | 29 +++++++++++++++---- .../text/UCD/VersionedSymbolTable.java | 4 ++- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index 602ab70a1..68a6c2d6e 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -32,6 +32,7 @@ import java.util.TreeMap; import java.util.function.Predicate; import java.util.regex.Pattern; +import org.unicode.cldr.util.Rational.RationalParser; import org.unicode.cldr.util.props.UnicodeLabel; public abstract class UnicodeProperty extends UnicodeLabel { @@ -450,11 +451,14 @@ public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { ? NULL_MATCHER : new SimpleMatcher( propertyValue, - getName().equals("Name") || getName().equals("Name_Alias") - ? CHARACTER_NAME_COMPARATOR - : isType(STRING_OR_MISC_MASK) - ? null - : PROPERTY_COMPARATOR), + isType(NUMERIC_MASK) + ? RATIONAL_COMPARATOR + : getName().equals("Name") + || getName().equals("Name_Alias") + ? CHARACTER_NAME_COMPARATOR + : isType(STRING_OR_MISC_MASK) + ? null + : PROPERTY_COMPARATOR), result); } } @@ -726,6 +730,21 @@ public static String toSkeleton(String source) { return skeletonBuffer.toString(); } + public static final Comparator RATIONAL_COMPARATOR = + new Comparator() { + @Override + public int compare(String x, String y) { + return compareRationals(x, y); + } + }; + + public static int compareRationals(String a, String b) { + if (a == b) return 0; + if (a == null) return -1; + if (b == null) return 1; + return RationalParser.BASIC.parse(a).compareTo(RationalParser.BASIC.parse(b)); + } + public static final Comparator CHARACTER_NAME_COMPARATOR = new Comparator() { @Override diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 3294992fb..9d6dd0063 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -327,7 +327,9 @@ private UnicodeSet getNonNegatedPropertyQuerySet( } return result; } else if (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK)) { - if (!RATIONAL_PATTERN.matcher(propertyValue).matches()) { + if (UnicodeProperty.equalNames(propertyValue, "NaN")) { + propertyValue = "NaN"; + } else if (!RATIONAL_PATTERN.matcher(propertyValue).matches()) { throw new IllegalArgumentException( "Invalid value '" + propertyValue From 830399b8a64502654f6652a356aee655fc6f9f76 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 28 Mar 2025 02:56:02 +0100 Subject: [PATCH 10/38] A test --- .../text/UCD/TestVersionedSymbolTable.java | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java new file mode 100644 index 000000000..0d9a2512c --- /dev/null +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -0,0 +1,70 @@ +package org.unicode.text.UCD; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UcdProperty; +import org.unicode.props.UcdPropertyValues; + +import com.ibm.icu.text.UnicodeSet; + +public class TestVersionedSymbolTable { + @BeforeEach + void setUp() { + UnicodeSet.setDefaultXSymbolTable(VersionedSymbolTable.forDevelopment()); + } + + @AfterEach + void tearDown() { + UnicodeSet.setDefaultXSymbolTable(NO_PROPS); + } + + @Test + void testIntroductionExamples() { + assertThatUnicodeSet("\\p{XID_Continue}").containsAll("a", "α", "𒀀").containsNone("'", ","); + } + + /** + * Helper class for testing multiple properties of the same UnicodeSet. + */ + static class UnicodeSetTestFluent { + UnicodeSetTestFluent(UnicodeSet set) {} + public UnicodeSetTestFluent isEqualTo(UnicodeSet collection) { + assertTrue(set.equals(collection)); + return this; + } + public UnicodeSetTestFluent containsNone(CharSequence... collection) { + assertTrue(set.containsNone(Arrays.asList(collection))); + return this; + } + public UnicodeSetTestFluent containsAll(CharSequence... collection) { + assertTrue(set.containsAll(Arrays.asList(collection))); + return this; + } + private UnicodeSet set; + } + + private UnicodeSetTestFluent assertThatUnicodeSet(String expression) { + return new UnicodeSetTestFluent(new UnicodeSet(expression)); + } + + static UnicodeSet.XSymbolTable NO_PROPS = + new UnicodeSet.XSymbolTable() { + @Override + public boolean applyPropertyAlias( + String propertyName, String propertyValue, UnicodeSet result) { + throw new IllegalArgumentException( + "Don't use any ICU Unicode Properties! " + + propertyName + + "=" + + propertyValue); + } + ; + }; +} From 4817cae2457b587e8dda8b8825937f610b799458 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 28 Mar 2025 03:15:15 +0100 Subject: [PATCH 11/38] The first test that passes --- .../text/UCD/VersionedSymbolTable.java | 12 +- .../text/UCD/TestVersionedSymbolTable.java | 118 ++++++++++-------- 2 files changed, 69 insertions(+), 61 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 9d6dd0063..7075d8c83 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -191,15 +191,13 @@ private UnicodeSet getNonNegatedPropertyQuerySet( if (propertyPredicate.length() == 0) { // Either unary-property-query, or binary-property-query with an empty property-value. - try { - return queriedProperties - .getProperty(UcdProperty.Script) - .getSet(unqualifiedLeftHandSide.toString()); - } catch (Exception e) { + final var script = queriedProperties.getProperty(UcdProperty.Script); + final var generalCategory = queriedProperties.getProperty(UcdProperty.General_Category); + if (script.isValidValue(unqualifiedLeftHandSide.toString())) { + return script.getSet(unqualifiedLeftHandSide.toString()); } - try { + if (generalCategory.isValidValue(unqualifiedLeftHandSide.toString())) { return getGeneralCategorySet(queriedProperties, unqualifiedLeftHandSide.toString()); - } catch (Exception e) { } UnicodeProperty queriedProperty = queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 0d9a2512c..8430e3f81 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -1,70 +1,80 @@ package org.unicode.text.UCD; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.Arrays; - +import com.ibm.icu.text.UnicodeSet; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.unicode.props.IndexUnicodeProperties; -import org.unicode.props.UcdProperty; -import org.unicode.props.UcdPropertyValues; - -import com.ibm.icu.text.UnicodeSet; public class TestVersionedSymbolTable { - @BeforeEach - void setUp() { - UnicodeSet.setDefaultXSymbolTable(VersionedSymbolTable.forDevelopment()); - } - - @AfterEach - void tearDown() { - UnicodeSet.setDefaultXSymbolTable(NO_PROPS); - } - - @Test - void testIntroductionExamples() { - assertThatUnicodeSet("\\p{XID_Continue}").containsAll("a", "α", "𒀀").containsNone("'", ","); - } + @BeforeEach + void setUp() { + UnicodeSet.setDefaultXSymbolTable(VersionedSymbolTable.forDevelopment()); + } - /** - * Helper class for testing multiple properties of the same UnicodeSet. - */ - static class UnicodeSetTestFluent { - UnicodeSetTestFluent(UnicodeSet set) {} - public UnicodeSetTestFluent isEqualTo(UnicodeSet collection) { - assertTrue(set.equals(collection)); - return this; + @AfterEach + void tearDown() { + UnicodeSet.setDefaultXSymbolTable(NO_PROPS); } - public UnicodeSetTestFluent containsNone(CharSequence... collection) { - assertTrue(set.containsNone(Arrays.asList(collection))); - return this; + + @Test + void testIntroductionExamples() { + assertThatUnicodeSet("\\p{XID_Continue}") + .contains("a") + .contains("α") + .contains("𒀀") + .doesNotContain("'") + .doesNotContain(","); } - public UnicodeSetTestFluent containsAll(CharSequence... collection) { - assertTrue(set.containsAll(Arrays.asList(collection))); - return this; + + /** Helper class for testing multiple properties of the same UnicodeSet. */ + private static class UnicodeSetTestFluent { + UnicodeSetTestFluent(String expression) { + this.expression = expression; + set = new UnicodeSet(expression); + set.complement().complement(); + } + + public UnicodeSetTestFluent isEqualTo(UnicodeSet collection) { + assertTrue(set.equals(collection)); + return this; + } + + public UnicodeSetTestFluent doesNotContain(CharSequence element) { + assertFalse( + set.contains(element), + element + " ∉ " + expression + " = " + set.toPattern(true)); + return this; + } + + public UnicodeSetTestFluent contains(CharSequence element) { + assertTrue( + set.contains(element), + element + " ∈ " + expression + " = " + set.toPattern(true)); + return this; + } + + private UnicodeSet set; + private String expression; } - private UnicodeSet set; - } - private UnicodeSetTestFluent assertThatUnicodeSet(String expression) { - return new UnicodeSetTestFluent(new UnicodeSet(expression)); - } + private UnicodeSetTestFluent assertThatUnicodeSet(String expression) { + return new UnicodeSetTestFluent(expression); + } - static UnicodeSet.XSymbolTable NO_PROPS = - new UnicodeSet.XSymbolTable() { - @Override - public boolean applyPropertyAlias( - String propertyName, String propertyValue, UnicodeSet result) { - throw new IllegalArgumentException( - "Don't use any ICU Unicode Properties! " - + propertyName - + "=" - + propertyValue); - } - ; - }; + static UnicodeSet.XSymbolTable NO_PROPS = + new UnicodeSet.XSymbolTable() { + @Override + public boolean applyPropertyAlias( + String propertyName, String propertyValue, UnicodeSet result) { + throw new IllegalArgumentException( + "Don't use any ICU Unicode Properties! " + + propertyName + + "=" + + propertyValue); + } + ; + }; } From 9fcab458922c1713e615efaefefa606b48f57090 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 28 Mar 2025 04:12:41 +0100 Subject: [PATCH 12/38] Basic debugging --- .../text/UCD/VersionedSymbolTable.java | 1 + .../text/UCD/TestVersionedSymbolTable.java | 23 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 7075d8c83..681e4522a 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -157,6 +157,7 @@ private UnicodeSet getGeneralCategorySet(IndexUnicodeProperties iup, String prop @Override public boolean applyPropertyAlias(String beforeEquals, String afterEquals, UnicodeSet result) { + result.clear(); String leftHandSide = beforeEquals; String propertyPredicate = afterEquals; boolean interiorlyNegated = false; diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 8430e3f81..9879b3fb5 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -4,6 +4,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.ibm.icu.text.UnicodeSet; +import java.text.ParsePosition; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -27,13 +28,33 @@ void testIntroductionExamples() { .contains("𒀀") .doesNotContain("'") .doesNotContain(","); + assertThatUnicodeSet("[\\p{lb=OP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]") + .contains("(") + .doesNotContain("【"); + assertThatUnicodeSet( + "[\\p{Other_ID_Start}\\p{Other_ID_Continue}" + + "\\p{L}\\p{Nl}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}" + + "-\\p{Pattern_Syntax}" + + "-\\p{Pattern_White_Space}]") + .contains("A") + .contains("_") + .contains("᧚") + .doesNotContain("\u2E2F") + .doesNotContain("$"); + assertThatUnicodeSet("[\\p{L}\\p{Nl}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}-[\\u2E2F]]") + .contains("A") + .contains("_") + .doesNotContain("᧚") + .doesNotContain("\u2E2F") + .doesNotContain("$"); } /** Helper class for testing multiple properties of the same UnicodeSet. */ private static class UnicodeSetTestFluent { UnicodeSetTestFluent(String expression) { this.expression = expression; - set = new UnicodeSet(expression); + ParsePosition parsePosition = new ParsePosition(0); + set = new UnicodeSet(expression, parsePosition, VersionedSymbolTable.forDevelopment()); set.complement().complement(); } From 7048be38555a52726a12e4231366a2a17b1a9857 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 28 Mar 2025 04:44:51 +0100 Subject: [PATCH 13/38] Intro tests --- .../text/UCD/VersionedSymbolTable.java | 2 +- .../text/UCD/TestVersionedSymbolTable.java | 33 +++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 681e4522a..2f55ab0d9 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -59,7 +59,7 @@ private VersionInfo parseVersionQualifier(StringBuilder qualified) { throw new IllegalArgumentException( "Invalid version-qualifier " + versionQualifier); } - String versionNumber = versionQualifier.substring(1, posColon + 1); + String versionNumber = versionQualifier.substring(1, posColon); if (versionNumber.endsWith("dev")) { versionNumber = versionNumber.substring(0, versionNumber.length() - 3); if (!versionNumber.isEmpty() diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 9879b3fb5..7547e2d3e 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -21,7 +21,7 @@ void tearDown() { } @Test - void testIntroductionExamples() { + void testIntroductionBasicExamples() { assertThatUnicodeSet("\\p{XID_Continue}") .contains("a") .contains("α") @@ -49,12 +49,30 @@ void testIntroductionExamples() { .doesNotContain("$"); } + @Test + void testIntroductionQueryLanguageExamples() { + assertThatUnicodeSet("\\p{Uppercase_Mapping≠@Simple_Uppercase_Mapping@}") + .contains("ß") + .doesNotContain("ſ"); + assertThatUnicodeSet("\\p{U15.1:Simple_Case_Folding≠@U15.0:Simple_Case_Folding@}") + .consistsOf("ſt", "ΐ", "ΰ"); + assertThatUnicodeSet("[\\p{cjkDefinition=/\\bcat\\b/} \\p{kEH_Desc=/\\bcat\\b/}]") + .contains("貓") + .contains("𓃠") + .doesNotContain("犬") + .doesNotContain("𓃡"); + assertThatUnicodeSet("[\\p{Case_Folding≠@code point@}-\\p{Changes_When_Casefolded}]") + .contains("ǰ") + .doesNotContain("š") + .doesNotContain("ß"); + } + /** Helper class for testing multiple properties of the same UnicodeSet. */ private static class UnicodeSetTestFluent { UnicodeSetTestFluent(String expression) { this.expression = expression; ParsePosition parsePosition = new ParsePosition(0); - set = new UnicodeSet(expression, parsePosition, VersionedSymbolTable.forDevelopment()); + set = new UnicodeSet(expression); set.complement().complement(); } @@ -77,6 +95,17 @@ public UnicodeSetTestFluent contains(CharSequence element) { return this; } + public UnicodeSetTestFluent consistsOf(CharSequence... elements) { + for (CharSequence element : elements) { + contains(element); + } + final var expectedElements = new UnicodeSet().addAll(elements); + assertTrue( + expectedElements.containsAll(set), + expectedElements + " ⊇ " + expression + " = " + set.toPattern(true)); + return this; + } + private UnicodeSet set; private String expression; } From 75e4250bfef8e7744baeed52438e7d1380e12e43 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 28 Mar 2025 15:57:57 +0100 Subject: [PATCH 14/38] More tests, more bugs --- .../text/UCD/VersionedSymbolTable.java | 1 + .../text/UCD/TestVersionedSymbolTable.java | 77 ++++++++++++++++++- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 2f55ab0d9..e989d011e 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -318,6 +318,7 @@ private UnicodeSet getNonNegatedPropertyQuerySet( throw new IllegalArgumentException( "No character name nor name alias matches " + propertyValue); } + return result; } else if (queriedProperty.getName().equals("Name_Alias")) { var result = queriedProperty.getSet(propertyValue); if (result.isEmpty()) { diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 7547e2d3e..af90106b8 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -1,6 +1,7 @@ package org.unicode.text.UCD; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import com.ibm.icu.text.UnicodeSet; @@ -67,17 +68,84 @@ void testIntroductionQueryLanguageExamples() { .doesNotContain("ß"); } + @Test + void testNegations() { + assertThatUnicodeSet("\\P{Cn}").contains("a").doesNotContain("\uFFFF"); + assertThatUnicodeSet("[:^Cn:]").contains("a").doesNotContain("\uFFFF"); + assertThatUnicodeSet("\\P{General_Category=Cn}").contains("a").doesNotContain("\uFFFF"); + assertThatUnicodeSet("[:^General_Category=Cn:]").contains("a").doesNotContain("\uFFFF"); + assertThatUnicodeSet("\\p{General_Category≠Cn}").contains("a").doesNotContain("\uFFFF"); + assertThatUnicodeSet("[:General_Category≠Cn:]").contains("a").doesNotContain("\uFFFF"); + assertThatUnicodeSet("[:^General_Category≠Cn:]").doesNotContain("a").contains("\uFFFF"); + assertThatUnicodeSet("[:^General_Category≠Cn:]").doesNotContain("a").contains("\uFFFF"); + assertThatUnicodeSet("[:^General_Category≠Cn:]").doesNotContain("a").contains("\uFFFF"); + + assertThatUnicodeSet("\\P{Decomposition_Type≠compat}") + .contains("∯") + .doesNotContain("∮") + .isEqualToUnicodeSet("\\p{Decomposition_Type=compat}"); + } + + @Test + void testNamedSingleton() { + assertThatUnicodeSet("\\N{SPACE}").consistsOf(" "); + assertThatUnicodeSet("\\N{THIS IS NOT A CHARACTER}") + .isIllFormed("No character name nor name alias matches"); + assertThatUnicodeSet("\\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET}") + .isEqualToUnicodeSet( + "\\N{PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET}") + .consistsOf("︘"); + assertThatUnicodeSet("\\N{Latin small ligature o-e}").consistsOf("œ"); + assertThatUnicodeSet("\\N{Hangul jungseong O-E}").consistsOf("ᆀ"); + assertThatUnicodeSet("\\N{Hangul jungseong OE}").consistsOf("ᅬ"); + } + + @Test + void testAge() { + assertThatUnicodeSet("\\p{Age=6.0}") + .contains("U") + .contains("𒌋") + .doesNotContain("𒎙") + .isEqualToUnicodeSet("[ \\P{U6:Cn} \\p{U6:Noncharacter_Code_Point} ]"); + assertThatUnicodeSet("\\p{Age=@U6:Age@}").isIllFormed("property-comparison for Age"); + assertThatUnicodeSet("\\p{Age=/1/}").isIllFormed("regular-expression-match for Age"); + } + /** Helper class for testing multiple properties of the same UnicodeSet. */ private static class UnicodeSetTestFluent { UnicodeSetTestFluent(String expression) { this.expression = expression; ParsePosition parsePosition = new ParsePosition(0); - set = new UnicodeSet(expression); - set.complement().complement(); + try { + set = new UnicodeSet(expression); + set.complement().complement(); + } catch (Exception e) { + exception = e; + } + } + + public void isIllFormed(String messageSubstring) { + assertNotNull(exception, expression + " is ill-formed"); + assertTrue( + exception.getMessage().contains(messageSubstring), + "Error message '" + + exception.getMessage() + + "' for " + + expression + + " contains '" + + messageSubstring + + "'"); } - public UnicodeSetTestFluent isEqualTo(UnicodeSet collection) { - assertTrue(set.equals(collection)); + public UnicodeSetTestFluent isEqualToUnicodeSet( + String expectedExpression) { + final var expected = new UnicodeSet(expectedExpression); + assertTrue( + set.containsAll(expected), + expected + " ⊆ " + expression + " = " + set.toPattern(true)); + assertTrue( + expected.containsAll(set), + expected + " ⊇ " + expression + " = " + set.toPattern(true)); return this; } @@ -108,6 +176,7 @@ public UnicodeSetTestFluent consistsOf(CharSequence... elements) { private UnicodeSet set; private String expression; + private Exception exception; } private UnicodeSetTestFluent assertThatUnicodeSet(String expression) { From 73fd0d0927ffd2e978c6a15525120e395fcee82d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 28 Mar 2025 21:02:39 +0100 Subject: [PATCH 15/38] More examples --- .../text/UCD/TestVersionedSymbolTable.java | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index af90106b8..aa4d7742f 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -111,6 +111,39 @@ void testAge() { assertThatUnicodeSet("\\p{Age=/1/}").isIllFormed("regular-expression-match for Age"); } + @Test + void testPropertyComparisons() { + assertThatUnicodeSet("\\p{scf=@lc@}").contains("Σ").contains("σ").doesNotContain("ς"); + assertThatUnicodeSet("\\p{U15.1:scf=@U15.1:lc@}") + .contains("Σ") + .contains("σ") + .doesNotContain("ς"); + assertThatUnicodeSet("\\p{U15.0:Line_Break≠@U15.1:Line_Break@}") + .contains("ᯤ") + .doesNotContain("i"); + assertThatUnicodeSet("\\p{kIRG_GSource=@none@}").contains("𒇽").doesNotContain("人"); + assertThatUnicodeSet("\\p{case folding=@code point@}") + .contains("s") + .doesNotContain("S") + .doesNotContain("ſ") + .doesNotContain("ß"); + assertThatUnicodeSet("\\p{kIRG_GSource=@U16:none@}") + .isIllFormed("comparison version on null query"); + assertThatUnicodeSet("\\p{case folding=@U16:code point@}") + .isIllFormed("comparison version on identity query"); + + assertThatUnicodeSet("\\p{Decomposition_Mapping=@Ideographic@}") + .isIllFormed( + "comparison between String property Decomposition_Mapping and" + + " Binary property Ideographic"); + assertThatUnicodeSet("\\p{Uppercase≠@Changes_When_Lowercased@}") + .isEqualToUnicodeSet( + "[[\\p{Uppercase}\\p{Changes_When_Lowercased}]" + + "-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]") + .contains("𝔄") + .doesNotContain("A"); + } + /** Helper class for testing multiple properties of the same UnicodeSet. */ private static class UnicodeSetTestFluent { UnicodeSetTestFluent(String expression) { From aae8ef82a8f7912c98e314e4eaa974775a9dbf92 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 28 Mar 2025 21:17:08 +0100 Subject: [PATCH 16/38] More tests! --- .../unicode/text/UCD/TestVersionedSymbolTable.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index aa4d7742f..360ff0d19 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -113,6 +113,7 @@ void testAge() { @Test void testPropertyComparisons() { + // From the first set of examples in the section. assertThatUnicodeSet("\\p{scf=@lc@}").contains("Σ").contains("σ").doesNotContain("ς"); assertThatUnicodeSet("\\p{U15.1:scf=@U15.1:lc@}") .contains("Σ") @@ -132,6 +133,7 @@ void testPropertyComparisons() { assertThatUnicodeSet("\\p{case folding=@U16:code point@}") .isIllFormed("comparison version on identity query"); + // From the third set of examples in the section. assertThatUnicodeSet("\\p{Decomposition_Mapping=@Ideographic@}") .isIllFormed( "comparison between String property Decomposition_Mapping and" @@ -142,6 +144,17 @@ void testPropertyComparisons() { + "-[\\p{Uppercase}&\\p{Changes_When_Lowercased}]]") .contains("𝔄") .doesNotContain("A"); + assertThatUnicodeSet("\\p{scf≠@cf@}").contains("ß").doesNotContain("ς"); + assertThatUnicodeSet("\\p{Numeric_Value=@kPrimaryNumeric@}") + .contains("A") + .contains("喵") + .contains("一") + .contains("五") + .doesNotContain("1") + .doesNotContain("伍"); + // \p{U15.0:Line_Break≠@U15.1:Line_Break@} covered above. + assertThatUnicodeSet("\\p{U16.0:kPrimaryNumeric≠@U17.0:kPrimaryNumeric@}").consistsOf("兆"); + assertThatUnicodeSet("\\p{Script_Extensions=@Script@}").contains("A").doesNotContain("।"); } /** Helper class for testing multiple properties of the same UnicodeSet. */ From 52e273cb6408de0dd145cf1d8195659ac0a1b64c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Fri, 28 Mar 2025 22:47:06 +0100 Subject: [PATCH 17/38] More tests! --- .../org/unicode/text/UCD/TestVersionedSymbolTable.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 360ff0d19..284883ec5 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -157,6 +157,14 @@ void testPropertyComparisons() { assertThatUnicodeSet("\\p{Script_Extensions=@Script@}").contains("A").doesNotContain("।"); } + @Test + void testIdentityAndNullQueries() { + assertThatUnicodeSet("\\p{scf=@code point@}").contains("a").doesNotContain("A"); + assertThatUnicodeSet("[:^kIRG_GSource=@none@:]").contains("喵").doesNotContain("𓃠"); + assertThatUnicodeSet("\\p{Bidi_Paired_Bracket=@none@}") + .isEqualToUnicodeSet("\\p{Bidi_Paired_Bracket_Type=None}"); + } + /** Helper class for testing multiple properties of the same UnicodeSet. */ private static class UnicodeSetTestFluent { UnicodeSetTestFluent(String expression) { From 4ea9a5c35a3c37ee9e8f006bd73e439374f2f781 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Sat, 29 Mar 2025 02:34:29 +0100 Subject: [PATCH 18/38] Notice --- .../java/org/unicode/text/UCD/TestVersionedSymbolTable.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 284883ec5..2d59a5e60 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -10,6 +10,12 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +/** + * Notice to the maintainer: These tests check that the UnicodeSet property queries are correctly + * parsed. They are not here to test property assignments. Mostly they check, for every valid + * expression, that the set is nonempty, not equal to the entire code space, and that it appears + * reasonable. If they are broken by changes to property assignments, feel free to update them. + */ public class TestVersionedSymbolTable { @BeforeEach void setUp() { From f00fa5f05ecf7d632835469f5766bed6dfe1a4f8 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 31 Mar 2025 10:06:14 +0200 Subject: [PATCH 19/38] More tests, bugfix in RATIONAL_COMPARATOR --- .../org/unicode/props/UnicodeProperty.java | 4 +- .../text/UCD/TestVersionedSymbolTable.java | 46 +++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index 68a6c2d6e..e331ab1d6 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -740,8 +740,8 @@ public int compare(String x, String y) { public static int compareRationals(String a, String b) { if (a == b) return 0; - if (a == null) return -1; - if (b == null) return 1; + if (a == null || a.equalsIgnoreCase("NaN")) return -1; + if (b == null || b.equalsIgnoreCase("NaN")) return 1; return RationalParser.BASIC.parse(a).compareTo(RationalParser.BASIC.parse(b)); } diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 2d59a5e60..046b60913 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -171,6 +171,47 @@ void testIdentityAndNullQueries() { .isEqualToUnicodeSet("\\p{Bidi_Paired_Bracket_Type=None}"); } + @Test + void testValidValues() { + assertThatUnicodeSet("\\p{Name=THIS IS NOT A CHARACTER}") + .isIllFormed("No character name nor name alias matches"); + assertThatUnicodeSet("\\p{Name =CUNEIFORM SIGN A}").consistsOf("𒀀"); + assertThatUnicodeSet("\\p{Name_Alias=CUNEIFORM SIGN A}") + .isIllFormed("No name alias matches"); + assertThatUnicodeSet("\\p{Line_Break=Meow}").isIllFormed("The value 'Meow' is illegal."); + assertThatUnicodeSet("\\p{kDefinition=meow}").isEmpty(); + assertThatUnicodeSet("\\p{Uppercase_Mapping=meow}").isEmpty(); + assertThatUnicodeSet("\\p{Numeric_Value=MDCCXXIX}") + .isIllFormed("Invalid value 'MDCCXXIX' for numeric property"); + assertThatUnicodeSet("\\p{Numeric_Value=1729}").isEmpty(); + } + + @Test + void testPropertyValueQueries() { + assertThatUnicodeSet("\\p{Uppercase=True}") + .isEqualToUnicodeSet("\\p{Uppercase}") + .contains("A") + .doesNotContain("a"); + assertThatUnicodeSet("\\p{Uppercase=NO}") + .isEqualToUnicodeSet("\\P{Uppercase}") + .contains("a") + .doesNotContain("A"); + assertThatUnicodeSet("\\p{Script_Extensions=Latin}") + .contains("A") + .contains("·") + .doesNotContain("𓃠") + .doesNotContain("।"); + assertThatUnicodeSet("\\p{nv=2/12}") + .isEqualToUnicodeSet("\\p{Numeric_Value=1/6}") + .contains("⅙") + .contains("𐧷") + .doesNotContain("½") + .doesNotContain("X"); + assertThatUnicodeSet("\\p{Name_Alias=New Line}") + .isEqualToUnicodeSet("\\p{Name=New Line}") + .consistsOf("\n"); + } + /** Helper class for testing multiple properties of the same UnicodeSet. */ private static class UnicodeSetTestFluent { UnicodeSetTestFluent(String expression) { @@ -234,6 +275,11 @@ public UnicodeSetTestFluent consistsOf(CharSequence... elements) { return this; } + public UnicodeSetTestFluent isEmpty() { + assertTrue(set.isEmpty(), expression + " = " + set.toPattern(true) + " is empty"); + return this; + } + private UnicodeSet set; private String expression; private Exception exception; From 3cc1272f885c6e18a4d859cc8f98c800f6784d2b Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 31 Mar 2025 10:23:20 +0200 Subject: [PATCH 20/38] Test regular expression queries --- .../text/UCD/TestVersionedSymbolTable.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 046b60913..7e8193b7a 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -212,6 +212,38 @@ void testPropertyValueQueries() { .consistsOf("\n"); } + @Test + void testRegularExpressionQueries() { + assertThatUnicodeSet("\\p{Name=/CAPITAL LETTER/}").contains("A").doesNotContain("a"); + assertThatUnicodeSet("\\p{Block=/^Cyrillic/}") + .contains("и") + .contains("\u1C8B") + .contains("\u1C8F") + .contains("ꙮ") + .doesNotContain("k"); + assertThatUnicodeSet("\\p{scx=/Gondi/}") + .isEqualToUnicodeSet("[\\p{scx=Gunjala_Gondi}\\p{scx=Masaram_Gondi}]") + .contains("𑴀") + .contains("𑵠") + .contains("।") + .doesNotContain("a"); + assertThatUnicodeSet("\\p{gc=/^P/}") + .isEqualToUnicodeSet("[\\p{Punctuation} \\p{Private Use} \\u2029]"); + + assertThatUnicodeSet("\\p{Name=/NO BREAK SPACE/}").isEmpty(); + assertThatUnicodeSet("\\p{Name=/NO-BREAK SPACE/}") + .contains("\u00A0") + .contains("\u202F") + .contains("\uFEFF"); + assertThatUnicodeSet("\\p{Script=/ Gondi/}").isEmpty(); + assertThatUnicodeSet("\\p{Script=/_Gondi/}").contains("𑴀").contains("𑵠"); + assertThatUnicodeSet("\\p{gc=/Cased_Letter/}").isEmpty(); + assertThatUnicodeSet("\\p{gc=/Cased_Letter/}") + .contains("a") + .contains("A") + .doesNotContain("𒀀"); + } + /** Helper class for testing multiple properties of the same UnicodeSet. */ private static class UnicodeSetTestFluent { UnicodeSetTestFluent(String expression) { From 1acb083be24ae9d8f72ecf751ce8b1a568bf3624 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 31 Mar 2025 10:36:01 +0200 Subject: [PATCH 21/38] a bug --- .../main/java/org/unicode/text/UCD/VersionedSymbolTable.java | 5 +++-- .../java/org/unicode/text/UCD/TestVersionedSymbolTable.java | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index e989d011e..b8e2b1604 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -53,6 +53,7 @@ private VersionInfo parseVersionQualifier(StringBuilder qualified) { // Extension: we allow a version-qualifier starting with R for retroactive // properties, that is, property derivations applied before the property // existed. + // TODO(egg): Actually support that. case 'U': break; default: @@ -137,8 +138,8 @@ private VersionInfo parseVersionQualifier(StringBuilder qualified) { /** * Similar to iup.getProperty(UcdProperty.General_Category).getSet(propertyValue), but takes the - * groupings into account. Implements both unary-query for a General_Category alias and - * binary-query with a property-value where the queried property is General_Category. + * groupings into account. Implements both unary-query-expression for a General_Category alias and + * binary-query-expression with a property-value where the queried property is General_Category. */ private UnicodeSet getGeneralCategorySet(IndexUnicodeProperties iup, String propertyValue) { var gc = iup.getProperty(UcdProperty.General_Category); diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 7e8193b7a..f17480b2c 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -238,7 +238,7 @@ void testRegularExpressionQueries() { assertThatUnicodeSet("\\p{Script=/ Gondi/}").isEmpty(); assertThatUnicodeSet("\\p{Script=/_Gondi/}").contains("𑴀").contains("𑵠"); assertThatUnicodeSet("\\p{gc=/Cased_Letter/}").isEmpty(); - assertThatUnicodeSet("\\p{gc=/Cased_Letter/}") + assertThatUnicodeSet("\\p{gc=Cased_Letter}") .contains("a") .contains("A") .doesNotContain("𒀀"); From 60ea2d374e9f3d3c5ada1a2c7d20ff26c98f57d4 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 31 Mar 2025 10:39:20 +0200 Subject: [PATCH 22/38] A setter --- .../main/java/org/unicode/text/UCD/VersionedSymbolTable.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index b8e2b1604..47d081c84 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -34,6 +34,11 @@ public static VersionedSymbolTable forDevelopment() { return result; } + public VersionedSymbolTable setUnversionedExtensions(UnicodeProperty.Factory factory) { + unversionedExtensions = factory; + return this; + } + /** * Parses a string prefixed with an optional-version-qualifier. If there is a version-qualifier, * returns the corresponding VersionInfo and removes the prefix from the given StringBuilder. From 40677ee2c92650213e4361b5d37b50f9c82bdc2a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 12:26:40 +0200 Subject: [PATCH 23/38] Fix NaN comparison and thereby MakeUnicodeFiles --- .../src/main/java/org/unicode/props/UnicodeProperty.java | 9 +++++++-- .../java/org/unicode/text/UCD/VersionedSymbolTable.java | 5 ++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index e331ab1d6..11347d6c2 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -740,8 +740,13 @@ public int compare(String x, String y) { public static int compareRationals(String a, String b) { if (a == b) return 0; - if (a == null || a.equalsIgnoreCase("NaN")) return -1; - if (b == null || b.equalsIgnoreCase("NaN")) return 1; + if (a == null) return -1; + if (b == null) return 1; + final boolean aIsNaN = equalNames(a, "NaN"); + final boolean bIsNaN = equalNames(b, "NaN"); + if (aIsNaN && bIsNaN) return 0; + if (aIsNaN) return -1; + if (bIsNaN) return 1; return RationalParser.BASIC.parse(a).compareTo(RationalParser.BASIC.parse(b)); } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 47d081c84..f0871c3bb 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -333,9 +333,8 @@ private UnicodeSet getNonNegatedPropertyQuerySet( } return result; } else if (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK)) { - if (UnicodeProperty.equalNames(propertyValue, "NaN")) { - propertyValue = "NaN"; - } else if (!RATIONAL_PATTERN.matcher(propertyValue).matches()) { + if (UnicodeProperty.equalNames(propertyValue, "NaN") || + !RATIONAL_PATTERN.matcher(propertyValue).matches()) { throw new IllegalArgumentException( "Invalid value '" + propertyValue From 21d72180a823053e1cc462a72b9f0f925e6b2379 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 12:26:56 +0200 Subject: [PATCH 24/38] spots --- .../java/org/unicode/text/UCD/VersionedSymbolTable.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index f0871c3bb..8ed3aeec4 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -143,8 +143,9 @@ private VersionInfo parseVersionQualifier(StringBuilder qualified) { /** * Similar to iup.getProperty(UcdProperty.General_Category).getSet(propertyValue), but takes the - * groupings into account. Implements both unary-query-expression for a General_Category alias and - * binary-query-expression with a property-value where the queried property is General_Category. + * groupings into account. Implements both unary-query-expression for a General_Category alias + * and binary-query-expression with a property-value where the queried property is + * General_Category. */ private UnicodeSet getGeneralCategorySet(IndexUnicodeProperties iup, String propertyValue) { var gc = iup.getProperty(UcdProperty.General_Category); @@ -333,8 +334,8 @@ private UnicodeSet getNonNegatedPropertyQuerySet( } return result; } else if (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK)) { - if (UnicodeProperty.equalNames(propertyValue, "NaN") || - !RATIONAL_PATTERN.matcher(propertyValue).matches()) { + if (UnicodeProperty.equalNames(propertyValue, "NaN") + || !RATIONAL_PATTERN.matcher(propertyValue).matches()) { throw new IllegalArgumentException( "Invalid value '" + propertyValue From 3ce4d2bfdbdf16a181ac782b0f3ce7aab625cda4 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 12:38:30 +0200 Subject: [PATCH 25/38] comments about strings --- .../java/org/unicode/text/UCD/VersionedSymbolTable.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 8ed3aeec4..38c4837fa 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -376,6 +376,10 @@ private UnicodeSet getNonNegatedPropertyQuerySet( private static UnicodeSet getIdentitySet(UnicodeProperty queriedProperty) { final var result = new UnicodeSet(); + // Note that while UnicodeProperty, can return strings from getSet, which is an extension of + // the UnicodeSet property-query specification, identity queries exclude any strings of + // length other than 1, otherwise we would end up with infinite sets, e.g., the set of all + // strings that normalize to themselves. for (int cp = 0; cp <= 0x10FFFF; ++cp) { if (UnicodeProperty.equals(cp, queriedProperty.getValue(cp))) { result.add(cp); @@ -409,6 +413,10 @@ private static UnicodeSet compareProperties( + comparisonProperty.getName()); } final var result = new UnicodeSet(); + // Note that while UnicodeProperty, can return strings from getSet, which is an extension of + // the UnicodeSet property-query specification, property comparisons exclude any strings of + // length other than 1. Extending them to include those leads to messy questions of + // defining the value of character properties for string (null?) and avoiding infinite sets. for (int cp = 0; cp <= 0x10FFFF; ++cp) { if (UnicodeProperty.equals( queriedProperty.getValue(cp), comparisonProperty.getValue(cp))) { From b0da269865db401c990c436c3b460bff235591fe Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 12:40:34 +0200 Subject: [PATCH 26/38] Set.of --- .../text/UCD/VersionedSymbolTable.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 38c4837fa..737babb2d 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -4,6 +4,7 @@ import com.ibm.icu.util.VersionInfo; import java.util.Comparator; import java.util.Map; +import java.util.Set; import java.util.regex.Pattern; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; @@ -121,25 +122,25 @@ private VersionInfo parseVersionQualifier(StringBuilder qualified) { } } - private static Map + private static Map> COARSE_GENERAL_CATEGORIES = Map.of( UcdPropertyValues.General_Category_Values.Other, - new String[] {"Cc", "Cf", "Cn", "Co", "Cs"}, + Set.of("Cc", "Cf", "Cn", "Co", "Cs"), UcdPropertyValues.General_Category_Values.Letter, - new String[] {"Ll", "Lm", "Lo", "Lt", "Lu"}, + Set.of("Ll", "Lm", "Lo", "Lt", "Lu"), UcdPropertyValues.General_Category_Values.Cased_Letter, - new String[] {"Ll", "Lt", "Lu"}, + Set.of("Ll", "Lt", "Lu"), UcdPropertyValues.General_Category_Values.Mark, - new String[] {"Mc", "Me", "Mn"}, + Set.of("Mc", "Me", "Mn"), UcdPropertyValues.General_Category_Values.Number, - new String[] {"Nd", "Nl", "No"}, + Set.of("Nd", "Nl", "No"), UcdPropertyValues.General_Category_Values.Punctuation, - new String[] {"Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"}, + Set.of("Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"), UcdPropertyValues.General_Category_Values.Symbol, - new String[] {"Sc", "Sk", "Sm", "So"}, + Set.of("Sc", "Sk", "Sm", "So"), UcdPropertyValues.General_Category_Values.Separator, - new String[] {"Zl", "Zp", "Zs"}); + Set.of("Zl", "Zp", "Zs")); /** * Similar to iup.getProperty(UcdProperty.General_Category).getSet(propertyValue), but takes the From 63a5d9fc08d98b084bbde4571f439fa7e4afd16e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 12:41:27 +0200 Subject: [PATCH 27/38] length() == 0 to isEmpty() passim --- .../main/java/org/unicode/text/UCD/VersionedSymbolTable.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 737babb2d..0584d4405 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -175,7 +175,7 @@ public boolean applyPropertyAlias(String beforeEquals, String afterEquals, Unico // the unary case. if (posNotEqual >= 0) { propertyPredicate = - afterEquals.length() == 0 + afterEquals.isEmpty() ? beforeEquals.substring(posNotEqual + 1) : beforeEquals.substring(posNotEqual + 1) + "=" + afterEquals; leftHandSide = beforeEquals.substring(0, posNotEqual); @@ -198,7 +198,7 @@ private UnicodeSet getNonNegatedPropertyQuerySet( final var queriedProperties = IndexUnicodeProperties.make(deducedQueriedVersion); - if (propertyPredicate.length() == 0) { + if (propertyPredicate.isEmpty()) { // Either unary-property-query, or binary-property-query with an empty property-value. final var script = queriedProperties.getProperty(UcdProperty.Script); final var generalCategory = queriedProperties.getProperty(UcdProperty.General_Category); From 6d724904f246014db21775587ea9c30eb6bec5e3 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 12:51:55 +0200 Subject: [PATCH 28/38] Giant conditional --- .../org/unicode/props/UnicodeProperty.java | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index 11347d6c2..7d48d614f 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -446,19 +446,24 @@ public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { throw new IllegalArgumentException( "Multivalued property values can't contain the delimiter."); } else { + Comparator comparator; + if (isType(NUMERIC_MASK)) { + // UAX44-LM1. + comparator = RATIONAL_COMPARATOR; + } else if (getName().equals("Name") || getName().equals("Name_Alias")) { + // UAX44-LM2. + comparator = CHARACTER_NAME_COMPARATOR; + } else if (isType(BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { + // UAX44-LM3 + comparator = PROPERTY_COMPARATOR; + } else { + // String-valued or Miscellaneous property. + comparator = null; + } return getSet( propertyValue == null ? NULL_MATCHER - : new SimpleMatcher( - propertyValue, - isType(NUMERIC_MASK) - ? RATIONAL_COMPARATOR - : getName().equals("Name") - || getName().equals("Name_Alias") - ? CHARACTER_NAME_COMPARATOR - : isType(STRING_OR_MISC_MASK) - ? null - : PROPERTY_COMPARATOR), + : new SimpleMatcher(propertyValue, comparator), result); } } From bb56f44b04f0ab42c9b06c64cf09af5e070eef96 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 12:54:31 +0200 Subject: [PATCH 29/38] Needless else --- .../org/unicode/props/UnicodeProperty.java | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index 7d48d614f..67450ee88 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -445,27 +445,24 @@ public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { if (isMultivalued && propertyValue != null && propertyValue.contains(delimiter)) { throw new IllegalArgumentException( "Multivalued property values can't contain the delimiter."); + } + Comparator comparator; + if (isType(NUMERIC_MASK)) { + // UAX44-LM1. + comparator = RATIONAL_COMPARATOR; + } else if (getName().equals("Name") || getName().equals("Name_Alias")) { + // UAX44-LM2. + comparator = CHARACTER_NAME_COMPARATOR; + } else if (isType(BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { + // UAX44-LM3 + comparator = PROPERTY_COMPARATOR; } else { - Comparator comparator; - if (isType(NUMERIC_MASK)) { - // UAX44-LM1. - comparator = RATIONAL_COMPARATOR; - } else if (getName().equals("Name") || getName().equals("Name_Alias")) { - // UAX44-LM2. - comparator = CHARACTER_NAME_COMPARATOR; - } else if (isType(BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { - // UAX44-LM3 - comparator = PROPERTY_COMPARATOR; - } else { - // String-valued or Miscellaneous property. - comparator = null; - } - return getSet( - propertyValue == null - ? NULL_MATCHER - : new SimpleMatcher(propertyValue, comparator), - result); + // String-valued or Miscellaneous property. + comparator = null; } + return getSet( + propertyValue == null ? NULL_MATCHER : new SimpleMatcher(propertyValue, comparator), + result); } private UnicodeMap unicodeMap = null; From f8e7e4cc802b8097e4ee3cf54b151698460cd8e7 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 13:19:49 +0200 Subject: [PATCH 30/38] Split function --- .../text/UCD/VersionedSymbolTable.java | 329 +++++++++--------- 1 file changed, 167 insertions(+), 162 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 0584d4405..0cc7ebdf9 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -148,7 +148,8 @@ private VersionInfo parseVersionQualifier(StringBuilder qualified) { * and binary-query-expression with a property-value where the queried property is * General_Category. */ - private UnicodeSet getGeneralCategorySet(IndexUnicodeProperties iup, String propertyValue) { + private static UnicodeSet getGeneralCategorySet( + IndexUnicodeProperties iup, String propertyValue) { var gc = iup.getProperty(UcdProperty.General_Category); for (var entry : COARSE_GENERAL_CATEGORIES.entrySet()) { final var aliases = entry.getKey().getNames().getAllNames(); @@ -192,186 +193,190 @@ public boolean applyPropertyAlias(String beforeEquals, String afterEquals, Unico private UnicodeSet getNonNegatedPropertyQuerySet( String leftHandSide, String propertyPredicate) { - final var unqualifiedLeftHandSide = new StringBuilder(leftHandSide); - final var queriedVersion = parseVersionQualifier(unqualifiedLeftHandSide); + final var mutableLeftHandSide = new StringBuilder(leftHandSide); + final var queriedVersion = parseVersionQualifier(mutableLeftHandSide); + final String unqualifiedLeftHandSide = mutableLeftHandSide.toString(); final var deducedQueriedVersion = queriedVersion == null ? implicitVersion : queriedVersion; final var queriedProperties = IndexUnicodeProperties.make(deducedQueriedVersion); if (propertyPredicate.isEmpty()) { - // Either unary-property-query, or binary-property-query with an empty property-value. - final var script = queriedProperties.getProperty(UcdProperty.Script); - final var generalCategory = queriedProperties.getProperty(UcdProperty.General_Category); - if (script.isValidValue(unqualifiedLeftHandSide.toString())) { - return script.getSet(unqualifiedLeftHandSide.toString()); - } - if (generalCategory.isValidValue(unqualifiedLeftHandSide.toString())) { - return getGeneralCategorySet(queriedProperties, unqualifiedLeftHandSide.toString()); - } - UnicodeProperty queriedProperty = - queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); - if (queriedProperty == null && unversionedExtensions != null) { - queriedProperty = - unversionedExtensions.getProperty(unqualifiedLeftHandSide.toString()); - } - if (queriedProperty == null) { - throw new IllegalArgumentException( - "Invalid unary-query-expression; could not find property " - + unqualifiedLeftHandSide); - } - if (!queriedProperty.isType(UnicodeProperty.BINARY_MASK)) { - // TODO(egg): Remove when we can tell this is a unary query. - if (queriedProperty.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { - return queriedProperty.getSet(""); - } - throw new IllegalArgumentException( - "Invalid unary-query-expression for non-binary property " - + queriedProperty.getName()); - } - return queriedProperty.getSet(UcdPropertyValues.Binary.Yes); + return computeUnaryQuery(queriedProperties, unqualifiedLeftHandSide); } else { - // We have a binary-property-query. - UnicodeProperty queriedProperty = - queriedProperties.getProperty(unqualifiedLeftHandSide.toString()); - if (queriedProperty == null && unversionedExtensions != null) { - queriedProperty = - unversionedExtensions.getProperty(unqualifiedLeftHandSide.toString()); + return computeBinaryQuery( + queriedProperties, unqualifiedLeftHandSide, propertyPredicate); + } + } + + private UnicodeSet computeUnaryQuery( + IndexUnicodeProperties queriedProperties, String unqualifiedQuery) { + // Either unary-property-query, or binary-property-query with an empty property-value. + final var script = queriedProperties.getProperty(UcdProperty.Script); + final var generalCategory = queriedProperties.getProperty(UcdProperty.General_Category); + if (script.isValidValue(unqualifiedQuery)) { + return script.getSet(unqualifiedQuery); + } + if (generalCategory.isValidValue(unqualifiedQuery)) { + return getGeneralCategorySet(queriedProperties, unqualifiedQuery); + } + UnicodeProperty queriedProperty = queriedProperties.getProperty(unqualifiedQuery); + if (queriedProperty == null && unversionedExtensions != null) { + queriedProperty = unversionedExtensions.getProperty(unqualifiedQuery); + } + if (queriedProperty == null) { + throw new IllegalArgumentException( + "Invalid unary-query-expression; could not find property " + unqualifiedQuery); + } + if (!queriedProperty.isType(UnicodeProperty.BINARY_MASK)) { + // TODO(egg): Remove when we can tell this is a unary query. + if (queriedProperty.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { + return queriedProperty.getSet(""); } - if (queriedProperty == null) { + throw new IllegalArgumentException( + "Invalid unary-query-expression for non-binary property " + + queriedProperty.getName()); + } + return queriedProperty.getSet(UcdPropertyValues.Binary.Yes); + } + + private UnicodeSet computeBinaryQuery( + IndexUnicodeProperties queriedProperties, + String unqualifiedLeftHandSide, + String propertyPredicate) { + // We have a binary-property-query. + UnicodeProperty queriedProperty = queriedProperties.getProperty(unqualifiedLeftHandSide); + if (queriedProperty == null && unversionedExtensions != null) { + queriedProperty = unversionedExtensions.getProperty(unqualifiedLeftHandSide); + } + if (queriedProperty == null) { + throw new IllegalArgumentException( + "Invalid binary-query-expression; could not find property " + + unqualifiedLeftHandSide); + } + final boolean isAge = queriedProperty.getName().equals("Age"); + final boolean isName = queriedProperty.getName().equals("Name"); + final boolean isPropertyComparison = + propertyPredicate.startsWith("@") && propertyPredicate.endsWith("@"); + final boolean isRegularExpressionMatch = + propertyPredicate.startsWith("/") && propertyPredicate.endsWith("/"); + if (isPropertyComparison) { + if (isAge) { throw new IllegalArgumentException( - "Invalid binary-query-expression; could not find property " - + unqualifiedLeftHandSide); + "Invalid binary-query-expression with property-comparison for Age"); } - final boolean isAge = queriedProperty.getName().equals("Age"); - final boolean isName = queriedProperty.getName().equals("Name"); - final boolean isPropertyComparison = - propertyPredicate.startsWith("@") && propertyPredicate.endsWith("@"); - final boolean isRegularExpressionMatch = - propertyPredicate.startsWith("/") && propertyPredicate.endsWith("/"); - if (isPropertyComparison) { - if (isAge) { + final var unqualifiedRightHandSide = + new StringBuilder( + propertyPredicate.substring(1, propertyPredicate.length() - 1)); + final var comparisonVersion = parseVersionQualifier(unqualifiedRightHandSide); + if (UnicodeProperty.equalNames(unqualifiedRightHandSide.toString(), "code point")) { + if (comparisonVersion != null) { throw new IllegalArgumentException( - "Invalid binary-query-expression with property-comparison for Age"); + "Invalid binary-query-expression with comparison version on identity query"); } - final var unqualifiedRightHandSide = - new StringBuilder( - propertyPredicate.substring(1, propertyPredicate.length() - 1)); - final var comparisonVersion = parseVersionQualifier(unqualifiedRightHandSide); - if (UnicodeProperty.equalNames(unqualifiedRightHandSide.toString(), "code point")) { - if (comparisonVersion != null) { - throw new IllegalArgumentException( - "Invalid binary-query-expression with comparison version on identity query"); - } - if (!queriedProperty.isType(UnicodeProperty.STRING_MASK)) { - throw new IllegalArgumentException( - "Invalid binary-query-expression with identity query for " - + queriedProperty.getTypeName() - + " property"); - } - return getIdentitySet(queriedProperty); - } else if (UnicodeProperty.equalNames( - unqualifiedRightHandSide.toString(), "none")) { - if (comparisonVersion != null) { - throw new IllegalArgumentException( - "Invalid binary-query-expression with comparison version on null query"); - } - if (!queriedProperty.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { - throw new IllegalArgumentException( - "Invalid binary-query-expression with null query for " - + queriedProperty.getTypeName() - + " property"); - } - return queriedProperty.getSet((String) null); - } else { - UnicodeProperty comparisonProperty = - IndexUnicodeProperties.make( - comparisonVersion == null - ? implicitVersion - : comparisonVersion) - .getProperty(unqualifiedRightHandSide.toString()); - if (comparisonProperty == null && unversionedExtensions != null) { - comparisonProperty = - unversionedExtensions.getProperty( - unqualifiedRightHandSide.toString()); - } - if (comparisonProperty == null) { - throw new IllegalArgumentException( - "Invalid binary-query-expression; could not find comparison property " - + unqualifiedRightHandSide); - } - return compareProperties(queriedProperty, comparisonProperty); + if (!queriedProperty.isType(UnicodeProperty.STRING_MASK)) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with identity query for " + + queriedProperty.getTypeName() + + " property"); } - } else if (isRegularExpressionMatch) { - if (isAge) { + return getIdentitySet(queriedProperty); + } else if (UnicodeProperty.equalNames(unqualifiedRightHandSide.toString(), "none")) { + if (comparisonVersion != null) { throw new IllegalArgumentException( - "Invalid binary-query-expression with regular-expression-match for Age"); + "Invalid binary-query-expression with comparison version on null query"); } - return queriedProperty.getSet( - new UnicodeProperty.RegexMatcher() - .set( - propertyPredicate.substring( - 1, propertyPredicate.length() - 1))); + if (!queriedProperty.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with null query for " + + queriedProperty.getTypeName() + + " property"); + } + return queriedProperty.getSet((String) null); } else { - String propertyValue = propertyPredicate; - // Validation. For Name, validation entails computing the query, so we return here. - if (isName) { - var result = queriedProperty.getSet(propertyValue); - if (result.isEmpty()) { - result = - queriedProperties - .getProperty(UcdProperty.Name_Alias) - .getSet(propertyValue); - } - if (result.isEmpty()) { - throw new IllegalArgumentException( - "No character name nor name alias matches " + propertyValue); - } - return result; - } else if (queriedProperty.getName().equals("Name_Alias")) { - var result = queriedProperty.getSet(propertyValue); - if (result.isEmpty()) { - throw new IllegalArgumentException( - "No name alias matches " + propertyValue); - } - return result; - } else if (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK)) { - if (UnicodeProperty.equalNames(propertyValue, "NaN") - || !RATIONAL_PATTERN.matcher(propertyValue).matches()) { - throw new IllegalArgumentException( - "Invalid value '" - + propertyValue - + "' for numeric property " - + queriedProperty.getName()); - } - } else if (queriedProperty.isType( - UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { - if (!queriedProperty.isValidValue(propertyValue)) { - throw new IllegalArgumentException( - "The value '" - + propertyValue - + "' is illegal. Values for " - + queriedProperty.getName() - + " must be in " - + queriedProperty.getAvailableValues() - + " or in " - + queriedProperty.getValueAliases()); - } - } else { - // TODO(egg): Check for unescaped :, @, =, etc. and unescape. + UnicodeProperty comparisonProperty = + IndexUnicodeProperties.make( + comparisonVersion == null + ? implicitVersion + : comparisonVersion) + .getProperty(unqualifiedRightHandSide.toString()); + if (comparisonProperty == null && unversionedExtensions != null) { + comparisonProperty = + unversionedExtensions.getProperty(unqualifiedRightHandSide.toString()); + } + if (comparisonProperty == null) { + throw new IllegalArgumentException( + "Invalid binary-query-expression; could not find comparison property " + + unqualifiedRightHandSide); } - if (isAge) { - return queriedProperty.getSet( - new UnicodePropertySymbolTable.ComparisonMatcher( - UnicodePropertySymbolTable.parseVersionInfoOrMax(propertyValue), - UnicodePropertySymbolTable.Relation.geq, - Comparator.nullsFirst(Comparator.naturalOrder()), - UnicodePropertySymbolTable::parseVersionInfoOrMax)); + return compareProperties(queriedProperty, comparisonProperty); + } + } else if (isRegularExpressionMatch) { + if (isAge) { + throw new IllegalArgumentException( + "Invalid binary-query-expression with regular-expression-match for Age"); + } + return queriedProperty.getSet( + new UnicodeProperty.RegexMatcher() + .set(propertyPredicate.substring(1, propertyPredicate.length() - 1))); + } else { + String propertyValue = propertyPredicate; + // Validation. For Name, validation entails computing the query, so we return here. + if (isName) { + var result = queriedProperty.getSet(propertyValue); + if (result.isEmpty()) { + result = + queriedProperties + .getProperty(UcdProperty.Name_Alias) + .getSet(propertyValue); } - if (queriedProperty.getName().equals("General_Category")) { - return getGeneralCategorySet(queriedProperties, propertyValue); + if (result.isEmpty()) { + throw new IllegalArgumentException( + "No character name nor name alias matches " + propertyValue); + } + return result; + } else if (queriedProperty.getName().equals("Name_Alias")) { + var result = queriedProperty.getSet(propertyValue); + if (result.isEmpty()) { + throw new IllegalArgumentException("No name alias matches " + propertyValue); } - return queriedProperty.getSet(propertyValue); + return result; + } else if (queriedProperty.isType(UnicodeProperty.NUMERIC_MASK)) { + if (UnicodeProperty.equalNames(propertyValue, "NaN") + || !RATIONAL_PATTERN.matcher(propertyValue).matches()) { + throw new IllegalArgumentException( + "Invalid value '" + + propertyValue + + "' for numeric property " + + queriedProperty.getName()); + } + } else if (queriedProperty.isType( + UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) { + if (!queriedProperty.isValidValue(propertyValue)) { + throw new IllegalArgumentException( + "The value '" + + propertyValue + + "' is illegal. Values for " + + queriedProperty.getName() + + " must be in " + + queriedProperty.getAvailableValues() + + " or in " + + queriedProperty.getValueAliases()); + } + } else { + // TODO(egg): Check for unescaped :, @, =, etc. and unescape. + } + if (isAge) { + return queriedProperty.getSet( + new UnicodePropertySymbolTable.ComparisonMatcher( + UnicodePropertySymbolTable.parseVersionInfoOrMax(propertyValue), + UnicodePropertySymbolTable.Relation.geq, + Comparator.nullsFirst(Comparator.naturalOrder()), + UnicodePropertySymbolTable::parseVersionInfoOrMax)); + } + if (queriedProperty.getName().equals("General_Category")) { + return getGeneralCategorySet(queriedProperties, propertyValue); } + return queriedProperty.getSet(propertyValue); } } From a227b018a782c583aec07ea3831657be07bf38a9 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 13:21:18 +0200 Subject: [PATCH 31/38] code motion --- .../text/UCD/VersionedSymbolTable.java | 248 +++++++++--------- 1 file changed, 124 insertions(+), 124 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index 0cc7ebdf9..e4625c7b2 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -40,130 +40,6 @@ public VersionedSymbolTable setUnversionedExtensions(UnicodeProperty.Factory fac return this; } - /** - * Parses a string prefixed with an optional-version-qualifier. If there is a version-qualifier, - * returns the corresponding VersionInfo and removes the prefix from the given StringBuilder. - */ - private VersionInfo parseVersionQualifier(StringBuilder qualified) { - int posColon = qualified.indexOf(":", 0); - if (posColon < 0) { - return null; - } else { - final String versionQualifier = qualified.substring(0, posColon + 1); - qualified.delete(0, posColon + 1); - if (versionQualifier.equals("U-1")) { - return previousVersion; - } else { - switch (versionQualifier.charAt(0)) { - case 'R': - // Extension: we allow a version-qualifier starting with R for retroactive - // properties, that is, property derivations applied before the property - // existed. - // TODO(egg): Actually support that. - case 'U': - break; - default: - throw new IllegalArgumentException( - "Invalid version-qualifier " + versionQualifier); - } - String versionNumber = versionQualifier.substring(1, posColon); - if (versionNumber.endsWith("dev")) { - versionNumber = versionNumber.substring(0, versionNumber.length() - 3); - if (!versionNumber.isEmpty() - && VersionInfo.getInstance(versionNumber) - != Settings.LATEST_VERSION_INFO) { - throw new IllegalArgumentException( - "Invalid version-qualifier " - + versionQualifier - + " with version-suffix dev: the current dev version is " - + Settings.latestVersion); - } - return Settings.LATEST_VERSION_INFO; - } else if (versionNumber.endsWith("α") || versionNumber.endsWith("β")) { - final String versionSuffix = - versionNumber.substring(versionNumber.length() - 1); - versionNumber = versionNumber.substring(0, versionNumber.length() - 1); - if (versionSuffix != Settings.latestVersionPhase.toString()) { - throw new IllegalArgumentException( - "Invalid version-qualifier " - + versionQualifier - + " with version-suffix " - + versionSuffix - + ": the current stage is " - + Settings.latestVersionPhase); - } - if (!versionNumber.isEmpty() - && VersionInfo.getInstance(versionNumber) - != Settings.LATEST_VERSION_INFO) { - throw new IllegalArgumentException( - "Invalid version-qualifier " - + versionQualifier - + " with version-suffix " - + versionNumber - + ": the current " - + versionSuffix - + " version is " - + Settings.latestVersion); - } - return Settings.LATEST_VERSION_INFO; - } else { - var result = VersionInfo.getInstance(versionNumber); - if (result == Settings.LATEST_VERSION_INFO && requireSuffixForLatest) { - throw new IllegalArgumentException( - "Invalid version-qualifier " - + versionQualifier - + " version-suffix " - + Settings.latestVersionPhase - + " required for unpublished version"); - } - return result; - } - } - } - } - - private static Map> - COARSE_GENERAL_CATEGORIES = - Map.of( - UcdPropertyValues.General_Category_Values.Other, - Set.of("Cc", "Cf", "Cn", "Co", "Cs"), - UcdPropertyValues.General_Category_Values.Letter, - Set.of("Ll", "Lm", "Lo", "Lt", "Lu"), - UcdPropertyValues.General_Category_Values.Cased_Letter, - Set.of("Ll", "Lt", "Lu"), - UcdPropertyValues.General_Category_Values.Mark, - Set.of("Mc", "Me", "Mn"), - UcdPropertyValues.General_Category_Values.Number, - Set.of("Nd", "Nl", "No"), - UcdPropertyValues.General_Category_Values.Punctuation, - Set.of("Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"), - UcdPropertyValues.General_Category_Values.Symbol, - Set.of("Sc", "Sk", "Sm", "So"), - UcdPropertyValues.General_Category_Values.Separator, - Set.of("Zl", "Zp", "Zs")); - - /** - * Similar to iup.getProperty(UcdProperty.General_Category).getSet(propertyValue), but takes the - * groupings into account. Implements both unary-query-expression for a General_Category alias - * and binary-query-expression with a property-value where the queried property is - * General_Category. - */ - private static UnicodeSet getGeneralCategorySet( - IndexUnicodeProperties iup, String propertyValue) { - var gc = iup.getProperty(UcdProperty.General_Category); - for (var entry : COARSE_GENERAL_CATEGORIES.entrySet()) { - final var aliases = entry.getKey().getNames().getAllNames(); - if (aliases.stream().anyMatch(a -> UnicodeProperty.equalNames(propertyValue, a))) { - UnicodeSet result = new UnicodeSet(); - for (var value : entry.getValue()) { - gc.getSet(value, result); - } - return result; - } - } - return gc.getSet(propertyValue); - } - @Override public boolean applyPropertyAlias(String beforeEquals, String afterEquals, UnicodeSet result) { result.clear(); @@ -380,6 +256,130 @@ private UnicodeSet computeBinaryQuery( } } + /** + * Parses a string prefixed with an optional-version-qualifier. If there is a version-qualifier, + * returns the corresponding VersionInfo and removes the prefix from the given StringBuilder. + */ + private VersionInfo parseVersionQualifier(StringBuilder qualified) { + int posColon = qualified.indexOf(":", 0); + if (posColon < 0) { + return null; + } else { + final String versionQualifier = qualified.substring(0, posColon + 1); + qualified.delete(0, posColon + 1); + if (versionQualifier.equals("U-1")) { + return previousVersion; + } else { + switch (versionQualifier.charAt(0)) { + case 'R': + // Extension: we allow a version-qualifier starting with R for retroactive + // properties, that is, property derivations applied before the property + // existed. + // TODO(egg): Actually support that. + case 'U': + break; + default: + throw new IllegalArgumentException( + "Invalid version-qualifier " + versionQualifier); + } + String versionNumber = versionQualifier.substring(1, posColon); + if (versionNumber.endsWith("dev")) { + versionNumber = versionNumber.substring(0, versionNumber.length() - 3); + if (!versionNumber.isEmpty() + && VersionInfo.getInstance(versionNumber) + != Settings.LATEST_VERSION_INFO) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix dev: the current dev version is " + + Settings.latestVersion); + } + return Settings.LATEST_VERSION_INFO; + } else if (versionNumber.endsWith("α") || versionNumber.endsWith("β")) { + final String versionSuffix = + versionNumber.substring(versionNumber.length() - 1); + versionNumber = versionNumber.substring(0, versionNumber.length() - 1); + if (versionSuffix != Settings.latestVersionPhase.toString()) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix " + + versionSuffix + + ": the current stage is " + + Settings.latestVersionPhase); + } + if (!versionNumber.isEmpty() + && VersionInfo.getInstance(versionNumber) + != Settings.LATEST_VERSION_INFO) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix " + + versionNumber + + ": the current " + + versionSuffix + + " version is " + + Settings.latestVersion); + } + return Settings.LATEST_VERSION_INFO; + } else { + var result = VersionInfo.getInstance(versionNumber); + if (result == Settings.LATEST_VERSION_INFO && requireSuffixForLatest) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " version-suffix " + + Settings.latestVersionPhase + + " required for unpublished version"); + } + return result; + } + } + } + } + + private static Map> + COARSE_GENERAL_CATEGORIES = + Map.of( + UcdPropertyValues.General_Category_Values.Other, + Set.of("Cc", "Cf", "Cn", "Co", "Cs"), + UcdPropertyValues.General_Category_Values.Letter, + Set.of("Ll", "Lm", "Lo", "Lt", "Lu"), + UcdPropertyValues.General_Category_Values.Cased_Letter, + Set.of("Ll", "Lt", "Lu"), + UcdPropertyValues.General_Category_Values.Mark, + Set.of("Mc", "Me", "Mn"), + UcdPropertyValues.General_Category_Values.Number, + Set.of("Nd", "Nl", "No"), + UcdPropertyValues.General_Category_Values.Punctuation, + Set.of("Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"), + UcdPropertyValues.General_Category_Values.Symbol, + Set.of("Sc", "Sk", "Sm", "So"), + UcdPropertyValues.General_Category_Values.Separator, + Set.of("Zl", "Zp", "Zs")); + + /** + * Similar to iup.getProperty(UcdProperty.General_Category).getSet(propertyValue), but takes the + * groupings into account. Implements both unary-query-expression for a General_Category alias + * and binary-query-expression with a property-value where the queried property is + * General_Category. + */ + private static UnicodeSet getGeneralCategorySet( + IndexUnicodeProperties iup, String propertyValue) { + var gc = iup.getProperty(UcdProperty.General_Category); + for (var entry : COARSE_GENERAL_CATEGORIES.entrySet()) { + final var aliases = entry.getKey().getNames().getAllNames(); + if (aliases.stream().anyMatch(a -> UnicodeProperty.equalNames(propertyValue, a))) { + UnicodeSet result = new UnicodeSet(); + for (var value : entry.getValue()) { + gc.getSet(value, result); + } + return result; + } + } + return gc.getSet(propertyValue); + } + private static UnicodeSet getIdentitySet(UnicodeProperty queriedProperty) { final var result = new UnicodeSet(); // Note that while UnicodeProperty, can return strings from getSet, which is an extension of From e205fa4157309f980307b3b3b2c322d090a7b925 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 13:28:50 +0200 Subject: [PATCH 32/38] Redundant else --- .../text/UCD/VersionedSymbolTable.java | 130 +++++++++--------- 1 file changed, 63 insertions(+), 67 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index e4625c7b2..e64e26c3d 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -264,76 +264,72 @@ private VersionInfo parseVersionQualifier(StringBuilder qualified) { int posColon = qualified.indexOf(":", 0); if (posColon < 0) { return null; + } + final String versionQualifier = qualified.substring(0, posColon + 1); + qualified.delete(0, posColon + 1); + if (versionQualifier.equals("U-1")) { + return previousVersion; } else { - final String versionQualifier = qualified.substring(0, posColon + 1); - qualified.delete(0, posColon + 1); - if (versionQualifier.equals("U-1")) { - return previousVersion; - } else { - switch (versionQualifier.charAt(0)) { - case 'R': - // Extension: we allow a version-qualifier starting with R for retroactive - // properties, that is, property derivations applied before the property - // existed. - // TODO(egg): Actually support that. - case 'U': - break; - default: - throw new IllegalArgumentException( - "Invalid version-qualifier " + versionQualifier); + switch (versionQualifier.charAt(0)) { + case 'R': + // Extension: we allow a version-qualifier starting with R for retroactive + // properties, that is, property derivations applied before the property + // existed. + // TODO(egg): Actually support that. + case 'U': + break; + default: + throw new IllegalArgumentException( + "Invalid version-qualifier " + versionQualifier); + } + String versionNumber = versionQualifier.substring(1, posColon); + if (versionNumber.endsWith("dev")) { + versionNumber = versionNumber.substring(0, versionNumber.length() - 3); + if (!versionNumber.isEmpty() + && VersionInfo.getInstance(versionNumber) != Settings.LATEST_VERSION_INFO) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix dev: the current dev version is " + + Settings.latestVersion); + } + return Settings.LATEST_VERSION_INFO; + } else if (versionNumber.endsWith("α") || versionNumber.endsWith("β")) { + final String versionSuffix = versionNumber.substring(versionNumber.length() - 1); + versionNumber = versionNumber.substring(0, versionNumber.length() - 1); + if (versionSuffix != Settings.latestVersionPhase.toString()) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix " + + versionSuffix + + ": the current stage is " + + Settings.latestVersionPhase); + } + if (!versionNumber.isEmpty() + && VersionInfo.getInstance(versionNumber) != Settings.LATEST_VERSION_INFO) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " with version-suffix " + + versionNumber + + ": the current " + + versionSuffix + + " version is " + + Settings.latestVersion); } - String versionNumber = versionQualifier.substring(1, posColon); - if (versionNumber.endsWith("dev")) { - versionNumber = versionNumber.substring(0, versionNumber.length() - 3); - if (!versionNumber.isEmpty() - && VersionInfo.getInstance(versionNumber) - != Settings.LATEST_VERSION_INFO) { - throw new IllegalArgumentException( - "Invalid version-qualifier " - + versionQualifier - + " with version-suffix dev: the current dev version is " - + Settings.latestVersion); - } - return Settings.LATEST_VERSION_INFO; - } else if (versionNumber.endsWith("α") || versionNumber.endsWith("β")) { - final String versionSuffix = - versionNumber.substring(versionNumber.length() - 1); - versionNumber = versionNumber.substring(0, versionNumber.length() - 1); - if (versionSuffix != Settings.latestVersionPhase.toString()) { - throw new IllegalArgumentException( - "Invalid version-qualifier " - + versionQualifier - + " with version-suffix " - + versionSuffix - + ": the current stage is " - + Settings.latestVersionPhase); - } - if (!versionNumber.isEmpty() - && VersionInfo.getInstance(versionNumber) - != Settings.LATEST_VERSION_INFO) { - throw new IllegalArgumentException( - "Invalid version-qualifier " - + versionQualifier - + " with version-suffix " - + versionNumber - + ": the current " - + versionSuffix - + " version is " - + Settings.latestVersion); - } - return Settings.LATEST_VERSION_INFO; - } else { - var result = VersionInfo.getInstance(versionNumber); - if (result == Settings.LATEST_VERSION_INFO && requireSuffixForLatest) { - throw new IllegalArgumentException( - "Invalid version-qualifier " - + versionQualifier - + " version-suffix " - + Settings.latestVersionPhase - + " required for unpublished version"); - } - return result; + return Settings.LATEST_VERSION_INFO; + } else { + var result = VersionInfo.getInstance(versionNumber); + if (result == Settings.LATEST_VERSION_INFO && requireSuffixForLatest) { + throw new IllegalArgumentException( + "Invalid version-qualifier " + + versionQualifier + + " version-suffix " + + Settings.latestVersionPhase + + " required for unpublished version"); } + return result; } } } From c6c00b40a0d69283275c7283e7b912805536339a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 13:32:55 +0200 Subject: [PATCH 33/38] Better error messages --- .../text/UCD/TestVersionedSymbolTable.java | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index f17480b2c..fa84a5a87 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -275,10 +275,28 @@ public UnicodeSetTestFluent isEqualToUnicodeSet( final var expected = new UnicodeSet(expectedExpression); assertTrue( set.containsAll(expected), - expected + " ⊆ " + expression + " = " + set.toPattern(true)); + "Expected " + + expected + + " ⊆ " + + expression + + " = " + + set.toPattern(true) + + " but " + + expression + + " is missing " + + expected.removeAll(set)); assertTrue( expected.containsAll(set), - expected + " ⊇ " + expression + " = " + set.toPattern(true)); + "Expected " + + expected + + " ⊇ " + + expression + + " = " + + set.toPattern(true) + + " but " + + expression + + " contains unexpected " + + set.removeAll(expected)); return this; } From 710aedb2d763268fa269e72f2401482c2a8eb8a5 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 13:34:36 +0200 Subject: [PATCH 34/38] Sonst null. --- .../src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java index e64e26c3d..806c56810 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedSymbolTable.java @@ -259,6 +259,7 @@ private UnicodeSet computeBinaryQuery( /** * Parses a string prefixed with an optional-version-qualifier. If there is a version-qualifier, * returns the corresponding VersionInfo and removes the prefix from the given StringBuilder. + * Otherwise returns null. */ private VersionInfo parseVersionQualifier(StringBuilder qualified) { int posColon = qualified.indexOf(":", 0); From a9666e5cdbe787df43d7e874de1bd75938e1e764 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 13:54:29 +0200 Subject: [PATCH 35/38] =?UTF-8?q?Don=E2=80=99t=20mutate=20the=20expected?= =?UTF-8?q?=20set?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/org/unicode/text/UCD/TestVersionedSymbolTable.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index fa84a5a87..3b15a0779 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -284,7 +284,7 @@ public UnicodeSetTestFluent isEqualToUnicodeSet( + " but " + expression + " is missing " - + expected.removeAll(set)); + + expected.cloneAsThawed().removeAll(set)); assertTrue( expected.containsAll(set), "Expected " @@ -296,7 +296,7 @@ public UnicodeSetTestFluent isEqualToUnicodeSet( + " but " + expression + " contains unexpected " - + set.removeAll(expected)); + + set.cloneAsThawed().removeAll(expected)); return this; } From 04c6f21d789d76c58799add6822a5f3bdde852d9 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 14:10:54 +0200 Subject: [PATCH 36/38] =?UTF-8?q?Don=E2=80=99t=20break=20other=20tests=20e?= =?UTF-8?q?ven=20if=20they=20are=20doing=20evil=20things?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../text/UCD/TestVersionedSymbolTable.java | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 3b15a0779..1c76bf40a 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -5,6 +5,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSet.XSymbolTable; + import java.text.ParsePosition; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -17,14 +19,17 @@ * reasonable. If they are broken by changes to property assignments, feel free to update them. */ public class TestVersionedSymbolTable { + XSymbolTable old_default; + @BeforeEach void setUp() { + old_default = UnicodeSet.getDefaultXSymbolTable(); UnicodeSet.setDefaultXSymbolTable(VersionedSymbolTable.forDevelopment()); } @AfterEach void tearDown() { - UnicodeSet.setDefaultXSymbolTable(NO_PROPS); + UnicodeSet.setDefaultXSymbolTable(old_default); } @Test @@ -338,18 +343,4 @@ public UnicodeSetTestFluent isEmpty() { private UnicodeSetTestFluent assertThatUnicodeSet(String expression) { return new UnicodeSetTestFluent(expression); } - - static UnicodeSet.XSymbolTable NO_PROPS = - new UnicodeSet.XSymbolTable() { - @Override - public boolean applyPropertyAlias( - String propertyName, String propertyValue, UnicodeSet result) { - throw new IllegalArgumentException( - "Don't use any ICU Unicode Properties! " - + propertyName - + "=" - + propertyValue); - } - ; - }; } From 27c490f4ad2c0dd4f604e1ff60698a43a43e1e38 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 14:16:06 +0200 Subject: [PATCH 37/38] spot --- .../java/org/unicode/text/UCD/TestVersionedSymbolTable.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index 1c76bf40a..b657c863a 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -6,7 +6,6 @@ import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSet.XSymbolTable; - import java.text.ParsePosition; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -19,7 +18,7 @@ * reasonable. If they are broken by changes to property assignments, feel free to update them. */ public class TestVersionedSymbolTable { - XSymbolTable old_default; + XSymbolTable old_default; @BeforeEach void setUp() { From ae22201361439a94f331e0cd5376793efc58bd7e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Tue, 1 Apr 2025 23:48:10 +0200 Subject: [PATCH 38/38] Some more review comments --- .../src/main/java/org/unicode/props/UnicodeProperty.java | 7 ++++--- .../org/unicode/text/UCD/TestVersionedSymbolTable.java | 7 +++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index 67450ee88..c5c3f2433 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -446,6 +446,9 @@ public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { throw new IllegalArgumentException( "Multivalued property values can't contain the delimiter."); } + if (propertyValue == null) { + return getSet(NULL_MATCHER, result); + } Comparator comparator; if (isType(NUMERIC_MASK)) { // UAX44-LM1. @@ -460,9 +463,7 @@ public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { // String-valued or Miscellaneous property. comparator = null; } - return getSet( - propertyValue == null ? NULL_MATCHER : new SimpleMatcher(propertyValue, comparator), - result); + return getSet(new SimpleMatcher(propertyValue, comparator), result); } private UnicodeMap unicodeMap = null; diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java index b657c863a..ac05aa9fb 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestVersionedSymbolTable.java @@ -5,7 +5,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.text.UnicodeSet.XSymbolTable; import java.text.ParsePosition; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -18,17 +17,17 @@ * reasonable. If they are broken by changes to property assignments, feel free to update them. */ public class TestVersionedSymbolTable { - XSymbolTable old_default; + UnicodeSet.XSymbolTable oldDefault; @BeforeEach void setUp() { - old_default = UnicodeSet.getDefaultXSymbolTable(); + oldDefault = UnicodeSet.getDefaultXSymbolTable(); UnicodeSet.setDefaultXSymbolTable(VersionedSymbolTable.forDevelopment()); } @AfterEach void tearDown() { - UnicodeSet.setDefaultXSymbolTable(old_default); + UnicodeSet.setDefaultXSymbolTable(oldDefault); } @Test