From ad8c1998485313f63cc181bb8cfab3942a991107 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 9 Apr 2025 13:48:59 +0200 Subject: [PATCH 1/2] Do not fall back when looking for 4.1 data or later --- .../src/main/java/org/unicode/text/utility/Utility.java | 5 +++++ .../resources/org/unicode/text/UCD/UnicodeInvariantTest.txt | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/unicodetools/src/main/java/org/unicode/text/utility/Utility.java b/unicodetools/src/main/java/org/unicode/text/utility/Utility.java index a17af1efd..541f01d59 100644 --- a/unicodetools/src/main/java/org/unicode/text/utility/Utility.java +++ b/unicodetools/src/main/java/org/unicode/text/utility/Utility.java @@ -1451,6 +1451,11 @@ public static String getMostRecentUnicodeDataFile( if (version != null && version.compareTo(currentVersion) < compValue) { continue; } + if (version != null && version.compareTo(VersionInfo.UNICODE_4_1) >= 0 && currentVersion.compareTo(version) < 0) { + // Do not look at earlier versions if we want Unicode 4.1 data or later. + // Unicode 4.0.1 is the last version for which unmodified files were not republished. + return null; + } // check the standard ucd directory if (filename.contains("/*/")) { // check the idna directory diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 85ff75d66..f1edbd273 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -1295,6 +1295,10 @@ Let $japaneseSimplifiedRadicals := \p{Name=/CJK RADICAL J-SIMPLIFIED/} In $chineseSimplifiedRadicals, Equivalent_Unified_Ideograph ∈ [\p{kRSUnicode=/^[0-9]+'\.0$/} $radicalsWithUnifiableSimplifications] In $japaneseSimplifiedRadicals, Equivalent_Unified_Ideograph ∈ \p{kRSUnicode=/^[0-9]+''\.0$/} +# Check that removed properties do not persist. +\p{U16:kGB7≠@none@}=[垯屃慭梾疭瘆筼臜荙诐跶轪辌酦镈镋镕饳馉鱽鱾鲃鲉鲌鲏鲪鲬鲯鲹鲾鳂鳈鳉鳑鳚鳡鳤鸻鸼鹟鹮鹲] +\p{kGB7≠@none@}=[] + # Tangut invariants Let $tangutSourcesScope := [\p{Block=/^Tangut(.Supplement)?$/} - \p{gc=Cn}] From 5207537885fdcd3179a40c623aad2e2aa0b6055c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 9 Apr 2025 13:51:25 +0200 Subject: [PATCH 2/2] spots --- .../src/main/java/org/unicode/text/utility/Utility.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/utility/Utility.java b/unicodetools/src/main/java/org/unicode/text/utility/Utility.java index 541f01d59..d519e132b 100644 --- a/unicodetools/src/main/java/org/unicode/text/utility/Utility.java +++ b/unicodetools/src/main/java/org/unicode/text/utility/Utility.java @@ -1451,9 +1451,12 @@ public static String getMostRecentUnicodeDataFile( if (version != null && version.compareTo(currentVersion) < compValue) { continue; } - if (version != null && version.compareTo(VersionInfo.UNICODE_4_1) >= 0 && currentVersion.compareTo(version) < 0) { + if (version != null + && version.compareTo(VersionInfo.UNICODE_4_1) >= 0 + && currentVersion.compareTo(version) < 0) { // Do not look at earlier versions if we want Unicode 4.1 data or later. - // Unicode 4.0.1 is the last version for which unmodified files were not republished. + // Unicode 4.0.1 is the last version for which unmodified files were not + // republished. return null; } // check the standard ucd directory