From 069bed44e3a25593b752a41518aa89e6c9206178 Mon Sep 17 00:00:00 2001 From: Josh Hadley Date: Mon, 11 Aug 2025 11:26:39 -0700 Subject: [PATCH 1/2] Confusables data for Devanagari UE and UUE - confusables-source.txt --- .../data/security/dev/data/source/confusables-source.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/unicodetools/data/security/dev/data/source/confusables-source.txt b/unicodetools/data/security/dev/data/source/confusables-source.txt index 2b6d86128..5a4e2ac68 100644 --- a/unicodetools/data/security/dev/data/source/confusables-source.txt +++ b/unicodetools/data/security/dev/data/source/confusables-source.txt @@ -5724,3 +5724,7 @@ A7F1 ; 02E2 # ( ꟱ → ˢ ) MODIFIER LETTER CAPITAL S → MODIFIER LETTER SMAL # Confusable Katakana-Han pair (PAG ref #442) 1B122 ; 4E8E + +# Confusables for Devanagari UE and UUE (PAG ref #449) +0956 ; 032E +0957 ; 032E 032E From 3643357eaf230d2fc2881973c9763ce14f1c111e Mon Sep 17 00:00:00 2001 From: Josh Hadley Date: Mon, 11 Aug 2025 11:27:00 -0700 Subject: [PATCH 2/2] Confusables data for Devanagari UE and UUE - generated data --- .../data/security/dev/confusables.txt | 21 ++++++------ .../data/security/dev/confusablesSummary.txt | 33 +++++++++---------- .../dev/data/confusablesSummaryIdentifier.txt | 18 +++++----- .../dev/data/source/formatted-source.txt | 5 ++- 4 files changed, 40 insertions(+), 37 deletions(-) diff --git a/unicodetools/data/security/dev/confusables.txt b/unicodetools/data/security/dev/confusables.txt index d52b6278a..8bf714fef 100644 --- a/unicodetools/data/security/dev/confusables.txt +++ b/unicodetools/data/security/dev/confusables.txt @@ -1,5 +1,5 @@ # confusables.txt -# Date: 2025-07-22, 05:49:37 GMT +# Date: 2025-08-11, 18:24:57 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -180,6 +180,13 @@ A6F1 ; 0304 ; MA # ( ꛱ → ̄ ) BAMUM COMBINING MARK TUKWENTIS → COMBINING M 1CD9 ; 032D ; MA # ( ᳙ → ̭ ) VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER → COMBINING CIRCUMFLEX ACCENT BELOW # 1CD8 ; 032E ; MA # ( ᳘ → ̮ ) VEDIC TONE CANDRA BELOW → COMBINING BREVE BELOW # +0956 ; 032E ; MA # ( ॖ → ̮ ) DEVANAGARI VOWEL SIGN UE → COMBINING BREVE BELOW # +0A41 ; 032E ; MA # ( ੁ → ̮ ) GURMUKHI VOWEL SIGN U → COMBINING BREVE BELOW # →ॖ→ +11B62 ; 032E ; MA # ( 𑭢 → ̮ ) SHARADA VOWEL SIGN UE → COMBINING BREVE BELOW # →ॖ→ + +0957 ; 032E 032E ; MA # ( ॗ → ̮̮ ) DEVANAGARI VOWEL SIGN UUE → COMBINING BREVE BELOW, COMBINING BREVE BELOW # +0A42 ; 032E 032E ; MA # ( ੂ → ̮̮ ) GURMUKHI VOWEL SIGN UU → COMBINING BREVE BELOW, COMBINING BREVE BELOW # →ॗ→ +11B63 ; 032E 032E ; MA # ( 𑭣 → ̮̮ ) SHARADA VOWEL SIGN UUE → COMBINING BREVE BELOW, COMBINING BREVE BELOW # →ॗ→ 0952 ; 0331 ; MA # ( ॒ → ̱ ) DEVANAGARI STRESS SIGN ANUDATTA → COMBINING MACRON BELOW # 0320 ; 0331 ; MA # ( ̠ → ̱ ) COMBINING MINUS SIGN BELOW → COMBINING MACRON BELOW # @@ -5869,9 +5876,9 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL 114BD ; 09D7 ; MA # ( 𑒽 → ৗ ) TIRHUTA VOWEL SIGN SHORT O → BENGALI AU LENGTH MARK # -0A09 ; 0A73 11B62 ; MA # ( ਉ → ੳ𑭢 ) GURMUKHI LETTER U → GURMUKHI URA, SHARADA VOWEL SIGN UE # →ੳੁ→ +0A09 ; 0A73 032E ; MA # ( ਉ → ੳ̮ ) GURMUKHI LETTER U → GURMUKHI URA, COMBINING BREVE BELOW # →ੳੁ→ -0A0A ; 0A73 11B63 ; MA # ( ਊ → ੳ𑭣 ) GURMUKHI LETTER UU → GURMUKHI URA, SHARADA VOWEL SIGN UUE # →ੳੂ→ +0A0A ; 0A73 032E 032E ; MA # ( ਊ → ੳ̮̮ ) GURMUKHI LETTER UU → GURMUKHI URA, COMBINING BREVE BELOW, COMBINING BREVE BELOW # →ੳੂ→ 0A10 ; 0A05 0948 ; MA # ( ਐ → ਅै ) GURMUKHI LETTER AI → GURMUKHI LETTER A, DEVANAGARI VOWEL SIGN AI # →ਅੈ→ @@ -9946,12 +9953,6 @@ A7CF ; A7CE ; MA # ( ꟏ → ꟎ ) LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIV 0348 ; 10EFA ; MA # ( ͈ → 𐻺 ) COMBINING DOUBLE VERTICAL LINE BELOW → ARABIC DOUBLE VERTICAL BAR BELOW # -0956 ; 11B62 ; MA # ( ॖ → 𑭢 ) DEVANAGARI VOWEL SIGN UE → SHARADA VOWEL SIGN UE # -0A41 ; 11B62 ; MA # ( ੁ → 𑭢 ) GURMUKHI VOWEL SIGN U → SHARADA VOWEL SIGN UE # →ॖ→ - -0957 ; 11B63 ; MA # ( ॗ → 𑭣 ) DEVANAGARI VOWEL SIGN UUE → SHARADA VOWEL SIGN UUE # -0A42 ; 11B63 ; MA # ( ੂ → 𑭣 ) GURMUKHI VOWEL SIGN UU → SHARADA VOWEL SIGN UUE # →ॗ→ - 0947 ; 11B64 ; MA # ( े → 𑭤 ) DEVANAGARI VOWEL SIGN E → SHARADA VOWEL SIGN SHORT E # 0A47 ; 11B64 ; MA # ( ੇ → 𑭤 ) GURMUKHI VOWEL SIGN EE → SHARADA VOWEL SIGN SHORT E # →े→ @@ -9990,5 +9991,5 @@ A7CF ; A7CE ; MA # ( ꟏ → ꟎ ) LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIV 6138 ; 2B73F ; MA # ( 愸 → 𫜿 ) CJK UNIFIED IDEOGRAPH-6138 → CJK UNIFIED IDEOGRAPH-2B73F # -# total: 6565 +# total: 6567 diff --git a/unicodetools/data/security/dev/confusablesSummary.txt b/unicodetools/data/security/dev/confusablesSummary.txt index 803fd067f..c5986be48 100644 --- a/unicodetools/data/security/dev/confusablesSummary.txt +++ b/unicodetools/data/security/dev/confusablesSummary.txt @@ -1,5 +1,5 @@ # confusablesSummary.txt -# Date: 2025-07-22, 05:49:37 GMT +# Date: 2025-08-11, 18:24:57 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -5754,10 +5754,19 @@ (‎ ̭ ‎) 032D COMBINING CIRCUMFLEX ACCENT BELOW ← (‎ ᳙ ‎) 1CD9 VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER -# ̮ ᳘ +# ̮ 𑭢 ॖ ੁ ᳘ (‎ ̮ ‎) 032E COMBINING BREVE BELOW +← (‎ 𑭢 ‎) 11B62 SHARADA VOWEL SIGN UE # →ॖ→ +← (‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE +← (‎ ੁ ‎) 0A41 GURMUKHI VOWEL SIGN U # →ॖ→ ← (‎ ᳘ ‎) 1CD8 VEDIC TONE CANDRA BELOW +# ̮̮ 𑭣 ॗ ੂ + (‎ ̮̮ ‎) 032E 032E COMBINING BREVE BELOW, COMBINING BREVE BELOW +← (‎ 𑭣 ‎) 11B63 SHARADA VOWEL SIGN UUE # →ॗ→ +← (‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE +← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU # →ॗ→ + # ̳ ͇ (‎ ̳ ‎) 0333 COMBINING DOUBLE LOW LINE ← (‎ ͇ ‎) 0347 COMBINING EQUALS SIGN BELOW @@ -8690,16 +8699,6 @@ ← (‎ ੍ ‎) 0A4D GURMUKHI SIGN VIRAMA ← (‎ ્ ‎) 0ACD GUJARATI SIGN VIRAMA -# 𑭢 ॖ ੁ - (‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE -← (‎ 𑭢 ‎) 11B62 SHARADA VOWEL SIGN UE -← (‎ ੁ ‎) 0A41 GURMUKHI VOWEL SIGN U - -# 𑭣 ॗ ੂ - (‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE -← (‎ 𑭣 ‎) 11B63 SHARADA VOWEL SIGN UUE -← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU - # । ꠰ (‎ । ‎) 0964 DEVANAGARI DANDA ← (‎ ꠰ ‎) A830 NORTH INDIC FRACTION ONE QUARTER @@ -8882,15 +8881,15 @@ (‎ ਅੌ ‎) 0A05 0A4C GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU ← (‎ ਔ ‎) 0A14 GURMUKHI LETTER AU -# ੳੁ ੳ𑭢 ਉ +# ੳੁ ੳ̮ ਉ (‎ ਉ ‎) 0A09 GURMUKHI LETTER U ← (‎ ੳੁ ‎) 0A73 0A41 GURMUKHI URA, GURMUKHI VOWEL SIGN U -← (‎ ੳ𑭢 ‎) 0A73 11B62 GURMUKHI URA, SHARADA VOWEL SIGN UE # →ੳੁ→ +← (‎ ੳ̮ ‎) 0A73 032E GURMUKHI URA, COMBINING BREVE BELOW # →ੳੁ→ -# ੳੂ ੳ𑭣 ਊ +# ੳ̮̮ ੳੂ ਊ (‎ ਊ ‎) 0A0A GURMUKHI LETTER UU +← (‎ ੳ̮̮ ‎) 0A73 032E 032E GURMUKHI URA, COMBINING BREVE BELOW, COMBINING BREVE BELOW # →ੳੂ→ ← (‎ ੳੂ ‎) 0A73 0A42 GURMUKHI URA, GURMUKHI VOWEL SIGN UU -← (‎ ੳ𑭣 ‎) 0A73 11B63 GURMUKHI URA, SHARADA VOWEL SIGN UUE # →ੳੂ→ # અા આ (‎ અા ‎) 0A85 0ABE GUJARATI LETTER A, GUJARATI VOWEL SIGN AA @@ -17836,5 +17835,5 @@ (‎ 𪘀 ‎) 2A600 CJK UNIFIED IDEOGRAPH-2A600 ← (‎ 𪘀 ‎) 2FA1D CJK COMPATIBILITY IDEOGRAPH-2FA1D -# total : 7575 +# total : 7577 diff --git a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt index 1d8f230b9..4b153e881 100644 --- a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt +++ b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt @@ -1,5 +1,5 @@ # confusablesSummaryIdentifier.txt -# Date: 2025-07-22, 05:49:37 GMT +# Date: 2025-08-11, 18:24:57 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -546,6 +546,14 @@ ← (‎ ઼ ‎) 0ABC GUJARATI SIGN NUKTA ← (‎ ଼ ‎) 0B3C ORIYA SIGN NUKTA +# ॖ ੁ + (‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE +← (‎ ੁ ‎) 0A41 GURMUKHI VOWEL SIGN U + +# ॗ ੂ + (‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE +← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU + # Γ Г (‎ Γ ‎) 0393 GREEK CAPITAL LETTER GAMMA ← (‎ Г ‎) 0413 CYRILLIC CAPITAL LETTER GHE @@ -919,14 +927,6 @@ ← (‎ ੍ ‎) 0A4D GURMUKHI SIGN VIRAMA ← (‎ ્ ‎) 0ACD GUJARATI SIGN VIRAMA -# ॖ ੁ - (‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE -← (‎ ੁ ‎) 0A41 GURMUKHI VOWEL SIGN U - -# ॗ ੂ - (‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE -← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU - # २ ર ૨ (‎ २ ‎) 0968 DEVANAGARI DIGIT TWO ← (‎ ર ‎) 0AB0 GUJARATI LETTER RA # →૨→ diff --git a/unicodetools/data/security/dev/data/source/formatted-source.txt b/unicodetools/data/security/dev/data/source/formatted-source.txt index 52e384773..0c9e53e36 100644 --- a/unicodetools/data/security/dev/data/source/formatted-source.txt +++ b/unicodetools/data/security/dev/data/source/formatted-source.txt @@ -1,5 +1,5 @@ # formatted-source.txt -# Date: 2025-07-22, 05:49:36 GMT +# Date: 2025-08-11, 18:24:56 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1265,8 +1265,11 @@ 032D ; 1CD9 # ( ̭ ~ ᳙ ) COMBINING CIRCUMFLEX ACCENT BELOW ~ VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER +032E ; 0956 # ( ̮ ~ ॖ ) COMBINING BREVE BELOW ~ DEVANAGARI VOWEL SIGN UE 032E ; 1CD8 # ( ̮ ~ ᳘ ) COMBINING BREVE BELOW ~ VEDIC TONE CANDRA BELOW +032E 032E ; 0957 # ( ̮̮ ~ ॗ ) COMBINING BREVE BELOW, COMBINING BREVE BELOW ~ DEVANAGARI VOWEL SIGN UUE + 0331 ; 0320 # ( ̱ ~ ̠ ) COMBINING MACRON BELOW ~ COMBINING MINUS SIGN BELOW 0331 ; 0952 # ( ̱ ~ ॒ ) COMBINING MACRON BELOW ~ DEVANAGARI STRESS SIGN ANUDATTA