diff --git a/collate/build/contract_test.go b/collate/build/contract_test.go index 2e0eaecd..569af42d 100644 --- a/collate/build/contract_test.go +++ b/collate/build/contract_test.go @@ -247,7 +247,6 @@ func TestPrintContractionTrieSet(t *testing.T) { print(&testdata, buf, "test") if contractTrieOutput != buf.String() { t.Errorf("output differs; found\n%s", buf.String()) - println(string(buf.Bytes())) } } diff --git a/go.mod b/go.mod index f421d92c..2d2fc9d2 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,9 @@ module golang.org/x/text -go 1.18 +go 1.23.0 require golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // tagx:ignore require golang.org/x/mod v0.17.0 // indirect; tagx:ignore -require golang.org/x/sync v0.11.0 // indirect +require golang.org/x/sync v0.12.0 // indirect diff --git a/go.sum b/go.sum index 4370a676..534abe7c 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,6 @@ golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= -golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= diff --git a/internal/export/unicode/gen.go b/internal/export/unicode/gen.go index 4e8295cc..caeb0711 100644 --- a/internal/export/unicode/gen.go +++ b/internal/export/unicode/gen.go @@ -13,8 +13,10 @@ import ( "flag" "fmt" "log" + "maps" "os" "regexp" + "slices" "sort" "strings" "unicode" @@ -90,13 +92,15 @@ func println(args ...interface{}) { var category = map[string]bool{ // Nd Lu etc. // We use one-character names to identify merged categories - "L": true, // Lu Ll Lt Lm Lo - "P": true, // Pc Pd Ps Pe Pu Pf Po - "M": true, // Mn Mc Me - "N": true, // Nd Nl No - "S": true, // Sm Sc Sk So - "Z": true, // Zs Zl Zp - "C": true, // Cc Cf Cs Co Cn + "L": true, // Lu Ll Lt Lm Lo + "LC": true, // Lu Ll Lt + "P": true, // Pc Pd Ps Pe Pu Pf Po + "M": true, // Mn Mc Me + "N": true, // Nd Nl No + "S": true, // Sm Sc Sk So + "Z": true, // Zs Zl Zp + "C": true, // Cc Cf Cs Co Cn + "Cn": true, // unassigned } // This contains only the properties we're interested in. @@ -149,6 +153,9 @@ func categoryOp(code rune, class uint8) bool { } func loadChars() { + for code := range chars { + chars[code].category = "Cn" // unassigned + } ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) { c := Char{codePoint: p.Rune(0)} @@ -201,6 +208,7 @@ func loadCasefold() { } var categoryMapping = map[string]string{ + "LC": "Letter, cased: Ll | Lt | Lu", "Lu": "Letter, uppercase", "Ll": "Letter, lowercase", "Lt": "Letter, titlecase", @@ -257,6 +265,7 @@ func printCategories() { printf("\t%q: %s,\n", k, k) } print("}\n\n") + printCategoryAliases() } decl := make(sort.StringSlice, len(list)) @@ -272,7 +281,7 @@ func printCategories() { varDecl := "" switch name { case "C": - varDecl = "\tOther = _C; // Other/C is the set of Unicode control and special characters, category C.\n" + varDecl = "\tOther = _C; // Other/C is the set of Unicode control, special, and unassigned code points, category C.\n" varDecl += "\tC = _C\n" case "L": varDecl = "\tLetter = _L; // Letter/L is the set of Unicode letters, category L.\n" @@ -315,14 +324,14 @@ func printCategories() { } decl[ndecl] = varDecl ndecl++ + match := func(cat string) bool { return cat == name } if len(name) == 1 { // unified categories - dumpRange( - "_"+name, - func(code rune) bool { return categoryOp(code, name[0]) }) - continue + match = func(cat string) bool { return strings.HasPrefix(cat, name) } } - dumpRange("_"+name, - func(code rune) bool { return chars[code].category == name }) + if name == "LC" { // special unified category + match = func(cat string) bool { return cat == "Ll" || cat == "Lt" || cat == "Lu" } + } + dumpRange("_"+name, func(code rune) bool { return match(chars[code].category) }) } decl.Sort() println("// These variables have type *RangeTable.") @@ -333,6 +342,35 @@ func printCategories() { print(")\n\n") } +func printCategoryAliases() { + known := make(map[string]bool) + for _, name := range allCategories() { + known[name] = true + } + + table := make(map[string]string) + ucd.Parse(gen.OpenUCDFile("PropertyValueAliases.txt"), func(p *ucd.Parser) { + if p.String(0) != "gc" { + return + } + name := p.String(1) + if !known[name] { + logger.Print("unknown category: ", name) + } + table[p.String(2)] = name + if a := p.String(3); a != "" { + table[a] = name + } + }) + + println("// CategoryAliases maps category aliases to standard category names.") + println("var CategoryAliases = map[string]string{") + for _, name := range slices.Sorted(maps.Keys(table)) { + printf("\t%q: %q,\n", name, table[name]) + } + print("}\n\n") +} + type Op func(code rune) bool func dumpRange(name string, inCategory Op) { diff --git a/internal/number/format.go b/internal/number/format.go index cd94c5dc..1aadcf40 100644 --- a/internal/number/format.go +++ b/internal/number/format.go @@ -394,9 +394,7 @@ func appendScientific(dst []byte, f *Formatter, n *Digits) (b []byte, postPre, p exp := n.Exp - int32(n.Comma) exponential := f.Symbol(SymExponential) if exponential == "E" { - dst = append(dst, "\u202f"...) // NARROW NO-BREAK SPACE dst = append(dst, f.Symbol(SymSuperscriptingExponent)...) - dst = append(dst, "\u202f"...) // NARROW NO-BREAK SPACE dst = f.AppendDigit(dst, 1) dst = f.AppendDigit(dst, 0) switch { diff --git a/internal/number/format_test.go b/internal/number/format_test.go index 01a08943..77ccdaa1 100644 --- a/internal/number/format_test.go +++ b/internal/number/format_test.go @@ -271,63 +271,63 @@ func TestAppendDecimal(t *testing.T) { }, { pattern: "#E0", test: pairs{ - "0": "0\u202f×\u202f10⁰", - "1": "1\u202f×\u202f10⁰", - "123.456": "1\u202f×\u202f10²", + "0": "0×10⁰", + "1": "1×10⁰", + "123.456": "1×10²", }, }, { pattern: "#E+0", test: pairs{ - "0": "0\u202f×\u202f10⁺⁰", - "1000": "1\u202f×\u202f10⁺³", - "1E100": "1\u202f×\u202f10⁺¹⁰⁰", - "1E-100": "1\u202f×\u202f10⁻¹⁰⁰", + "0": "0×10⁺⁰", + "1000": "1×10⁺³", + "1E100": "1×10⁺¹⁰⁰", + "1E-100": "1×10⁻¹⁰⁰", "NaN": "NaN", "-Inf": "-∞", }, }, { pattern: "##0E00", test: pairs{ - "100": "100\u202f×\u202f10⁰⁰", - "12345": "12\u202f×\u202f10⁰³", - "123.456": "123\u202f×\u202f10⁰⁰", + "100": "100×10⁰⁰", + "12345": "12×10⁰³", + "123.456": "123×10⁰⁰", }, }, { pattern: "##0.###E00", test: pairs{ - "100": "100\u202f×\u202f10⁰⁰", - "12345": "12.345\u202f×\u202f10⁰³", - "123456": "123.456\u202f×\u202f10⁰³", - "123.456": "123.456\u202f×\u202f10⁰⁰", - "123.4567": "123.457\u202f×\u202f10⁰⁰", + "100": "100×10⁰⁰", + "12345": "12.345×10⁰³", + "123456": "123.456×10⁰³", + "123.456": "123.456×10⁰⁰", + "123.4567": "123.457×10⁰⁰", }, }, { pattern: "##0.000E00", test: pairs{ - "100": "100.000\u202f×\u202f10⁰⁰", - "12345": "12.345\u202f×\u202f10⁰³", - "123.456": "123.456\u202f×\u202f10⁰⁰", - "12.3456": "12.346\u202f×\u202f10⁰⁰", + "100": "100.000×10⁰⁰", + "12345": "12.345×10⁰³", + "123.456": "123.456×10⁰⁰", + "12.3456": "12.346×10⁰⁰", }, }, { pattern: "@@E0", test: pairs{ - "0": "0.0\u202f×\u202f10⁰", - "99": "9.9\u202f×\u202f10¹", - "0.99": "9.9\u202f×\u202f10⁻¹", + "0": "0.0×10⁰", + "99": "9.9×10¹", + "0.99": "9.9×10⁻¹", }, }, { pattern: "@###E00", test: pairs{ - "0": "0\u202f×\u202f10⁰⁰", - "1": "1\u202f×\u202f10⁰⁰", - "11": "1.1\u202f×\u202f10⁰¹", - "111": "1.11\u202f×\u202f10⁰²", - "1111": "1.111\u202f×\u202f10⁰³", - "11111": "1.111\u202f×\u202f10⁰⁴", - "0.1": "1\u202f×\u202f10⁻⁰¹", - "0.11": "1.1\u202f×\u202f10⁻⁰¹", - "0.001": "1\u202f×\u202f10⁻⁰³", + "0": "0×10⁰⁰", + "1": "1×10⁰⁰", + "11": "1.1×10⁰¹", + "111": "1.11×10⁰²", + "1111": "1.111×10⁰³", + "11111": "1.111×10⁰⁴", + "0.1": "1×10⁻⁰¹", + "0.11": "1.1×10⁻⁰¹", + "0.001": "1×10⁻⁰³", }, }, { pattern: "*x##0", @@ -354,18 +354,18 @@ func TestAppendDecimal(t *testing.T) { "1234.567": "1234.567", }, }, { - pattern: "**0.0#######E00", + pattern: "**0.0#####E00", test: pairs{ - "0": "***0.0\u202f×\u202f10⁰⁰", - "10": "***1.0\u202f×\u202f10⁰¹", - "11": "***1.1\u202f×\u202f10⁰¹", - "111": "**1.11\u202f×\u202f10⁰²", - "1111": "*1.111\u202f×\u202f10⁰³", - "11111": "1.1111\u202f×\u202f10⁰⁴", - "11110": "*1.111\u202f×\u202f10⁰⁴", - "11100": "**1.11\u202f×\u202f10⁰⁴", - "11000": "***1.1\u202f×\u202f10⁰⁴", - "10000": "***1.0\u202f×\u202f10⁰⁴", + "0": "***0.0×10⁰⁰", + "10": "***1.0×10⁰¹", + "11": "***1.1×10⁰¹", + "111": "**1.11×10⁰²", + "1111": "*1.111×10⁰³", + "11111": "1.1111×10⁰⁴", + "11110": "*1.111×10⁰⁴", + "11100": "**1.11×10⁰⁴", + "11000": "***1.1×10⁰⁴", + "10000": "***1.0×10⁰⁴", }, }, { pattern: "*xpre0suf", @@ -500,9 +500,9 @@ func TestFormatters(t *testing.T) { want string }{ {f.InitDecimal, "123456.78", "123,456.78"}, - {f.InitScientific, "123456.78", "1.23\u202f×\u202f10⁵"}, - {f.InitEngineering, "123456.78", "123.46\u202f×\u202f10³"}, - {f.InitEngineering, "1234", "1.23\u202f×\u202f10³"}, + {f.InitScientific, "123456.78", "1.23×10⁵"}, + {f.InitEngineering, "123456.78", "123.46×10³"}, + {f.InitEngineering, "1234", "1.23×10³"}, {f.InitPercent, "0.1234", "12.34%"}, {f.InitPerMille, "0.1234", "123.40‰"}, diff --git a/language/parse.go b/language/parse.go index 4d57222e..053336e2 100644 --- a/language/parse.go +++ b/language/parse.go @@ -59,7 +59,7 @@ func (c CanonType) Parse(s string) (t Tag, err error) { if changed { tt.RemakeString() } - return makeTag(tt), err + return makeTag(tt), nil } // Compose creates a Tag from individual parts, which may be of type Tag, Base, diff --git a/message/fmt_test.go b/message/fmt_test.go index 42de5936..a8fde6f8 100644 --- a/message/fmt_test.go +++ b/message/fmt_test.go @@ -421,8 +421,8 @@ var fmtTests = []struct { {"%#.68U", '日', zeroFill("U+", 68, "65E5") + " '日'"}, // floats - {"%+.3e", 0.0, "+0.000\u202f×\u202f10⁰⁰"}, - {"%+.3e", 1.0, "+1.000\u202f×\u202f10⁰⁰"}, + {"%+.3e", 0.0, "+0.000×10⁰⁰"}, + {"%+.3e", 1.0, "+1.000×10⁰⁰"}, {"%+.3f", -1.0, "-1.000"}, {"%+.3F", -1.0, "-1.000"}, {"%+.3F", float32(-1.0), "-1.000"}, @@ -436,8 +436,8 @@ var fmtTests = []struct { {"%-+07.2f", -1.0, "-1.00 "}, {"%+10.2f", +1.0, " +1.00"}, {"%+10.2f", -1.0, " -1.00"}, - {"% .3E", -1.0, "-1.000\u202f×\u202f10⁰⁰"}, - {"% .3e", 1.0, " 1.000\u202f×\u202f10⁰⁰"}, + {"% .3E", -1.0, "-1.000×10⁰⁰"}, + {"% .3e", 1.0, " 1.000×10⁰⁰"}, {"%+.3g", 0.0, "+0"}, {"%+.3g", 1.0, "+1"}, {"%+.3g", -1.0, "-1"}, @@ -501,24 +501,24 @@ var fmtTests = []struct { {"% .f", 0i, "( 0+0i)"}, {"%+.f", 0i, "(+0+0i)"}, {"% +.f", 0i, "(+0+0i)"}, - {"%+.3e", 0i, "(+0.000\u202f×\u202f10⁰⁰+0.000\u202f×\u202f10⁰⁰i)"}, + {"%+.3e", 0i, "(+0.000×10⁰⁰+0.000×10⁰⁰i)"}, {"%+.3f", 0i, "(+0.000+0.000i)"}, {"%+.3g", 0i, "(+0+0i)"}, - {"%+.3e", 1 + 2i, "(+1.000\u202f×\u202f10⁰⁰+2.000\u202f×\u202f10⁰⁰i)"}, + {"%+.3e", 1 + 2i, "(+1.000×10⁰⁰+2.000×10⁰⁰i)"}, {"%+.3f", 1 + 2i, "(+1.000+2.000i)"}, {"%+.3g", 1 + 2i, "(+1+2i)"}, - {"%.3e", 0i, "(0.000\u202f×\u202f10⁰⁰+0.000\u202f×\u202f10⁰⁰i)"}, + {"%.3e", 0i, "(0.000×10⁰⁰+0.000×10⁰⁰i)"}, {"%.3f", 0i, "(0.000+0.000i)"}, {"%.3F", 0i, "(0.000+0.000i)"}, {"%.3F", complex64(0i), "(0.000+0.000i)"}, {"%.3g", 0i, "(0+0i)"}, - {"%.3e", 1 + 2i, "(1.000\u202f×\u202f10⁰⁰+2.000\u202f×\u202f10⁰⁰i)"}, + {"%.3e", 1 + 2i, "(1.000×10⁰⁰+2.000×10⁰⁰i)"}, {"%.3f", 1 + 2i, "(1.000+2.000i)"}, {"%.3g", 1 + 2i, "(1+2i)"}, - {"%.3e", -1 - 2i, "(-1.000\u202f×\u202f10⁰⁰-2.000\u202f×\u202f10⁰⁰i)"}, + {"%.3e", -1 - 2i, "(-1.000×10⁰⁰-2.000×10⁰⁰i)"}, {"%.3f", -1 - 2i, "(-1.000-2.000i)"}, {"%.3g", -1 - 2i, "(-1-2i)"}, - {"% .3E", -1 - 2i, "(-1.000\u202f×\u202f10⁰⁰-2.000\u202f×\u202f10⁰⁰i)"}, + {"% .3E", -1 - 2i, "(-1.000×10⁰⁰-2.000×10⁰⁰i)"}, {"%+.3g", 1 + 2i, "(+1+2i)"}, {"%+.3g", complex64(1 + 2i), "(+1+2i)"}, {"%#g", 1 + 2i, "(1.00000+2.00000i)"}, @@ -562,42 +562,42 @@ var fmtTests = []struct { {"%-08G", complex(NaN, NaN), "(NaN +NaN i)"}, // old test/fmt_test.go - {"%e", 1.0, "1.000000\u202f×\u202f10⁰⁰"}, - {"%e", 1234.5678e3, "1.234568\u202f×\u202f10⁰⁶"}, - {"%e", 1234.5678e-8, "1.234568\u202f×\u202f10⁻⁰⁵"}, - {"%e", -7.0, "-7.000000\u202f×\u202f10⁰⁰"}, - {"%e", -1e-9, "-1.000000\u202f×\u202f10⁻⁰⁹"}, + {"%e", 1.0, "1.000000×10⁰⁰"}, + {"%e", 1234.5678e3, "1.234568×10⁰⁶"}, + {"%e", 1234.5678e-8, "1.234568×10⁻⁰⁵"}, + {"%e", -7.0, "-7.000000×10⁰⁰"}, + {"%e", -1e-9, "-1.000000×10⁻⁰⁹"}, {"%f", 1234.5678e3, "1,234,567.800000"}, {"%f", 1234.5678e-8, "0.000012"}, {"%f", -7.0, "-7.000000"}, {"%f", -1e-9, "-0.000000"}, - {"%g", 1234.5678e3, "1.2345678\u202f×\u202f10⁰⁶"}, - {"%g", float32(1234.5678e3), "1.2345678\u202f×\u202f10⁰⁶"}, - {"%g", 1234.5678e-8, "1.2345678\u202f×\u202f10⁻⁰⁵"}, + {"%g", 1234.5678e3, "1.2345678×10⁰⁶"}, + {"%g", float32(1234.5678e3), "1.2345678×10⁰⁶"}, + {"%g", 1234.5678e-8, "1.2345678×10⁻⁰⁵"}, {"%g", -7.0, "-7"}, - {"%g", -1e-9, "-1\u202f×\u202f10⁻⁰⁹"}, - {"%g", float32(-1e-9), "-1\u202f×\u202f10⁻⁰⁹"}, - {"%E", 1.0, "1.000000\u202f×\u202f10⁰⁰"}, - {"%E", 1234.5678e3, "1.234568\u202f×\u202f10⁰⁶"}, - {"%E", 1234.5678e-8, "1.234568\u202f×\u202f10⁻⁰⁵"}, - {"%E", -7.0, "-7.000000\u202f×\u202f10⁰⁰"}, - {"%E", -1e-9, "-1.000000\u202f×\u202f10⁻⁰⁹"}, - {"%G", 1234.5678e3, "1.2345678\u202f×\u202f10⁰⁶"}, - {"%G", float32(1234.5678e3), "1.2345678\u202f×\u202f10⁰⁶"}, - {"%G", 1234.5678e-8, "1.2345678\u202f×\u202f10⁻⁰⁵"}, + {"%g", -1e-9, "-1×10⁻⁰⁹"}, + {"%g", float32(-1e-9), "-1×10⁻⁰⁹"}, + {"%E", 1.0, "1.000000×10⁰⁰"}, + {"%E", 1234.5678e3, "1.234568×10⁰⁶"}, + {"%E", 1234.5678e-8, "1.234568×10⁻⁰⁵"}, + {"%E", -7.0, "-7.000000×10⁰⁰"}, + {"%E", -1e-9, "-1.000000×10⁻⁰⁹"}, + {"%G", 1234.5678e3, "1.2345678×10⁰⁶"}, + {"%G", float32(1234.5678e3), "1.2345678×10⁰⁶"}, + {"%G", 1234.5678e-8, "1.2345678×10⁻⁰⁵"}, {"%G", -7.0, "-7"}, - {"%G", -1e-9, "-1\u202f×\u202f10⁻⁰⁹"}, - {"%G", float32(-1e-9), "-1\u202f×\u202f10⁻⁰⁹"}, + {"%G", -1e-9, "-1×10⁻⁰⁹"}, + {"%G", float32(-1e-9), "-1×10⁻⁰⁹"}, {"%20.5s", "qwertyuiop", " qwert"}, {"%.5s", "qwertyuiop", "qwert"}, {"%-20.5s", "qwertyuiop", "qwert "}, {"%20c", 'x', " x"}, {"%-20c", 'x', "x "}, - {"%20.6e", 1.2345e3, " 1.234500\u202f×\u202f10⁰³"}, - {"%20.6e", 1.2345e-3, " 1.234500\u202f×\u202f10⁻⁰³"}, - {"%20e", 1.2345e3, " 1.234500\u202f×\u202f10⁰³"}, - {"%20e", 1.2345e-3, " 1.234500\u202f×\u202f10⁻⁰³"}, - {"%20.8e", 1.2345e3, " 1.23450000\u202f×\u202f10⁰³"}, + {"%20.6e", 1.2345e3, " 1.234500×10⁰³"}, + {"%20.6e", 1.2345e-3, " 1.234500×10⁻⁰³"}, + {"%20e", 1.2345e3, " 1.234500×10⁰³"}, + {"%20e", 1.2345e-3, " 1.234500×10⁻⁰³"}, + {"%20.8e", 1.2345e3, " 1.23450000×10⁰³"}, {"%20f", 1.23456789e3, " 1,234.567890"}, {"%20f", 1.23456789e-3, " 0.001235"}, {"%20f", 12345678901.23456789, "12,345,678,901.234568"}, @@ -606,7 +606,7 @@ var fmtTests = []struct { {"%20.8f", 1.23456789e-3, " 0.00123457"}, {"%g", 1.23456789e3, "1,234.56789"}, {"%g", 1.23456789e-3, "0.00123456789"}, - {"%g", 1.23456789e20, "1.23456789\u202f×\u202f10²⁰"}, + {"%g", 1.23456789e20, "1.23456789×10²⁰"}, // arrays {"%v", array, "[1 2 3 4 5]"}, diff --git a/number/number_test.go b/number/number_test.go index 3dcac364..e79c2741 100644 --- a/number/number_test.go +++ b/number/number_test.go @@ -129,31 +129,31 @@ func TestFormatter(t *testing.T) { }, { desc: "scientific", f: Scientific(3.00), - want: "3\u202f×\u202f10⁰", + want: "3×10⁰", }, { desc: "scientific", f: Scientific(1234), - want: "1.234\u202f×\u202f10³", + want: "1.234×10³", }, { desc: "scientific", f: Scientific(1234, Scale(2)), - want: "1.23\u202f×\u202f10³", + want: "1.23×10³", }, { desc: "engineering", f: Engineering(12345), - want: "12.345\u202f×\u202f10³", + want: "12.345×10³", }, { desc: "engineering scale", f: Engineering(12345, Scale(2)), - want: "12.34\u202f×\u202f10³", + want: "12.34×10³", }, { desc: "engineering precision(4)", f: Engineering(12345, Precision(4)), - want: "12.34\u202f×\u202f10³", + want: "12.34×10³", }, { desc: "engineering precision(2)", f: Engineering(1234.5, Precision(2)), - want: "1.2\u202f×\u202f10³", + want: "1.2×10³", }, { desc: "percent", f: Percent(0.12),