diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index 15167cd74..000000000 --- a/AUTHORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code refers to The Go Authors for copyright purposes. -# The master list of authors is in the main Go distribution, -# visible at http://tip.golang.org/AUTHORS. diff --git a/CONTRIBUTORS b/CONTRIBUTORS deleted file mode 100644 index 1c4577e96..000000000 --- a/CONTRIBUTORS +++ /dev/null @@ -1,3 +0,0 @@ -# This source code was written by the Go contributors. -# The master list of contributors is in the main Go distribution, -# visible at http://tip.golang.org/CONTRIBUTORS. diff --git a/cases/gen.go b/cases/gen.go index a041fede1..2e1d95cc5 100644 --- a/cases/gen.go +++ b/cases/gen.go @@ -16,7 +16,6 @@ import ( "bytes" "fmt" "io" - "io/ioutil" "log" "reflect" "strconv" @@ -635,9 +634,9 @@ func genTablesTest() { // We discard the output as we know we have perfect functions. We run them // just to verify the properties are correct. - n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased) - n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower) - n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper) + n := printProperties(io.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased) + n += printProperties(io.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower) + n += printProperties(io.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper) if n > 0 { log.Fatalf("One of the discarded properties does not have a perfect filter.") } diff --git a/cases/gen_trieval.go b/cases/gen_trieval.go index 6c7222a73..14dd33b1d 100644 --- a/cases/gen_trieval.go +++ b/cases/gen_trieval.go @@ -19,19 +19,19 @@ package main // // The per-rune values have the following format: // -// if (exception) { -// 15..4 unsigned exception index -// } else { -// 15..8 XOR pattern or index to XOR pattern for case mapping -// Only 13..8 are used for XOR patterns. -// 7 inverseFold (fold to upper, not to lower) -// 6 index: interpret the XOR pattern as an index -// or isMid if case mode is cIgnorableUncased. -// 5..4 CCC: zero (normal or break), above or other -// } -// 3 exception: interpret this value as an exception index -// (TODO: is this bit necessary? Probably implied from case mode.) -// 2..0 case mode +// if (exception) { +// 15..4 unsigned exception index +// } else { +// 15..8 XOR pattern or index to XOR pattern for case mapping +// Only 13..8 are used for XOR patterns. +// 7 inverseFold (fold to upper, not to lower) +// 6 index: interpret the XOR pattern as an index +// or isMid if case mode is cIgnorableUncased. +// 5..4 CCC: zero (normal or break), above or other +// } +// 3 exception: interpret this value as an exception index +// (TODO: is this bit necessary? Probably implied from case mode.) +// 2..0 case mode // // For the non-exceptional cases, a rune must be either uncased, lowercase or // uppercase. If the rune is cased, the XOR pattern maps either a lowercase @@ -133,37 +133,40 @@ const ( // The entry is pointed to by the exception index in an entry. It has the // following format: // -// Header -// byte 0: -// 7..6 unused -// 5..4 CCC type (same bits as entry) -// 3 unused -// 2..0 length of fold +// Header: // -// byte 1: -// 7..6 unused -// 5..3 length of 1st mapping of case type -// 2..0 length of 2nd mapping of case type +// byte 0: +// 7..6 unused +// 5..4 CCC type (same bits as entry) +// 3 unused +// 2..0 length of fold // -// case 1st 2nd -// lower -> upper, title -// upper -> lower, title -// title -> lower, upper +// byte 1: +// 7..6 unused +// 5..3 length of 1st mapping of case type +// 2..0 length of 2nd mapping of case type +// +// case 1st 2nd +// lower -> upper, title +// upper -> lower, title +// title -> lower, upper // // Lengths with the value 0x7 indicate no value and implies no change. // A length of 0 indicates a mapping to zero-length string. // // Body bytes: -// case folding bytes -// lowercase mapping bytes -// uppercase mapping bytes -// titlecase mapping bytes -// closure mapping bytes (for NFKC_Casefold). (TODO) +// +// case folding bytes +// lowercase mapping bytes +// uppercase mapping bytes +// titlecase mapping bytes +// closure mapping bytes (for NFKC_Casefold). (TODO) // // Fallbacks: -// missing fold -> lower -// missing title -> upper -// all missing -> original rune +// +// missing fold -> lower +// missing title -> upper +// all missing -> original rune // // exceptions starts with a dummy byte to enforce that there is no zero index // value. diff --git a/cases/trieval.go b/cases/trieval.go index 99e039628..4e4d13fe5 100644 --- a/cases/trieval.go +++ b/cases/trieval.go @@ -14,19 +14,19 @@ package cases // // The per-rune values have the following format: // -// if (exception) { -// 15..4 unsigned exception index -// } else { -// 15..8 XOR pattern or index to XOR pattern for case mapping -// Only 13..8 are used for XOR patterns. -// 7 inverseFold (fold to upper, not to lower) -// 6 index: interpret the XOR pattern as an index -// or isMid if case mode is cIgnorableUncased. -// 5..4 CCC: zero (normal or break), above or other -// } -// 3 exception: interpret this value as an exception index -// (TODO: is this bit necessary? Probably implied from case mode.) -// 2..0 case mode +// if (exception) { +// 15..4 unsigned exception index +// } else { +// 15..8 XOR pattern or index to XOR pattern for case mapping +// Only 13..8 are used for XOR patterns. +// 7 inverseFold (fold to upper, not to lower) +// 6 index: interpret the XOR pattern as an index +// or isMid if case mode is cIgnorableUncased. +// 5..4 CCC: zero (normal or break), above or other +// } +// 3 exception: interpret this value as an exception index +// (TODO: is this bit necessary? Probably implied from case mode.) +// 2..0 case mode // // For the non-exceptional cases, a rune must be either uncased, lowercase or // uppercase. If the rune is cased, the XOR pattern maps either a lowercase @@ -128,37 +128,40 @@ const ( // The entry is pointed to by the exception index in an entry. It has the // following format: // -// Header -// byte 0: -// 7..6 unused -// 5..4 CCC type (same bits as entry) -// 3 unused -// 2..0 length of fold +// Header: // -// byte 1: -// 7..6 unused -// 5..3 length of 1st mapping of case type -// 2..0 length of 2nd mapping of case type +// byte 0: +// 7..6 unused +// 5..4 CCC type (same bits as entry) +// 3 unused +// 2..0 length of fold // -// case 1st 2nd -// lower -> upper, title -// upper -> lower, title -// title -> lower, upper +// byte 1: +// 7..6 unused +// 5..3 length of 1st mapping of case type +// 2..0 length of 2nd mapping of case type +// +// case 1st 2nd +// lower -> upper, title +// upper -> lower, title +// title -> lower, upper // // Lengths with the value 0x7 indicate no value and implies no change. // A length of 0 indicates a mapping to zero-length string. // // Body bytes: -// case folding bytes -// lowercase mapping bytes -// uppercase mapping bytes -// titlecase mapping bytes -// closure mapping bytes (for NFKC_Casefold). (TODO) +// +// case folding bytes +// lowercase mapping bytes +// uppercase mapping bytes +// titlecase mapping bytes +// closure mapping bytes (for NFKC_Casefold). (TODO) // // Fallbacks: -// missing fold -> lower -// missing title -> upper -// all missing -> original rune +// +// missing fold -> lower +// missing title -> upper +// all missing -> original rune // // exceptions starts with a dummy byte to enforce that there is no zero index // value. diff --git a/cmd/gotext/doc.go b/cmd/gotext/doc.go index fa247c6d3..d363ae25e 100644 --- a/cmd/gotext/doc.go +++ b/cmd/gotext/doc.go @@ -4,60 +4,47 @@ // // Usage: // -// gotext command [arguments] +// gotext command [arguments] // // The commands are: // -// update merge translations and generate catalog -// extract extracts strings to be translated from code -// rewrite rewrites fmt functions to use a message Printer -// generate generates code to insert translated messages +// update merge translations and generate catalog +// extract extracts strings to be translated from code +// rewrite rewrites fmt functions to use a message Printer +// generate generates code to insert translated messages // // Use "gotext help [command]" for more information about a command. // // Additional help topics: // -// // Use "gotext help [topic]" for more information about that topic. // -// -// Merge translations and generate catalog +// # Merge translations and generate catalog // // Usage: // -// gotext update * [-out ] -// -// +// gotext update * [-out ] // -// -// Extracts strings to be translated from code +// # Extracts strings to be translated from code // // Usage: // -// gotext extract * -// -// +// gotext extract * // -// -// Rewrites fmt functions to use a message Printer +// # Rewrites fmt functions to use a message Printer // // Usage: // -// gotext rewrite +// gotext rewrite // // rewrite is typically done once for a project. It rewrites all usages of // fmt to use x/text's message package whenever a message.Printer is in scope. // It rewrites Print and Println calls with constant strings to the equivalent // using Printf to allow translators to reorder arguments. // -// -// Generates code to insert translated messages +// # Generates code to insert translated messages // // Usage: // -// gotext generate -// -// -// -// +// gotext generate package main diff --git a/cmd/gotext/main.go b/cmd/gotext/main.go index f31dd4fbd..aad1d4a14 100644 --- a/cmd/gotext/main.go +++ b/cmd/gotext/main.go @@ -16,7 +16,6 @@ import ( "go/build" "go/format" "io" - "io/ioutil" "log" "os" "strings" @@ -48,7 +47,7 @@ func config() (*pipeline.Config, error) { return &pipeline.Config{ SourceLanguage: tag, Supported: getLangs(), - TranslationsPattern: `messages\.(.*)\.json`, + TranslationsPattern: `messages\.(.*)\.json$`, GenFile: *out, }, nil } @@ -310,7 +309,7 @@ func help(args []string) { if err != nil { logf("Could not format generated docs: %v\n", err) } - if err := ioutil.WriteFile("doc.go", b, 0666); err != nil { + if err := os.WriteFile("doc.go", b, 0666); err != nil { logf("Could not create file alldocs.go: %v\n", err) } } else { diff --git a/collate/build/builder.go b/collate/build/builder.go index 092a4b506..3efb7387d 100644 --- a/collate/build/builder.go +++ b/collate/build/builder.go @@ -225,26 +225,37 @@ func (t *Tailoring) SetAnchorBefore(anchor string) error { // // Examples: create a tailoring for Swedish, where "ä" is ordered after "z" // at the primary sorting level: -// t := b.Tailoring("se") -// t.SetAnchor("z") -// t.Insert(colltab.Primary, "ä", "") +// +// t := b.Tailoring("se") +// t.SetAnchor("z") +// t.Insert(colltab.Primary, "ä", "") +// // Order "ü" after "ue" at the secondary sorting level: -// t.SetAnchor("ue") -// t.Insert(colltab.Secondary, "ü","") +// +// t.SetAnchor("ue") +// t.Insert(colltab.Secondary, "ü","") +// // or -// t.SetAnchor("u") -// t.Insert(colltab.Secondary, "ü", "e") +// +// t.SetAnchor("u") +// t.Insert(colltab.Secondary, "ü", "e") +// // Order "q" afer "ab" at the secondary level and "Q" after "q" // at the tertiary level: -// t.SetAnchor("ab") -// t.Insert(colltab.Secondary, "q", "") -// t.Insert(colltab.Tertiary, "Q", "") +// +// t.SetAnchor("ab") +// t.Insert(colltab.Secondary, "q", "") +// t.Insert(colltab.Tertiary, "Q", "") +// // Order "b" before "a": -// t.SetAnchorBefore("a") -// t.Insert(colltab.Primary, "b", "") +// +// t.SetAnchorBefore("a") +// t.Insert(colltab.Primary, "b", "") +// // Order "0" after the last primary ignorable: -// t.SetAnchor("") -// t.Insert(colltab.Primary, "0", "") +// +// t.SetAnchor("") +// t.Insert(colltab.Primary, "0", "") func (t *Tailoring) Insert(level colltab.Level, str, extend string) error { if t.anchor == nil { return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str) diff --git a/collate/build/colelem.go b/collate/build/colelem.go index 04fc3bfb7..1aaa062c5 100644 --- a/collate/build/colelem.go +++ b/collate/build/colelem.go @@ -51,6 +51,7 @@ func makeCE(ce rawCE) (uint32, error) { // - n* is the size of the first node in the contraction trie. // - i* is the index of the first node in the contraction trie. // - b* is the offset into the contraction collation element table. +// // See contract.go for details on the contraction trie. const ( contractID = 0xC0000000 @@ -103,7 +104,8 @@ func makeExpansionHeader(n int) (uint32, error) { // The collation element, in this case, is of the form // 11110000 00000000 wwwwwwww vvvvvvvv, where // - v* is the replacement tertiary weight for the first rune, -// - w* is the replacement tertiary weight for the second rune, +// - w* is the replacement tertiary weight for the second rune. +// // Tertiary weights of subsequent runes should be replaced with maxTertiary. // See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. const ( @@ -162,7 +164,9 @@ func implicitPrimary(r rune) int { // primaries (either double primaries or for illegal runes) // to our own representation. // A CJK character C is represented in the DUCET as -// [.FBxx.0020.0002.C][.BBBB.0000.0000.C] +// +// [.FBxx.0020.0002.C][.BBBB.0000.0000.C] +// // We will rewrite these characters to a single CE. // We assume the CJK values start at 0x8000. // See https://unicode.org/reports/tr10/#Implicit_Weights diff --git a/collate/build/contract.go b/collate/build/contract.go index e2df64f0c..5d79eb8bf 100644 --- a/collate/build/contract.go +++ b/collate/build/contract.go @@ -56,19 +56,22 @@ const ( // entry might still resemble a completed suffix. // Examples: // The suffix strings "ab" and "ac" can be represented as: -// []ctEntry{ -// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF. -// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2 -// } +// +// []ctEntry{ +// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF. +// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2 +// } // // The suffix strings "ab", "abc", "abd", and "abcd" can be represented as: -// []ctEntry{ -// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'. -// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'. -// {'d', 'd', final, 3}, // "abd" -> 3 -// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'. -// {'d', 'd', final, 4}, // "abcd" -> 4 -// } +// +// []ctEntry{ +// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'. +// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'. +// {'d', 'd', final, 3}, // "abd" -> 3 +// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'. +// {'d', 'd', final, 4}, // "abcd" -> 4 +// } +// // See genStateTests in contract_test.go for more examples. type ctEntry struct { L uint8 // non-final: byte value to match; final: lowest match in range. diff --git a/collate/example_sort_test.go b/collate/example_sort_test.go new file mode 100644 index 000000000..e86c02a7a --- /dev/null +++ b/collate/example_sort_test.go @@ -0,0 +1,56 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package collate_test + +import ( + "fmt" + + "golang.org/x/text/collate" + "golang.org/x/text/language" +) + +type book struct { + title string +} + +type bookcase struct { + books []book +} + +func (bc bookcase) Len() int { + return len(bc.books) +} + +func (bc bookcase) Swap(i, j int) { + temp := bc.books[i] + bc.books[i] = bc.books[j] + bc.books[j] = temp +} + +func (bc bookcase) Bytes(i int) []byte { + // returns the bytes of text at index i + return []byte(bc.books[i].title) +} + +func ExampleCollator_Sort() { + bc := bookcase{ + books: []book{ + {title: "If Cats Disappeared from the World"}, + {title: "The Guest Cat"}, + {title: "Catwings"}, + }, + } + + cc := collate.New(language.English) + cc.Sort(bc) + + for _, b := range bc.books { + fmt.Println(b.title) + } + // Output: + // Catwings + // If Cats Disappeared from the World + // The Guest Cat +} diff --git a/collate/examples_test.go b/collate/examples_test.go new file mode 100644 index 000000000..0a42a6d21 --- /dev/null +++ b/collate/examples_test.go @@ -0,0 +1,79 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package collate_test + +import ( + "fmt" + + "golang.org/x/text/collate" + "golang.org/x/text/language" +) + +func ExampleNew() { + letters := []string{"ä", "å", "ö", "o", "a"} + + ec := collate.New(language.English) + ec.SortStrings(letters) + fmt.Printf("English Sorting: %v\n", letters) + + sc := collate.New(language.Swedish) + sc.SortStrings(letters) + fmt.Printf("Swedish Sorting: %v\n", letters) + + numbers := []string{"0", "11", "01", "2", "3", "23"} + + ec.SortStrings(numbers) + fmt.Printf("Alphabetic Sorting: %v\n", numbers) + + nc := collate.New(language.English, collate.Numeric) + nc.SortStrings(numbers) + fmt.Printf("Numeric Sorting: %v\n", numbers) + // Output: + // English Sorting: [a å ä o ö] + // Swedish Sorting: [a o å ä ö] + // Alphabetic Sorting: [0 01 11 2 23 3] + // Numeric Sorting: [0 01 2 3 11 23] +} + +func ExampleCollator_SortStrings() { + c := collate.New(language.English) + words := []string{"meow", "woof", "bark", "moo"} + c.SortStrings(words) + fmt.Println(words) + // Output: + // [bark meow moo woof] +} + +func ExampleCollator_CompareString() { + c := collate.New(language.English) + r := c.CompareString("meow", "woof") + fmt.Println(r) + + r = c.CompareString("woof", "meow") + fmt.Println(r) + + r = c.CompareString("meow", "meow") + fmt.Println(r) + // Output: + // -1 + // 1 + // 0 +} + +func ExampleCollator_Compare() { + c := collate.New(language.English) + r := c.Compare([]byte("meow"), []byte("woof")) + fmt.Println(r) + + r = c.Compare([]byte("woof"), []byte("meow")) + fmt.Println(r) + + r = c.Compare([]byte("meow"), []byte("meow")) + fmt.Println(r) + // Output: + // -1 + // 1 + // 0 +} diff --git a/collate/maketables.go b/collate/maketables.go index 59afeb9c2..4175143e3 100644 --- a/collate/maketables.go +++ b/collate/maketables.go @@ -17,7 +17,6 @@ import ( "flag" "fmt" "io" - "io/ioutil" "log" "os" "regexp" @@ -187,7 +186,7 @@ func failOnError(e error) { func openArchive() *zip.Reader { f := gen.OpenCLDRCoreZip() - buffer, err := ioutil.ReadAll(f) + buffer, err := io.ReadAll(f) f.Close() failOnError(err) archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) diff --git a/collate/reg_test.go b/collate/reg_test.go index 4d3598485..f9c192cb3 100644 --- a/collate/reg_test.go +++ b/collate/reg_test.go @@ -10,7 +10,6 @@ import ( "bytes" "flag" "io" - "io/ioutil" "log" "path" "regexp" @@ -133,7 +132,7 @@ func convHex(line int, s string) int { func loadTestData() []Test { f := gen.OpenUnicodeFile("UCA", "", "CollationTest.zip") - buffer, err := ioutil.ReadAll(f) + buffer, err := io.ReadAll(f) f.Close() Error(err) archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) diff --git a/currency/format.go b/currency/format.go index 11152632a..cc4570d3b 100644 --- a/currency/format.go +++ b/currency/format.go @@ -6,11 +6,13 @@ package currency import ( "fmt" - "io" "sort" "golang.org/x/text/internal/format" "golang.org/x/text/internal/language/compact" + "golang.org/x/text/internal/number" + + "golang.org/x/text/language" ) // Amount is an amount-currency unit pair. @@ -34,8 +36,6 @@ func (a Amount) Currency() Unit { return a.currency } // // Add/Sub/Div/Mul/Round. -var space = []byte(" ") - // Format implements fmt.Formatter. It accepts format.State for // language-specific rendering. func (a Amount) Format(s fmt.State, verb rune) { @@ -58,9 +58,11 @@ type formattedValue struct { // Format implements fmt.Formatter. It accepts format.State for // language-specific rendering. func (v formattedValue) Format(s fmt.State, verb rune) { + var tag language.Tag var lang compact.ID if state, ok := s.(format.State); ok { - lang, _ = compact.RegionalID(compact.Tag(state.Language())) + tag = state.Language() + lang, _ = compact.RegionalID(compact.Tag(tag)) } // Get the options. Use DefaultFormat if not present. @@ -73,18 +75,22 @@ func (v formattedValue) Format(s fmt.State, verb rune) { cur = opt.currency } - // TODO: use pattern. - io.WriteString(s, opt.symbol(lang, cur)) + sym := opt.symbol(lang, cur) if v.amount != nil { - s.Write(space) - - // TODO: apply currency-specific rounding - scale, _ := opt.kind.Rounding(cur) - if _, ok := s.Precision(); !ok { - fmt.Fprintf(s, "%.*f", scale, v.amount) - } else { - fmt.Fprint(s, v.amount) - } + var f number.Formatter + f.InitDecimal(tag) + + scale, increment := opt.kind.Rounding(cur) + f.RoundingContext.SetScale(scale) + f.RoundingContext.Increment = uint32(increment) + f.RoundingContext.IncrementScale = uint8(scale) + f.RoundingContext.Mode = number.ToNearestAway + + d := f.Append(nil, v.amount) + + fmt.Fprint(s, sym, " ", string(d)) + } else { + fmt.Fprint(s, sym) } } diff --git a/currency/format_test.go b/currency/format_test.go index 0aa0d58af..5cb11ebc9 100644 --- a/currency/format_test.go +++ b/currency/format_test.go @@ -12,8 +12,10 @@ import ( ) var ( + de = language.German en = language.English fr = language.French + de_CH = language.MustParse("de-CH") en_US = language.AmericanEnglish en_GB = language.BritishEnglish en_AU = language.MustParse("en-AU") @@ -42,20 +44,35 @@ func TestFormatting(t *testing.T) { 9: {en, 9.0, Symbol.Default(EUR), "€ 9.00"}, 10: {en, 10.123, Symbol.Default(KRW), "₩ 10"}, - 11: {fr, 11.52, Symbol.Default(TWD), "TWD 11.52"}, + 11: {fr, 11.52, Symbol.Default(TWD), "TWD 11,52"}, 12: {en, 12.123, Symbol.Default(czk), "CZK 12.12"}, 13: {en, 13.123, Symbol.Default(czk).Kind(Cash), "CZK 13"}, 14: {en, 14.12345, ISO.Default(MustParseISO("CLF")), "CLF 14.1235"}, 15: {en, USD.Amount(15.00), ISO.Default(TWD), "USD 15.00"}, 16: {en, KRW.Amount(16.00), ISO.Kind(Cash), "KRW 16"}, - // TODO: support integers as well. - 17: {en, USD, nil, "USD"}, 18: {en, USD, ISO, "USD"}, 19: {en, USD, Symbol, "$"}, 20: {en_GB, USD, Symbol, "US$"}, 21: {en_AU, USD, NarrowSymbol, "$"}, + + // https://en.wikipedia.org/wiki/Decimal_separator + 22: {de, EUR.Amount(1234567.89), nil, "EUR 1.234.567,89"}, + 23: {fr, EUR.Amount(1234567.89), nil, "EUR 1\u00a0234\u00a0567,89"}, + 24: {en_AU, EUR.Amount(1234567.89), nil, "EUR 1,234,567.89"}, + 25: {de_CH, EUR.Amount(1234567.89), nil, "EUR 1’234’567.89"}, + + // https://en.wikipedia.org/wiki/Cash_rounding + 26: {de, NOK.Amount(2.49), ISO.Kind(Cash), "NOK 2"}, + 27: {de, NOK.Amount(2.50), ISO.Kind(Cash), "NOK 3"}, + 28: {de, DKK.Amount(0.24), ISO.Kind(Cash), "DKK 0,00"}, + 29: {de, DKK.Amount(0.25), ISO.Kind(Cash), "DKK 0,50"}, + + // integers + 30: {de, EUR.Amount(1234567), nil, "EUR 1.234.567,00"}, + 31: {en, CNY.Amount(0), NarrowSymbol, "¥ 0.00"}, + 32: {en, CNY.Amount(0), Symbol, "CN¥ 0.00"}, } for i, tc := range testCases { p := message.NewPrinter(tc.tag) diff --git a/encoding/encoding_test.go b/encoding/encoding_test.go index 173814762..e459fa230 100644 --- a/encoding/encoding_test.go +++ b/encoding/encoding_test.go @@ -5,7 +5,7 @@ package encoding_test import ( - "io/ioutil" + "io" "strings" "testing" @@ -62,7 +62,7 @@ func TestReplacement(t *testing.T) { want = "AB\x00CD\ufffdYZ" } sr := strings.NewReader("AB\x00CD\x80YZ") - g, err := ioutil.ReadAll(transform.NewReader(sr, enc)) + g, err := io.ReadAll(transform.NewReader(sr, enc)) if err != nil { t.Errorf("%s: ReadAll: %v", direction, err) continue diff --git a/encoding/htmlindex/tables.go b/encoding/htmlindex/tables.go index f074e2c6d..9e6daa896 100644 --- a/encoding/htmlindex/tables.go +++ b/encoding/htmlindex/tables.go @@ -93,8 +93,11 @@ var canonical = [numEncodings]string{ var nameMap = map[string]htmlEncoding{ "unicode-1-1-utf-8": utf8, + "unicode11utf8": utf8, + "unicode20utf8": utf8, "utf-8": utf8, "utf8": utf8, + "x-unicode20utf8": utf8, "866": ibm866, "cp866": ibm866, "csibm866": ibm866, @@ -307,7 +310,13 @@ var nameMap = map[string]htmlEncoding{ "iso-2022-cn-ext": replacement, "iso-2022-kr": replacement, "replacement": replacement, + "unicodefffe": utf16be, "utf-16be": utf16be, + "csunicode": utf16le, + "iso-10646-ucs-2": utf16le, + "ucs-2": utf16le, + "unicode": utf16le, + "unicodefeff": utf16le, "utf-16": utf16le, "utf-16le": utf16le, "x-user-defined": xUserDefined, diff --git a/encoding/ianaindex/tables.go b/encoding/ianaindex/tables.go index cec6a0407..921bb3b4b 100644 --- a/encoding/ianaindex/tables.go +++ b/encoding/ianaindex/tables.go @@ -141,6 +141,7 @@ const ( enc1018 enc1019 enc1020 + enc1021 enc2000 enc2001 enc2002 @@ -265,7 +266,7 @@ const ( numIANA ) -var ianaToMIB = []identifier.MIB{ // 257 elements +var ianaToMIB = []identifier.MIB{ // 258 elements // Entry 0 - 3F 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, @@ -286,27 +287,27 @@ var ianaToMIB = []identifier.MIB{ // 257 elements 0x03ed, 0x03ee, 0x03ef, 0x03f0, 0x03f1, 0x03f2, 0x03f3, 0x03f4, // Entry 80 - BF 0x03f5, 0x03f6, 0x03f7, 0x03f8, 0x03f9, 0x03fa, 0x03fb, 0x03fc, - 0x07d0, 0x07d1, 0x07d2, 0x07d3, 0x07d4, 0x07d5, 0x07d6, 0x07d7, - 0x07d8, 0x07d9, 0x07da, 0x07db, 0x07dc, 0x07dd, 0x07de, 0x07df, - 0x07e0, 0x07e1, 0x07e2, 0x07e3, 0x07e4, 0x07e5, 0x07e6, 0x07e7, - 0x07e8, 0x07e9, 0x07ea, 0x07eb, 0x07ec, 0x07ed, 0x07ee, 0x07ef, - 0x07f0, 0x07f1, 0x07f2, 0x07f3, 0x07f4, 0x07f5, 0x07f6, 0x07f7, - 0x07f8, 0x07f9, 0x07fa, 0x07fb, 0x07fc, 0x07fd, 0x07fe, 0x07ff, - 0x0800, 0x0801, 0x0802, 0x0803, 0x0804, 0x0805, 0x0806, 0x0807, + 0x03fd, 0x07d0, 0x07d1, 0x07d2, 0x07d3, 0x07d4, 0x07d5, 0x07d6, + 0x07d7, 0x07d8, 0x07d9, 0x07da, 0x07db, 0x07dc, 0x07dd, 0x07de, + 0x07df, 0x07e0, 0x07e1, 0x07e2, 0x07e3, 0x07e4, 0x07e5, 0x07e6, + 0x07e7, 0x07e8, 0x07e9, 0x07ea, 0x07eb, 0x07ec, 0x07ed, 0x07ee, + 0x07ef, 0x07f0, 0x07f1, 0x07f2, 0x07f3, 0x07f4, 0x07f5, 0x07f6, + 0x07f7, 0x07f8, 0x07f9, 0x07fa, 0x07fb, 0x07fc, 0x07fd, 0x07fe, + 0x07ff, 0x0800, 0x0801, 0x0802, 0x0803, 0x0804, 0x0805, 0x0806, // Entry C0 - FF - 0x0808, 0x0809, 0x080a, 0x080b, 0x080c, 0x080d, 0x080e, 0x080f, - 0x0810, 0x0811, 0x0812, 0x0813, 0x0814, 0x0815, 0x0816, 0x0817, - 0x0818, 0x0819, 0x081a, 0x081b, 0x081c, 0x081d, 0x081e, 0x081f, - 0x0820, 0x0821, 0x0822, 0x0823, 0x0824, 0x0825, 0x0826, 0x0827, - 0x0828, 0x0829, 0x082a, 0x082b, 0x082c, 0x082d, 0x082e, 0x082f, - 0x0830, 0x0831, 0x0832, 0x0833, 0x0834, 0x0835, 0x0836, 0x0837, - 0x0838, 0x0839, 0x083a, 0x083b, 0x083c, 0x083d, 0x08ca, 0x08cb, - 0x08cc, 0x08cd, 0x08ce, 0x08cf, 0x08d0, 0x08d1, 0x08d2, 0x08d3, + 0x0807, 0x0808, 0x0809, 0x080a, 0x080b, 0x080c, 0x080d, 0x080e, + 0x080f, 0x0810, 0x0811, 0x0812, 0x0813, 0x0814, 0x0815, 0x0816, + 0x0817, 0x0818, 0x0819, 0x081a, 0x081b, 0x081c, 0x081d, 0x081e, + 0x081f, 0x0820, 0x0821, 0x0822, 0x0823, 0x0824, 0x0825, 0x0826, + 0x0827, 0x0828, 0x0829, 0x082a, 0x082b, 0x082c, 0x082d, 0x082e, + 0x082f, 0x0830, 0x0831, 0x0832, 0x0833, 0x0834, 0x0835, 0x0836, + 0x0837, 0x0838, 0x0839, 0x083a, 0x083b, 0x083c, 0x083d, 0x08ca, + 0x08cb, 0x08cc, 0x08cd, 0x08ce, 0x08cf, 0x08d0, 0x08d1, 0x08d2, // Entry 100 - 13F - 0x08d4, -} // Size: 538 bytes + 0x08d3, 0x08d4, +} // Size: 540 bytes -var ianaNames = []string{ // 257 elements +var ianaNames = []string{ // 258 elements "US-ASCII", "\vISO-8859-1ISO_8859-1:1987", "\vISO-8859-2ISO_8859-2:1987", @@ -443,6 +444,7 @@ var ianaNames = []string{ // 257 elements "UTF-32BE", "UTF-32LE", "BOCU-1", + "UTF-7-IMAP", "ISO-8859-1-Windows-3.0-Latin-1", "ISO-8859-1-Windows-3.1-Latin-1", "ISO-8859-2-Windows-Latin-2", @@ -564,9 +566,9 @@ var ianaNames = []string{ // 257 elements "windows-1258", "TIS-620", "CP50220", -} // Size: 7088 bytes +} // Size: 7114 bytes -var mibNames = []string{ // 257 elements +var mibNames = []string{ // 258 elements "ASCII", "ISOLatin1", "ISOLatin2", @@ -703,6 +705,7 @@ var mibNames = []string{ // 257 elements "UTF32BE", "UTF32LE", "BOCU-1", + "UTF7IMAP", "Windows30Latin1", "Windows31Latin1", "Windows31Latin2", @@ -824,7 +827,7 @@ var mibNames = []string{ // 257 elements "windows1258", "TIS620", "CP50220", -} // Size: 6776 bytes +} // Size: 6800 bytes // TODO: Instead of using a map, we could use binary search strings doing // on-the fly lower-casing per character. This allows to always avoid @@ -1692,6 +1695,10 @@ var ianaAliases = map[string]int{ "csbocu1": enc1020, "csBOCU-1": enc1020, "csbocu-1": enc1020, + "UTF-7-IMAP": enc1021, + "utf-7-imap": enc1021, + "csUTF7IMAP": enc1021, + "csutf7imap": enc1021, "ISO-8859-1-Windows-3.0-Latin-1": enc2000, "iso-8859-1-windows-3.0-latin-1": enc2000, "csWindows30Latin1": enc2000, @@ -2345,4 +2352,4 @@ var ianaAliases = map[string]int{ "cscp50220": enc2260, } -// Total table size 14402 bytes (14KiB); checksum: CEBAA10C +// Total table size 14454 bytes (14KiB); checksum: 9095144D diff --git a/encoding/internal/enctest/enctest.go b/encoding/internal/enctest/enctest.go index 0cccae044..cf5da2675 100644 --- a/encoding/internal/enctest/enctest.go +++ b/encoding/internal/enctest/enctest.go @@ -8,7 +8,7 @@ import ( "bytes" "fmt" "io" - "io/ioutil" + "os" "strings" "testing" @@ -105,7 +105,7 @@ func Benchmark(b *testing.B, enc encoding.Encoding) { b.ResetTimer() for i := 0; i < b.N; i++ { r := transform.NewReader(bytes.NewReader(src), transformer) - io.Copy(ioutil.Discard, r) + io.Copy(io.Discard, r) } }) } @@ -157,15 +157,15 @@ func load(direction string, enc encoding.Encoding) ([]byte, []byte, Transcoder, dstFile, srcFile = srcFile, dstFile coder = enc.NewDecoder() } - dst, err := ioutil.ReadFile(dstFile) + dst, err := os.ReadFile(dstFile) if err != nil { - if dst, err = ioutil.ReadFile("../" + dstFile); err != nil { + if dst, err = os.ReadFile("../" + dstFile); err != nil { return nil, nil, nil, err } } - src, err := ioutil.ReadFile(srcFile) + src, err := os.ReadFile(srcFile) if err != nil { - if src, err = ioutil.ReadFile("../" + srcFile); err != nil { + if src, err = os.ReadFile("../" + srcFile); err != nil { return nil, nil, nil, err } } diff --git a/encoding/internal/identifier/mib.go b/encoding/internal/identifier/mib.go index fc7df1bc7..351fb86e2 100644 --- a/encoding/internal/identifier/mib.go +++ b/encoding/internal/identifier/mib.go @@ -905,6 +905,14 @@ const ( // https://www.unicode.org/notes/tn6/ BOCU1 MIB = 1020 + // UTF7IMAP is the MIB identifier with IANA name UTF-7-IMAP. + // + // Note: This charset is used to encode Unicode in IMAP mailbox names; + // see section 5.1.3 of rfc3501 . It should never be used + // outside this context. A name has been assigned so that charset processing + // implementations can refer to it in a consistent way. + UTF7IMAP MIB = 1021 + // Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1. // // Extended ISO 8859-1 Latin-1 for Windows 3.0. diff --git a/encoding/simplifiedchinese/all_test.go b/encoding/simplifiedchinese/all_test.go index a556c94dd..fbb623cd2 100644 --- a/encoding/simplifiedchinese/all_test.go +++ b/encoding/simplifiedchinese/all_test.go @@ -40,7 +40,9 @@ func TestNonRepertoire(t *testing.T) { {enc, HZGB2312, "a갂", "a"}, {enc, HZGB2312, "\u6cf5갂", "~{1C~}"}, + {dec, GBK, "\xa2\xe3", "€"}, {dec, GB18030, "\x80", "€"}, + {dec, GB18030, "\x81", "\ufffd"}, {dec, GB18030, "\x81\x20", "\ufffd "}, {dec, GB18030, "\xfe\xfe", "\ufffd"}, @@ -125,6 +127,14 @@ func TestBasics(t *testing.T) { encPrefix: "~{", encoded: ";(F#,6@WCN^O`GW!#", utf8: "花间一壶酒,独酌无相亲。", + }, { + e: GBK, + encoded: "\x80", + utf8: "€", + }, { + e: GB18030, + encoded: "\xa2\xe3", + utf8: "€", }} for _, tc := range testCases { diff --git a/encoding/simplifiedchinese/gbk.go b/encoding/simplifiedchinese/gbk.go index b89c45b03..0e0fabfd6 100644 --- a/encoding/simplifiedchinese/gbk.go +++ b/encoding/simplifiedchinese/gbk.go @@ -55,6 +55,8 @@ loop: // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // says to treat "gbk" as Code Page 936. + // GBK’s decoder is gb18030’s decoder. https://encoding.spec.whatwg.org/#gbk-decoder + // If byte is 0x80, return code point U+20AC. https://encoding.spec.whatwg.org/#gb18030-decoder case c0 == 0x80: r, size = '€', 1 @@ -180,7 +182,9 @@ func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // says to treat "gbk" as Code Page 936. - if r == '€' { + // GBK’s encoder is gb18030’s encoder with its _is GBK_ set to true. https://encoding.spec.whatwg.org/#gbk-encoder + // If _is GBK_ is true and code point is U+20AC, return byte 0x80. https://encoding.spec.whatwg.org/#gb18030-encoder + if !e.gb18030 && r == '€' { r = 0x80 goto write1 } diff --git a/encoding/traditionalchinese/all_test.go b/encoding/traditionalchinese/all_test.go index 3825c7672..f804e83e1 100644 --- a/encoding/traditionalchinese/all_test.go +++ b/encoding/traditionalchinese/all_test.go @@ -6,7 +6,7 @@ package traditionalchinese import ( "fmt" - "io/ioutil" + "io" "strings" "testing" @@ -103,7 +103,7 @@ func TestBig5CircumflexAndMacron(t *testing.T) { "\x88\xa2\x88\xa3\x88\xa4\x88\xa5\x88\xa6" want := "ÓǑÒ\u00ca\u0304Ế\u00ca\u030cỀÊ " + "ü\u00ea\u0304ế\u00ea\u030cề" - dst, err := ioutil.ReadAll(transform.NewReader( + dst, err := io.ReadAll(transform.NewReader( strings.NewReader(src), Big5.NewDecoder())) if err != nil { t.Fatal(err) diff --git a/feature/plural/gen.go b/feature/plural/gen.go index b9c5f2493..5f8f375fb 100644 --- a/feature/plural/gen.go +++ b/feature/plural/gen.go @@ -359,15 +359,16 @@ var operandIndex = map[string]opID{ // the resulting or conditions to conds. // // Example rules: -// // Category "one" in English: only allow 1 with no visible fraction -// i = 1 and v = 0 @integer 1 // -// // Category "few" in Czech: all numbers with visible fractions -// v != 0 @decimal ... +// // Category "one" in English: only allow 1 with no visible fraction +// i = 1 and v = 0 @integer 1 // -// // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or -// // numbers with a fraction 11..19 and no trailing zeros. -// n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ... +// // Category "few" in Czech: all numbers with visible fractions +// v != 0 @decimal ... +// +// // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or +// // numbers with a fraction 11..19 and no trailing zeros. +// n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ... // // @integer and @decimal are followed by examples and are not relevant for the // rule itself. The are used here to signal the termination of the rule. diff --git a/feature/plural/message.go b/feature/plural/message.go index f931f8a6a..56d518cc3 100644 --- a/feature/plural/message.go +++ b/feature/plural/message.go @@ -6,7 +6,7 @@ package plural import ( "fmt" - "io/ioutil" + "io" "reflect" "strconv" @@ -35,13 +35,13 @@ type Interface interface { // The cases argument are pairs of selectors and messages. Selectors are of type // string or Form. Messages are of type string or catalog.Message. A selector // matches an argument if: -// - it is "other" or Other -// - it matches the plural form of the argument: "zero", "one", "two", "few", -// or "many", or the equivalent Form -// - it is of the form "=x" where x is an integer that matches the value of -// the argument. -// - it is of the form " 1. -// - Otherwise the result is i % 10^nMod. +// - Let i be asInt(digits[start:end]), where out-of-range digits are assumed +// to be zero. +// - Result n is big if i / 10^nMod > 1. +// - Otherwise the result is i % 10^nMod. // // For example, if digits is {1, 2, 3} and start:end is 0:5, then the result // for various values of nMod is: -// - when nMod == 2, n == big -// - when nMod == 3, n == big -// - when nMod == 4, n == big -// - when nMod == 5, n == 12300 -// - when nMod == 6, n == 12300 -// - when nMod == 7, n == 12300 +// - when nMod == 2, n == big +// - when nMod == 3, n == big +// - when nMod == 4, n == big +// - when nMod == 5, n == 12300 +// - when nMod == 6, n == 12300 +// - when nMod == 7, n == 12300 func getIntApprox(digits []byte, start, end, nMod, big int) (n int) { // Leading 0 digits just result in 0. p := start @@ -107,12 +106,13 @@ func getIntApprox(digits []byte, start, end, nMod, big int) (n int) { // // The following table contains examples of possible arguments to represent // the given numbers. -// decimal digits exp scale -// 123 []byte{1, 2, 3} 3 0 -// 123.4 []byte{1, 2, 3, 4} 3 1 -// 123.40 []byte{1, 2, 3, 4} 3 2 -// 100000 []byte{1} 6 0 -// 100000.00 []byte{1} 6 3 +// +// decimal digits exp scale +// 123 []byte{1, 2, 3} 3 0 +// 123.4 []byte{1, 2, 3, 4} 3 1 +// 123.40 []byte{1, 2, 3, 4} 3 2 +// 100000 []byte{1} 6 0 +// 100000.00 []byte{1} 6 3 func (p *Rules) MatchDigits(t language.Tag, digits []byte, exp, scale int) Form { index := tagToID(t) @@ -152,14 +152,15 @@ func (p *Rules) matchComponents(t language.Tag, n, f, scale int) Form { // MatchPlural returns the plural form for the given language and plural // operands (as defined in // https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules): -// where -// n absolute value of the source number (integer and decimals) -// input -// i integer digits of n. -// v number of visible fraction digits in n, with trailing zeros. -// w number of visible fraction digits in n, without trailing zeros. -// f visible fractional digits in n, with trailing zeros (f = t * 10^(v-w)) -// t visible fractional digits in n, without trailing zeros. +// +// where +// n absolute value of the source number (integer and decimals) +// input +// i integer digits of n. +// v number of visible fraction digits in n, with trailing zeros. +// w number of visible fraction digits in n, without trailing zeros. +// f visible fractional digits in n, with trailing zeros (f = t * 10^(v-w)) +// t visible fractional digits in n, without trailing zeros. // // If any of the operand values is too large to fit in an int, it is okay to // pass the value modulo 10,000,000. diff --git a/gen.go b/gen.go index 04ff6035c..566a30355 100644 --- a/gen.go +++ b/gen.go @@ -14,7 +14,6 @@ import ( "flag" "fmt" "go/format" - "io/ioutil" "os" "os/exec" "path" @@ -256,13 +255,7 @@ func copyPackage(dirSrc, dirDst, search, replace string) { filepath.Dir(file) != dirSrc { return nil } - if strings.HasPrefix(base, "tables") { - if !strings.HasSuffix(base, gen.UnicodeVersion()+".go") { - return nil - } - base = "tables.go" - } - b, err := ioutil.ReadFile(file) + b, err := os.ReadFile(file) if err != nil || bytes.Contains(b, []byte("\n// +build ignore")) { return err } @@ -281,7 +274,7 @@ func copyPackage(dirSrc, dirDst, search, replace string) { } file = filepath.Join(dirDst, base) vprintf("=== COPY %s\n", file) - return ioutil.WriteFile(file, b, 0666) + return os.WriteFile(file, b, 0666) }) if err != nil { fmt.Println("Copying exported files failed:", err) diff --git a/go.mod b/go.mod index 63bc05f20..7afa1eb19 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,10 @@ module golang.org/x/text -require golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e +require golang.org/x/tools v0.1.12 // tagx:ignore + +require ( + golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect +) go 1.17 diff --git a/go.sum b/go.sum index 6a308d730..3ddae732d 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,26 @@ -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e h1:FDhOuMEY4JVRztM/gsbk+IKUQ8kj74bxZrgw87eMMVc= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/catmsg/catmsg.go b/internal/catmsg/catmsg.go index c0bf86f09..1b257a7b4 100644 --- a/internal/catmsg/catmsg.go +++ b/internal/catmsg/catmsg.go @@ -9,8 +9,7 @@ // own. For instance, the plural package provides functionality for selecting // translation strings based on the plural category of substitution arguments. // -// -// Encoding and Decoding +// # Encoding and Decoding // // Catalogs store Messages encoded as a single string. Compiling a message into // a string both results in compacter representation and speeds up evaluation. @@ -25,8 +24,7 @@ // the message. This decoder takes a Decoder argument which provides the // counterparts for the decoding. // -// -// Renderers +// # Renderers // // A Decoder must be initialized with a Renderer implementation. These // implementations must be provided by packages that use Catalogs, typically @@ -38,22 +36,22 @@ // as sequence of substrings passed to the Renderer. The following snippet shows // how to express the above example using the message package. // -// message.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", plural.Select(1, "one", "minute")), -// catalog.String("You are %[1]d ${minutes} late.")) +// message.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", plural.Select(1, "one", "minute")), +// catalog.String("You are %[1]d ${minutes} late.")) // -// p := message.NewPrinter(language.English) -// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late. +// p := message.NewPrinter(language.English) +// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late. // // To evaluate the Printf, package message wraps the arguments in a Renderer // that is passed to the catalog for message decoding. The call sequence that // results from evaluating the above message, assuming the person is rather // tardy, is: // -// Render("You are %[1]d ") -// Arg(1) -// Render("minutes") -// Render(" late.") +// Render("You are %[1]d ") +// Arg(1) +// Render("minutes") +// Render(" late.") // // The calls to Arg is caused by the plural.Select execution, which evaluates // the argument to determine whether the singular or plural message form should @@ -267,10 +265,12 @@ func (s FirstOf) Compile(e *Encoder) error { // Var defines a message that can be substituted for a placeholder of the same // name. If an expression does not result in a string after evaluation, Name is // used as the substitution. For example: -// Var{ -// Name: "minutes", -// Message: plural.Select(1, "one", "minute"), -// } +// +// Var{ +// Name: "minutes", +// Message: plural.Select(1, "one", "minute"), +// } +// // will resolve to minute for singular and minutes for plural forms. type Var struct { Name string @@ -318,13 +318,15 @@ func (r Raw) Compile(e *Encoder) (err error) { // calls for each placeholder and interstitial string. For example, for the // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls // is: -// d.Render("%[1]v ") -// d.Arg(1) -// d.Render(resultOfInvites) -// d.Render(" %[2]v to ") -// d.Arg(2) -// d.Render(resultOfTheir) -// d.Render(" party.") +// +// d.Render("%[1]v ") +// d.Arg(1) +// d.Render(resultOfInvites) +// d.Render(" %[2]v to ") +// d.Arg(2) +// d.Render(resultOfTheir) +// d.Render(" party.") +// // where the messages for "invites" and "their" both use a plural.Select // referring to the first argument. // diff --git a/internal/cldrtree/cldrtree.go b/internal/cldrtree/cldrtree.go index 7530831d6..cc2714e99 100644 --- a/internal/cldrtree/cldrtree.go +++ b/internal/cldrtree/cldrtree.go @@ -4,7 +4,6 @@ // Package cldrtree builds and generates a CLDR index file, including all // inheritance. -// package cldrtree //go:generate go test -gen diff --git a/internal/cldrtree/cldrtree_test.go b/internal/cldrtree/cldrtree_test.go index 06c366a05..c42eba30a 100644 --- a/internal/cldrtree/cldrtree_test.go +++ b/internal/cldrtree/cldrtree_test.go @@ -7,9 +7,9 @@ package cldrtree import ( "bytes" "flag" - "io/ioutil" "log" "math/rand" + "os" "path/filepath" "reflect" "regexp" @@ -303,11 +303,11 @@ func TestGen(t *testing.T) { file := filepath.Join("testdata", tc, "output.go") if *genOutput { - ioutil.WriteFile(file, got, 0700) + os.WriteFile(file, got, 0700) t.SkipNow() } - b, err := ioutil.ReadFile(file) + b, err := os.ReadFile(file) if err != nil { t.Fatalf("failed to open file: %v", err) } diff --git a/internal/colltab/collelem.go b/internal/colltab/collelem.go index 396cebda2..0c23c8a48 100644 --- a/internal/colltab/collelem.go +++ b/internal/colltab/collelem.go @@ -78,24 +78,27 @@ func (ce Elem) ctype() ceType { // For normal collation elements, we assume that a collation element either has // a primary or non-default secondary value, not both. // Collation elements with a primary value are of the form -// 01pppppp pppppppp ppppppp0 ssssssss -// - p* is primary collation value -// - s* is the secondary collation value -// 00pppppp pppppppp ppppppps sssttttt, where -// - p* is primary collation value -// - s* offset of secondary from default value. -// - t* is the tertiary collation value -// 100ttttt cccccccc pppppppp pppppppp -// - t* is the tertiar collation value -// - c* is the canonical combining class -// - p* is the primary collation value +// +// 01pppppp pppppppp ppppppp0 ssssssss +// - p* is primary collation value +// - s* is the secondary collation value +// 00pppppp pppppppp ppppppps sssttttt, where +// - p* is primary collation value +// - s* offset of secondary from default value. +// - t* is the tertiary collation value +// 100ttttt cccccccc pppppppp pppppppp +// - t* is the tertiar collation value +// - c* is the canonical combining class +// - p* is the primary collation value +// // Collation elements with a secondary value are of the form -// 1010cccc ccccssss ssssssss tttttttt, where -// - c* is the canonical combining class -// - s* is the secondary collation value -// - t* is the tertiary collation value -// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 -// - q* quaternary value +// +// 1010cccc ccccssss ssssssss tttttttt, where +// - c* is the canonical combining class +// - s* is the secondary collation value +// - t* is the tertiary collation value +// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 +// - q* quaternary value const ( ceTypeMask = 0xC0000000 ceTypeMaskExt = 0xE0000000 @@ -296,6 +299,7 @@ func (ce Elem) Weight(l Level) int { // - n* is the size of the first node in the contraction trie. // - i* is the index of the first node in the contraction trie. // - b* is the offset into the contraction collation element table. +// // See contract.go for details on the contraction trie. const ( maxNBits = 4 @@ -326,6 +330,7 @@ func splitExpandIndex(ce Elem) (index int) { // The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where // - v* is the replacement tertiary weight for the first rune, // - w* is the replacement tertiary weight for the second rune, +// // Tertiary weights of subsequent runes should be replaced with maxTertiary. // See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. func splitDecompose(ce Elem) (t1, t2 uint8) { diff --git a/internal/export/idna/conformance_test.go b/internal/export/idna/conformance_test.go index 1cdf43ca9..3e0e87518 100644 --- a/internal/export/idna/conformance_test.go +++ b/internal/export/idna/conformance_test.go @@ -2,6 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 +// +build go1.10 + package idna import ( diff --git a/internal/export/idna/example_test.go b/internal/export/idna/example_test.go index 6e6b8727c..4c7352bdc 100644 --- a/internal/export/idna/example_test.go +++ b/internal/export/idna/example_test.go @@ -13,27 +13,26 @@ import ( func ExampleProfile() { // Raw Punycode has no restrictions and does no mappings. fmt.Println(idna.ToASCII("")) - fmt.Println(idna.ToASCII("*.faß.com")) - fmt.Println(idna.Punycode.ToASCII("*.faß.com")) + fmt.Println(idna.ToASCII("*.GÖPHER.com")) + fmt.Println(idna.Punycode.ToASCII("*.GÖPHER.com")) - // Rewrite IDN for lookup. This (currently) uses transitional mappings to - // find a balance between IDNA2003 and IDNA2008 compatibility. + // Rewrite IDN for lookup. fmt.Println(idna.Lookup.ToASCII("")) - fmt.Println(idna.Lookup.ToASCII("www.faß.com")) + fmt.Println(idna.Lookup.ToASCII("www.GÖPHER.com")) - // Convert an IDN to ASCII for registration purposes. This changes the - // encoding, but reports an error if the input was illformed. - fmt.Println(idna.Registration.ToASCII("")) - fmt.Println(idna.Registration.ToASCII("www.faß.com")) + // Convert an IDN to ASCII for registration purposes. + // This reports an error if the input was illformed. + fmt.Println(idna.Registration.ToASCII("www.GÖPHER.com")) + fmt.Println(idna.Registration.ToASCII("www.göpher.com")) // Output: // - // *.xn--fa-hia.com - // *.xn--fa-hia.com + // *.xn--GPHER-1oa.com + // *.xn--GPHER-1oa.com // - // www.fass.com - // idna: invalid label "" - // www.xn--fa-hia.com + // www.xn--gpher-jua.com + // www.xn--GPHER-1oa.com idna: disallowed rune U+0047 + // www.xn--gpher-jua.com } func ExampleNew() { diff --git a/internal/export/idna/gen_trieval.go b/internal/export/idna/gen_trieval.go index 9d92407f2..501bfabed 100644 --- a/internal/export/idna/gen_trieval.go +++ b/internal/export/idna/gen_trieval.go @@ -22,23 +22,23 @@ package main // // The per-rune values have the following format: // -// if mapped { -// if inlinedXOR { -// 15..13 inline XOR marker -// 12..11 unused -// 10..3 inline XOR mask -// } else { -// 15..3 index into xor or mapping table -// } -// } else { -// 15..14 unused -// 13 mayNeedNorm -// 12..11 attributes -// 10..8 joining type -// 7..3 category type -// } -// 2 use xor pattern -// 1..0 mapped category +// if mapped { +// if inlinedXOR { +// 15..13 inline XOR marker +// 12..11 unused +// 10..3 inline XOR mask +// } else { +// 15..3 index into xor or mapping table +// } +// } else { +// 15..14 unused +// 13 mayNeedNorm +// 12..11 attributes +// 10..8 joining type +// 7..3 category type +// } +// 2 use xor pattern +// 1..0 mapped category // // See the definitions below for a more detailed description of the various // bits. diff --git a/internal/export/idna/go118.go b/internal/export/idna/go118.go new file mode 100644 index 000000000..941a7aaff --- /dev/null +++ b/internal/export/idna/go118.go @@ -0,0 +1,12 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.18 +// +build go1.18 + +package idna + +// Transitional processing is disabled by default in Go 1.18. +// https://golang.org/issue/47510 +const transitionalLookup = false diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go index 3e7bac3cb..0e7571d16 100644 --- a/internal/export/idna/idna10.0.0.go +++ b/internal/export/idna/idna10.0.0.go @@ -59,10 +59,10 @@ type Option func(*options) // Transitional sets a Profile to use the Transitional mapping as defined in UTS // #46. This will cause, for example, "ß" to be mapped to "ss". Using the // transitional mapping provides a compromise between IDNA2003 and IDNA2008 -// compatibility. It is used by most browsers when resolving domain names. This +// compatibility. It is used by some browsers when resolving domain names. This // option is only meaningful if combined with MapForLookup. func Transitional(transitional bool) Option { - return func(o *options) { o.transitional = true } + return func(o *options) { o.transitional = transitional } } // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts @@ -284,7 +284,7 @@ var ( punycode = &Profile{} lookup = &Profile{options{ - transitional: true, + transitional: transitionalLookup, useSTD3Rules: true, checkHyphens: true, checkJoiners: true, diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go index 4142bfa84..c3365bc6a 100644 --- a/internal/export/idna/idna10.0.0_test.go +++ b/internal/export/idna/idna10.0.0_test.go @@ -34,6 +34,10 @@ func TestLabelErrors(t *testing.T) { std3 := kind{"STD3", p.ToASCII} p = New(MapForLookup(), CheckHyphens(false)) hyphens := kind{"CheckHyphens", p.ToASCII} + p = New(MapForLookup(), Transitional(true)) + transitional := kind{"Transitional", p.ToASCII} + p = New(MapForLookup(), Transitional(false)) + nontransitional := kind{"Nontransitional", p.ToASCII} testCases := []struct { kind @@ -95,24 +99,26 @@ func TestLabelErrors(t *testing.T) { {hyphens, "-label-.com", "-label-.com", ""}, // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of - // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return - // lab9.be. + // Chrome, modern Firefox, Safari, and IE. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") {display, "lab⒐be", "lab⒐be", "P1"}, - - {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" + {transitional, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, + // Transitional vs Nontransitional processing + {transitional, "Plan9faß.de", "plan9fass.de", ""}, + {nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""}, + // Chrome 54.0 recognizes the error and treats this input verbatim as a // search string. // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the // punycode on the result using transitional mapping. // Firefox 49.0.1 goes haywire on this string and prints a bunch of what // seems to be nested punycode encodings. - {resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"}, + {transitional, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"}, {display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"}, - {resolve, "a\u200Cb", "ab", ""}, + {transitional, "a\u200Cb", "ab", ""}, {display, "a\u200Cb", "a\u200Cb", "C"}, {resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"}, @@ -147,3 +153,11 @@ func TestLabelErrors(t *testing.T) { doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr) } } + +func TestTransitionalDefault(t *testing.T) { + want := "xn--strae-oqa.de" + if transitionalLookup { + want = "strasse.de" + } + doTest(t, Lookup.ToASCII, "Lookup", "straße.de", want, "") +} diff --git a/internal/export/idna/idna9.0.0.go b/internal/export/idna/idna9.0.0.go index 7acecb800..4979fdc17 100644 --- a/internal/export/idna/idna9.0.0.go +++ b/internal/export/idna/idna9.0.0.go @@ -58,10 +58,10 @@ type Option func(*options) // Transitional sets a Profile to use the Transitional mapping as defined in UTS // #46. This will cause, for example, "ß" to be mapped to "ss". Using the // transitional mapping provides a compromise between IDNA2003 and IDNA2008 -// compatibility. It is used by most browsers when resolving domain names. This +// compatibility. It is used by some browsers when resolving domain names. This // option is only meaningful if combined with MapForLookup. func Transitional(transitional bool) Option { - return func(o *options) { o.transitional = true } + return func(o *options) { o.transitional = transitional } } // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts diff --git a/internal/export/idna/idna9.0.0_test.go b/internal/export/idna/idna9.0.0_test.go index b76b79628..524727b1b 100644 --- a/internal/export/idna/idna9.0.0_test.go +++ b/internal/export/idna/idna9.0.0_test.go @@ -34,6 +34,10 @@ func TestLabelErrors(t *testing.T) { std3 := kind{"STD3", p.ToASCII} p = New(MapForLookup(), CheckHyphens(false)) hyphens := kind{"CheckHyphens", p.ToASCII} + p = New(MapForLookup(), Transitional(true)) + transitional := kind{"Transitional", p.ToASCII} + p = New(MapForLookup(), Transitional(false)) + nontransitional := kind{"Nontransitional", p.ToASCII} testCases := []struct { kind @@ -91,14 +95,16 @@ func TestLabelErrors(t *testing.T) { {hyphens, "-label-.com", "-label-.com", ""}, // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of - // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return - // lab9.be. + // Chrome, modern Firefox, Safari, and IE. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") {display, "lab⒐be", "lab⒐be", "P1"}, - {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, + // Transitional vs Nontransitional processing + {transitional, "Plan9faß.de", "plan9fass.de", ""}, + {nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""}, + // Chrome 54.0 recognizes the error and treats this input verbatim as a // search string. // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the diff --git a/internal/export/idna/idna_test.go b/internal/export/idna/idna_test.go index 7235452c2..a13b67348 100644 --- a/internal/export/idna/idna_test.go +++ b/internal/export/idna/idna_test.go @@ -5,7 +5,9 @@ package idna import ( + "encoding/hex" "fmt" + "regexp" "strconv" "strings" "testing" @@ -43,7 +45,7 @@ func TestProfiles(t *testing.T) { VerifyDNSLength(true), BidiRule(), )}, - {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(true))}, + {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(transitionalLookup))}, {"Display", display, New(MapForLookup(), BidiRule())}, } for _, tc := range testCases { @@ -99,12 +101,14 @@ func doTest(t *testing.T, f func(string) (string, error), name, input, want, err }) } +var unescapeRE = regexp.MustCompile(`\\u([0-9a-zA-Z]{4})`) + func unescape(s string) string { - s, err := strconv.Unquote(`"` + s + `"`) - if err != nil { - panic(err) - } - return s + return unescapeRE.ReplaceAllStringFunc(s, func(v string) string { + var d [2]byte + hex.Decode(d[:], []byte(v[2:])) + return string(rune(d[0])<<8 | rune(d[1])) + }) } func BenchmarkProfile(b *testing.B) { diff --git a/internal/export/idna/pre_go118.go b/internal/export/idna/pre_go118.go new file mode 100644 index 000000000..ab3fa2e8c --- /dev/null +++ b/internal/export/idna/pre_go118.go @@ -0,0 +1,10 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.18 +// +build !go1.18 + +package idna + +const transitionalLookup = true diff --git a/internal/export/idna/punycode.go b/internal/export/idna/punycode.go index f0cbd487b..7e96febf1 100644 --- a/internal/export/idna/punycode.go +++ b/internal/export/idna/punycode.go @@ -47,6 +47,7 @@ func decode(encoded string) (string, error) { } } i, n, bias := int32(0), initialN, initialBias + overflow := false for pos < len(encoded) { oldI, w := i, int32(1) for k := base; ; k += base { @@ -58,29 +59,32 @@ func decode(encoded string) (string, error) { return "", punyError(encoded) } pos++ - i += digit * w - if i < 0 { + i, overflow = madd(i, digit, w) + if overflow { return "", punyError(encoded) } t := k - bias - if t < tmin { + if k <= bias { t = tmin - } else if t > tmax { + } else if k >= bias+tmax { t = tmax } if digit < t { break } - w *= base - t - if w >= math.MaxInt32/base { + w, overflow = madd(0, w, base-t) + if overflow { return "", punyError(encoded) } } + if len(output) >= 1024 { + return "", punyError(encoded) + } x := int32(len(output) + 1) bias = adapt(i-oldI, x, oldI == 0) n += i / x i %= x - if n > utf8.MaxRune || len(output) >= 1024 { + if n < 0 || n > utf8.MaxRune { return "", punyError(encoded) } output = append(output, 0) @@ -113,6 +117,7 @@ func encode(prefix, s string) (string, error) { if b > 0 { output = append(output, '-') } + overflow := false for remaining != 0 { m := int32(0x7fffffff) for _, r := range s { @@ -120,8 +125,8 @@ func encode(prefix, s string) (string, error) { m = r } } - delta += (m - n) * (h + 1) - if delta < 0 { + delta, overflow = madd(delta, m-n, h+1) + if overflow { return "", punyError(s) } n = m @@ -139,9 +144,9 @@ func encode(prefix, s string) (string, error) { q := delta for k := base; ; k += base { t := k - bias - if t < tmin { + if k <= bias { t = tmin - } else if t > tmax { + } else if k >= bias+tmax { t = tmax } if q < t { @@ -162,6 +167,15 @@ func encode(prefix, s string) (string, error) { return string(output), nil } +// madd computes a + (b * c), detecting overflow. +func madd(a, b, c int32) (next int32, overflow bool) { + p := int64(b) * int64(c) + if p > math.MaxInt32-int64(a) { + return 0, true + } + return a + int32(p), false +} + func decodeDigit(x byte) (digit int32, ok bool) { switch { case '0' <= x && x <= '9': diff --git a/internal/export/idna/punycode_test.go b/internal/export/idna/punycode_test.go index 2d99239ec..5cf0c968a 100644 --- a/internal/export/idna/punycode_test.go +++ b/internal/export/idna/punycode_test.go @@ -177,6 +177,7 @@ var punycodeErrorTestCases = [...]string{ "decode 9999999999a", // "9999999999a" overflows the int32 calculation. "encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow. + "encode " + strings.Repeat("x", 65666) + "\uffff", // int32 overflow. issue #28233 } func TestPunycodeErrors(t *testing.T) { diff --git a/internal/export/idna/trieval.go b/internal/export/idna/trieval.go index 7a8cf889b..9c070a44b 100644 --- a/internal/export/idna/trieval.go +++ b/internal/export/idna/trieval.go @@ -17,23 +17,23 @@ package idna // // The per-rune values have the following format: // -// if mapped { -// if inlinedXOR { -// 15..13 inline XOR marker -// 12..11 unused -// 10..3 inline XOR mask -// } else { -// 15..3 index into xor or mapping table -// } -// } else { -// 15..14 unused -// 13 mayNeedNorm -// 12..11 attributes -// 10..8 joining type -// 7..3 category type -// } -// 2 use xor pattern -// 1..0 mapped category +// if mapped { +// if inlinedXOR { +// 15..13 inline XOR marker +// 12..11 unused +// 10..3 inline XOR mask +// } else { +// 15..3 index into xor or mapping table +// } +// } else { +// 15..14 unused +// 13 mayNeedNorm +// 12..11 attributes +// 10..8 joining type +// 7..3 category type +// } +// 2 use xor pattern +// 1..0 mapped category // // See the definitions below for a more detailed description of the various // bits. diff --git a/internal/gen/bitfield/bitfield_test.go b/internal/gen/bitfield/bitfield_test.go index 789f86d1a..a43dda962 100644 --- a/internal/gen/bitfield/bitfield_test.go +++ b/internal/gen/bitfield/bitfield_test.go @@ -7,7 +7,7 @@ package bitfield import ( "bytes" "fmt" - "io/ioutil" + "os" "testing" ) @@ -222,7 +222,7 @@ func (t test1) Baz() int8 { ` func mustRead(filename string) string { - b, err := ioutil.ReadFile(filename) + b, err := os.ReadFile(filename) if err != nil { panic(err) } diff --git a/internal/gen/gen.go b/internal/gen/gen.go index fa289534a..268258dd3 100644 --- a/internal/gen/gen.go +++ b/internal/gen/gen.go @@ -25,7 +25,6 @@ import ( "go/build" "go/format" "io" - "io/ioutil" "log" "net/http" "os" @@ -175,7 +174,7 @@ func getLocalDir() string { if err := os.MkdirAll(dir, permissions); err != nil { log.Fatalf("Could not create directory: %v", err) } - ioutil.WriteFile(readme, []byte(readmeTxt), permissions) + os.WriteFile(readme, []byte(readmeTxt), permissions) } return dir } @@ -213,15 +212,15 @@ func open(file, urlRoot, path string) io.ReadCloser { } r := get(urlRoot, path) defer r.Close() - b, err := ioutil.ReadAll(r) + b, err := io.ReadAll(r) if err != nil { log.Fatalf("Could not download file: %v", err) } os.MkdirAll(filepath.Dir(file), permissions) - if err := ioutil.WriteFile(file, b, permissions); err != nil { + if err := os.WriteFile(file, b, permissions); err != nil { log.Fatalf("Could not create file: %v", err) } - return ioutil.NopCloser(bytes.NewReader(b)) + return io.NopCloser(bytes.NewReader(b)) } func get(root, path string) io.ReadCloser { @@ -280,13 +279,13 @@ func fileToPattern(filename string) string { func updateBuildTags(pattern string) { for _, t := range tags { oldFile := fmt.Sprintf(pattern, t.version) - b, err := ioutil.ReadFile(oldFile) + b, err := os.ReadFile(oldFile) if err != nil { continue } build := fmt.Sprintf("// +build %s", t.buildTags) b = regexp.MustCompile(`// \+build .*`).ReplaceAll(b, []byte(build)) - err = ioutil.WriteFile(oldFile, b, 0644) + err = os.WriteFile(oldFile, b, 0644) if err != nil { log.Fatal(err) } @@ -334,7 +333,7 @@ func WriteGo(w io.Writer, pkg, tags string, b []byte) (n int, err error) { // Repackage rewrites a Go file from belonging to package main to belonging to // the given package. func Repackage(inFile, outFile, pkg string) { - src, err := ioutil.ReadFile(inFile) + src, err := os.ReadFile(inFile) if err != nil { log.Fatalf("reading %s: %v", inFile, err) } diff --git a/internal/language/compact/parse_test.go b/internal/language/compact/parse_test.go index abe3a58c0..2db200b88 100644 --- a/internal/language/compact/parse_test.go +++ b/internal/language/compact/parse_test.go @@ -122,6 +122,11 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: true}, + {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, diff --git a/internal/language/compact/tables.go b/internal/language/compact/tables.go index fe7ad9ea7..32af9de59 100644 --- a/internal/language/compact/tables.go +++ b/internal/language/compact/tables.go @@ -966,7 +966,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements 0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c, 0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3, 0x40200000, 0x4020004c, 0x40700000, 0x40800000, - 0x4085a000, 0x4085a0ba, 0x408e3000, 0x408e30ba, + 0x4085a000, 0x4085a0ba, 0x408e8000, 0x408e80ba, 0x40c00000, 0x40c000b3, 0x41200000, 0x41200111, 0x41600000, 0x4160010f, 0x41c00000, 0x41d00000, // Entry 280 - 29F @@ -994,7 +994,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements 0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8, 0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000, 0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000, - 0x4be5a000, 0x4be5a0b4, 0x4beeb000, 0x4beeb0b4, + 0x4be5a000, 0x4be5a0b4, 0x4bef1000, 0x4bef10b4, 0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000, // Entry 2E0 - 2FF 0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000, @@ -1012,4 +1012,4 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix" -// Total table size 3147 bytes (3KiB); checksum: BE816D44 +// Total table size 3147 bytes (3KiB); checksum: 6772C83C diff --git a/internal/language/gen.go b/internal/language/gen.go index 27c43dc96..520f1596f 100644 --- a/internal/language/gen.go +++ b/internal/language/gen.go @@ -15,7 +15,6 @@ import ( "flag" "fmt" "io" - "io/ioutil" "log" "math" "reflect" @@ -921,7 +920,7 @@ func (b *builder) writeRegion() { r := gen.OpenIANAFile("domains/root/db") defer r.Close() - buf, err := ioutil.ReadAll(r) + buf, err := io.ReadAll(r) failOnError(err) re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`) for _, m := range re.FindAllSubmatch(buf, -1) { @@ -1209,12 +1208,12 @@ func (b *builder) writeLikelyData() { type ( // generated types likelyScriptRegion struct { region uint16 - script uint8 + script uint16 flags uint8 } likelyLangScript struct { lang uint16 - script uint8 + script uint16 flags uint8 } likelyLangRegion struct { @@ -1226,7 +1225,7 @@ func (b *builder) writeLikelyData() { likelyTag struct { lang uint16 region uint16 - script uint8 + script uint16 } ) var ( // generated variables @@ -1279,7 +1278,7 @@ func (b *builder) writeLikelyData() { log.Fatalf("region changed unexpectedly: %s -> %s", from, to) } likelyRegionGroup[id].lang = uint16(b.langIndex(to[0])) - likelyRegionGroup[id].script = uint8(b.script.index(to[1])) + likelyRegionGroup[id].script = uint16(b.script.index(to[1])) likelyRegionGroup[id].region = uint16(b.region.index(to[2])) } else { regionToOther[r] = append(regionToOther[r], fromTo{from, to}) @@ -1293,11 +1292,11 @@ func (b *builder) writeLikelyData() { list := langToOther[id] if len(list) == 1 { likelyLang[id].region = uint16(b.region.index(list[0].to[2])) - likelyLang[id].script = uint8(b.script.index(list[0].to[1])) + likelyLang[id].script = uint16(b.script.index(list[0].to[1])) } else if len(list) > 1 { likelyLang[id].flags = isList likelyLang[id].region = uint16(len(likelyLangList)) - likelyLang[id].script = uint8(len(list)) + likelyLang[id].script = uint16(len(list)) for _, x := range list { flags := uint8(0) if len(x.from) > 1 { @@ -1309,7 +1308,7 @@ func (b *builder) writeLikelyData() { } likelyLangList = append(likelyLangList, likelyScriptRegion{ region: uint16(b.region.index(x.to[2])), - script: uint8(b.script.index(x.to[1])), + script: uint16(b.script.index(x.to[1])), flags: flags, }) } @@ -1324,21 +1323,21 @@ func (b *builder) writeLikelyData() { list := regionToOther[id] if len(list) == 1 { likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0])) - likelyRegion[id].script = uint8(b.script.index(list[0].to[1])) + likelyRegion[id].script = uint16(b.script.index(list[0].to[1])) if len(list[0].from) > 2 { likelyRegion[id].flags = scriptInFrom } } else if len(list) > 1 { likelyRegion[id].flags = isList likelyRegion[id].lang = uint16(len(likelyRegionList)) - likelyRegion[id].script = uint8(len(list)) + likelyRegion[id].script = uint16(len(list)) for i, x := range list { if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 { log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i) } x := likelyLangScript{ lang: uint16(b.langIndex(x.to[0])), - script: uint8(b.script.index(x.to[1])), + script: uint16(b.script.index(x.to[1])), } if len(list[0].from) > 2 { x.flags = scriptInFrom @@ -1453,8 +1452,8 @@ func (b *builder) writeRegionInclusionData() { type parentRel struct { lang uint16 - script uint8 - maxScript uint8 + script uint16 + maxScript uint16 toRegion uint16 fromRegion []uint16 } @@ -1477,10 +1476,10 @@ func (b *builder) writeParents() { if len(sub) == 2 { // TODO: check that all undefined scripts are indeed Latn in these // cases. - parent.maxScript = uint8(b.script.index("Latn")) + parent.maxScript = uint16(b.script.index("Latn")) parent.toRegion = uint16(b.region.index(sub[1])) } else { - parent.script = uint8(b.script.index(sub[1])) + parent.script = uint16(b.script.index(sub[1])) parent.maxScript = parent.script parent.toRegion = uint16(b.region.index(sub[2])) } diff --git a/internal/language/language_test.go b/internal/language/language_test.go index 8244c1c8a..07be42c84 100644 --- a/internal/language/language_test.go +++ b/internal/language/language_test.go @@ -14,8 +14,8 @@ import ( func TestTagSize(t *testing.T) { id := Tag{} typ := reflect.TypeOf(id) - if typ.Size() > 24 { - t.Errorf("size of Tag was %d; want 24", typ.Size()) + if typ.Size() > 32 { + t.Errorf("size of Tag was %d; want <= 32", typ.Size()) } } @@ -681,6 +681,8 @@ var ( "en-t-t0-abcd", "en-t-nl-latn", "en-t-t0-abcd-x-a", + "en_t_pt_MLt", + "en-t-fr-est", } // Change, but not memory allocation required. benchSimpleChange = []string{ diff --git a/internal/language/lookup.go b/internal/language/lookup.go index 6294b8152..9309dc276 100644 --- a/internal/language/lookup.go +++ b/internal/language/lookup.go @@ -328,7 +328,7 @@ func (r Region) IsPrivateUse() bool { return r.typ()&iso3166UserAssigned != 0 } -type Script uint8 +type Script uint16 // getScriptID returns the script id for string s. It assumes that s // is of the format [A-Z][a-z]{3}. diff --git a/internal/language/parse.go b/internal/language/parse.go index 47ee0fed1..aad1e0acf 100644 --- a/internal/language/parse.go +++ b/internal/language/parse.go @@ -270,7 +270,7 @@ func parse(scan *scanner, s string) (t Tag, err error) { } else if n >= 4 { return Und, ErrSyntax } else { // the usual case - t, end = parseTag(scan) + t, end = parseTag(scan, true) if n := len(scan.token); n == 1 { t.pExt = uint16(end) end = parseExtensions(scan) @@ -296,7 +296,8 @@ func parse(scan *scanner, s string) (t Tag, err error) { // parseTag parses language, script, region and variants. // It returns a Tag and the end position in the input that was parsed. -func parseTag(scan *scanner) (t Tag, end int) { +// If doNorm is true, then - will be normalized to . +func parseTag(scan *scanner, doNorm bool) (t Tag, end int) { var e error // TODO: set an error if an unknown lang, script or region is encountered. t.LangID, e = getLangID(scan.token) @@ -307,14 +308,17 @@ func parseTag(scan *scanner) (t Tag, end int) { for len(scan.token) == 3 && isAlpha(scan.token[0]) { // From http://tools.ietf.org/html/bcp47, - tags are equivalent // to a tag of the form . - lang, e := getLangID(scan.token) - if lang != 0 { - t.LangID = lang - copy(scan.b[langStart:], lang.String()) - scan.b[langStart+3] = '-' - scan.start = langStart + 4 + if doNorm { + lang, e := getLangID(scan.token) + if lang != 0 { + t.LangID = lang + langStr := lang.String() + copy(scan.b[langStart:], langStr) + scan.b[langStart+len(langStr)] = '-' + scan.start = langStart + len(langStr) + 1 + } + scan.gobble(e) } - scan.gobble(e) end = scan.scan() } if len(scan.token) == 4 && isAlpha(scan.token[0]) { @@ -559,7 +563,7 @@ func parseExtension(scan *scanner) int { case 't': // https://www.ietf.org/rfc/rfc6497.txt scan.scan() if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { - _, end = parseTag(scan) + _, end = parseTag(scan, false) scan.toLower(start, end) } for len(scan.token) == 2 && !isAlpha(scan.token[1]) { diff --git a/internal/language/parse_test.go b/internal/language/parse_test.go index e1d428aa6..0af9e8a25 100644 --- a/internal/language/parse_test.go +++ b/internal/language/parse_test.go @@ -192,6 +192,14 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: false}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: false}, + // The same input here is used in both TestParse and TestParseExtensions. + // changed should be true for this input in TestParse but changed should be false for this input in TestParseExtensions + // because the entire input has been reformatted but the extension part hasn't. + // {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, @@ -299,7 +307,7 @@ func TestParseTag(t *testing.T) { return Tag{}, true } scan := makeScannerString(tt.in) - id, end := parseTag(&scan) + id, end := parseTag(&scan, true) id.str = string(scan.b[:end]) tt.ext = "" tt.extList = []string{} diff --git a/internal/language/tables.go b/internal/language/tables.go index a19480c5b..fb6b58378 100644 --- a/internal/language/tables.go +++ b/internal/language/tables.go @@ -7,9 +7,9 @@ import "golang.org/x/text/internal/tag" // CLDRVersion is the CLDR version from which the tables in this package are derived. const CLDRVersion = "32" -const NumLanguages = 8717 +const NumLanguages = 8752 -const NumScripts = 251 +const NumScripts = 258 const NumRegions = 357 @@ -121,9 +121,10 @@ const langPrivateEnd = 0x3179 // lang holds an alphabetically sorted list of ISO-639 language identifiers. // All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. // For 2-byte language identifiers, the two successive bytes have the following meaning: -// - if the first letter of the 2- and 3-letter ISO codes are the same: -// the second and third letter of the 3-letter ISO code. -// - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. +// - if the first letter of the 2- and 3-letter ISO codes are the same: +// the second and third letter of the 3-letter ISO code. +// - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. +// // For 3-byte language identifiers the 4th byte is 0. const lang tag.Index = "" + // Size: 5324 bytes "---\x00aaaraai\x00aak\x00aau\x00abbkabi\x00abq\x00abr\x00abt\x00aby\x00a" + @@ -265,7 +266,7 @@ var langNoIndex = [2197]uint8{ 0xad, 0x03, 0xff, 0xff, 0xcf, 0x05, 0x84, 0x62, 0xe9, 0xbf, 0xfd, 0xbf, 0xbf, 0xf7, 0xfd, 0x77, 0x0f, 0xff, 0xef, 0x6f, 0xff, 0xfb, 0xdf, 0xe2, - 0xc9, 0xf8, 0x7f, 0x7e, 0x4d, 0xb8, 0x0a, 0x6a, + 0xc9, 0xf8, 0x7f, 0x7e, 0x4d, 0xbc, 0x0a, 0x6a, 0x7c, 0xea, 0xe3, 0xfa, 0x7a, 0xbf, 0x67, 0xff, // Entry 40 - 7F 0xff, 0xff, 0xff, 0xdf, 0x2a, 0x54, 0x91, 0xc0, @@ -277,7 +278,7 @@ var langNoIndex = [2197]uint8{ 0xa8, 0xff, 0x1f, 0x67, 0x7d, 0xeb, 0xef, 0xce, 0xff, 0xff, 0x9f, 0xff, 0xb7, 0xef, 0xfe, 0xcf, // Entry 80 - BF - 0xdb, 0xff, 0xf3, 0xcd, 0xfb, 0x2f, 0xff, 0xff, + 0xdb, 0xff, 0xf3, 0xcd, 0xfb, 0x6f, 0xff, 0xff, 0xbb, 0xee, 0xf7, 0xbd, 0xdb, 0xff, 0x5f, 0xf7, 0xfd, 0xf2, 0xfd, 0xff, 0x5e, 0x2f, 0x3b, 0xba, 0x7e, 0xff, 0xff, 0xfe, 0xf7, 0xff, 0xdd, 0xff, @@ -290,15 +291,15 @@ var langNoIndex = [2197]uint8{ 0x1b, 0x14, 0x08, 0xf3, 0x2b, 0xe7, 0x17, 0x56, 0x05, 0x7d, 0x0e, 0x1c, 0x37, 0x7b, 0xf3, 0xef, 0x97, 0xff, 0x5d, 0x38, 0x64, 0x08, 0x00, 0x10, - 0xbc, 0x85, 0xaf, 0xdf, 0xff, 0xff, 0x73, 0x35, - 0x3e, 0x87, 0xc7, 0xdf, 0xff, 0x01, 0x81, 0x00, + 0xbc, 0x85, 0xaf, 0xdf, 0xff, 0xff, 0x7b, 0x35, + 0x3e, 0xc7, 0xc7, 0xdf, 0xff, 0x01, 0x81, 0x00, 0xb0, 0x05, 0x80, 0x00, 0x00, 0x00, 0x00, 0x03, 0x40, 0x00, 0x40, 0x92, 0x21, 0x50, 0xb1, 0x5d, // Entry 100 - 13F 0xfd, 0xdc, 0xbe, 0x5e, 0x00, 0x00, 0x02, 0x64, 0x0d, 0x19, 0x41, 0xdf, 0x79, 0x22, 0x00, 0x00, 0x00, 0x5e, 0x64, 0xdc, 0x24, 0xe5, 0xd9, 0xe3, - 0xfe, 0xff, 0xfd, 0xcb, 0x9f, 0x14, 0x01, 0x0c, + 0xfe, 0xff, 0xfd, 0xcb, 0x9f, 0x14, 0x41, 0x0c, 0x86, 0x00, 0xd1, 0x00, 0xf0, 0xc7, 0x67, 0x5f, 0x56, 0x99, 0x5e, 0xb5, 0x6c, 0xaf, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xc0, 0x37, 0xda, 0x56, @@ -309,9 +310,9 @@ var langNoIndex = [2197]uint8{ 0x0a, 0x00, 0x01, 0x00, 0x00, 0x10, 0x11, 0x09, 0x00, 0x00, 0x60, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x44, 0x00, 0x00, 0x10, 0x00, 0x04, - 0x08, 0x00, 0x00, 0x04, 0x00, 0x80, 0x28, 0x04, + 0x08, 0x00, 0x00, 0x05, 0x00, 0x80, 0x28, 0x04, 0x00, 0x00, 0x40, 0xd5, 0x2d, 0x00, 0x64, 0x35, - 0x24, 0x52, 0xf4, 0xd4, 0xbd, 0x62, 0xc9, 0x03, + 0x24, 0x52, 0xf4, 0xd5, 0xbf, 0x62, 0xc9, 0x03, // Entry 180 - 1BF 0x00, 0x80, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x13, 0x39, 0x01, 0xdd, 0x57, 0x98, @@ -333,20 +334,20 @@ var langNoIndex = [2197]uint8{ // Entry 200 - 23F 0xdf, 0xc3, 0x83, 0x82, 0xc0, 0xfb, 0x57, 0x27, 0xed, 0x55, 0xe7, 0x01, 0x00, 0x20, 0xb2, 0xc5, - 0xa4, 0x45, 0x25, 0x9b, 0x02, 0xdf, 0xe0, 0xdf, - 0x03, 0x44, 0x08, 0x90, 0x01, 0x04, 0x01, 0xe3, + 0xa4, 0x45, 0x25, 0x9b, 0x02, 0xdf, 0xe1, 0xdf, + 0x03, 0x44, 0x08, 0x90, 0x01, 0x04, 0x81, 0xe3, 0x92, 0x54, 0xdb, 0x28, 0xd3, 0x5f, 0xfe, 0x6d, 0x79, 0xed, 0x1c, 0x7d, 0x04, 0x08, 0x00, 0x01, 0x21, 0x12, 0x64, 0x5f, 0xdd, 0x0e, 0x85, 0x4f, 0x40, 0x40, 0x00, 0x04, 0xf1, 0xfd, 0x3d, 0x54, // Entry 240 - 27F 0xe8, 0x03, 0xb4, 0x27, 0x23, 0x0d, 0x00, 0x00, - 0x20, 0x7b, 0x78, 0x02, 0x05, 0x84, 0x00, 0xf0, + 0x20, 0x7b, 0x78, 0x02, 0x07, 0x84, 0x00, 0xf0, 0xbb, 0x7e, 0x5a, 0x00, 0x18, 0x04, 0x81, 0x00, 0x00, 0x00, 0x80, 0x10, 0x90, 0x1c, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x40, 0x00, 0x04, 0x08, 0xa0, 0x70, 0xa5, 0x0c, 0x40, 0x00, 0x00, - 0x11, 0x24, 0x04, 0x68, 0x00, 0x20, 0x70, 0xff, + 0x91, 0x24, 0x04, 0x68, 0x00, 0x20, 0x70, 0xff, 0x7b, 0x7f, 0x70, 0x00, 0x05, 0x9b, 0xdd, 0x66, // Entry 280 - 2BF 0x03, 0x00, 0x11, 0x00, 0x00, 0x00, 0x40, 0x05, @@ -365,12 +366,12 @@ var langNoIndex = [2197]uint8{ 0xa7, 0x81, 0x47, 0x97, 0xfb, 0x00, 0x10, 0x00, 0x08, 0x00, 0x80, 0x00, 0x40, 0x04, 0x00, 0x01, 0x02, 0x00, 0x01, 0x40, 0x80, 0x00, 0x00, 0x08, - 0xd8, 0xeb, 0xf6, 0x39, 0xc4, 0x89, 0x12, 0x00, + 0xd8, 0xeb, 0xf6, 0x39, 0xc4, 0x8d, 0x12, 0x00, // Entry 300 - 33F 0x00, 0x0c, 0x04, 0x01, 0x20, 0x20, 0xdd, 0xa0, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x04, 0x10, 0xd0, 0x9d, 0x95, 0x13, 0x04, 0x80, - 0x00, 0x01, 0xd0, 0x12, 0x40, 0x00, 0x10, 0xb0, + 0x00, 0x01, 0xd0, 0x16, 0x40, 0x00, 0x10, 0xb0, 0x10, 0x62, 0x4c, 0xd2, 0x02, 0x01, 0x4a, 0x00, 0x46, 0x04, 0x00, 0x08, 0x02, 0x00, 0x20, 0x80, 0x00, 0x80, 0x06, 0x00, 0x08, 0x00, 0x00, 0x00, @@ -397,9 +398,9 @@ var langNoIndex = [2197]uint8{ 0x02, 0x30, 0x9f, 0x7a, 0x16, 0xbd, 0x7f, 0x57, 0xf2, 0xff, 0x31, 0xff, 0xf2, 0x1e, 0x90, 0xf7, 0xf1, 0xf9, 0x45, 0x80, 0x01, 0x02, 0x00, 0x00, - 0x40, 0x54, 0x9f, 0x8a, 0xd9, 0xf9, 0x2e, 0x11, + 0x40, 0x54, 0x9f, 0x8a, 0xdb, 0xf9, 0x2e, 0x11, 0x86, 0x51, 0xc0, 0xf3, 0xfb, 0x47, 0x40, 0x01, - 0x05, 0xd1, 0x50, 0x5c, 0x00, 0x00, 0x00, 0x10, + 0x05, 0xd1, 0x50, 0x5c, 0x00, 0x40, 0x00, 0x10, 0x04, 0x02, 0x00, 0x00, 0x0a, 0x00, 0x17, 0xd2, 0xb9, 0xfd, 0xfc, 0xba, 0xfe, 0xef, 0xc7, 0xbe, // Entry 400 - 43F @@ -421,19 +422,19 @@ var langNoIndex = [2197]uint8{ 0xcd, 0xff, 0xfb, 0xff, 0xdf, 0xd7, 0xea, 0xff, 0xe5, 0x5f, 0x6d, 0x0f, 0xa7, 0x51, 0x06, 0xc4, // Entry 480 - 4BF - 0x13, 0x50, 0x5d, 0xaf, 0xa6, 0xff, 0x99, 0xfb, + 0x93, 0x50, 0x5d, 0xaf, 0xa6, 0xff, 0x99, 0xfb, 0x63, 0x1d, 0x53, 0xff, 0xef, 0xb7, 0x35, 0x20, 0x14, 0x00, 0x55, 0x51, 0x82, 0x65, 0xf5, 0x41, 0xe2, 0xff, 0xfc, 0xdf, 0x02, 0x05, 0xc5, 0x05, - 0x00, 0x22, 0x00, 0x74, 0x69, 0x10, 0x08, 0x04, + 0x00, 0x22, 0x00, 0x74, 0x69, 0x10, 0x08, 0x05, 0x41, 0x00, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x20, 0x05, 0x04, 0x01, 0x00, 0x00, - 0x06, 0x01, 0x20, 0x00, 0x18, 0x01, 0x92, 0xb1, + 0x06, 0x01, 0x20, 0x00, 0x18, 0x01, 0x92, 0xf1, // Entry 4C0 - 4FF - 0xfd, 0x47, 0x49, 0x06, 0x95, 0x06, 0x57, 0xed, - 0xfb, 0x4c, 0x1c, 0x6b, 0x83, 0x04, 0x62, 0x40, + 0xfd, 0x47, 0x69, 0x06, 0x95, 0x06, 0x57, 0xed, + 0xfb, 0x4d, 0x1c, 0x6b, 0x83, 0x04, 0x62, 0x40, 0x00, 0x11, 0x42, 0x00, 0x00, 0x00, 0x54, 0x83, - 0xb8, 0x4f, 0x10, 0x8c, 0x89, 0x46, 0xde, 0xf7, + 0xb8, 0x4f, 0x10, 0x8e, 0x89, 0x46, 0xde, 0xf7, 0x13, 0x31, 0x00, 0x20, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x10, 0x00, 0x01, 0x00, 0x00, 0xf0, 0x5b, 0xf4, 0xbe, 0x3d, @@ -470,7 +471,7 @@ var langNoIndex = [2197]uint8{ 0xaa, 0x10, 0x5d, 0x98, 0x52, 0x00, 0x80, 0x20, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x02, 0x02, 0x19, 0x00, 0x10, 0x02, 0x10, 0x61, 0x5a, 0x9d, - 0x31, 0x00, 0x00, 0x00, 0x01, 0x10, 0x02, 0x20, + 0x31, 0x00, 0x00, 0x00, 0x01, 0x18, 0x02, 0x20, 0x00, 0x00, 0x01, 0x00, 0x42, 0x00, 0x20, 0x00, 0x00, 0x1f, 0xdf, 0xd2, 0xb9, 0xff, 0xfd, 0x3f, 0x1f, 0x98, 0xcf, 0x9c, 0xff, 0xaf, 0x5f, 0xfe, @@ -479,9 +480,9 @@ var langNoIndex = [2197]uint8{ 0xb7, 0xf6, 0xfb, 0xb3, 0xc7, 0xff, 0x6f, 0xf1, 0x73, 0xb1, 0x7f, 0x9f, 0x7f, 0xbd, 0xfc, 0xb7, 0xee, 0x1c, 0xfa, 0xcb, 0xef, 0xdd, 0xf9, 0xbd, - 0x6e, 0xae, 0x55, 0xfd, 0x6e, 0x81, 0x76, 0x1f, + 0x6e, 0xae, 0x55, 0xfd, 0x6e, 0x81, 0x76, 0x9f, 0xd4, 0x77, 0xf5, 0x7d, 0xfb, 0xff, 0xeb, 0xfe, - 0xbe, 0x5f, 0x46, 0x1b, 0xe9, 0x5f, 0x50, 0x18, + 0xbe, 0x5f, 0x46, 0x5b, 0xe9, 0x5f, 0x50, 0x18, 0x02, 0xfa, 0xf7, 0x9d, 0x15, 0x97, 0x05, 0x0f, // Entry 640 - 67F 0x75, 0xc4, 0x7d, 0x81, 0x92, 0xf5, 0x57, 0x6c, @@ -495,14 +496,14 @@ var langNoIndex = [2197]uint8{ // Entry 680 - 6BF 0x97, 0x9d, 0xbf, 0x9f, 0xf7, 0xc7, 0xfd, 0x37, 0xce, 0x7f, 0x04, 0x1d, 0x73, 0x7f, 0xf8, 0xda, - 0x5d, 0xce, 0x7d, 0x06, 0xb9, 0xea, 0x69, 0xa0, + 0x5d, 0xce, 0x7d, 0x06, 0xb9, 0xea, 0x79, 0xa0, 0x1a, 0x20, 0x00, 0x30, 0x02, 0x04, 0x24, 0x08, 0x04, 0x00, 0x00, 0x40, 0xd4, 0x02, 0x04, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x20, 0x01, 0x06, 0x50, 0x00, 0x08, 0x00, 0x00, 0x00, 0x24, 0x00, 0x04, 0x00, 0x10, 0xdc, 0x58, 0xd7, 0x0d, 0x0f, // Entry 6C0 - 6FF - 0x14, 0x4d, 0xf1, 0x16, 0x44, 0xd1, 0x42, 0x08, + 0x14, 0x4d, 0xf1, 0x16, 0x44, 0xd5, 0x42, 0x08, 0x40, 0x00, 0x00, 0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0xdc, 0xfb, 0xcb, 0x0e, 0x58, 0x48, 0x41, 0x24, 0x20, 0x04, 0x00, 0x30, 0x12, 0x40, 0x00, @@ -513,7 +514,7 @@ var langNoIndex = [2197]uint8{ // Entry 700 - 73F 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x80, 0x86, 0xc2, 0x00, 0x00, 0x00, 0x00, 0x01, - 0xdf, 0x18, 0x00, 0x00, 0x02, 0xf0, 0xfd, 0x79, + 0xff, 0x18, 0x02, 0x00, 0x02, 0xf0, 0xfd, 0x79, 0x3b, 0x00, 0x25, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00, 0x09, 0x20, 0x00, 0x00, 0x01, 0x00, @@ -525,7 +526,7 @@ var langNoIndex = [2197]uint8{ 0xcd, 0xf9, 0x5c, 0x00, 0x01, 0x00, 0x30, 0x04, 0x04, 0x55, 0x00, 0x01, 0x04, 0xf4, 0x3f, 0x4a, 0x01, 0x00, 0x00, 0xb0, 0x80, 0x20, 0x55, 0x75, - 0x97, 0x7c, 0x9f, 0x31, 0xcc, 0x68, 0xd1, 0x03, + 0x97, 0x7c, 0xdf, 0x31, 0xcc, 0x68, 0xd1, 0x03, 0xd5, 0x57, 0x27, 0x14, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xf7, 0xcb, 0x1f, 0x14, 0x60, // Entry 780 - 7BF @@ -538,7 +539,7 @@ var langNoIndex = [2197]uint8{ 0xe8, 0x30, 0x90, 0x6a, 0x92, 0x00, 0x00, 0x02, 0xff, 0xef, 0xff, 0x4b, 0x85, 0x53, 0xf4, 0xed, // Entry 7C0 - 7FF - 0xdd, 0xbf, 0x72, 0x1d, 0xc7, 0x0c, 0xd5, 0x42, + 0xdd, 0xbf, 0xf2, 0x5d, 0xc7, 0x0c, 0xd5, 0x42, 0xfc, 0xff, 0xf7, 0x1f, 0x00, 0x80, 0x40, 0x56, 0xcc, 0x16, 0x9e, 0xea, 0x35, 0x7d, 0xef, 0xff, 0xbd, 0xa4, 0xaf, 0x01, 0x44, 0x18, 0x01, 0x4d, @@ -552,15 +553,15 @@ var langNoIndex = [2197]uint8{ 0x40, 0x9c, 0x44, 0xdf, 0xf5, 0x8f, 0x66, 0xb3, 0x55, 0x20, 0xd4, 0xc1, 0xd8, 0x30, 0x3d, 0x80, 0x00, 0x00, 0x00, 0x04, 0xd4, 0x11, 0xc5, 0x84, - 0x2e, 0x50, 0x00, 0x22, 0x50, 0x6e, 0xbd, 0x93, + 0x2f, 0x50, 0x00, 0x22, 0x50, 0x6e, 0xbd, 0x93, 0x07, 0x00, 0x20, 0x10, 0x84, 0xb2, 0x45, 0x10, 0x06, 0x44, 0x00, 0x00, 0x12, 0x02, 0x11, 0x00, // Entry 840 - 87F - 0xf0, 0xfb, 0xfd, 0x7f, 0x05, 0x00, 0x12, 0x81, + 0xf0, 0xfb, 0xfd, 0x7f, 0x05, 0x00, 0x16, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x02, 0x00, 0x00, 0x00, 0x00, 0x03, 0x30, 0x02, 0x28, 0x84, 0x00, 0x21, 0xc0, 0x23, 0x24, 0x00, 0x00, - 0x00, 0xcb, 0xe4, 0x3a, 0x42, 0x88, 0x14, 0xf1, + 0x00, 0xcb, 0xe4, 0x3a, 0x46, 0x88, 0x14, 0xf1, 0xef, 0xff, 0x7f, 0x12, 0x01, 0x01, 0x84, 0x50, 0x07, 0xfc, 0xff, 0xff, 0x0f, 0x01, 0x00, 0x40, 0x10, 0x38, 0x01, 0x01, 0x1c, 0x12, 0x40, 0xe1, @@ -582,8 +583,8 @@ var altLangIndex = [6]uint16{ } // AliasMap maps langIDs to their suggested replacements. -// Size: 704 bytes, 176 elements -var AliasMap = [176]FromTo{ +// Size: 716 bytes, 179 elements +var AliasMap = [179]FromTo{ 0: {From: 0x82, To: 0x88}, 1: {From: 0x187, To: 0x1ae}, 2: {From: 0x1f3, To: 0x1e1}, @@ -612,172 +613,176 @@ var AliasMap = [176]FromTo{ 25: {From: 0x80c, To: 0x5a}, 26: {From: 0x815, To: 0x8d}, 27: {From: 0x87e, To: 0x810}, - 28: {From: 0x8c3, To: 0xee3}, - 29: {From: 0x9ef, To: 0x331}, - 30: {From: 0xa36, To: 0x2c5}, - 31: {From: 0xa3d, To: 0xbf}, - 32: {From: 0xabe, To: 0x3322}, - 33: {From: 0xb38, To: 0x529}, - 34: {From: 0xb75, To: 0x265a}, - 35: {From: 0xb7e, To: 0xbc3}, - 36: {From: 0xb9b, To: 0x44e}, - 37: {From: 0xbbc, To: 0x4229}, - 38: {From: 0xbbf, To: 0x529}, - 39: {From: 0xbfe, To: 0x2da7}, - 40: {From: 0xc2e, To: 0x3181}, - 41: {From: 0xcb9, To: 0xf3}, - 42: {From: 0xd08, To: 0xfa}, - 43: {From: 0xdc8, To: 0x11a}, - 44: {From: 0xdd7, To: 0x32d}, - 45: {From: 0xdf8, To: 0xdfb}, - 46: {From: 0xdfe, To: 0x531}, - 47: {From: 0xe01, To: 0xdf3}, - 48: {From: 0xedf, To: 0x205a}, - 49: {From: 0xee9, To: 0x222e}, - 50: {From: 0xeee, To: 0x2e9a}, - 51: {From: 0xf39, To: 0x367}, - 52: {From: 0x10d0, To: 0x140}, - 53: {From: 0x1104, To: 0x2d0}, - 54: {From: 0x11a0, To: 0x1ec}, - 55: {From: 0x1279, To: 0x21}, - 56: {From: 0x1424, To: 0x15e}, - 57: {From: 0x1470, To: 0x14e}, - 58: {From: 0x151f, To: 0xd9b}, - 59: {From: 0x1523, To: 0x390}, - 60: {From: 0x1532, To: 0x19f}, - 61: {From: 0x1580, To: 0x210}, - 62: {From: 0x1583, To: 0x10d}, - 63: {From: 0x15a3, To: 0x3caf}, - 64: {From: 0x1630, To: 0x222e}, - 65: {From: 0x166a, To: 0x19b}, - 66: {From: 0x16c8, To: 0x136}, - 67: {From: 0x1700, To: 0x29f8}, - 68: {From: 0x1718, To: 0x194}, - 69: {From: 0x1727, To: 0xf3f}, - 70: {From: 0x177a, To: 0x178}, - 71: {From: 0x1809, To: 0x17b6}, - 72: {From: 0x1816, To: 0x18f3}, - 73: {From: 0x188a, To: 0x436}, - 74: {From: 0x1979, To: 0x1d01}, - 75: {From: 0x1a74, To: 0x2bb0}, - 76: {From: 0x1a8a, To: 0x1f8}, - 77: {From: 0x1b5a, To: 0x1fa}, - 78: {From: 0x1b86, To: 0x1515}, - 79: {From: 0x1d64, To: 0x2c9b}, - 80: {From: 0x2038, To: 0x37b1}, - 81: {From: 0x203d, To: 0x20dd}, - 82: {From: 0x205a, To: 0x30b}, - 83: {From: 0x20e3, To: 0x274}, - 84: {From: 0x20ee, To: 0x263}, - 85: {From: 0x20f2, To: 0x22d}, - 86: {From: 0x20f9, To: 0x256}, - 87: {From: 0x210f, To: 0x21eb}, - 88: {From: 0x2135, To: 0x27d}, - 89: {From: 0x2160, To: 0x913}, - 90: {From: 0x2199, To: 0x121}, - 91: {From: 0x21ce, To: 0x1561}, - 92: {From: 0x21e6, To: 0x504}, - 93: {From: 0x21f4, To: 0x49f}, - 94: {From: 0x21fb, To: 0x269}, - 95: {From: 0x222d, To: 0x121}, - 96: {From: 0x2237, To: 0x121}, - 97: {From: 0x2262, To: 0x92a}, - 98: {From: 0x2316, To: 0x3226}, - 99: {From: 0x236a, To: 0x2835}, - 100: {From: 0x2382, To: 0x3365}, - 101: {From: 0x2472, To: 0x2c7}, - 102: {From: 0x24e4, To: 0x2ff}, - 103: {From: 0x24f0, To: 0x2fa}, - 104: {From: 0x24fa, To: 0x31f}, - 105: {From: 0x2550, To: 0xb5b}, - 106: {From: 0x25a9, To: 0xe2}, - 107: {From: 0x263e, To: 0x2d0}, - 108: {From: 0x26c9, To: 0x26b4}, - 109: {From: 0x26f9, To: 0x3c8}, - 110: {From: 0x2727, To: 0x3caf}, - 111: {From: 0x2755, To: 0x6a4}, - 112: {From: 0x2765, To: 0x26b4}, - 113: {From: 0x2789, To: 0x4358}, - 114: {From: 0x27c9, To: 0x2001}, - 115: {From: 0x28ea, To: 0x27b1}, - 116: {From: 0x28ef, To: 0x2837}, - 117: {From: 0x2914, To: 0x351}, - 118: {From: 0x2986, To: 0x2da7}, - 119: {From: 0x29f0, To: 0x96b}, - 120: {From: 0x2b1a, To: 0x38d}, - 121: {From: 0x2bfc, To: 0x395}, - 122: {From: 0x2c3f, To: 0x3caf}, - 123: {From: 0x2cfc, To: 0x3be}, - 124: {From: 0x2d13, To: 0x597}, - 125: {From: 0x2d47, To: 0x148}, - 126: {From: 0x2d48, To: 0x148}, - 127: {From: 0x2dff, To: 0x2f1}, - 128: {From: 0x2e08, To: 0x19cc}, - 129: {From: 0x2e1a, To: 0x2d95}, - 130: {From: 0x2e21, To: 0x292}, - 131: {From: 0x2e54, To: 0x7d}, - 132: {From: 0x2e65, To: 0x2282}, - 133: {From: 0x2ea0, To: 0x2e9b}, - 134: {From: 0x2eef, To: 0x2ed7}, - 135: {From: 0x3193, To: 0x3c4}, - 136: {From: 0x3366, To: 0x338e}, - 137: {From: 0x342a, To: 0x3dc}, - 138: {From: 0x34ee, To: 0x18d0}, - 139: {From: 0x35c8, To: 0x2c9b}, - 140: {From: 0x35e6, To: 0x412}, - 141: {From: 0x3658, To: 0x246}, - 142: {From: 0x3676, To: 0x3f4}, - 143: {From: 0x36fd, To: 0x445}, - 144: {From: 0x37c0, To: 0x121}, - 145: {From: 0x3816, To: 0x38f2}, - 146: {From: 0x382a, To: 0x2b48}, - 147: {From: 0x382b, To: 0x2c9b}, - 148: {From: 0x382f, To: 0xa9}, - 149: {From: 0x3832, To: 0x3228}, - 150: {From: 0x386c, To: 0x39a6}, - 151: {From: 0x3892, To: 0x3fc0}, - 152: {From: 0x38a5, To: 0x39d7}, - 153: {From: 0x38b4, To: 0x1fa4}, - 154: {From: 0x38b5, To: 0x2e9a}, - 155: {From: 0x395c, To: 0x47e}, - 156: {From: 0x3b4e, To: 0xd91}, - 157: {From: 0x3b78, To: 0x137}, - 158: {From: 0x3c99, To: 0x4bc}, - 159: {From: 0x3fbd, To: 0x100}, - 160: {From: 0x4208, To: 0xa91}, - 161: {From: 0x42be, To: 0x573}, - 162: {From: 0x42f9, To: 0x3f60}, - 163: {From: 0x4378, To: 0x25a}, - 164: {From: 0x43b8, To: 0xe6c}, - 165: {From: 0x43cd, To: 0x10f}, - 166: {From: 0x44af, To: 0x3322}, - 167: {From: 0x44e3, To: 0x512}, - 168: {From: 0x45ca, To: 0x2409}, - 169: {From: 0x45dd, To: 0x26dc}, - 170: {From: 0x4610, To: 0x48ae}, - 171: {From: 0x46ae, To: 0x46a0}, - 172: {From: 0x473e, To: 0x4745}, - 173: {From: 0x4817, To: 0x3503}, - 174: {From: 0x4916, To: 0x31f}, - 175: {From: 0x49a7, To: 0x523}, + 28: {From: 0x8a8, To: 0x8b7}, + 29: {From: 0x8c3, To: 0xee3}, + 30: {From: 0x8fa, To: 0x1dc}, + 31: {From: 0x9ef, To: 0x331}, + 32: {From: 0xa36, To: 0x2c5}, + 33: {From: 0xa3d, To: 0xbf}, + 34: {From: 0xabe, To: 0x3322}, + 35: {From: 0xb38, To: 0x529}, + 36: {From: 0xb75, To: 0x265a}, + 37: {From: 0xb7e, To: 0xbc3}, + 38: {From: 0xb9b, To: 0x44e}, + 39: {From: 0xbbc, To: 0x4229}, + 40: {From: 0xbbf, To: 0x529}, + 41: {From: 0xbfe, To: 0x2da7}, + 42: {From: 0xc2e, To: 0x3181}, + 43: {From: 0xcb9, To: 0xf3}, + 44: {From: 0xd08, To: 0xfa}, + 45: {From: 0xdc8, To: 0x11a}, + 46: {From: 0xdd7, To: 0x32d}, + 47: {From: 0xdf8, To: 0xdfb}, + 48: {From: 0xdfe, To: 0x531}, + 49: {From: 0xe01, To: 0xdf3}, + 50: {From: 0xedf, To: 0x205a}, + 51: {From: 0xee9, To: 0x222e}, + 52: {From: 0xeee, To: 0x2e9a}, + 53: {From: 0xf39, To: 0x367}, + 54: {From: 0x10d0, To: 0x140}, + 55: {From: 0x1104, To: 0x2d0}, + 56: {From: 0x11a0, To: 0x1ec}, + 57: {From: 0x1279, To: 0x21}, + 58: {From: 0x1424, To: 0x15e}, + 59: {From: 0x1470, To: 0x14e}, + 60: {From: 0x151f, To: 0xd9b}, + 61: {From: 0x1523, To: 0x390}, + 62: {From: 0x1532, To: 0x19f}, + 63: {From: 0x1580, To: 0x210}, + 64: {From: 0x1583, To: 0x10d}, + 65: {From: 0x15a3, To: 0x3caf}, + 66: {From: 0x1630, To: 0x222e}, + 67: {From: 0x166a, To: 0x19b}, + 68: {From: 0x16c8, To: 0x136}, + 69: {From: 0x1700, To: 0x29f8}, + 70: {From: 0x1718, To: 0x194}, + 71: {From: 0x1727, To: 0xf3f}, + 72: {From: 0x177a, To: 0x178}, + 73: {From: 0x1809, To: 0x17b6}, + 74: {From: 0x1816, To: 0x18f3}, + 75: {From: 0x188a, To: 0x436}, + 76: {From: 0x1979, To: 0x1d01}, + 77: {From: 0x1a74, To: 0x2bb0}, + 78: {From: 0x1a8a, To: 0x1f8}, + 79: {From: 0x1b5a, To: 0x1fa}, + 80: {From: 0x1b86, To: 0x1515}, + 81: {From: 0x1d64, To: 0x2c9b}, + 82: {From: 0x2038, To: 0x37b1}, + 83: {From: 0x203d, To: 0x20dd}, + 84: {From: 0x205a, To: 0x30b}, + 85: {From: 0x20e3, To: 0x274}, + 86: {From: 0x20ee, To: 0x263}, + 87: {From: 0x20f2, To: 0x22d}, + 88: {From: 0x20f9, To: 0x256}, + 89: {From: 0x210f, To: 0x21eb}, + 90: {From: 0x2135, To: 0x27d}, + 91: {From: 0x2160, To: 0x913}, + 92: {From: 0x2199, To: 0x121}, + 93: {From: 0x21ce, To: 0x1561}, + 94: {From: 0x21e6, To: 0x504}, + 95: {From: 0x21f4, To: 0x49f}, + 96: {From: 0x21fb, To: 0x269}, + 97: {From: 0x222d, To: 0x121}, + 98: {From: 0x2237, To: 0x121}, + 99: {From: 0x2262, To: 0x92a}, + 100: {From: 0x2316, To: 0x3226}, + 101: {From: 0x236a, To: 0x2835}, + 102: {From: 0x2382, To: 0x3365}, + 103: {From: 0x2472, To: 0x2c7}, + 104: {From: 0x24e4, To: 0x2ff}, + 105: {From: 0x24f0, To: 0x2fa}, + 106: {From: 0x24fa, To: 0x31f}, + 107: {From: 0x2550, To: 0xb5b}, + 108: {From: 0x25a9, To: 0xe2}, + 109: {From: 0x263e, To: 0x2d0}, + 110: {From: 0x26c9, To: 0x26b4}, + 111: {From: 0x26f9, To: 0x3c8}, + 112: {From: 0x2727, To: 0x3caf}, + 113: {From: 0x2755, To: 0x6a4}, + 114: {From: 0x2765, To: 0x26b4}, + 115: {From: 0x2789, To: 0x4358}, + 116: {From: 0x27c9, To: 0x2001}, + 117: {From: 0x28ea, To: 0x27b1}, + 118: {From: 0x28ef, To: 0x2837}, + 119: {From: 0x2914, To: 0x351}, + 120: {From: 0x2986, To: 0x2da7}, + 121: {From: 0x29f0, To: 0x96b}, + 122: {From: 0x2b1a, To: 0x38d}, + 123: {From: 0x2bfc, To: 0x395}, + 124: {From: 0x2c3f, To: 0x3caf}, + 125: {From: 0x2ce1, To: 0x2201}, + 126: {From: 0x2cfc, To: 0x3be}, + 127: {From: 0x2d13, To: 0x597}, + 128: {From: 0x2d47, To: 0x148}, + 129: {From: 0x2d48, To: 0x148}, + 130: {From: 0x2dff, To: 0x2f1}, + 131: {From: 0x2e08, To: 0x19cc}, + 132: {From: 0x2e1a, To: 0x2d95}, + 133: {From: 0x2e21, To: 0x292}, + 134: {From: 0x2e54, To: 0x7d}, + 135: {From: 0x2e65, To: 0x2282}, + 136: {From: 0x2ea0, To: 0x2e9b}, + 137: {From: 0x2eef, To: 0x2ed7}, + 138: {From: 0x3193, To: 0x3c4}, + 139: {From: 0x3366, To: 0x338e}, + 140: {From: 0x342a, To: 0x3dc}, + 141: {From: 0x34ee, To: 0x18d0}, + 142: {From: 0x35c8, To: 0x2c9b}, + 143: {From: 0x35e6, To: 0x412}, + 144: {From: 0x3658, To: 0x246}, + 145: {From: 0x3676, To: 0x3f4}, + 146: {From: 0x36fd, To: 0x445}, + 147: {From: 0x37c0, To: 0x121}, + 148: {From: 0x3816, To: 0x38f2}, + 149: {From: 0x382a, To: 0x2b48}, + 150: {From: 0x382b, To: 0x2c9b}, + 151: {From: 0x382f, To: 0xa9}, + 152: {From: 0x3832, To: 0x3228}, + 153: {From: 0x386c, To: 0x39a6}, + 154: {From: 0x3892, To: 0x3fc0}, + 155: {From: 0x38a5, To: 0x39d7}, + 156: {From: 0x38b4, To: 0x1fa4}, + 157: {From: 0x38b5, To: 0x2e9a}, + 158: {From: 0x395c, To: 0x47e}, + 159: {From: 0x3b4e, To: 0xd91}, + 160: {From: 0x3b78, To: 0x137}, + 161: {From: 0x3c99, To: 0x4bc}, + 162: {From: 0x3fbd, To: 0x100}, + 163: {From: 0x4208, To: 0xa91}, + 164: {From: 0x42be, To: 0x573}, + 165: {From: 0x42f9, To: 0x3f60}, + 166: {From: 0x4378, To: 0x25a}, + 167: {From: 0x43b8, To: 0xe6c}, + 168: {From: 0x43cd, To: 0x10f}, + 169: {From: 0x44af, To: 0x3322}, + 170: {From: 0x44e3, To: 0x512}, + 171: {From: 0x45ca, To: 0x2409}, + 172: {From: 0x45dd, To: 0x26dc}, + 173: {From: 0x4610, To: 0x48ae}, + 174: {From: 0x46ae, To: 0x46a0}, + 175: {From: 0x473e, To: 0x4745}, + 176: {From: 0x4817, To: 0x3503}, + 177: {From: 0x4916, To: 0x31f}, + 178: {From: 0x49a7, To: 0x523}, } -// Size: 176 bytes, 176 elements -var AliasTypes = [176]AliasType{ +// Size: 179 bytes, 179 elements +var AliasTypes = [179]AliasType{ // Entry 0 - 3F 1, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 1, 0, 0, 1, 2, - 1, 1, 2, 0, 0, 1, 0, 1, 2, 1, 1, 0, 0, 2, 1, 1, - 0, 2, 0, 0, 1, 0, 1, 0, 0, 1, 2, 1, 1, 1, 1, 0, - 0, 0, 0, 2, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 2, 0, + 1, 1, 2, 0, 0, 1, 0, 1, 2, 1, 1, 0, 0, 0, 0, 2, + 1, 1, 0, 2, 0, 0, 1, 0, 1, 0, 0, 1, 2, 1, 1, 1, + 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, // Entry 40 - 7F - 0, 1, 2, 0, 1, 0, 1, 1, 1, 1, 0, 0, 2, 1, 0, 0, - 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 1, 2, 2, 2, 0, 1, 1, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 2, 1, 1, + 2, 0, 0, 1, 2, 0, 1, 0, 1, 1, 1, 1, 0, 0, 2, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 0, 1, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, // Entry 80 - BF - 0, 0, 1, 0, 0, 0, 0, 1, 1, 2, 0, 0, 2, 1, 1, 1, - 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, - 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, + 2, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 2, 0, 0, 2, + 1, 1, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 1, + 0, 1, 2, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, + 0, 1, 1, } const ( @@ -785,17 +790,17 @@ const ( _Hani = 57 _Hans = 59 _Hant = 60 - _Qaaa = 143 - _Qaai = 151 - _Qabx = 192 - _Zinh = 245 - _Zyyy = 250 - _Zzzz = 251 + _Qaaa = 147 + _Qaai = 155 + _Qabx = 196 + _Zinh = 252 + _Zyyy = 257 + _Zzzz = 258 ) // script is an alphabetically sorted list of ISO 15924 codes. The index // of the script in the string, divided by 4, is the internal scriptID. -const script tag.Index = "" + // Size: 1012 bytes +const script tag.Index = "" + // Size: 1040 bytes "----AdlmAfakAghbAhomArabAranArmiArmnAvstBaliBamuBassBatkBengBhksBlisBopo" + "BrahBraiBugiBuhdCakmCansCariChamCherChrsCirtCoptCpmnCprtCyrlCyrsDevaDiak" + "DogrDsrtDuplEgydEgyhEgypElbaElymEthiGeokGeorGlagGongGonmGothGranGrekGujr" + @@ -803,14 +808,14 @@ const script tag.Index = "" + // Size: 1012 bytes "JavaJpanJurcKaliKanaKharKhmrKhojKitlKitsKndaKoreKpelKthiLanaLaooLatfLatg" + "LatnLekeLepcLimbLinaLinbLisuLomaLyciLydiMahjMakaMandManiMarcMayaMedfMend" + "MercMeroMlymModiMongMoonMrooMteiMultMymrNandNarbNbatNewaNkdbNkgbNkooNshu" + - "OgamOlckOrkhOryaOsgeOsmaPalmPaucPermPhagPhliPhlpPhlvPhnxPiqdPlrdPrtiQaaa" + - "QaabQaacQaadQaaeQaafQaagQaahQaaiQaajQaakQaalQaamQaanQaaoQaapQaaqQaarQaas" + - "QaatQaauQaavQaawQaaxQaayQaazQabaQabbQabcQabdQabeQabfQabgQabhQabiQabjQabk" + - "QablQabmQabnQaboQabpQabqQabrQabsQabtQabuQabvQabwQabxRjngRohgRoroRunrSamr" + - "SaraSarbSaurSgnwShawShrdShuiSiddSindSinhSogdSogoSoraSoyoSundSyloSyrcSyre" + - "SyrjSyrnTagbTakrTaleTaluTamlTangTavtTeluTengTfngTglgThaaThaiTibtTirhToto" + - "UgarVaiiVispWaraWchoWoleXpeoXsuxYeziYiiiZanbZinhZmthZsyeZsymZxxxZyyyZzzz" + - "\xff\xff\xff\xff" + "OgamOlckOrkhOryaOsgeOsmaOugrPalmPaucPcunPelmPermPhagPhliPhlpPhlvPhnxPiqd" + + "PlrdPrtiPsinQaaaQaabQaacQaadQaaeQaafQaagQaahQaaiQaajQaakQaalQaamQaanQaao" + + "QaapQaaqQaarQaasQaatQaauQaavQaawQaaxQaayQaazQabaQabbQabcQabdQabeQabfQabg" + + "QabhQabiQabjQabkQablQabmQabnQaboQabpQabqQabrQabsQabtQabuQabvQabwQabxRanj" + + "RjngRohgRoroRunrSamrSaraSarbSaurSgnwShawShrdShuiSiddSindSinhSogdSogoSora" + + "SoyoSundSyloSyrcSyreSyrjSyrnTagbTakrTaleTaluTamlTangTavtTeluTengTfngTglg" + + "ThaaThaiTibtTirhTnsaTotoUgarVaiiVispVithWaraWchoWoleXpeoXsuxYeziYiiiZanb" + + "ZinhZmthZsyeZsymZxxxZyyyZzzz\xff\xff\xff\xff" // suppressScript is an index from langID to the dominant script for that language, // if it exists. If a script is given, it should be suppressed from the language tag. @@ -858,7 +863,7 @@ var suppressScript = [1330]uint8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xe5, 0x00, 0x00, 0x00, 0x00, 0xe7, 0x00, 0x00, + 0xea, 0x00, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x5a, 0x00, 0x00, 0x5a, 0x00, 0x5a, 0x00, // Entry 140 - 17F @@ -962,7 +967,7 @@ var suppressScript = [1330]uint8{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Entry 400 - 43F 0x00, 0x00, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xcf, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xd4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5a, 0x00, 0x00, 0x00, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -972,10 +977,10 @@ var suppressScript = [1330]uint8{ // Entry 440 - 47F 0x00, 0x00, 0x00, 0x00, 0x5a, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xe3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0xe1, 0x00, 0x5a, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xe6, 0x00, 0x00, 0x00, 0x2c, + 0x00, 0xe6, 0x00, 0x5a, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xeb, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5a, 0x00, 0x00, 0x5a, 0x00, 0x00, 0x00, 0x5a, 0x00, // Entry 480 - 4BF @@ -1086,9 +1091,9 @@ var regionTypes = [358]uint8{ // regionISO holds a list of alphabetically sorted 2-letter ISO region codes. // Each 2-letter codes is followed by two bytes with the following meaning: -// - [A-Z}{2}: the first letter of the 2-letter code plus these two -// letters form the 3-letter ISO code. -// - 0, n: index into altRegionISO3. +// - [A-Z}{2}: the first letter of the 2-letter code plus these two +// letters form the 3-letter ISO code. +// - 0, n: index into altRegionISO3. const regionISO tag.Index = "" + // Size: 1308 bytes "AAAAACSCADNDAEREAFFGAGTGAIIAALLBAMRMANNTAOGOAQTAARRGASSMATUTAUUSAWBWAXLA" + "AZZEBAIHBBRBBDGDBEELBFFABGGRBHHRBIDIBJENBLLMBMMUBNRNBOOLBQESBRRABSHSBTTN" + @@ -1206,7 +1211,9 @@ var m49 = [358]int16{ // m49Index gives indexes into fromM49 based on the three most significant bits // of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in -// fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] +// +// fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] +// // for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. // The region code is stored in the 9 lsb of the indexed value. // Size: 18 bytes, 9 elements @@ -1268,117 +1275,118 @@ var fromM49 = [333]uint16{ 0xc759, 0xc95a, 0xcb5b, 0xcd5c, 0xcf65, } -// Size: 1995 bytes +// Size: 2014 bytes var variantIndex = map[string]uint8{ "1606nict": 0x0, "1694acad": 0x1, "1901": 0x2, "1959acad": 0x3, - "1994": 0x60, + "1994": 0x61, "1996": 0x4, "abl1943": 0x5, "akuapem": 0x6, - "alalc97": 0x62, + "alalc97": 0x63, "aluku": 0x7, "ao1990": 0x8, "aranes": 0x9, "arevela": 0xa, "arevmda": 0xb, - "asante": 0xc, - "auvern": 0xd, - "baku1926": 0xe, - "balanka": 0xf, - "barla": 0x10, - "basiceng": 0x11, - "bauddha": 0x12, - "biscayan": 0x13, - "biske": 0x5b, - "bohoric": 0x14, - "boont": 0x15, - "bornholm": 0x16, - "cisaup": 0x17, - "colb1945": 0x18, - "cornu": 0x19, - "creiss": 0x1a, - "dajnko": 0x1b, - "ekavsk": 0x1c, - "emodeng": 0x1d, - "fonipa": 0x63, - "fonkirsh": 0x64, - "fonnapa": 0x65, - "fonupa": 0x66, - "fonxsamp": 0x67, - "gascon": 0x1e, - "grclass": 0x1f, - "grital": 0x20, - "grmistr": 0x21, - "hepburn": 0x22, - "heploc": 0x61, - "hognorsk": 0x23, - "hsistemo": 0x24, - "ijekavsk": 0x25, - "itihasa": 0x26, - "ivanchov": 0x27, - "jauer": 0x28, - "jyutping": 0x29, - "kkcor": 0x2a, - "kociewie": 0x2b, - "kscor": 0x2c, - "laukika": 0x2d, - "lemosin": 0x2e, - "lengadoc": 0x2f, - "lipaw": 0x5c, - "luna1918": 0x30, - "metelko": 0x31, - "monoton": 0x32, - "ndyuka": 0x33, - "nedis": 0x34, - "newfound": 0x35, - "nicard": 0x36, - "njiva": 0x5d, - "nulik": 0x37, - "osojs": 0x5e, - "oxendict": 0x38, - "pahawh2": 0x39, - "pahawh3": 0x3a, - "pahawh4": 0x3b, - "pamaka": 0x3c, - "peano": 0x3d, - "petr1708": 0x3e, - "pinyin": 0x3f, - "polyton": 0x40, - "provenc": 0x41, - "puter": 0x42, - "rigik": 0x43, - "rozaj": 0x44, - "rumgr": 0x45, - "scotland": 0x46, - "scouse": 0x47, - "simple": 0x68, - "solba": 0x5f, - "sotav": 0x48, - "spanglis": 0x49, - "surmiran": 0x4a, - "sursilv": 0x4b, - "sutsilv": 0x4c, - "tarask": 0x4d, - "tongyong": 0x4e, - "tunumiit": 0x4f, - "uccor": 0x50, - "ucrcor": 0x51, - "ulster": 0x52, - "unifon": 0x53, - "vaidika": 0x54, - "valencia": 0x55, - "vallader": 0x56, - "vecdruka": 0x57, - "vivaraup": 0x58, - "wadegile": 0x59, - "xsistemo": 0x5a, + "arkaika": 0xc, + "asante": 0xd, + "auvern": 0xe, + "baku1926": 0xf, + "balanka": 0x10, + "barla": 0x11, + "basiceng": 0x12, + "bauddha": 0x13, + "biscayan": 0x14, + "biske": 0x5c, + "bohoric": 0x15, + "boont": 0x16, + "bornholm": 0x17, + "cisaup": 0x18, + "colb1945": 0x19, + "cornu": 0x1a, + "creiss": 0x1b, + "dajnko": 0x1c, + "ekavsk": 0x1d, + "emodeng": 0x1e, + "fonipa": 0x64, + "fonkirsh": 0x65, + "fonnapa": 0x66, + "fonupa": 0x67, + "fonxsamp": 0x68, + "gascon": 0x1f, + "grclass": 0x20, + "grital": 0x21, + "grmistr": 0x22, + "hepburn": 0x23, + "heploc": 0x62, + "hognorsk": 0x24, + "hsistemo": 0x25, + "ijekavsk": 0x26, + "itihasa": 0x27, + "ivanchov": 0x28, + "jauer": 0x29, + "jyutping": 0x2a, + "kkcor": 0x2b, + "kociewie": 0x2c, + "kscor": 0x2d, + "laukika": 0x2e, + "lemosin": 0x2f, + "lengadoc": 0x30, + "lipaw": 0x5d, + "luna1918": 0x31, + "metelko": 0x32, + "monoton": 0x33, + "ndyuka": 0x34, + "nedis": 0x35, + "newfound": 0x36, + "nicard": 0x37, + "njiva": 0x5e, + "nulik": 0x38, + "osojs": 0x5f, + "oxendict": 0x39, + "pahawh2": 0x3a, + "pahawh3": 0x3b, + "pahawh4": 0x3c, + "pamaka": 0x3d, + "peano": 0x3e, + "petr1708": 0x3f, + "pinyin": 0x40, + "polyton": 0x41, + "provenc": 0x42, + "puter": 0x43, + "rigik": 0x44, + "rozaj": 0x45, + "rumgr": 0x46, + "scotland": 0x47, + "scouse": 0x48, + "simple": 0x69, + "solba": 0x60, + "sotav": 0x49, + "spanglis": 0x4a, + "surmiran": 0x4b, + "sursilv": 0x4c, + "sutsilv": 0x4d, + "tarask": 0x4e, + "tongyong": 0x4f, + "tunumiit": 0x50, + "uccor": 0x51, + "ucrcor": 0x52, + "ulster": 0x53, + "unifon": 0x54, + "vaidika": 0x55, + "valencia": 0x56, + "vallader": 0x57, + "vecdruka": 0x58, + "vivaraup": 0x59, + "wadegile": 0x5a, + "xsistemo": 0x5b, } // variantNumSpecialized is the number of specialized variants in variants. -const variantNumSpecialized = 98 +const variantNumSpecialized = 99 // nRegionGroups is the number of region groups. const nRegionGroups = 33 @@ -1390,8 +1398,8 @@ type likelyLangRegion struct { // likelyScript is a lookup table, indexed by scriptID, for the most likely // languages and regions given a script. -// Size: 1012 bytes, 253 elements -var likelyScript = [253]likelyLangRegion{ +// Size: 1040 bytes, 260 elements +var likelyScript = [260]likelyLangRegion{ 1: {lang: 0x14e, region: 0x84}, 3: {lang: 0x2a2, region: 0x106}, 4: {lang: 0x1f, region: 0x99}, @@ -1489,57 +1497,57 @@ var likelyScript = [253]likelyLangRegion{ 129: {lang: 0x395, region: 0x99}, 130: {lang: 0x399, region: 0x135}, 131: {lang: 0x429, region: 0x115}, - 132: {lang: 0x3b, region: 0x11c}, - 133: {lang: 0xfd, region: 0xc4}, - 134: {lang: 0x27d, region: 0x106}, - 135: {lang: 0x2c9, region: 0x53}, - 136: {lang: 0x39f, region: 0x9c}, - 137: {lang: 0x39f, region: 0x53}, - 139: {lang: 0x3ad, region: 0xb0}, - 141: {lang: 0x1c6, region: 0x53}, - 142: {lang: 0x4fd, region: 0x9c}, - 193: {lang: 0x3cb, region: 0x95}, - 196: {lang: 0x372, region: 0x10c}, - 197: {lang: 0x420, region: 0x97}, - 199: {lang: 0x4ff, region: 0x15e}, - 200: {lang: 0x3f0, region: 0x99}, - 201: {lang: 0x45, region: 0x135}, - 202: {lang: 0x139, region: 0x7b}, - 203: {lang: 0x3e9, region: 0x99}, - 205: {lang: 0x3e9, region: 0x99}, - 206: {lang: 0x3fa, region: 0x99}, - 207: {lang: 0x40c, region: 0xb3}, - 210: {lang: 0x433, region: 0x99}, - 211: {lang: 0xef, region: 0xc5}, - 212: {lang: 0x43e, region: 0x95}, - 213: {lang: 0x44d, region: 0x35}, - 214: {lang: 0x44e, region: 0x9b}, - 218: {lang: 0x45a, region: 0xe7}, - 219: {lang: 0x11a, region: 0x99}, - 220: {lang: 0x45e, region: 0x53}, - 221: {lang: 0x232, region: 0x53}, - 222: {lang: 0x450, region: 0x99}, - 223: {lang: 0x4a5, region: 0x53}, - 224: {lang: 0x9f, region: 0x13e}, - 225: {lang: 0x461, region: 0x99}, - 227: {lang: 0x528, region: 0xba}, - 228: {lang: 0x153, region: 0xe7}, - 229: {lang: 0x128, region: 0xcd}, - 230: {lang: 0x46b, region: 0x123}, - 231: {lang: 0xa9, region: 0x53}, - 232: {lang: 0x2ce, region: 0x99}, - 234: {lang: 0x4ad, region: 0x11c}, - 235: {lang: 0x4be, region: 0xb4}, - 237: {lang: 0x1ce, region: 0x99}, - 240: {lang: 0x3a9, region: 0x9c}, - 241: {lang: 0x22, region: 0x9b}, - 243: {lang: 0x1ea, region: 0x53}, - 244: {lang: 0xef, region: 0xc5}, + 133: {lang: 0x3b, region: 0x11c}, + 134: {lang: 0xfd, region: 0xc4}, + 137: {lang: 0x27d, region: 0x106}, + 138: {lang: 0x2c9, region: 0x53}, + 139: {lang: 0x39f, region: 0x9c}, + 140: {lang: 0x39f, region: 0x53}, + 142: {lang: 0x3ad, region: 0xb0}, + 144: {lang: 0x1c6, region: 0x53}, + 145: {lang: 0x4fd, region: 0x9c}, + 198: {lang: 0x3cb, region: 0x95}, + 201: {lang: 0x372, region: 0x10c}, + 202: {lang: 0x420, region: 0x97}, + 204: {lang: 0x4ff, region: 0x15e}, + 205: {lang: 0x3f0, region: 0x99}, + 206: {lang: 0x45, region: 0x135}, + 207: {lang: 0x139, region: 0x7b}, + 208: {lang: 0x3e9, region: 0x99}, + 210: {lang: 0x3e9, region: 0x99}, + 211: {lang: 0x3fa, region: 0x99}, + 212: {lang: 0x40c, region: 0xb3}, + 215: {lang: 0x433, region: 0x99}, + 216: {lang: 0xef, region: 0xc5}, + 217: {lang: 0x43e, region: 0x95}, + 218: {lang: 0x44d, region: 0x35}, + 219: {lang: 0x44e, region: 0x9b}, + 223: {lang: 0x45a, region: 0xe7}, + 224: {lang: 0x11a, region: 0x99}, + 225: {lang: 0x45e, region: 0x53}, + 226: {lang: 0x232, region: 0x53}, + 227: {lang: 0x450, region: 0x99}, + 228: {lang: 0x4a5, region: 0x53}, + 229: {lang: 0x9f, region: 0x13e}, + 230: {lang: 0x461, region: 0x99}, + 232: {lang: 0x528, region: 0xba}, + 233: {lang: 0x153, region: 0xe7}, + 234: {lang: 0x128, region: 0xcd}, + 235: {lang: 0x46b, region: 0x123}, + 236: {lang: 0xa9, region: 0x53}, + 237: {lang: 0x2ce, region: 0x99}, + 240: {lang: 0x4ad, region: 0x11c}, + 241: {lang: 0x4be, region: 0xb4}, + 244: {lang: 0x1ce, region: 0x99}, + 247: {lang: 0x3a9, region: 0x9c}, + 248: {lang: 0x22, region: 0x9b}, + 250: {lang: 0x1ea, region: 0x53}, + 251: {lang: 0xef, region: 0xc5}, } type likelyScriptRegion struct { region uint16 - script uint8 + script uint16 flags uint8 } @@ -1547,7 +1555,7 @@ type likelyScriptRegion struct { // scripts and regions given incomplete information. If more entries exist for a // given language, region and script are the index and size respectively // of the list in likelyLangList. -// Size: 5320 bytes, 1330 elements +// Size: 7980 bytes, 1330 elements var likelyLang = [1330]likelyScriptRegion{ 0: {region: 0x135, script: 0x5a, flags: 0x0}, 1: {region: 0x6f, script: 0x5a, flags: 0x0}, @@ -1583,7 +1591,7 @@ var likelyLang = [1330]likelyScriptRegion{ 31: {region: 0x99, script: 0x4, flags: 0x0}, 32: {region: 0x165, script: 0x5a, flags: 0x0}, 33: {region: 0x80, script: 0x5a, flags: 0x0}, - 34: {region: 0x9b, script: 0xf1, flags: 0x0}, + 34: {region: 0x9b, script: 0xf8, flags: 0x0}, 35: {region: 0x165, script: 0x5a, flags: 0x0}, 36: {region: 0x165, script: 0x5a, flags: 0x0}, 37: {region: 0x14d, script: 0x5a, flags: 0x0}, @@ -1616,7 +1624,7 @@ var likelyLang = [1330]likelyScriptRegion{ 66: {region: 0x6b, script: 0x5, flags: 0x0}, 67: {region: 0x99, script: 0xe, flags: 0x0}, 68: {region: 0x12f, script: 0x5a, flags: 0x0}, - 69: {region: 0x135, script: 0xc9, flags: 0x0}, + 69: {region: 0x135, script: 0xce, flags: 0x0}, 70: {region: 0x165, script: 0x5a, flags: 0x0}, 71: {region: 0x165, script: 0x5a, flags: 0x0}, 72: {region: 0x6e, script: 0x5a, flags: 0x0}, @@ -1670,7 +1678,7 @@ var likelyLang = [1330]likelyScriptRegion{ 120: {region: 0x165, script: 0x5a, flags: 0x0}, 121: {region: 0x12f, script: 0x5a, flags: 0x0}, 122: {region: 0x52, script: 0x5a, flags: 0x0}, - 123: {region: 0x99, script: 0xde, flags: 0x0}, + 123: {region: 0x99, script: 0xe3, flags: 0x0}, 124: {region: 0xe8, script: 0x5, flags: 0x0}, 125: {region: 0x99, script: 0x22, flags: 0x0}, 126: {region: 0x38, script: 0x20, flags: 0x0}, @@ -1705,7 +1713,7 @@ var likelyLang = [1330]likelyScriptRegion{ 156: {region: 0x165, script: 0x5a, flags: 0x0}, 157: {region: 0xe7, script: 0x5a, flags: 0x0}, 158: {region: 0x165, script: 0x5a, flags: 0x0}, - 159: {region: 0x13e, script: 0xe0, flags: 0x0}, + 159: {region: 0x13e, script: 0xe5, flags: 0x0}, 160: {region: 0xc3, script: 0x5a, flags: 0x0}, 161: {region: 0x165, script: 0x5a, flags: 0x0}, 162: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -1715,7 +1723,7 @@ var likelyLang = [1330]likelyScriptRegion{ 166: {region: 0x165, script: 0x5a, flags: 0x0}, 167: {region: 0x165, script: 0x5a, flags: 0x0}, 168: {region: 0x165, script: 0x5a, flags: 0x0}, - 169: {region: 0x53, script: 0xe7, flags: 0x0}, + 169: {region: 0x53, script: 0xec, flags: 0x0}, 170: {region: 0x165, script: 0x5a, flags: 0x0}, 171: {region: 0x165, script: 0x5a, flags: 0x0}, 172: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -1785,7 +1793,7 @@ var likelyLang = [1330]likelyScriptRegion{ 236: {region: 0x165, script: 0x5a, flags: 0x0}, 237: {region: 0x165, script: 0x5a, flags: 0x0}, 238: {region: 0x165, script: 0x5a, flags: 0x0}, - 239: {region: 0xc5, script: 0xd3, flags: 0x0}, + 239: {region: 0xc5, script: 0xd8, flags: 0x0}, 240: {region: 0x78, script: 0x5a, flags: 0x0}, 241: {region: 0x6b, script: 0x1d, flags: 0x0}, 242: {region: 0xe7, script: 0x5a, flags: 0x0}, @@ -1799,7 +1807,7 @@ var likelyLang = [1330]likelyScriptRegion{ 250: {region: 0x5e, script: 0x5a, flags: 0x0}, 251: {region: 0xe9, script: 0x5a, flags: 0x0}, 252: {region: 0x49, script: 0x17, flags: 0x0}, - 253: {region: 0xc4, script: 0x85, flags: 0x0}, + 253: {region: 0xc4, script: 0x86, flags: 0x0}, 254: {region: 0x8, script: 0x2, flags: 0x1}, 255: {region: 0x106, script: 0x20, flags: 0x0}, 256: {region: 0x7b, script: 0x5a, flags: 0x0}, @@ -1842,12 +1850,12 @@ var likelyLang = [1330]likelyScriptRegion{ 293: {region: 0x165, script: 0x5a, flags: 0x0}, 294: {region: 0x165, script: 0x5a, flags: 0x0}, 295: {region: 0x165, script: 0x5a, flags: 0x0}, - 296: {region: 0xcd, script: 0xe5, flags: 0x0}, + 296: {region: 0xcd, script: 0xea, flags: 0x0}, 297: {region: 0x165, script: 0x5a, flags: 0x0}, 298: {region: 0x165, script: 0x5a, flags: 0x0}, 299: {region: 0x114, script: 0x5a, flags: 0x0}, 300: {region: 0x37, script: 0x5a, flags: 0x0}, - 301: {region: 0x43, script: 0xe7, flags: 0x0}, + 301: {region: 0x43, script: 0xec, flags: 0x0}, 302: {region: 0x165, script: 0x5a, flags: 0x0}, 303: {region: 0xa4, script: 0x5a, flags: 0x0}, 304: {region: 0x80, script: 0x5a, flags: 0x0}, @@ -1957,7 +1965,7 @@ var likelyLang = [1330]likelyScriptRegion{ 408: {region: 0x165, script: 0x2c, flags: 0x0}, 409: {region: 0x165, script: 0x5a, flags: 0x0}, 410: {region: 0x99, script: 0x22, flags: 0x0}, - 411: {region: 0x99, script: 0xe1, flags: 0x0}, + 411: {region: 0x99, script: 0xe6, flags: 0x0}, 412: {region: 0x95, script: 0x5a, flags: 0x0}, 413: {region: 0xd9, script: 0x5a, flags: 0x0}, 414: {region: 0x130, script: 0x32, flags: 0x0}, @@ -2000,7 +2008,7 @@ var likelyLang = [1330]likelyScriptRegion{ 451: {region: 0xe7, script: 0x5a, flags: 0x0}, 452: {region: 0x165, script: 0x5a, flags: 0x0}, 453: {region: 0x12b, script: 0x40, flags: 0x0}, - 454: {region: 0x53, script: 0x8d, flags: 0x0}, + 454: {region: 0x53, script: 0x90, flags: 0x0}, 455: {region: 0x165, script: 0x5a, flags: 0x0}, 456: {region: 0xe8, script: 0x5, flags: 0x0}, 457: {region: 0x99, script: 0x22, flags: 0x0}, @@ -2035,7 +2043,7 @@ var likelyLang = [1330]likelyScriptRegion{ 487: {region: 0xd6, script: 0x5a, flags: 0x0}, 488: {region: 0x165, script: 0x5a, flags: 0x0}, 489: {region: 0x165, script: 0x5a, flags: 0x0}, - 490: {region: 0x53, script: 0xf3, flags: 0x0}, + 490: {region: 0x53, script: 0xfa, flags: 0x0}, 491: {region: 0x165, script: 0x5a, flags: 0x0}, 492: {region: 0x135, script: 0x5a, flags: 0x0}, 493: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2095,7 +2103,7 @@ var likelyLang = [1330]likelyScriptRegion{ 547: {region: 0x12f, script: 0x5a, flags: 0x0}, 548: {region: 0x122, script: 0x5, flags: 0x0}, 549: {region: 0x165, script: 0x5a, flags: 0x0}, - 550: {region: 0x123, script: 0xe6, flags: 0x0}, + 550: {region: 0x123, script: 0xeb, flags: 0x0}, 551: {region: 0x5a, script: 0x5a, flags: 0x0}, 552: {region: 0x52, script: 0x5a, flags: 0x0}, 553: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2107,7 +2115,7 @@ var likelyLang = [1330]likelyScriptRegion{ 559: {region: 0x165, script: 0x5a, flags: 0x0}, 560: {region: 0x41, script: 0x5a, flags: 0x0}, 561: {region: 0x99, script: 0x5a, flags: 0x0}, - 562: {region: 0x53, script: 0xdd, flags: 0x0}, + 562: {region: 0x53, script: 0xe2, flags: 0x0}, 563: {region: 0x99, script: 0x22, flags: 0x0}, 564: {region: 0xc3, script: 0x5a, flags: 0x0}, 565: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2191,7 +2199,7 @@ var likelyLang = [1330]likelyScriptRegion{ 643: {region: 0x165, script: 0x5a, flags: 0x0}, 644: {region: 0x165, script: 0x5a, flags: 0x0}, 645: {region: 0x165, script: 0x2c, flags: 0x0}, - 646: {region: 0x123, script: 0xe6, flags: 0x0}, + 646: {region: 0x123, script: 0xeb, flags: 0x0}, 647: {region: 0xe8, script: 0x5, flags: 0x0}, 648: {region: 0x165, script: 0x5a, flags: 0x0}, 649: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2211,7 +2219,7 @@ var likelyLang = [1330]likelyScriptRegion{ 663: {region: 0x165, script: 0x5a, flags: 0x0}, 664: {region: 0x95, script: 0x5a, flags: 0x0}, 665: {region: 0x165, script: 0x5a, flags: 0x0}, - 666: {region: 0x53, script: 0xe6, flags: 0x0}, + 666: {region: 0x53, script: 0xeb, flags: 0x0}, 667: {region: 0x165, script: 0x5a, flags: 0x0}, 668: {region: 0x165, script: 0x5a, flags: 0x0}, 669: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2235,7 +2243,7 @@ var likelyLang = [1330]likelyScriptRegion{ 687: {region: 0x135, script: 0x5a, flags: 0x0}, 688: {region: 0x165, script: 0x5a, flags: 0x0}, 689: {region: 0x165, script: 0x5a, flags: 0x0}, - 690: {region: 0x99, script: 0xe1, flags: 0x0}, + 690: {region: 0x99, script: 0xe6, flags: 0x0}, 691: {region: 0x9e, script: 0x5a, flags: 0x0}, 692: {region: 0x165, script: 0x5a, flags: 0x0}, 693: {region: 0x4b, script: 0x5a, flags: 0x0}, @@ -2257,7 +2265,7 @@ var likelyLang = [1330]likelyScriptRegion{ 709: {region: 0xa4, script: 0x5a, flags: 0x0}, 710: {region: 0x9c, script: 0x5, flags: 0x0}, 711: {region: 0xb8, script: 0x5a, flags: 0x0}, - 712: {region: 0x123, script: 0xe6, flags: 0x0}, + 712: {region: 0x123, script: 0xeb, flags: 0x0}, 713: {region: 0x53, script: 0x3b, flags: 0x0}, 714: {region: 0x12b, script: 0x5a, flags: 0x0}, 715: {region: 0x95, script: 0x5a, flags: 0x0}, @@ -2424,7 +2432,7 @@ var likelyLang = [1330]likelyScriptRegion{ 879: {region: 0xda, script: 0x5a, flags: 0x0}, 880: {region: 0x123, script: 0x56, flags: 0x0}, 881: {region: 0x99, script: 0x22, flags: 0x0}, - 882: {region: 0x10c, script: 0xc4, flags: 0x0}, + 882: {region: 0x10c, script: 0xc9, flags: 0x0}, 883: {region: 0x165, script: 0x5a, flags: 0x0}, 884: {region: 0x165, script: 0x5a, flags: 0x0}, 885: {region: 0x84, script: 0x7c, flags: 0x0}, @@ -2478,11 +2486,11 @@ var likelyLang = [1330]likelyScriptRegion{ 934: {region: 0x135, script: 0x5a, flags: 0x0}, 935: {region: 0x49, script: 0x5a, flags: 0x0}, 936: {region: 0x165, script: 0x5a, flags: 0x0}, - 937: {region: 0x9c, script: 0xf0, flags: 0x0}, + 937: {region: 0x9c, script: 0xf7, flags: 0x0}, 938: {region: 0x165, script: 0x5a, flags: 0x0}, 939: {region: 0x60, script: 0x5a, flags: 0x0}, 940: {region: 0x165, script: 0x5, flags: 0x0}, - 941: {region: 0xb0, script: 0x8b, flags: 0x0}, + 941: {region: 0xb0, script: 0x8e, flags: 0x0}, 943: {region: 0x165, script: 0x5a, flags: 0x0}, 944: {region: 0x165, script: 0x5a, flags: 0x0}, 945: {region: 0x99, script: 0x12, flags: 0x0}, @@ -2548,7 +2556,7 @@ var likelyLang = [1330]likelyScriptRegion{ 1005: {region: 0x95, script: 0x5a, flags: 0x0}, 1006: {region: 0x99, script: 0x5a, flags: 0x0}, 1007: {region: 0x114, script: 0x5a, flags: 0x0}, - 1008: {region: 0x99, script: 0xc8, flags: 0x0}, + 1008: {region: 0x99, script: 0xcd, flags: 0x0}, 1009: {region: 0x165, script: 0x5a, flags: 0x0}, 1010: {region: 0x165, script: 0x5a, flags: 0x0}, 1011: {region: 0x12f, script: 0x5a, flags: 0x0}, @@ -2571,11 +2579,11 @@ var likelyLang = [1330]likelyScriptRegion{ 1028: {region: 0xb6, script: 0x5a, flags: 0x0}, 1029: {region: 0x165, script: 0x2c, flags: 0x0}, 1030: {region: 0x165, script: 0x5a, flags: 0x0}, - 1032: {region: 0xba, script: 0xe3, flags: 0x0}, + 1032: {region: 0xba, script: 0xe8, flags: 0x0}, 1033: {region: 0x165, script: 0x5a, flags: 0x0}, 1034: {region: 0xc4, script: 0x75, flags: 0x0}, 1035: {region: 0x165, script: 0x5, flags: 0x0}, - 1036: {region: 0xb3, script: 0xcf, flags: 0x0}, + 1036: {region: 0xb3, script: 0xd4, flags: 0x0}, 1037: {region: 0x6f, script: 0x5a, flags: 0x0}, 1038: {region: 0x165, script: 0x5a, flags: 0x0}, 1039: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2594,7 +2602,7 @@ var likelyLang = [1330]likelyScriptRegion{ 1052: {region: 0x10c, script: 0x5a, flags: 0x0}, 1054: {region: 0x10c, script: 0x5a, flags: 0x0}, 1055: {region: 0x72, script: 0x5a, flags: 0x0}, - 1056: {region: 0x97, script: 0xc5, flags: 0x0}, + 1056: {region: 0x97, script: 0xca, flags: 0x0}, 1057: {region: 0x165, script: 0x5a, flags: 0x0}, 1058: {region: 0x72, script: 0x5a, flags: 0x0}, 1059: {region: 0x164, script: 0x5a, flags: 0x0}, @@ -2606,14 +2614,14 @@ var likelyLang = [1330]likelyScriptRegion{ 1065: {region: 0x115, script: 0x5a, flags: 0x0}, 1066: {region: 0x165, script: 0x5a, flags: 0x0}, 1067: {region: 0x165, script: 0x5a, flags: 0x0}, - 1068: {region: 0x123, script: 0xe6, flags: 0x0}, + 1068: {region: 0x123, script: 0xeb, flags: 0x0}, 1069: {region: 0x165, script: 0x5a, flags: 0x0}, 1070: {region: 0x165, script: 0x5a, flags: 0x0}, 1071: {region: 0x165, script: 0x5a, flags: 0x0}, 1072: {region: 0x165, script: 0x5a, flags: 0x0}, 1073: {region: 0x27, script: 0x5a, flags: 0x0}, 1074: {region: 0x37, script: 0x5, flags: 0x1}, - 1075: {region: 0x99, script: 0xd2, flags: 0x0}, + 1075: {region: 0x99, script: 0xd7, flags: 0x0}, 1076: {region: 0x116, script: 0x5a, flags: 0x0}, 1077: {region: 0x114, script: 0x5a, flags: 0x0}, 1078: {region: 0x99, script: 0x22, flags: 0x0}, @@ -2640,9 +2648,9 @@ var likelyLang = [1330]likelyScriptRegion{ 1099: {region: 0x95, script: 0x5a, flags: 0x0}, 1100: {region: 0x165, script: 0x5a, flags: 0x0}, 1101: {region: 0x35, script: 0xe, flags: 0x0}, - 1102: {region: 0x9b, script: 0xd6, flags: 0x0}, + 1102: {region: 0x9b, script: 0xdb, flags: 0x0}, 1103: {region: 0xe9, script: 0x5a, flags: 0x0}, - 1104: {region: 0x99, script: 0xde, flags: 0x0}, + 1104: {region: 0x99, script: 0xe3, flags: 0x0}, 1105: {region: 0xdb, script: 0x22, flags: 0x0}, 1106: {region: 0x165, script: 0x5a, flags: 0x0}, 1107: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2656,10 +2664,10 @@ var likelyLang = [1330]likelyScriptRegion{ 1115: {region: 0x165, script: 0x5a, flags: 0x0}, 1116: {region: 0x165, script: 0x5a, flags: 0x0}, 1117: {region: 0x99, script: 0x52, flags: 0x0}, - 1118: {region: 0x53, script: 0xdc, flags: 0x0}, + 1118: {region: 0x53, script: 0xe1, flags: 0x0}, 1119: {region: 0xdb, script: 0x22, flags: 0x0}, 1120: {region: 0xdb, script: 0x22, flags: 0x0}, - 1121: {region: 0x99, script: 0xe1, flags: 0x0}, + 1121: {region: 0x99, script: 0xe6, flags: 0x0}, 1122: {region: 0x165, script: 0x5a, flags: 0x0}, 1123: {region: 0x112, script: 0x5a, flags: 0x0}, 1124: {region: 0x131, script: 0x5a, flags: 0x0}, @@ -2669,7 +2677,7 @@ var likelyLang = [1330]likelyScriptRegion{ 1128: {region: 0x165, script: 0x5a, flags: 0x0}, 1129: {region: 0x165, script: 0x5a, flags: 0x0}, 1130: {region: 0x165, script: 0x5a, flags: 0x0}, - 1131: {region: 0x123, script: 0xe6, flags: 0x0}, + 1131: {region: 0x123, script: 0xeb, flags: 0x0}, 1132: {region: 0xdb, script: 0x22, flags: 0x0}, 1133: {region: 0xdb, script: 0x22, flags: 0x0}, 1134: {region: 0xdb, script: 0x22, flags: 0x0}, @@ -2708,14 +2716,14 @@ var likelyLang = [1330]likelyScriptRegion{ 1167: {region: 0x87, script: 0x34, flags: 0x0}, 1168: {region: 0xdb, script: 0x22, flags: 0x0}, 1169: {region: 0xe7, script: 0x5a, flags: 0x0}, - 1170: {region: 0x43, script: 0xe7, flags: 0x0}, + 1170: {region: 0x43, script: 0xec, flags: 0x0}, 1171: {region: 0x165, script: 0x5a, flags: 0x0}, 1172: {region: 0x106, script: 0x20, flags: 0x0}, 1173: {region: 0x165, script: 0x5a, flags: 0x0}, 1174: {region: 0x165, script: 0x5a, flags: 0x0}, 1175: {region: 0x131, script: 0x5a, flags: 0x0}, 1176: {region: 0x165, script: 0x5a, flags: 0x0}, - 1177: {region: 0x123, script: 0xe6, flags: 0x0}, + 1177: {region: 0x123, script: 0xeb, flags: 0x0}, 1178: {region: 0x32, script: 0x5a, flags: 0x0}, 1179: {region: 0x165, script: 0x5a, flags: 0x0}, 1180: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2726,7 +2734,7 @@ var likelyLang = [1330]likelyScriptRegion{ 1185: {region: 0x165, script: 0x5a, flags: 0x0}, 1187: {region: 0x165, script: 0x5a, flags: 0x0}, 1188: {region: 0xd4, script: 0x5a, flags: 0x0}, - 1189: {region: 0x53, script: 0xdf, flags: 0x0}, + 1189: {region: 0x53, script: 0xe4, flags: 0x0}, 1190: {region: 0xe5, script: 0x5a, flags: 0x0}, 1191: {region: 0x165, script: 0x5a, flags: 0x0}, 1192: {region: 0x106, script: 0x20, flags: 0x0}, @@ -2734,7 +2742,7 @@ var likelyLang = [1330]likelyScriptRegion{ 1194: {region: 0x165, script: 0x5a, flags: 0x0}, 1195: {region: 0x106, script: 0x20, flags: 0x0}, 1196: {region: 0x3f, script: 0x4, flags: 0x1}, - 1197: {region: 0x11c, script: 0xea, flags: 0x0}, + 1197: {region: 0x11c, script: 0xf0, flags: 0x0}, 1198: {region: 0x130, script: 0x20, flags: 0x0}, 1199: {region: 0x75, script: 0x5a, flags: 0x0}, 1200: {region: 0x2a, script: 0x5a, flags: 0x0}, @@ -2750,7 +2758,7 @@ var likelyLang = [1330]likelyScriptRegion{ 1211: {region: 0x165, script: 0x5a, flags: 0x0}, 1212: {region: 0x46, script: 0x4, flags: 0x1}, 1213: {region: 0x165, script: 0x5a, flags: 0x0}, - 1214: {region: 0xb4, script: 0xeb, flags: 0x0}, + 1214: {region: 0xb4, script: 0xf1, flags: 0x0}, 1215: {region: 0x165, script: 0x5a, flags: 0x0}, 1216: {region: 0x161, script: 0x5a, flags: 0x0}, 1217: {region: 0x9e, script: 0x5a, flags: 0x0}, @@ -2773,7 +2781,7 @@ var likelyLang = [1330]likelyScriptRegion{ 1234: {region: 0x165, script: 0x5a, flags: 0x0}, 1235: {region: 0xe7, script: 0x5a, flags: 0x0}, 1236: {region: 0x2f, script: 0x5a, flags: 0x0}, - 1237: {region: 0x99, script: 0xe1, flags: 0x0}, + 1237: {region: 0x99, script: 0xe6, flags: 0x0}, 1238: {region: 0x99, script: 0x22, flags: 0x0}, 1239: {region: 0x165, script: 0x5a, flags: 0x0}, 1240: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2813,9 +2821,9 @@ var likelyLang = [1330]likelyScriptRegion{ 1274: {region: 0x99, script: 0x22, flags: 0x0}, 1275: {region: 0x131, script: 0x5a, flags: 0x0}, 1276: {region: 0x165, script: 0x5a, flags: 0x0}, - 1277: {region: 0x9c, script: 0x8e, flags: 0x0}, + 1277: {region: 0x9c, script: 0x91, flags: 0x0}, 1278: {region: 0x165, script: 0x5a, flags: 0x0}, - 1279: {region: 0x15e, script: 0xc7, flags: 0x0}, + 1279: {region: 0x15e, script: 0xcc, flags: 0x0}, 1280: {region: 0x165, script: 0x5a, flags: 0x0}, 1281: {region: 0x165, script: 0x5a, flags: 0x0}, 1282: {region: 0xdb, script: 0x22, flags: 0x0}, @@ -2855,7 +2863,7 @@ var likelyLang = [1330]likelyScriptRegion{ 1316: {region: 0x10b, script: 0x5a, flags: 0x0}, 1318: {region: 0xa8, script: 0x5, flags: 0x0}, 1319: {region: 0xd9, script: 0x5a, flags: 0x0}, - 1320: {region: 0xba, script: 0xe3, flags: 0x0}, + 1320: {region: 0xba, script: 0xe8, flags: 0x0}, 1321: {region: 0x4d, script: 0x14, flags: 0x1}, 1322: {region: 0x53, script: 0x7d, flags: 0x0}, 1323: {region: 0x165, script: 0x5a, flags: 0x0}, @@ -2867,11 +2875,11 @@ var likelyLang = [1330]likelyScriptRegion{ } // likelyLangList holds lists info associated with likelyLang. -// Size: 388 bytes, 97 elements +// Size: 582 bytes, 97 elements var likelyLangList = [97]likelyScriptRegion{ 0: {region: 0x9c, script: 0x7, flags: 0x0}, 1: {region: 0xa1, script: 0x78, flags: 0x2}, - 2: {region: 0x11c, script: 0x84, flags: 0x2}, + 2: {region: 0x11c, script: 0x85, flags: 0x2}, 3: {region: 0x32, script: 0x5a, flags: 0x0}, 4: {region: 0x9b, script: 0x5, flags: 0x4}, 5: {region: 0x9c, script: 0x5, flags: 0x4}, @@ -2880,7 +2888,7 @@ var likelyLangList = [97]likelyScriptRegion{ 8: {region: 0x106, script: 0x20, flags: 0x0}, 9: {region: 0x38, script: 0x2f, flags: 0x2}, 10: {region: 0x135, script: 0x5a, flags: 0x0}, - 11: {region: 0x7b, script: 0xca, flags: 0x2}, + 11: {region: 0x7b, script: 0xcf, flags: 0x2}, 12: {region: 0x114, script: 0x5a, flags: 0x0}, 13: {region: 0x84, script: 0x1, flags: 0x2}, 14: {region: 0x5d, script: 0x1f, flags: 0x0}, @@ -2916,14 +2924,14 @@ var likelyLangList = [97]likelyScriptRegion{ 44: {region: 0x99, script: 0x36, flags: 0x0}, 45: {region: 0xe8, script: 0x5, flags: 0x4}, 46: {region: 0xe8, script: 0x5, flags: 0x2}, - 47: {region: 0x9c, script: 0x88, flags: 0x0}, - 48: {region: 0x53, script: 0x89, flags: 0x2}, - 49: {region: 0xba, script: 0xe3, flags: 0x0}, + 47: {region: 0x9c, script: 0x8b, flags: 0x0}, + 48: {region: 0x53, script: 0x8c, flags: 0x2}, + 49: {region: 0xba, script: 0xe8, flags: 0x0}, 50: {region: 0xd9, script: 0x5a, flags: 0x4}, 51: {region: 0xe8, script: 0x5, flags: 0x0}, 52: {region: 0x99, script: 0x22, flags: 0x2}, 53: {region: 0x99, script: 0x4f, flags: 0x2}, - 54: {region: 0x99, script: 0xce, flags: 0x2}, + 54: {region: 0x99, script: 0xd3, flags: 0x2}, 55: {region: 0x105, script: 0x20, flags: 0x0}, 56: {region: 0xbd, script: 0x5a, flags: 0x4}, 57: {region: 0x104, script: 0x5a, flags: 0x4}, @@ -2970,7 +2978,7 @@ var likelyLangList = [97]likelyScriptRegion{ type likelyLangScript struct { lang uint16 - script uint8 + script uint16 flags uint8 } @@ -2979,7 +2987,7 @@ type likelyLangScript struct { // for a given regionID, lang and script are the index and size respectively // of the list in likelyRegionList. // TODO: exclude containers and user-definable regions from the list. -// Size: 1432 bytes, 358 elements +// Size: 2148 bytes, 358 elements var likelyRegion = [358]likelyLangScript{ 34: {lang: 0xd7, script: 0x5a, flags: 0x0}, 35: {lang: 0x3a, script: 0x5, flags: 0x0}, @@ -3086,7 +3094,7 @@ var likelyRegion = [358]likelyLangScript{ 175: {lang: 0x27, script: 0x2, flags: 0x1}, 176: {lang: 0x3a, script: 0x5, flags: 0x0}, 178: {lang: 0x10d, script: 0x5a, flags: 0x0}, - 179: {lang: 0x40c, script: 0xcf, flags: 0x0}, + 179: {lang: 0x40c, script: 0xd4, flags: 0x0}, 181: {lang: 0x43b, script: 0x5a, flags: 0x0}, 182: {lang: 0x2c0, script: 0x5a, flags: 0x0}, 183: {lang: 0x15e, script: 0x5a, flags: 0x0}, @@ -3107,7 +3115,7 @@ var likelyRegion = [358]likelyLangScript{ 201: {lang: 0x35, script: 0x2, flags: 0x1}, 203: {lang: 0x320, script: 0x5a, flags: 0x0}, 204: {lang: 0x37, script: 0x3, flags: 0x1}, - 205: {lang: 0x128, script: 0xe5, flags: 0x0}, + 205: {lang: 0x128, script: 0xea, flags: 0x0}, 207: {lang: 0x13e, script: 0x5a, flags: 0x0}, 208: {lang: 0x31f, script: 0x5a, flags: 0x0}, 209: {lang: 0x3c0, script: 0x5a, flags: 0x0}, @@ -3185,7 +3193,7 @@ var likelyRegion = [358]likelyLangScript{ } // likelyRegionList holds lists info associated with likelyRegion. -// Size: 372 bytes, 93 elements +// Size: 558 bytes, 93 elements var likelyRegionList = [93]likelyLangScript{ 0: {lang: 0x148, script: 0x5, flags: 0x0}, 1: {lang: 0x476, script: 0x5a, flags: 0x0}, @@ -3195,12 +3203,12 @@ var likelyRegionList = [93]likelyLangScript{ 5: {lang: 0x274, script: 0x5a, flags: 0x0}, 6: {lang: 0xb7, script: 0x5a, flags: 0x0}, 7: {lang: 0x432, script: 0x20, flags: 0x0}, - 8: {lang: 0x12d, script: 0xe7, flags: 0x0}, + 8: {lang: 0x12d, script: 0xec, flags: 0x0}, 9: {lang: 0x351, script: 0x22, flags: 0x0}, 10: {lang: 0x529, script: 0x3b, flags: 0x0}, 11: {lang: 0x4ac, script: 0x5, flags: 0x0}, 12: {lang: 0x523, script: 0x5a, flags: 0x0}, - 13: {lang: 0x29a, script: 0xe6, flags: 0x0}, + 13: {lang: 0x29a, script: 0xeb, flags: 0x0}, 14: {lang: 0x136, script: 0x34, flags: 0x0}, 15: {lang: 0x48a, script: 0x5a, flags: 0x0}, 16: {lang: 0x3a, script: 0x5, flags: 0x0}, @@ -3223,11 +3231,11 @@ var likelyRegionList = [93]likelyLangScript{ 33: {lang: 0x476, script: 0x5a, flags: 0x0}, 34: {lang: 0x24a, script: 0x4e, flags: 0x0}, 35: {lang: 0xe6, script: 0x5, flags: 0x0}, - 36: {lang: 0x226, script: 0xe6, flags: 0x0}, + 36: {lang: 0x226, script: 0xeb, flags: 0x0}, 37: {lang: 0x3a, script: 0x5, flags: 0x0}, 38: {lang: 0x15e, script: 0x5a, flags: 0x0}, 39: {lang: 0x2b8, script: 0x57, flags: 0x0}, - 40: {lang: 0x226, script: 0xe6, flags: 0x0}, + 40: {lang: 0x226, script: 0xeb, flags: 0x0}, 41: {lang: 0x3a, script: 0x5, flags: 0x0}, 42: {lang: 0x15e, script: 0x5a, flags: 0x0}, 43: {lang: 0x3dc, script: 0x5a, flags: 0x0}, @@ -3260,7 +3268,7 @@ var likelyRegionList = [93]likelyLangScript{ 70: {lang: 0x15e, script: 0x5a, flags: 0x0}, 71: {lang: 0x15e, script: 0x5a, flags: 0x0}, 72: {lang: 0x35, script: 0x5, flags: 0x0}, - 73: {lang: 0x46b, script: 0xe6, flags: 0x0}, + 73: {lang: 0x46b, script: 0xeb, flags: 0x0}, 74: {lang: 0x2ec, script: 0x5, flags: 0x0}, 75: {lang: 0x30f, script: 0x75, flags: 0x0}, 76: {lang: 0x467, script: 0x20, flags: 0x0}, @@ -3285,7 +3293,7 @@ var likelyRegionList = [93]likelyLangScript{ type likelyTag struct { lang uint16 region uint16 - script uint8 + script uint16 } // Size: 198 bytes, 33 elements @@ -3446,8 +3454,8 @@ var regionInclusionNext = [73]uint8{ type parentRel struct { lang uint16 - script uint8 - maxScript uint8 + script uint16 + maxScript uint16 toRegion uint16 fromRegion []uint16 } @@ -3461,4 +3469,4 @@ var parents = [5]parentRel{ 4: {lang: 0x529, script: 0x3c, maxScript: 0x3c, toRegion: 0x8d, fromRegion: []uint16{0xc6}}, } -// Total table size 26398 bytes (25KiB); checksum: 1C859EA7 +// Total table size 30244 bytes (29KiB); checksum: B6B15F30 diff --git a/internal/number/decimal.go b/internal/number/decimal.go index cb656db6c..37e0c4b98 100644 --- a/internal/number/decimal.go +++ b/internal/number/decimal.go @@ -33,13 +33,14 @@ const maxIntDigits = 20 // may point outside a valid position in Digits. // // Examples: -// Number Decimal -// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 -// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 -// 12000 Digits: [1, 2], Exp: 5 -// 12000.00 Digits: [1, 2], Exp: 5 -// 0.00123 Digits: [1, 2, 3], Exp: -2 -// 0 Digits: [], Exp: 0 +// +// Number Decimal +// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 +// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 +// 12000 Digits: [1, 2], Exp: 5 +// 12000.00 Digits: [1, 2], Exp: 5 +// 0.00123 Digits: [1, 2, 3], Exp: -2 +// 0 Digits: [], Exp: 0 type Decimal struct { digits @@ -60,22 +61,23 @@ type digits struct { // engineering notation. Digits must have at least one digit. // // Examples: -// Number Decimal -// decimal -// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5 -// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5 -// 12000 Digits: [1, 2], Exp: 5 End: 5 -// 12000.00 Digits: [1, 2], Exp: 5 End: 7 -// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3 -// 0 Digits: [], Exp: 0 End: 1 -// scientific (actual exp is Exp - Comma) -// 0e0 Digits: [0], Exp: 1, End: 1, Comma: 1 -// .0e0 Digits: [0], Exp: 0, End: 1, Comma: 0 -// 0.0e0 Digits: [0], Exp: 1, End: 2, Comma: 1 -// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1 -// .123e5 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 0 -// engineering -// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2 +// +// Number Decimal +// decimal +// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5 +// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5 +// 12000 Digits: [1, 2], Exp: 5 End: 5 +// 12000.00 Digits: [1, 2], Exp: 5 End: 7 +// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3 +// 0 Digits: [], Exp: 0 End: 1 +// scientific (actual exp is Exp - Comma) +// 0e0 Digits: [0], Exp: 1, End: 1, Comma: 1 +// .0e0 Digits: [0], Exp: 0, End: 1, Comma: 0 +// 0.0e0 Digits: [0], Exp: 1, End: 2, Comma: 1 +// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1 +// .123e5 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 0 +// engineering +// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2 type Digits struct { digits // End indicates the end position of the number. diff --git a/internal/testtext/codesize.go b/internal/testtext/codesize.go index 5fc5eaec7..cdaed81d5 100644 --- a/internal/testtext/codesize.go +++ b/internal/testtext/codesize.go @@ -7,7 +7,6 @@ package testtext import ( "bytes" "fmt" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -16,21 +15,23 @@ import ( // CodeSize builds the given code sample and returns the binary size or en error // if an error occurred. The code sample typically will look like this: -// package main -// import "golang.org/x/text/somepackage" -// func main() { -// somepackage.Func() // reference Func to cause it to be linked in. -// } +// +// package main +// import "golang.org/x/text/somepackage" +// func main() { +// somepackage.Func() // reference Func to cause it to be linked in. +// } +// // See dict_test.go in the display package for an example. func CodeSize(s string) (int, error) { // Write the file. - tmpdir, err := ioutil.TempDir(os.TempDir(), "testtext") + tmpdir, err := os.MkdirTemp(os.TempDir(), "testtext") if err != nil { return 0, fmt.Errorf("testtext: failed to create tmpdir: %v", err) } defer os.RemoveAll(tmpdir) filename := filepath.Join(tmpdir, "main.go") - if err := ioutil.WriteFile(filename, []byte(s), 0644); err != nil { + if err := os.WriteFile(filename, []byte(s), 0644); err != nil { return 0, fmt.Errorf("testtext: failed to write main.go: %v", err) } diff --git a/internal/triegen/example_compact_test.go b/internal/triegen/example_compact_test.go index 7cf604ca4..95486d1ff 100644 --- a/internal/triegen/example_compact_test.go +++ b/internal/triegen/example_compact_test.go @@ -7,7 +7,6 @@ package triegen_test import ( "fmt" "io" - "io/ioutil" "golang.org/x/text/internal/triegen" ) @@ -17,12 +16,12 @@ func ExampleCompacter() { for r := rune(0); r < 10000; r += 64 { t.Insert(r, 0x9015BADA55^uint64(r)) } - sz, _ := t.Gen(ioutil.Discard) + sz, _ := t.Gen(io.Discard) fmt.Printf("Size normal: %5d\n", sz) var c myCompacter - sz, _ = t.Gen(ioutil.Discard, triegen.Compact(&c)) + sz, _ = t.Gen(io.Discard, triegen.Compact(&c)) fmt.Printf("Size compacted: %5d\n", sz) diff --git a/internal/triegen/example_test.go b/internal/triegen/example_test.go index 557a152e7..5823a2c34 100644 --- a/internal/triegen/example_test.go +++ b/internal/triegen/example_test.go @@ -6,7 +6,7 @@ package triegen_test import ( "fmt" - "io/ioutil" + "io" "math/rand" "unicode" @@ -15,7 +15,7 @@ import ( const seed = 0x12345 -var genWriter = ioutil.Discard +var genWriter = io.Discard func randomRunes() map[rune]uint8 { rnd := rand.New(rand.NewSource(seed)) diff --git a/internal/triegen/triegen.go b/internal/triegen/triegen.go index 51d218a30..de54a8075 100644 --- a/internal/triegen/triegen.go +++ b/internal/triegen/triegen.go @@ -34,23 +34,24 @@ // triegen generates both tables and code. The code is optimized to use the // automatically chosen data types. The following code is generated for a Trie // or multiple Tries named "foo": -// - type fooTrie -// The trie type. // -// - func newFooTrie(x int) *fooTrie -// Trie constructor, where x is the index of the trie passed to Gen. +// - type fooTrie +// The trie type. // -// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) -// The lookup method, where uintX is automatically chosen. +// - func newFooTrie(x int) *fooTrie +// Trie constructor, where x is the index of the trie passed to Gen. // -// - func lookupString, lookupUnsafe and lookupStringUnsafe -// Variants of the above. +// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) +// The lookup method, where uintX is automatically chosen. // -// - var fooValues and fooIndex and any tables generated by Compacters. -// The core trie data. +// - func lookupString, lookupUnsafe and lookupStringUnsafe +// Variants of the above. // -// - var fooTrieHandles -// Indexes of starter blocks in case of multiple trie roots. +// - var fooValues and fooIndex and any tables generated by Compacters. +// The core trie data. +// +// - var fooTrieHandles +// Indexes of starter blocks in case of multiple trie roots. // // It is recommended that users test the generated trie by checking the returned // value for every rune. Such exhaustive tests are possible as the number of diff --git a/internal/utf8internal/utf8internal.go b/internal/utf8internal/utf8internal.go index 575cea870..e5c53b1b3 100644 --- a/internal/utf8internal/utf8internal.go +++ b/internal/utf8internal/utf8internal.go @@ -74,7 +74,7 @@ type AcceptRange struct { // AcceptRanges is a slice of AcceptRange values. For a given byte sequence b // -// AcceptRanges[First[b[0]]>>AcceptShift] +// AcceptRanges[First[b[0]]>>AcceptShift] // // will give the value of AcceptRange for the multi-byte UTF-8 sequence starting // at b[0]. diff --git a/language/display/display.go b/language/display/display.go index d043da907..31bdd6c6b 100644 --- a/language/display/display.go +++ b/language/display/display.go @@ -85,7 +85,7 @@ func Script(script interface{}) Formatter { return Formatter{scriptFunc, script} } -// Script returns a Formatter that renders the name for tag in the current +// Tag returns a Formatter that renders the name for tag in the current // language. tag may be a language.Tag. // It renders tag in the default language if no translation for the current // language is supported. diff --git a/language/display/lookup.go b/language/display/lookup.go index e6dc0e016..88307753d 100644 --- a/language/display/lookup.go +++ b/language/display/lookup.go @@ -92,10 +92,10 @@ func nameTag(langN, scrN, regN namer, x interface{}) string { // offsets for a string in data. For example, consider a header that defines // strings for the languages de, el, en, fi, and nl: // -// header{ -// data: "GermanGreekEnglishDutch", -// index: []uint16{ 0, 6, 11, 18, 18, 23 }, -// } +// header{ +// data: "GermanGreekEnglishDutch", +// index: []uint16{0, 6, 11, 18, 18, 23}, +// } // // For a language with index i, the string is defined by // data[index[i]:index[i+1]]. So the number of elements in index is always one @@ -204,9 +204,11 @@ func supportedRegions() []language.Region { // for each length, which can be used in combination with binary search to get // the index associated with a tag. // For example, a tagIndex{ -// "arenesfrruzh", // 6 2-byte tags. -// "barwae", // 2 3-byte tags. -// "", +// +// "arenesfrruzh", // 6 2-byte tags. +// "barwae", // 2 3-byte tags. +// "", +// // } // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag // "wae" had an index of 7. diff --git a/language/doc.go b/language/doc.go index 8afecd50e..212b77c90 100644 --- a/language/doc.go +++ b/language/doc.go @@ -10,18 +10,17 @@ // and provides the user with the best experience // (see https://blog.golang.org/matchlang). // -// -// Matching preferred against supported languages +// # Matching preferred against supported languages // // A Matcher for an application that supports English, Australian English, // Danish, and standard Mandarin can be created as follows: // -// var matcher = language.NewMatcher([]language.Tag{ -// language.English, // The first language is used as fallback. -// language.MustParse("en-AU"), -// language.Danish, -// language.Chinese, -// }) +// var matcher = language.NewMatcher([]language.Tag{ +// language.English, // The first language is used as fallback. +// language.MustParse("en-AU"), +// language.Danish, +// language.Chinese, +// }) // // This list of supported languages is typically implied by the languages for // which there exists translations of the user interface. @@ -30,14 +29,14 @@ // language tags. // The MatchString finds best matches for such strings: // -// handler(w http.ResponseWriter, r *http.Request) { -// lang, _ := r.Cookie("lang") -// accept := r.Header.Get("Accept-Language") -// tag, _ := language.MatchStrings(matcher, lang.String(), accept) +// handler(w http.ResponseWriter, r *http.Request) { +// lang, _ := r.Cookie("lang") +// accept := r.Header.Get("Accept-Language") +// tag, _ := language.MatchStrings(matcher, lang.String(), accept) // -// // tag should now be used for the initialization of any -// // locale-specific service. -// } +// // tag should now be used for the initialization of any +// // locale-specific service. +// } // // The Matcher's Match method can be used to match Tags directly. // @@ -48,8 +47,7 @@ // For instance, it will know that a reader of Bokmål Danish can read Norwegian // and will know that Cantonese ("yue") is a good match for "zh-HK". // -// -// Using match results +// # Using match results // // To guarantee a consistent user experience to the user it is important to // use the same language tag for the selection of any locale-specific services. @@ -58,9 +56,9 @@ // More subtly confusing is using the wrong sorting order or casing // algorithm for a certain language. // -// All the packages in x/text that provide locale-specific services -// (e.g. collate, cases) should be initialized with the tag that was -// obtained at the start of an interaction with the user. +// All the packages in x/text that provide locale-specific services +// (e.g. collate, cases) should be initialized with the tag that was +// obtained at the start of an interaction with the user. // // Note that Tag that is returned by Match and MatchString may differ from any // of the supported languages, as it may contain carried over settings from @@ -70,8 +68,7 @@ // Match and MatchString both return the index of the matched supported tag // to simplify associating such data with the matched tag. // -// -// Canonicalization +// # Canonicalization // // If one uses the Matcher to compare languages one does not need to // worry about canonicalization. @@ -92,10 +89,9 @@ // equivalence relations. The CanonType type can be used to alter the // canonicalization form. // -// References +// # References // // BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47 -// package language // import "golang.org/x/text/language" // TODO: explanation on how to match languages for your own locale-specific diff --git a/language/examples_test.go b/language/examples_test.go index ad089a3f4..61401f252 100644 --- a/language/examples_test.go +++ b/language/examples_test.go @@ -247,24 +247,28 @@ func ExampleMatcher() { // This is the set of tags from which we want to pick the best match. These // can be, for example, the supported languages for some package. tags := []language.Tag{ - language.English, - language.BritishEnglish, - language.French, - language.Afrikaans, - language.BrazilianPortuguese, - language.EuropeanPortuguese, - language.Croatian, - language.SimplifiedChinese, - language.Raw.Make("iw-IL"), - language.Raw.Make("iw"), - language.Raw.Make("he"), + language.English, // en + language.BritishEnglish, // en-GB + language.French, // fr + language.Afrikaans, // af + language.BrazilianPortuguese, // pt-BR + language.EuropeanPortuguese, // pt-PT + language.SimplifiedChinese, // zh-Hans + language.Raw.Make("iw-IL"), // Hebrew from Israel + language.Raw.Make("iw"), // Hebrew + language.Raw.Make("he"), // Hebrew } m := language.NewMatcher(tags) // A simple match. fmt.Println(m.Match(language.Make("fr"))) - // Australian English is closer to British than American English. + // Australian English is closer to British English than American English. + // The resulting match is "en-GB-u-rg-auzzzz". The first language listed, + // "en-GB", is the matched language. Next is the region override prefix + // "-u-rg-", the region override "au", and the region override suffix "zzzz". + // The region override is for things like currency, dates, and measurement + // systems. fmt.Println(m.Match(language.Make("en-AU"))) // Default to the first tag passed to the Matcher if there is no match. @@ -275,15 +279,12 @@ func ExampleMatcher() { fmt.Println("----") - // Someone specifying sr-Latn is probably fine with getting Croatian. - fmt.Println(m.Match(language.Make("sr-Latn"))) - // We match SimplifiedChinese, but with Low confidence. fmt.Println(m.Match(language.TraditionalChinese)) - // Serbian in Latin script is a closer match to Croatian than Traditional - // Chinese to Simplified Chinese. - fmt.Println(m.Match(language.TraditionalChinese, language.Make("sr-Latn"))) + // British English is closer to Australian English than Traditional Chinese + // to Simplified Chinese. + fmt.Println(m.Match(language.TraditionalChinese, language.Make("en-AU"))) fmt.Println("----") @@ -297,7 +298,7 @@ func ExampleMatcher() { fmt.Println("----") - // If a Matcher is initialized with a language and it's deprecated version, + // If a Matcher is initialized with a language and its deprecated version, // it will distinguish between them. fmt.Println(m.Match(language.Raw.Make("iw"))) @@ -319,26 +320,23 @@ func ExampleMatcher() { // Output: // fr 2 Exact - // en-GB 1 High + // en-GB-u-rg-auzzzz 1 High // en 0 No // en 0 No // ---- - // hr 6 High - // zh-Hans 7 Low - // hr 6 High + // zh-Hans 6 Low + // en-GB-u-rg-auzzzz 1 High // ---- - // pt-BR 4 High - // fr 2 High - // af 3 High + // pt-BR 4 Exact + // fr-u-rg-bezzzz 2 High + // af-u-rg-nazzzz 3 High // ---- - // iw 9 Exact - // he 10 Exact + // iw-IL 7 Exact + // he-u-rg-ilzzzz 9 Exact // ---- // fr-u-cu-frf 2 Exact // fr-u-cu-frf 2 High // en-u-co-phonebk 0 No - - // TODO: "he" should be "he-u-rg-IL High" } func ExampleMatchStrings() { diff --git a/language/language_test.go b/language/language_test.go index b2e3ce3c5..d45706c98 100644 --- a/language/language_test.go +++ b/language/language_test.go @@ -723,6 +723,8 @@ var ( "en-t-t0-abcd", "en-t-nl-latn", "en-t-t0-abcd-x-a", + "en_t_pt_MLt", + "en-t-fr-est", } // Change, but not memory allocation required. benchSimpleChange = []string{ diff --git a/language/match.go b/language/match.go index f73492134..ee45f4947 100644 --- a/language/match.go +++ b/language/match.go @@ -545,7 +545,7 @@ type bestMatch struct { // match as the preferred match. // // If pin is true and have and tag are a strong match, it will henceforth only -// consider matches for this language. This corresponds to the nothing that most +// consider matches for this language. This corresponds to the idea that most // users have a strong preference for the first defined language. A user can // still prefer a second language over a dialect of the preferred language by // explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should diff --git a/language/parse.go b/language/parse.go index 59b041008..b982d9e42 100644 --- a/language/parse.go +++ b/language/parse.go @@ -147,6 +147,7 @@ func update(b *language.Builder, part ...interface{}) (err error) { } var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") +var errTagListTooLarge = errors.New("tag list exceeds max length") // ParseAcceptLanguage parses the contents of an Accept-Language header as // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and @@ -164,6 +165,10 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { } }() + if strings.Count(s, "-") > 1000 { + return nil, nil, errTagListTooLarge + } + var entry string for s != "" { if entry, s = split(s, ','); entry == "" { diff --git a/language/parse_test.go b/language/parse_test.go index 4b7e64db3..0eee033e6 100644 --- a/language/parse_test.go +++ b/language/parse_test.go @@ -129,6 +129,11 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: true}, + {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, @@ -389,3 +394,16 @@ func TestParseAcceptLanguage(t *testing.T) { } } } + +func TestParseAcceptLanguageTooBig(t *testing.T) { + s := strings.Repeat("en-x-a-", 333) + _, _, err := ParseAcceptLanguage(s) + if err != language.ErrSyntax { + t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, language.ErrSyntax) + } + s += "en-x-a" + _, _, err = ParseAcceptLanguage(s) + if err != errTagListTooLarge { + t.Errorf("ParseAcceptLanguage() unexpected error: got %v, want %v", err, errTagListTooLarge) + } +} diff --git a/language/tables.go b/language/tables.go index 96b57f610..34a732b69 100644 --- a/language/tables.go +++ b/language/tables.go @@ -39,12 +39,12 @@ const ( _Hani = 57 _Hans = 59 _Hant = 60 - _Qaaa = 143 - _Qaai = 151 - _Qabx = 192 - _Zinh = 245 - _Zyyy = 250 - _Zzzz = 251 + _Qaaa = 147 + _Qaai = 155 + _Qabx = 196 + _Zinh = 252 + _Zyyy = 257 + _Zzzz = 258 ) var regionToGroups = []uint8{ // 358 elements @@ -265,9 +265,9 @@ var matchScript = []scriptIntelligibility{ // 26 elements 13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa}, 14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, - 16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xcf, haveScript: 0x5a, distance: 0xa}, - 17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xde, haveScript: 0x5a, distance: 0xa}, - 18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe1, haveScript: 0x5a, distance: 0xa}, + 16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xd4, haveScript: 0x5a, distance: 0xa}, + 17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xe3, haveScript: 0x5a, distance: 0xa}, + 18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe6, haveScript: 0x5a, distance: 0xa}, 19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa}, 20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa}, 21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, diff --git a/message/catalog/catalog.go b/message/catalog/catalog.go index de595b510..96955d075 100644 --- a/message/catalog/catalog.go +++ b/message/catalog/catalog.go @@ -13,8 +13,7 @@ // language. The Loader interface defines a source of dictionaries. A // translation of a format string is represented by a Message. // -// -// Catalogs +// # Catalogs // // A Catalog defines a programmatic interface for setting message translations. // It maintains a set of per-language dictionaries with translations for a set @@ -24,8 +23,7 @@ // the key. For example, a Dictionary for "en-GB" could leave out entries that // are identical to those in a dictionary for "en". // -// -// Messages +// # Messages // // A Message is a format string which varies on the value of substitution // variables. For instance, to indicate the number of results one could want "no @@ -39,8 +37,7 @@ // to selected string. This separation of concerns allows Catalog to be used to // store any kind of formatting strings. // -// -// Selecting messages based on linguistic features of substitution arguments +// # Selecting messages based on linguistic features of substitution arguments // // Messages may vary based on any linguistic features of the argument values. // The most common one is plural form, but others exist. @@ -48,10 +45,10 @@ // Selection messages are provided in packages that provide support for a // specific linguistic feature. The following snippet uses plural.Selectf: // -// catalog.Set(language.English, "You are %d minute(s) late.", -// plural.Selectf(1, "", -// plural.One, "You are 1 minute late.", -// plural.Other, "You are %d minutes late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// plural.Selectf(1, "", +// plural.One, "You are 1 minute late.", +// plural.Other, "You are %d minutes late.")) // // In this example, a message is stored in the Catalog where one of two messages // is selected based on the first argument, a number. The first message is @@ -64,47 +61,46 @@ // Selects can be nested. This allows selecting sentences based on features of // multiple arguments or multiple linguistic properties of a single argument. // -// -// String interpolation +// # String interpolation // // There is often a lot of commonality between the possible variants of a // message. For instance, in the example above the word "minute" varies based on // the plural catogory of the argument, but the rest of the sentence is // identical. Using interpolation the above message can be rewritten as: // -// catalog.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", -// plural.Selectf(1, "", plural.One, "minute", plural.Other, "minutes")), -// catalog.String("You are %[1]d ${minutes} late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", +// plural.Selectf(1, "", plural.One, "minute", plural.Other, "minutes")), +// catalog.String("You are %[1]d ${minutes} late.")) // // Var is defined to return the variable name if the message does not yield a // match. This allows us to further simplify this snippet to // -// catalog.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", plural.Selectf(1, "", plural.One, "minute")), -// catalog.String("You are %d ${minutes} late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", plural.Selectf(1, "", plural.One, "minute")), +// catalog.String("You are %d ${minutes} late.")) // // Overall this is still only a minor improvement, but things can get a lot more // unwieldy if more than one linguistic feature is used to determine a message // variant. Consider the following example: // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", -// catalog.Var("their", -// plural.Selectf(1, "" -// plural.One, gender.Select(1, "female", "her", "other", "his"))), -// catalog.Var("invites", plural.Selectf(1, "", plural.One, "invite")) -// catalog.String("%[1]v ${invites} %[2]v to ${their} party.")), +// // argument 1: list of hosts, argument 2: list of guests +// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", +// catalog.Var("their", +// plural.Selectf(1, "" +// plural.One, gender.Select(1, "female", "her", "other", "his"))), +// catalog.Var("invites", plural.Selectf(1, "", plural.One, "invite")) +// catalog.String("%[1]v ${invites} %[2]v to ${their} party.")), // // Without variable substitution, this would have to be written as // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", -// plural.Selectf(1, "", -// plural.One, gender.Select(1, -// "female", "%[1]v invites %[2]v to her party." -// "other", "%[1]v invites %[2]v to his party."), -// plural.Other, "%[1]v invites %[2]v to their party.") +// // argument 1: list of hosts, argument 2: list of guests +// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", +// plural.Selectf(1, "", +// plural.One, gender.Select(1, +// "female", "%[1]v invites %[2]v to her party." +// "other", "%[1]v invites %[2]v to his party."), +// plural.Other, "%[1]v invites %[2]v to their party.")) // // Not necessarily shorter, but using variables there is less duplication and // the messages are more maintenance friendly. Moreover, languages may have up @@ -113,33 +109,32 @@ // Different messages using the same inflections can reuse variables by moving // them to macros. Using macros we can rewrite the message as: // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.SetString(language.English, "%[1]v invite(s) %[2]v to their party.", -// "%[1]v ${invites(1)} %[2]v to ${their(1)} party.") +// // argument 1: list of hosts, argument 2: list of guests +// catalog.SetString(language.English, "%[1]v invite(s) %[2]v to their party.", +// "%[1]v ${invites(1)} %[2]v to ${their(1)} party.") // // Where the following macros were defined separately. // -// catalog.SetMacro(language.English, "invites", plural.Selectf(1, "", -// plural.One, "invite")) -// catalog.SetMacro(language.English, "their", plural.Selectf(1, "", -// plural.One, gender.Select(1, "female", "her", "other", "his"))), +// catalog.SetMacro(language.English, "invites", plural.Selectf(1, "", +// plural.One, "invite")) +// catalog.SetMacro(language.English, "their", plural.Selectf(1, "", +// plural.One, gender.Select(1, "female", "her", "other", "his"))), // // Placeholders use parentheses and the arguments to invoke a macro. // -// -// Looking up messages +// # Looking up messages // // Message lookup using Catalogs is typically only done by specialized packages // and is not something the user should be concerned with. For instance, to // express the tardiness of a user using the related message we defined earlier, // the user may use the package message like so: // -// p := message.NewPrinter(language.English) -// p.Printf("You are %d minute(s) late.", 5) +// p := message.NewPrinter(language.English) +// p.Printf("You are %d minute(s) late.", 5) // // Which would print: -// You are 5 minutes late. // +// You are 5 minutes late. // // This package is UNDER CONSTRUCTION and its API may change. package catalog // import "golang.org/x/text/message/catalog" diff --git a/message/doc.go b/message/doc.go index 72e8fde71..4bf7bdcac 100644 --- a/message/doc.go +++ b/message/doc.go @@ -5,22 +5,21 @@ // Package message implements formatted I/O for localized strings with functions // analogous to the fmt's print functions. It is a drop-in replacement for fmt. // -// -// Localized Formatting +// # Localized Formatting // // A format string can be localized by replacing any of the print functions of // fmt with an equivalent call to a Printer. // -// p := message.NewPrinter(message.MatchLanguage("en")) -// p.Println(123456.78) // Prints 123,456.78 +// p := message.NewPrinter(message.MatchLanguage("en")) +// p.Println(123456.78) // Prints 123,456.78 // -// p.Printf("%d ducks in a row", 4331) // Prints 4,331 ducks in a row +// p.Printf("%d ducks in a row", 4331) // Prints 4,331 ducks in a row // -// p := message.NewPrinter(message.MatchLanguage("nl")) -// p.Printf("Hoogte: %.1f meter", 1244.9) // Prints Hoogte: 1,244.9 meter +// p := message.NewPrinter(message.MatchLanguage("nl")) +// p.Printf("Hoogte: %.1f meter", 1244.9) // Prints Hoogte: 1,244.9 meter // -// p := message.NewPrinter(message.MatchLanguage("bn")) -// p.Println(123456.78) // Prints ১,২৩,৪৫৬.৭৮ +// p := message.NewPrinter(message.MatchLanguage("bn")) +// p.Println(123456.78) // Prints ১,২৩,৪৫৬.৭৮ // // Printer currently supports numbers and specialized types for which packages // exist in x/text. Other builtin types such as time.Time and slices are @@ -35,8 +34,7 @@ // // See package fmt for more options. // -// -// Translation +// # Translation // // The format strings that are passed to Printf, Sprintf, Fprintf, or Errorf // are used as keys to look up translations for the specified languages. @@ -44,34 +42,36 @@ // // One can use arbitrary keys to distinguish between otherwise ambiguous // strings: -// p := message.NewPrinter(language.English) -// p.Printf("archive(noun)") // Prints "archive" -// p.Printf("archive(verb)") // Prints "archive" // -// p := message.NewPrinter(language.German) -// p.Printf("archive(noun)") // Prints "Archiv" -// p.Printf("archive(verb)") // Prints "archivieren" +// p := message.NewPrinter(language.English) +// p.Printf("archive(noun)") // Prints "archive" +// p.Printf("archive(verb)") // Prints "archive" +// +// p := message.NewPrinter(language.German) +// p.Printf("archive(noun)") // Prints "Archiv" +// p.Printf("archive(verb)") // Prints "archivieren" // // To retain the fallback functionality, use Key: -// p.Printf(message.Key("archive(noun)", "archive")) -// p.Printf(message.Key("archive(verb)", "archive")) // +// p.Printf(message.Key("archive(noun)", "archive")) +// p.Printf(message.Key("archive(verb)", "archive")) // -// Translation Pipeline +// # Translation Pipeline // // Format strings that contain text need to be translated to support different // locales. The first step is to extract strings that need to be translated. // // 1. Install gotext -// go get -u golang.org/x/text/cmd/gotext -// gotext -help +// +// go get -u golang.org/x/text/cmd/gotext +// gotext -help // // 2. Mark strings in your source to be translated by using message.Printer, // instead of the functions of the fmt package. // // 3. Extract the strings from your source // -// gotext extract +// gotext extract // // The output will be written to the textdata directory. // @@ -89,13 +89,11 @@ // see also package golang.org/x/text/message/catalog can be used to implement // either dynamic or static loading of messages. // -// -// Plural and Gender Forms +// # Plural and Gender Forms // // Translated messages can vary based on the plural and gender forms of // substitution values. In general, it is up to the translators to provide // alternative translations for such forms. See the packages in // golang.org/x/text/feature and golang.org/x/text/message/catalog for more // information. -// package message diff --git a/message/pipeline/pipeline_test.go b/message/pipeline/pipeline_test.go index fe3b5daff..f4cd42a69 100644 --- a/message/pipeline/pipeline_test.go +++ b/message/pipeline/pipeline_test.go @@ -11,7 +11,6 @@ import ( "flag" "fmt" "go/build" - "io/ioutil" "os" "os/exec" "path" @@ -33,11 +32,14 @@ func TestFullCycle(t *testing.T) { if runtime.GOOS == "android" { t.Skip("cannot load outside packages on android") } + if b := os.Getenv("GO_BUILDER_NAME"); b == "plan9-arm" { + t.Skipf("skipping: test frequently times out on %s", b) + } if _, err := exec.LookPath("go"); err != nil { t.Skipf("skipping because 'go' command is unavailable: %v", err) } - GOPATH, err := ioutil.TempDir("", "pipeline_test") + GOPATH, err := os.MkdirTemp("", "pipeline_test") if err != nil { t.Fatal(err) } @@ -63,7 +65,7 @@ func TestFullCycle(t *testing.T) { wd, _ := os.Getwd() defer os.Chdir(wd) - dirs, err := ioutil.ReadDir(testdata) + dirs, err := os.ReadDir(testdata) if err != nil { t.Fatal(err) } @@ -121,11 +123,11 @@ func copyTestdata(t *testing.T, dst string) { return os.MkdirAll(filepath.Join(dst, rel), 0755) } - data, err := ioutil.ReadFile(p) + data, err := os.ReadFile(p) if err != nil { return err } - return ioutil.WriteFile(filepath.Join(dst, rel), data, 0644) + return os.WriteFile(filepath.Join(dst, rel), data, 0644) }) if err != nil { t.Fatal(err) @@ -139,16 +141,42 @@ func initTestdataModule(t *testing.T, dst string) { } goMod := fmt.Sprintf(`module testdata -go 1.11 -require golang.org/x/text v0.0.0-00010101000000-000000000000 -replace golang.org/x/text v0.0.0-00010101000000-000000000000 => %s + +replace golang.org/x/text => %s `, xTextDir) - if err := ioutil.WriteFile(filepath.Join(dst, "go.mod"), []byte(goMod), 0644); err != nil { + if err := os.WriteFile(filepath.Join(dst, "go.mod"), []byte(goMod), 0644); err != nil { + t.Fatal(err) + } + + // Copy in the checksums from the parent module so that we won't + // need to re-fetch them from the checksum database. + data, err := os.ReadFile(filepath.Join(xTextDir, "go.sum")) + if err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dst, "go.sum"), data, 0644); err != nil { t.Fatal(err) } - data, err := ioutil.ReadFile(filepath.Join(xTextDir, "go.sum")) - if err := ioutil.WriteFile(filepath.Join(dst, "go.sum"), data, 0644); err != nil { + // We've added a replacement for the parent version of x/text, + // but now we need to populate the correct version. + // (We can't just replace the zero-version because x/text + // may indirectly depend on some nonzero version of itself.) + // + // We use 'go get' instead of 'go mod tidy' to avoid the old-release + // compatibility check when graph pruning is enabled, and to avoid doing + // more work than necessary for test dependencies of imported packages + // (we're not going to run those tests here anyway). + // + // We 'go get' the packages in the testdata module — not specific dependencies + // of those packages — so that they will resolve to whatever version is + // already required in the (replaced) x/text go.mod file. + + getCmd := exec.Command("go", "get", "-d", "./...") + getCmd.Dir = dst + getCmd.Env = append(os.Environ(), "PWD="+dst, "GOPROXY=off", "GOCACHE=off") + if out, err := getCmd.CombinedOutput(); err != nil { + t.Logf("%s", out) t.Fatal(err) } } @@ -165,17 +193,17 @@ func checkOutput(t *testing.T, gen string, testdataDir string) { return nil } - got, err := ioutil.ReadFile(gotFile) + got, err := os.ReadFile(gotFile) if err != nil { t.Errorf("failed to read %q", gotFile) return nil } if *genFiles { - if err := ioutil.WriteFile(wantFile, got, 0644); err != nil { + if err := os.WriteFile(wantFile, got, 0644); err != nil { t.Fatal(err) } } - want, err := ioutil.ReadFile(wantFile) + want, err := os.ReadFile(wantFile) if err != nil { t.Errorf("failed to read %q", wantFile) } else { @@ -213,7 +241,7 @@ func writeJSON(t *testing.T, path string, x interface{}) { if err != nil { t.Fatal(err) } - if err := ioutil.WriteFile(path, data, 0644); err != nil { + if err := os.WriteFile(path, data, 0644); err != nil { t.Fatal(err) } } diff --git a/number/doc.go b/number/doc.go index 925383acc..876623086 100644 --- a/number/doc.go +++ b/number/doc.go @@ -9,19 +9,18 @@ // builtin Go types and anything that implements the Convert interface // (currently internal). // -// p := message.NewPrinter(language.English) +// p := message.NewPrinter(language.English) // -// p.Printf("%v bottles of beer on the wall.", number.Decimal(1234)) -// // Prints: 1,234 bottles of beer on the wall. +// p.Printf("%v bottles of beer on the wall.", number.Decimal(1234)) +// // Prints: 1,234 bottles of beer on the wall. // -// p.Printf("%v of gophers lose too much fur", number.Percent(0.12)) -// // Prints: 12% of gophers lose too much fur. +// p.Printf("%v of gophers lose too much fur", number.Percent(0.12)) +// // Prints: 12% of gophers lose too much fur. // -// p := message.NewPrinter(language.Dutch) -// -// p.Printf("Er zijn %v fietsen per huishouden.", number.Decimal(1.2)) -// // Prints: Er zijn 1,2 fietsen per huishouden. +// p := message.NewPrinter(language.Dutch) // +// p.Printf("There are %v bikes per household.", number.Decimal(1.2)) +// // Prints: Er zijn 1,2 fietsen per huishouden. // // The width and scale specified in the formatting directives override the // configuration of the formatter. diff --git a/secure/precis/profile.go b/secure/precis/profile.go index 35bd6f0a5..bdd991bb9 100644 --- a/secure/precis/profile.go +++ b/secure/precis/profile.go @@ -316,7 +316,7 @@ func (p *Profile) Compare(a, b string) bool { return false } - return bytes.Compare(akey, bkey) == 0 + return bytes.Equal(akey, bkey) } // Allowed returns a runes.Set containing every rune that is a member of the diff --git a/transform/transform_test.go b/transform/transform_test.go index 273abfa52..62fad2bc9 100644 --- a/transform/transform_test.go +++ b/transform/transform_test.go @@ -8,7 +8,7 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" + "io" "strconv" "strings" "testing" @@ -648,7 +648,7 @@ func TestReader(t *testing.T) { // exported API. We override them manually. r.dst = make([]byte, tc.dstSize) r.src = make([]byte, tc.srcSize) - got, err := ioutil.ReadAll(r) + got, err := io.ReadAll(r) str := string(got) if str != tc.wantStr || err != tc.wantErr { t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr) diff --git a/unicode/bidi/core.go b/unicode/bidi/core.go index e4c081101..9d2ae547b 100644 --- a/unicode/bidi/core.go +++ b/unicode/bidi/core.go @@ -193,14 +193,14 @@ func (p *paragraph) run() { // // At the end of this function: // -// - The member variable matchingPDI is set to point to the index of the -// matching PDI character for each isolate initiator character. If there is -// no matching PDI, it is set to the length of the input text. For other -// characters, it is set to -1. -// - The member variable matchingIsolateInitiator is set to point to the -// index of the matching isolate initiator character for each PDI character. -// If there is no matching isolate initiator, or the character is not a PDI, -// it is set to -1. +// - The member variable matchingPDI is set to point to the index of the +// matching PDI character for each isolate initiator character. If there is +// no matching PDI, it is set to the length of the input text. For other +// characters, it is set to -1. +// - The member variable matchingIsolateInitiator is set to point to the +// index of the matching isolate initiator character for each PDI character. +// If there is no matching isolate initiator, or the character is not a PDI, +// it is set to -1. func (p *paragraph) determineMatchingIsolates() { p.matchingPDI = make([]int, p.Len()) p.matchingIsolateInitiator = make([]int, p.Len()) @@ -435,7 +435,7 @@ func maxLevel(a, b level) level { } // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, -// either L or R, for each isolating run sequence. +// either L or R, for each isolating run sequence. func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { length := len(indexes) types := make([]Class, length) @@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() { if t == NSM { s.types[i] = precedingCharacterType } else { - if t.in(LRI, RLI, FSI, PDI) { - precedingCharacterType = ON - } + // if t.in(LRI, RLI, FSI, PDI) { + // precedingCharacterType = ON + // } precedingCharacterType = t } } @@ -905,7 +905,7 @@ func (p *paragraph) getLevels(linebreaks []int) []level { // Lines are concatenated from left to right. So for example, the fifth // character from the left on the third line is // -// getReordering(linebreaks)[linebreaks[1] + 4] +// getReordering(linebreaks)[linebreaks[1] + 4] // // (linebreaks[1] is the position after the last character of the second // line, which is also the index of the first character on the third line, diff --git a/unicode/cldr/collate.go b/unicode/cldr/collate.go index 27c5bac9a..056fe7f7f 100644 --- a/unicode/cldr/collate.go +++ b/unicode/cldr/collate.go @@ -98,9 +98,13 @@ func processRules(p RuleProcessor, s string) (err error) { } // parseSpecialAnchor parses the anchor syntax which is either of the form -// ['before' ] +// +// ['before' ] +// // or -// [