From 2ca5a52dcb2f184d71fbbf2a28372c5882d990a8 Mon Sep 17 00:00:00 2001 From: Rich Hong Date: Mon, 28 Sep 2020 16:32:38 -0400 Subject: [PATCH 01/27] internal/language: fix canonicalization of extlang parseTag tries to replace - with , but itself can also be replaced with its canonical form which can be a different length than the original . The existing implementation assumes that the length of is 3 and would leave scanner positions in an incorrect state if the length of is not 3. Fixes golang/go#41617 Change-Id: Ie0da320530e2545f9b521e7b8cf503d854c50b45 Reviewed-on: https://go-review.googlesource.com/c/text/+/260177 Run-TryBot: Alberto Donizetti TryBot-Result: Go Bot Trust: Alberto Donizetti Trust: Cherry Mui Trust: Marcel van Lohuizen Reviewed-by: Marcel van Lohuizen --- internal/language/compact/parse_test.go | 5 +++++ internal/language/language_test.go | 2 ++ internal/language/parse.go | 24 ++++++++++++++---------- internal/language/parse_test.go | 10 +++++++++- language/language_test.go | 2 ++ language/parse_test.go | 5 +++++ 6 files changed, 37 insertions(+), 11 deletions(-) diff --git a/internal/language/compact/parse_test.go b/internal/language/compact/parse_test.go index abe3a58c0..2db200b88 100644 --- a/internal/language/compact/parse_test.go +++ b/internal/language/compact/parse_test.go @@ -122,6 +122,11 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: true}, + {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, diff --git a/internal/language/language_test.go b/internal/language/language_test.go index 8244c1c8a..668034d03 100644 --- a/internal/language/language_test.go +++ b/internal/language/language_test.go @@ -681,6 +681,8 @@ var ( "en-t-t0-abcd", "en-t-nl-latn", "en-t-t0-abcd-x-a", + "en_t_pt_MLt", + "en-t-fr-est", } // Change, but not memory allocation required. benchSimpleChange = []string{ diff --git a/internal/language/parse.go b/internal/language/parse.go index 47ee0fed1..aad1e0acf 100644 --- a/internal/language/parse.go +++ b/internal/language/parse.go @@ -270,7 +270,7 @@ func parse(scan *scanner, s string) (t Tag, err error) { } else if n >= 4 { return Und, ErrSyntax } else { // the usual case - t, end = parseTag(scan) + t, end = parseTag(scan, true) if n := len(scan.token); n == 1 { t.pExt = uint16(end) end = parseExtensions(scan) @@ -296,7 +296,8 @@ func parse(scan *scanner, s string) (t Tag, err error) { // parseTag parses language, script, region and variants. // It returns a Tag and the end position in the input that was parsed. -func parseTag(scan *scanner) (t Tag, end int) { +// If doNorm is true, then - will be normalized to . +func parseTag(scan *scanner, doNorm bool) (t Tag, end int) { var e error // TODO: set an error if an unknown lang, script or region is encountered. t.LangID, e = getLangID(scan.token) @@ -307,14 +308,17 @@ func parseTag(scan *scanner) (t Tag, end int) { for len(scan.token) == 3 && isAlpha(scan.token[0]) { // From http://tools.ietf.org/html/bcp47, - tags are equivalent // to a tag of the form . - lang, e := getLangID(scan.token) - if lang != 0 { - t.LangID = lang - copy(scan.b[langStart:], lang.String()) - scan.b[langStart+3] = '-' - scan.start = langStart + 4 + if doNorm { + lang, e := getLangID(scan.token) + if lang != 0 { + t.LangID = lang + langStr := lang.String() + copy(scan.b[langStart:], langStr) + scan.b[langStart+len(langStr)] = '-' + scan.start = langStart + len(langStr) + 1 + } + scan.gobble(e) } - scan.gobble(e) end = scan.scan() } if len(scan.token) == 4 && isAlpha(scan.token[0]) { @@ -559,7 +563,7 @@ func parseExtension(scan *scanner) int { case 't': // https://www.ietf.org/rfc/rfc6497.txt scan.scan() if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { - _, end = parseTag(scan) + _, end = parseTag(scan, false) scan.toLower(start, end) } for len(scan.token) == 2 && !isAlpha(scan.token[1]) { diff --git a/internal/language/parse_test.go b/internal/language/parse_test.go index e1d428aa6..0af9e8a25 100644 --- a/internal/language/parse_test.go +++ b/internal/language/parse_test.go @@ -192,6 +192,14 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: false}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: false}, + // The same input here is used in both TestParse and TestParseExtensions. + // changed should be true for this input in TestParse but changed should be false for this input in TestParseExtensions + // because the entire input has been reformatted but the extension part hasn't. + // {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, @@ -299,7 +307,7 @@ func TestParseTag(t *testing.T) { return Tag{}, true } scan := makeScannerString(tt.in) - id, end := parseTag(&scan) + id, end := parseTag(&scan, true) id.str = string(scan.b[:end]) tt.ext = "" tt.extList = []string{} diff --git a/language/language_test.go b/language/language_test.go index b2e3ce3c5..d45706c98 100644 --- a/language/language_test.go +++ b/language/language_test.go @@ -723,6 +723,8 @@ var ( "en-t-t0-abcd", "en-t-nl-latn", "en-t-t0-abcd-x-a", + "en_t_pt_MLt", + "en-t-fr-est", } // Change, but not memory allocation required. benchSimpleChange = []string{ diff --git a/language/parse_test.go b/language/parse_test.go index 4b7e64db3..e1e5653ad 100644 --- a/language/parse_test.go +++ b/language/parse_test.go @@ -129,6 +129,11 @@ func parseTests() []parseTest { {in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true}, {in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"}, {in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}}, + {in: "en_t_pt_MLt", lang: "en", ext: "t-pt-mlt", changed: true}, + {in: "en-t-fr-est", lang: "en", ext: "t-fr-est", changed: false}, + {in: "fr-est", lang: "et", changed: true}, + {in: "fr-est-t-fr-est", lang: "et", ext: "t-fr-est", changed: true}, + {in: "fr-est-Cyrl", lang: "et", script: "Cyrl", changed: true}, // invalid {in: "", lang: "und", invalid: true}, {in: "-", lang: "und", invalid: true}, From 29bf0102df0c3c8844ae296826d1a9e7302f26df Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Mon, 3 May 2021 20:14:09 +0000 Subject: [PATCH 02/27] Revert "number: match input example to be Dutch as in the output" This reverts commit c2d28a6ddf6cb833e996ccb00cbb4206394958d2. Reason for revert: This change was incorrect. The examples were supposed to demonstrate that the API can provide translations. Change-Id: I247d5509136d34ce4c82a8ac2de50dad3f652a78 Reviewed-on: https://go-review.googlesource.com/c/text/+/316529 Reviewed-by: Emmanuel Odeke Reviewed-by: Alberto Donizetti Run-TryBot: Emmanuel Odeke Run-TryBot: Alberto Donizetti Trust: Cherry Mui --- number/doc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/number/doc.go b/number/doc.go index 925383acc..2ad8d431a 100644 --- a/number/doc.go +++ b/number/doc.go @@ -19,7 +19,7 @@ // // p := message.NewPrinter(language.Dutch) // -// p.Printf("Er zijn %v fietsen per huishouden.", number.Decimal(1.2)) +// p.Printf("There are %v bikes per household.", number.Decimal(1.2)) // // Prints: Er zijn 1,2 fietsen per huishouden. // // From 22bfdb67013842b9fed978a6f362b3edee086066 Mon Sep 17 00:00:00 2001 From: Amelia Downs Date: Mon, 13 Sep 2021 16:17:04 -0400 Subject: [PATCH 03/27] language: fix ExampleMatcher output This removes the "TODO" after the output section, which was preventing this test from running properly and from running in pkg.go.dev. Fixing this revealed that many of the outputs were out of date and needed to be updated as well. Fixes: golang/go#48361 Change-Id: I495e4eb83f071d4d9810bf44bfabe3258a6220c8 Reviewed-on: https://go-review.googlesource.com/c/text/+/349552 Trust: Alberto Donizetti Trust: Marcel van Lohuizen Run-TryBot: Alberto Donizetti Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot Reviewed-by: Alberto Donizetti Reviewed-by: Amelia Downs Reviewed-by: Marcel van Lohuizen --- language/examples_test.go | 58 +++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/language/examples_test.go b/language/examples_test.go index ad089a3f4..61401f252 100644 --- a/language/examples_test.go +++ b/language/examples_test.go @@ -247,24 +247,28 @@ func ExampleMatcher() { // This is the set of tags from which we want to pick the best match. These // can be, for example, the supported languages for some package. tags := []language.Tag{ - language.English, - language.BritishEnglish, - language.French, - language.Afrikaans, - language.BrazilianPortuguese, - language.EuropeanPortuguese, - language.Croatian, - language.SimplifiedChinese, - language.Raw.Make("iw-IL"), - language.Raw.Make("iw"), - language.Raw.Make("he"), + language.English, // en + language.BritishEnglish, // en-GB + language.French, // fr + language.Afrikaans, // af + language.BrazilianPortuguese, // pt-BR + language.EuropeanPortuguese, // pt-PT + language.SimplifiedChinese, // zh-Hans + language.Raw.Make("iw-IL"), // Hebrew from Israel + language.Raw.Make("iw"), // Hebrew + language.Raw.Make("he"), // Hebrew } m := language.NewMatcher(tags) // A simple match. fmt.Println(m.Match(language.Make("fr"))) - // Australian English is closer to British than American English. + // Australian English is closer to British English than American English. + // The resulting match is "en-GB-u-rg-auzzzz". The first language listed, + // "en-GB", is the matched language. Next is the region override prefix + // "-u-rg-", the region override "au", and the region override suffix "zzzz". + // The region override is for things like currency, dates, and measurement + // systems. fmt.Println(m.Match(language.Make("en-AU"))) // Default to the first tag passed to the Matcher if there is no match. @@ -275,15 +279,12 @@ func ExampleMatcher() { fmt.Println("----") - // Someone specifying sr-Latn is probably fine with getting Croatian. - fmt.Println(m.Match(language.Make("sr-Latn"))) - // We match SimplifiedChinese, but with Low confidence. fmt.Println(m.Match(language.TraditionalChinese)) - // Serbian in Latin script is a closer match to Croatian than Traditional - // Chinese to Simplified Chinese. - fmt.Println(m.Match(language.TraditionalChinese, language.Make("sr-Latn"))) + // British English is closer to Australian English than Traditional Chinese + // to Simplified Chinese. + fmt.Println(m.Match(language.TraditionalChinese, language.Make("en-AU"))) fmt.Println("----") @@ -297,7 +298,7 @@ func ExampleMatcher() { fmt.Println("----") - // If a Matcher is initialized with a language and it's deprecated version, + // If a Matcher is initialized with a language and its deprecated version, // it will distinguish between them. fmt.Println(m.Match(language.Raw.Make("iw"))) @@ -319,26 +320,23 @@ func ExampleMatcher() { // Output: // fr 2 Exact - // en-GB 1 High + // en-GB-u-rg-auzzzz 1 High // en 0 No // en 0 No // ---- - // hr 6 High - // zh-Hans 7 Low - // hr 6 High + // zh-Hans 6 Low + // en-GB-u-rg-auzzzz 1 High // ---- - // pt-BR 4 High - // fr 2 High - // af 3 High + // pt-BR 4 Exact + // fr-u-rg-bezzzz 2 High + // af-u-rg-nazzzz 3 High // ---- - // iw 9 Exact - // he 10 Exact + // iw-IL 7 Exact + // he-u-rg-ilzzzz 9 Exact // ---- // fr-u-cu-frf 2 Exact // fr-u-cu-frf 2 High // en-u-co-phonebk 0 No - - // TODO: "he" should be "he-u-rg-IL High" } func ExampleMatchStrings() { From 1b993004bb667a0b60e9bff6aa6dd173d4103d42 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Thu, 30 Sep 2021 11:59:03 -0400 Subject: [PATCH 04/27] all: upgrade x/tools to latest Apparently the x/text/message/pipeline test uses x/tools/go/ssa to introspect something, and as of CL 352953 the 'runtime' package needs a newer version of x/tools/go/ssa (probably to pick up CL 333110). Fixes golang/go#48701 Updates golang/go#47091 Change-Id: I78474f5ad037744906f5c771a7af7e0195f65173 Reviewed-on: https://go-review.googlesource.com/c/text/+/353350 Trust: Bryan C. Mills Trust: Josh Bleecher Snyder Run-TryBot: Bryan C. Mills TryBot-Result: Go Bot Reviewed-by: Josh Bleecher Snyder --- go.mod | 8 +++++++- go.sum | 27 +++++++++++++++++++++++++- message/pipeline/pipeline_test.go | 32 ++++++++++++++++++++++++++++--- 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 63bc05f20..32d3aa025 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,11 @@ module golang.org/x/text -require golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e +require golang.org/x/tools v0.1.7 + +require ( + golang.org/x/mod v0.4.2 // indirect + golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect +) go 1.17 diff --git a/go.sum b/go.sum index 6a308d730..13ba81505 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,27 @@ -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e h1:FDhOuMEY4JVRztM/gsbk+IKUQ8kj74bxZrgw87eMMVc= +github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/mod v0.4.2 h1:Gz96sIWK3OalVv/I/qNygP42zyoKp3xptRVCWRFEBvo= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e h1:WUoyKPm6nCo1BnNUvPGnFG3T5DUVem42yDJZZ4CNxMA= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.7 h1:6j8CgantCy3yc8JGBqkDLMKWqZ0RDU2g1HVgacojGWQ= +golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/message/pipeline/pipeline_test.go b/message/pipeline/pipeline_test.go index fe3b5daff..51c14a50b 100644 --- a/message/pipeline/pipeline_test.go +++ b/message/pipeline/pipeline_test.go @@ -139,18 +139,44 @@ func initTestdataModule(t *testing.T, dst string) { } goMod := fmt.Sprintf(`module testdata -go 1.11 -require golang.org/x/text v0.0.0-00010101000000-000000000000 -replace golang.org/x/text v0.0.0-00010101000000-000000000000 => %s + +replace golang.org/x/text => %s `, xTextDir) if err := ioutil.WriteFile(filepath.Join(dst, "go.mod"), []byte(goMod), 0644); err != nil { t.Fatal(err) } + // Copy in the checksums from the parent module so that we won't + // need to re-fetch them from the checksum database. data, err := ioutil.ReadFile(filepath.Join(xTextDir, "go.sum")) + if err != nil { + t.Fatal(err) + } if err := ioutil.WriteFile(filepath.Join(dst, "go.sum"), data, 0644); err != nil { t.Fatal(err) } + + // We've added a replacement for the parent version of x/text, + // but now we need to populate the correct version. + // (We can't just replace the zero-version because x/text + // may indirectly depend on some nonzero version of itself.) + // + // We use 'go get' instead of 'go mod tidy' to avoid the old-release + // compatibility check when graph pruning is enabled, and to avoid doing + // more work than necessary for test dependencies of imported packages + // (we're not going to run those tests here anyway). + // + // We 'go get' the packages in the testdata module — not specific dependencies + // of those packages — so that they will resolve to whatever version is + // already required in the (replaced) x/text go.mod file. + + getCmd := exec.Command("go", "get", "-d", "./...") + getCmd.Dir = dst + getCmd.Env = append(os.Environ(), "PWD="+dst, "GOPROXY=off", "GOCACHE=off") + if out, err := getCmd.CombinedOutput(); err != nil { + t.Logf("%s", out) + t.Fatal(err) + } } func checkOutput(t *testing.T, gen string, testdataDir string) { From 5bd84dd9b33bd2bdebd8a6a6477920a8e492d47f Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Sun, 3 Oct 2021 03:13:22 +0000 Subject: [PATCH 05/27] =?UTF-8?q?encoding/simplifiedchinese:=20Fixes=20?= =?UTF-8?q?=E2=82=AC=20encoding=20in=20GB18030?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The euro sign is an exception which is given a single byte code of 0x80 in Microsoft's later versions of CP936/GBK and a two byte code of A2 E3 in GB18030. https://en.wikipedia.org/wiki/GB_18030#cite_note-4 Fixes golang/go#48691 Change-Id: I6a4460274d4313ad1d03bcd8070373af674691eb GitHub-Last-Rev: acbbc50f20d663452f8da77cf2a66d8d893bec1d GitHub-Pull-Request: golang/text#26 Reviewed-on: https://go-review.googlesource.com/c/text/+/353712 Reviewed-by: Nigel Tao Trust: Nigel Tao Trust: Alberto Donizetti Run-TryBot: Nigel Tao TryBot-Result: Go Bot --- encoding/simplifiedchinese/all_test.go | 10 ++++++++++ encoding/simplifiedchinese/gbk.go | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/encoding/simplifiedchinese/all_test.go b/encoding/simplifiedchinese/all_test.go index a556c94dd..fbb623cd2 100644 --- a/encoding/simplifiedchinese/all_test.go +++ b/encoding/simplifiedchinese/all_test.go @@ -40,7 +40,9 @@ func TestNonRepertoire(t *testing.T) { {enc, HZGB2312, "a갂", "a"}, {enc, HZGB2312, "\u6cf5갂", "~{1C~}"}, + {dec, GBK, "\xa2\xe3", "€"}, {dec, GB18030, "\x80", "€"}, + {dec, GB18030, "\x81", "\ufffd"}, {dec, GB18030, "\x81\x20", "\ufffd "}, {dec, GB18030, "\xfe\xfe", "\ufffd"}, @@ -125,6 +127,14 @@ func TestBasics(t *testing.T) { encPrefix: "~{", encoded: ";(F#,6@WCN^O`GW!#", utf8: "花间一壶酒,独酌无相亲。", + }, { + e: GBK, + encoded: "\x80", + utf8: "€", + }, { + e: GB18030, + encoded: "\xa2\xe3", + utf8: "€", }} for _, tc := range testCases { diff --git a/encoding/simplifiedchinese/gbk.go b/encoding/simplifiedchinese/gbk.go index b89c45b03..0e0fabfd6 100644 --- a/encoding/simplifiedchinese/gbk.go +++ b/encoding/simplifiedchinese/gbk.go @@ -55,6 +55,8 @@ loop: // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // says to treat "gbk" as Code Page 936. + // GBK’s decoder is gb18030’s decoder. https://encoding.spec.whatwg.org/#gbk-decoder + // If byte is 0x80, return code point U+20AC. https://encoding.spec.whatwg.org/#gb18030-decoder case c0 == 0x80: r, size = '€', 1 @@ -180,7 +182,9 @@ func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err // Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC // as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk // says to treat "gbk" as Code Page 936. - if r == '€' { + // GBK’s encoder is gb18030’s encoder with its _is GBK_ set to true. https://encoding.spec.whatwg.org/#gbk-encoder + // If _is GBK_ is true and code point is U+20AC, return byte 0x80. https://encoding.spec.whatwg.org/#gb18030-encoder + if !e.gb18030 && r == '€' { r = 0x80 goto write1 } From bb1c79828956f0f6753be0920efcecf32ba55f93 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Thu, 6 May 2021 12:39:25 -0700 Subject: [PATCH 06/27] internal/export/idna: make Transitional an actual toggle Previously, it always enabled transitional processing instead of toggling, despite the fact that it took a boolean argument. For golang/go#30940. Change-Id: I00ad51ec55abfb2de28deb8c98f949989ece1099 Reviewed-on: https://go-review.googlesource.com/c/text/+/317729 Reviewed-by: Ian Lance Taylor Reviewed-by: Damien Neil Trust: Damien Neil Run-TryBot: Damien Neil TryBot-Result: Go Bot --- internal/export/idna/conformance_test.go | 3 +++ internal/export/idna/idna10.0.0.go | 4 ++-- internal/export/idna/idna10.0.0_test.go | 12 +++++++++--- internal/export/idna/idna9.0.0.go | 4 ++-- internal/export/idna/idna9.0.0_test.go | 12 +++++++++--- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/internal/export/idna/conformance_test.go b/internal/export/idna/conformance_test.go index 1cdf43ca9..3e0e87518 100644 --- a/internal/export/idna/conformance_test.go +++ b/internal/export/idna/conformance_test.go @@ -2,6 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build go1.10 +// +build go1.10 + package idna import ( diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go index 3e7bac3cb..e6b62a287 100644 --- a/internal/export/idna/idna10.0.0.go +++ b/internal/export/idna/idna10.0.0.go @@ -59,10 +59,10 @@ type Option func(*options) // Transitional sets a Profile to use the Transitional mapping as defined in UTS // #46. This will cause, for example, "ß" to be mapped to "ss". Using the // transitional mapping provides a compromise between IDNA2003 and IDNA2008 -// compatibility. It is used by most browsers when resolving domain names. This +// compatibility. It is used by some browsers when resolving domain names. This // option is only meaningful if combined with MapForLookup. func Transitional(transitional bool) Option { - return func(o *options) { o.transitional = true } + return func(o *options) { o.transitional = transitional } } // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go index 4142bfa84..0b9f7a862 100644 --- a/internal/export/idna/idna10.0.0_test.go +++ b/internal/export/idna/idna10.0.0_test.go @@ -34,6 +34,10 @@ func TestLabelErrors(t *testing.T) { std3 := kind{"STD3", p.ToASCII} p = New(MapForLookup(), CheckHyphens(false)) hyphens := kind{"CheckHyphens", p.ToASCII} + p = New(MapForLookup(), Transitional(true)) + transitional := kind{"Transitional", p.ToASCII} + p = New(MapForLookup(), Transitional(false)) + nontransitional := kind{"Nontransitional", p.ToASCII} testCases := []struct { kind @@ -95,14 +99,16 @@ func TestLabelErrors(t *testing.T) { {hyphens, "-label-.com", "-label-.com", ""}, // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of - // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return - // lab9.be. + // Chrome, modern Firefox, Safari, and IE. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") {display, "lab⒐be", "lab⒐be", "P1"}, - {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, + // Transitional vs Nontransitional processing + {transitional, "Plan9faß.de", "plan9fass.de", ""}, + {nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""}, + // Chrome 54.0 recognizes the error and treats this input verbatim as a // search string. // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the diff --git a/internal/export/idna/idna9.0.0.go b/internal/export/idna/idna9.0.0.go index 7acecb800..4979fdc17 100644 --- a/internal/export/idna/idna9.0.0.go +++ b/internal/export/idna/idna9.0.0.go @@ -58,10 +58,10 @@ type Option func(*options) // Transitional sets a Profile to use the Transitional mapping as defined in UTS // #46. This will cause, for example, "ß" to be mapped to "ss". Using the // transitional mapping provides a compromise between IDNA2003 and IDNA2008 -// compatibility. It is used by most browsers when resolving domain names. This +// compatibility. It is used by some browsers when resolving domain names. This // option is only meaningful if combined with MapForLookup. func Transitional(transitional bool) Option { - return func(o *options) { o.transitional = true } + return func(o *options) { o.transitional = transitional } } // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts diff --git a/internal/export/idna/idna9.0.0_test.go b/internal/export/idna/idna9.0.0_test.go index b76b79628..524727b1b 100644 --- a/internal/export/idna/idna9.0.0_test.go +++ b/internal/export/idna/idna9.0.0_test.go @@ -34,6 +34,10 @@ func TestLabelErrors(t *testing.T) { std3 := kind{"STD3", p.ToASCII} p = New(MapForLookup(), CheckHyphens(false)) hyphens := kind{"CheckHyphens", p.ToASCII} + p = New(MapForLookup(), Transitional(true)) + transitional := kind{"Transitional", p.ToASCII} + p = New(MapForLookup(), Transitional(false)) + nontransitional := kind{"Nontransitional", p.ToASCII} testCases := []struct { kind @@ -91,14 +95,16 @@ func TestLabelErrors(t *testing.T) { {hyphens, "-label-.com", "-label-.com", ""}, // Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of - // Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return - // lab9.be. + // Chrome, modern Firefox, Safari, and IE. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") {display, "lab⒐be", "lab⒐be", "P1"}, - {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, + // Transitional vs Nontransitional processing + {transitional, "Plan9faß.de", "plan9fass.de", ""}, + {nontransitional, "Plan9faß.de", "xn--plan9fa-6va.de", ""}, + // Chrome 54.0 recognizes the error and treats this input verbatim as a // search string. // Safari 10.0 (non-conform spec) decomposes "⒈" and computes the From 86e65b86426d37ada82226be2e8c1afae6bdc178 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Thu, 6 May 2021 14:25:55 -0700 Subject: [PATCH 07/27] internal/export/idna: fix int32 overflows Prefer multiplication (int64(b)*int64(c) > MaxInt32) over division (b > MaxInt32/c) for overflow checking as it is a little faster on 386, and a LOT faster on amd64. For golang/go#28233. Change-Id: Ibf42529b93b699417781adc7eca6e66474f00bbf Reviewed-on: https://go-review.googlesource.com/c/text/+/317731 Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor Trust: Damien Neil --- internal/export/idna/punycode.go | 36 +++++++++++++++++++-------- internal/export/idna/punycode_test.go | 1 + 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/internal/export/idna/punycode.go b/internal/export/idna/punycode.go index f0cbd487b..7e96febf1 100644 --- a/internal/export/idna/punycode.go +++ b/internal/export/idna/punycode.go @@ -47,6 +47,7 @@ func decode(encoded string) (string, error) { } } i, n, bias := int32(0), initialN, initialBias + overflow := false for pos < len(encoded) { oldI, w := i, int32(1) for k := base; ; k += base { @@ -58,29 +59,32 @@ func decode(encoded string) (string, error) { return "", punyError(encoded) } pos++ - i += digit * w - if i < 0 { + i, overflow = madd(i, digit, w) + if overflow { return "", punyError(encoded) } t := k - bias - if t < tmin { + if k <= bias { t = tmin - } else if t > tmax { + } else if k >= bias+tmax { t = tmax } if digit < t { break } - w *= base - t - if w >= math.MaxInt32/base { + w, overflow = madd(0, w, base-t) + if overflow { return "", punyError(encoded) } } + if len(output) >= 1024 { + return "", punyError(encoded) + } x := int32(len(output) + 1) bias = adapt(i-oldI, x, oldI == 0) n += i / x i %= x - if n > utf8.MaxRune || len(output) >= 1024 { + if n < 0 || n > utf8.MaxRune { return "", punyError(encoded) } output = append(output, 0) @@ -113,6 +117,7 @@ func encode(prefix, s string) (string, error) { if b > 0 { output = append(output, '-') } + overflow := false for remaining != 0 { m := int32(0x7fffffff) for _, r := range s { @@ -120,8 +125,8 @@ func encode(prefix, s string) (string, error) { m = r } } - delta += (m - n) * (h + 1) - if delta < 0 { + delta, overflow = madd(delta, m-n, h+1) + if overflow { return "", punyError(s) } n = m @@ -139,9 +144,9 @@ func encode(prefix, s string) (string, error) { q := delta for k := base; ; k += base { t := k - bias - if t < tmin { + if k <= bias { t = tmin - } else if t > tmax { + } else if k >= bias+tmax { t = tmax } if q < t { @@ -162,6 +167,15 @@ func encode(prefix, s string) (string, error) { return string(output), nil } +// madd computes a + (b * c), detecting overflow. +func madd(a, b, c int32) (next int32, overflow bool) { + p := int64(b) * int64(c) + if p > math.MaxInt32-int64(a) { + return 0, true + } + return a + int32(p), false +} + func decodeDigit(x byte) (digit int32, ok bool) { switch { case '0' <= x && x <= '9': diff --git a/internal/export/idna/punycode_test.go b/internal/export/idna/punycode_test.go index 2d99239ec..5cf0c968a 100644 --- a/internal/export/idna/punycode_test.go +++ b/internal/export/idna/punycode_test.go @@ -177,6 +177,7 @@ var punycodeErrorTestCases = [...]string{ "decode 9999999999a", // "9999999999a" overflows the int32 calculation. "encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow. + "encode " + strings.Repeat("x", 65666) + "\uffff", // int32 overflow. issue #28233 } func TestPunycodeErrors(t *testing.T) { From 593da8d90fd448917a9ef0ca582e8d2bbe50ab2b Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Thu, 28 Oct 2021 17:50:34 -0700 Subject: [PATCH 08/27] internal/export/idna: avoid strconv.Unquote errors on surrogate halves The IDNA test data includes surrogate halves, which strconv.Unquote reports an error for as of Go 1.18. Change-Id: I9eb954aa3ab3a177ab0984d0da7caee7a47920a5 Reviewed-on: https://go-review.googlesource.com/c/text/+/359554 Trust: Damien Neil Run-TryBot: Damien Neil TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor --- internal/export/idna/idna_test.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/internal/export/idna/idna_test.go b/internal/export/idna/idna_test.go index 7235452c2..e568d3b90 100644 --- a/internal/export/idna/idna_test.go +++ b/internal/export/idna/idna_test.go @@ -5,7 +5,9 @@ package idna import ( + "encoding/hex" "fmt" + "regexp" "strconv" "strings" "testing" @@ -99,12 +101,14 @@ func doTest(t *testing.T, f func(string) (string, error), name, input, want, err }) } +var unescapeRE = regexp.MustCompile(`\\u([0-9a-zA-Z]{4})`) + func unescape(s string) string { - s, err := strconv.Unquote(`"` + s + `"`) - if err != nil { - panic(err) - } - return s + return unescapeRE.ReplaceAllStringFunc(s, func(v string) string { + var d [2]byte + hex.Decode(d[:], []byte(v[2:])) + return string(rune(d[0])<<8 | rune(d[1])) + }) } func BenchmarkProfile(b *testing.B) { From 835dae61a3f3b1dd4f513e19a270937411e179f5 Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Thu, 28 Oct 2021 17:47:46 -0700 Subject: [PATCH 09/27] internal/export/idna: use nontransitional processing in Go 1.18 Updates golang/go#46001 Updates golang/go#47510 Change-Id: I1e978a3c6230abfd0b1aaab0c7343b33dda1ba64 Reviewed-on: https://go-review.googlesource.com/c/text/+/359634 Trust: Damien Neil Run-TryBot: Damien Neil TryBot-Result: Go Bot Reviewed-by: Timothy Gu Reviewed-by: Ian Lance Taylor --- internal/export/idna/example_test.go | 27 ++++++++++++------------- internal/export/idna/go118.go | 12 +++++++++++ internal/export/idna/idna10.0.0.go | 2 +- internal/export/idna/idna10.0.0_test.go | 14 ++++++++++--- internal/export/idna/idna_test.go | 2 +- internal/export/idna/pre_go118.go | 10 +++++++++ 6 files changed, 48 insertions(+), 19 deletions(-) create mode 100644 internal/export/idna/go118.go create mode 100644 internal/export/idna/pre_go118.go diff --git a/internal/export/idna/example_test.go b/internal/export/idna/example_test.go index 6e6b8727c..4c7352bdc 100644 --- a/internal/export/idna/example_test.go +++ b/internal/export/idna/example_test.go @@ -13,27 +13,26 @@ import ( func ExampleProfile() { // Raw Punycode has no restrictions and does no mappings. fmt.Println(idna.ToASCII("")) - fmt.Println(idna.ToASCII("*.faß.com")) - fmt.Println(idna.Punycode.ToASCII("*.faß.com")) + fmt.Println(idna.ToASCII("*.GÖPHER.com")) + fmt.Println(idna.Punycode.ToASCII("*.GÖPHER.com")) - // Rewrite IDN for lookup. This (currently) uses transitional mappings to - // find a balance between IDNA2003 and IDNA2008 compatibility. + // Rewrite IDN for lookup. fmt.Println(idna.Lookup.ToASCII("")) - fmt.Println(idna.Lookup.ToASCII("www.faß.com")) + fmt.Println(idna.Lookup.ToASCII("www.GÖPHER.com")) - // Convert an IDN to ASCII for registration purposes. This changes the - // encoding, but reports an error if the input was illformed. - fmt.Println(idna.Registration.ToASCII("")) - fmt.Println(idna.Registration.ToASCII("www.faß.com")) + // Convert an IDN to ASCII for registration purposes. + // This reports an error if the input was illformed. + fmt.Println(idna.Registration.ToASCII("www.GÖPHER.com")) + fmt.Println(idna.Registration.ToASCII("www.göpher.com")) // Output: // - // *.xn--fa-hia.com - // *.xn--fa-hia.com + // *.xn--GPHER-1oa.com + // *.xn--GPHER-1oa.com // - // www.fass.com - // idna: invalid label "" - // www.xn--fa-hia.com + // www.xn--gpher-jua.com + // www.xn--GPHER-1oa.com idna: disallowed rune U+0047 + // www.xn--gpher-jua.com } func ExampleNew() { diff --git a/internal/export/idna/go118.go b/internal/export/idna/go118.go new file mode 100644 index 000000000..941a7aaff --- /dev/null +++ b/internal/export/idna/go118.go @@ -0,0 +1,12 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.18 +// +build go1.18 + +package idna + +// Transitional processing is disabled by default in Go 1.18. +// https://golang.org/issue/47510 +const transitionalLookup = false diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go index e6b62a287..0e7571d16 100644 --- a/internal/export/idna/idna10.0.0.go +++ b/internal/export/idna/idna10.0.0.go @@ -284,7 +284,7 @@ var ( punycode = &Profile{} lookup = &Profile{options{ - transitional: true, + transitional: transitionalLookup, useSTD3Rules: true, checkHyphens: true, checkJoiners: true, diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go index 0b9f7a862..c3365bc6a 100644 --- a/internal/export/idna/idna10.0.0_test.go +++ b/internal/export/idna/idna10.0.0_test.go @@ -102,7 +102,7 @@ func TestLabelErrors(t *testing.T) { // Chrome, modern Firefox, Safari, and IE. {resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be") {display, "lab⒐be", "lab⒐be", "P1"}, - {resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" + {transitional, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de" {display, "Plan⒐faß.de", "plan⒐faß.de", "P1"}, // Transitional vs Nontransitional processing @@ -115,10 +115,10 @@ func TestLabelErrors(t *testing.T) { // punycode on the result using transitional mapping. // Firefox 49.0.1 goes haywire on this string and prints a bunch of what // seems to be nested punycode encodings. - {resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"}, + {transitional, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"}, {display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"}, - {resolve, "a\u200Cb", "ab", ""}, + {transitional, "a\u200Cb", "ab", ""}, {display, "a\u200Cb", "a\u200Cb", "C"}, {resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"}, @@ -153,3 +153,11 @@ func TestLabelErrors(t *testing.T) { doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr) } } + +func TestTransitionalDefault(t *testing.T) { + want := "xn--strae-oqa.de" + if transitionalLookup { + want = "strasse.de" + } + doTest(t, Lookup.ToASCII, "Lookup", "straße.de", want, "") +} diff --git a/internal/export/idna/idna_test.go b/internal/export/idna/idna_test.go index e568d3b90..a13b67348 100644 --- a/internal/export/idna/idna_test.go +++ b/internal/export/idna/idna_test.go @@ -45,7 +45,7 @@ func TestProfiles(t *testing.T) { VerifyDNSLength(true), BidiRule(), )}, - {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(true))}, + {"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(transitionalLookup))}, {"Display", display, New(MapForLookup(), BidiRule())}, } for _, tc := range testCases { diff --git a/internal/export/idna/pre_go118.go b/internal/export/idna/pre_go118.go new file mode 100644 index 000000000..ab3fa2e8c --- /dev/null +++ b/internal/export/idna/pre_go118.go @@ -0,0 +1,10 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.18 +// +build !go1.18 + +package idna + +const transitionalLookup = true From 8da7c0fd2b032cc0b7be90fcb2d361c5ebc40fef Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Mon, 1 Nov 2021 13:27:11 -0700 Subject: [PATCH 10/27] gen.go: copy all tablesXX.X.X.go versions to golang.org/x/net Remove logic that copies only the most current tablesXX.X.X.go, renaming it to tables.go. The golang.org/x/net/idna package currently contains all the versioned files and no tables.go, so this change makes gen.go consistent with the last export. Change-Id: Ic2797b45ec998873651eda04ed56ada29788f0f0 Reviewed-on: https://go-review.googlesource.com/c/text/+/360380 Trust: Damien Neil Trust: Marcel van Lohuizen Run-TryBot: Damien Neil Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot Reviewed-by: Marcel van Lohuizen --- gen.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/gen.go b/gen.go index 04ff6035c..fb2fb94a7 100644 --- a/gen.go +++ b/gen.go @@ -256,12 +256,6 @@ func copyPackage(dirSrc, dirDst, search, replace string) { filepath.Dir(file) != dirSrc { return nil } - if strings.HasPrefix(base, "tables") { - if !strings.HasSuffix(base, gen.UnicodeVersion()+".go") { - return nil - } - base = "tables.go" - } b, err := ioutil.ReadFile(file) if err != nil || bytes.Contains(b, []byte("\n// +build ignore")) { return err From 85a1c56496a61b2c4e607faaf3369d473cf2589d Mon Sep 17 00:00:00 2001 From: Amelia Downs Date: Mon, 1 Nov 2021 16:41:32 -0400 Subject: [PATCH 11/27] text/collate: add testable examples Change-Id: Id3915137c4d365ec82ce74d8212e7b6cfb6fb200 Reviewed-on: https://go-review.googlesource.com/c/text/+/360494 Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Ian Lance Taylor Reviewed-by: Marcel van Lohuizen Trust: Marcel van Lohuizen --- collate/example_sort_test.go | 56 +++++++++++++++++++++++++ collate/examples_test.go | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 collate/example_sort_test.go create mode 100644 collate/examples_test.go diff --git a/collate/example_sort_test.go b/collate/example_sort_test.go new file mode 100644 index 000000000..e86c02a7a --- /dev/null +++ b/collate/example_sort_test.go @@ -0,0 +1,56 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package collate_test + +import ( + "fmt" + + "golang.org/x/text/collate" + "golang.org/x/text/language" +) + +type book struct { + title string +} + +type bookcase struct { + books []book +} + +func (bc bookcase) Len() int { + return len(bc.books) +} + +func (bc bookcase) Swap(i, j int) { + temp := bc.books[i] + bc.books[i] = bc.books[j] + bc.books[j] = temp +} + +func (bc bookcase) Bytes(i int) []byte { + // returns the bytes of text at index i + return []byte(bc.books[i].title) +} + +func ExampleCollator_Sort() { + bc := bookcase{ + books: []book{ + {title: "If Cats Disappeared from the World"}, + {title: "The Guest Cat"}, + {title: "Catwings"}, + }, + } + + cc := collate.New(language.English) + cc.Sort(bc) + + for _, b := range bc.books { + fmt.Println(b.title) + } + // Output: + // Catwings + // If Cats Disappeared from the World + // The Guest Cat +} diff --git a/collate/examples_test.go b/collate/examples_test.go new file mode 100644 index 000000000..0a42a6d21 --- /dev/null +++ b/collate/examples_test.go @@ -0,0 +1,79 @@ +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package collate_test + +import ( + "fmt" + + "golang.org/x/text/collate" + "golang.org/x/text/language" +) + +func ExampleNew() { + letters := []string{"ä", "å", "ö", "o", "a"} + + ec := collate.New(language.English) + ec.SortStrings(letters) + fmt.Printf("English Sorting: %v\n", letters) + + sc := collate.New(language.Swedish) + sc.SortStrings(letters) + fmt.Printf("Swedish Sorting: %v\n", letters) + + numbers := []string{"0", "11", "01", "2", "3", "23"} + + ec.SortStrings(numbers) + fmt.Printf("Alphabetic Sorting: %v\n", numbers) + + nc := collate.New(language.English, collate.Numeric) + nc.SortStrings(numbers) + fmt.Printf("Numeric Sorting: %v\n", numbers) + // Output: + // English Sorting: [a å ä o ö] + // Swedish Sorting: [a o å ä ö] + // Alphabetic Sorting: [0 01 11 2 23 3] + // Numeric Sorting: [0 01 2 3 11 23] +} + +func ExampleCollator_SortStrings() { + c := collate.New(language.English) + words := []string{"meow", "woof", "bark", "moo"} + c.SortStrings(words) + fmt.Println(words) + // Output: + // [bark meow moo woof] +} + +func ExampleCollator_CompareString() { + c := collate.New(language.English) + r := c.CompareString("meow", "woof") + fmt.Println(r) + + r = c.CompareString("woof", "meow") + fmt.Println(r) + + r = c.CompareString("meow", "meow") + fmt.Println(r) + // Output: + // -1 + // 1 + // 0 +} + +func ExampleCollator_Compare() { + c := collate.New(language.English) + r := c.Compare([]byte("meow"), []byte("woof")) + fmt.Println(r) + + r = c.Compare([]byte("woof"), []byte("meow")) + fmt.Println(r) + + r = c.Compare([]byte("meow"), []byte("meow")) + fmt.Println(r) + // Output: + // -1 + // 1 + // 0 +} From 459fa287ae002e3df243c0bd10ea915f5c64f687 Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Mon, 18 Oct 2021 14:56:52 +0000 Subject: [PATCH 12/27] text/currency: format currency amount according to the locale Fixes golang/go#47623 Change-Id: Ie6be9db93bf58f597f1ea4d864fcb507235b1018 GitHub-Last-Rev: 4c8f3557daf5440390c0775ed6e71ec80f8c11e8 GitHub-Pull-Request: golang/text#27 Reviewed-on: https://go-review.googlesource.com/c/text/+/353935 Reviewed-by: Marcel van Lohuizen Trust: Marcel van Lohuizen Trust: Ian Lance Taylor Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot --- currency/format.go | 36 +++++++++++++++++++++--------------- currency/format_test.go | 23 ++++++++++++++++++++--- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/currency/format.go b/currency/format.go index 11152632a..cc4570d3b 100644 --- a/currency/format.go +++ b/currency/format.go @@ -6,11 +6,13 @@ package currency import ( "fmt" - "io" "sort" "golang.org/x/text/internal/format" "golang.org/x/text/internal/language/compact" + "golang.org/x/text/internal/number" + + "golang.org/x/text/language" ) // Amount is an amount-currency unit pair. @@ -34,8 +36,6 @@ func (a Amount) Currency() Unit { return a.currency } // // Add/Sub/Div/Mul/Round. -var space = []byte(" ") - // Format implements fmt.Formatter. It accepts format.State for // language-specific rendering. func (a Amount) Format(s fmt.State, verb rune) { @@ -58,9 +58,11 @@ type formattedValue struct { // Format implements fmt.Formatter. It accepts format.State for // language-specific rendering. func (v formattedValue) Format(s fmt.State, verb rune) { + var tag language.Tag var lang compact.ID if state, ok := s.(format.State); ok { - lang, _ = compact.RegionalID(compact.Tag(state.Language())) + tag = state.Language() + lang, _ = compact.RegionalID(compact.Tag(tag)) } // Get the options. Use DefaultFormat if not present. @@ -73,18 +75,22 @@ func (v formattedValue) Format(s fmt.State, verb rune) { cur = opt.currency } - // TODO: use pattern. - io.WriteString(s, opt.symbol(lang, cur)) + sym := opt.symbol(lang, cur) if v.amount != nil { - s.Write(space) - - // TODO: apply currency-specific rounding - scale, _ := opt.kind.Rounding(cur) - if _, ok := s.Precision(); !ok { - fmt.Fprintf(s, "%.*f", scale, v.amount) - } else { - fmt.Fprint(s, v.amount) - } + var f number.Formatter + f.InitDecimal(tag) + + scale, increment := opt.kind.Rounding(cur) + f.RoundingContext.SetScale(scale) + f.RoundingContext.Increment = uint32(increment) + f.RoundingContext.IncrementScale = uint8(scale) + f.RoundingContext.Mode = number.ToNearestAway + + d := f.Append(nil, v.amount) + + fmt.Fprint(s, sym, " ", string(d)) + } else { + fmt.Fprint(s, sym) } } diff --git a/currency/format_test.go b/currency/format_test.go index 0aa0d58af..5cb11ebc9 100644 --- a/currency/format_test.go +++ b/currency/format_test.go @@ -12,8 +12,10 @@ import ( ) var ( + de = language.German en = language.English fr = language.French + de_CH = language.MustParse("de-CH") en_US = language.AmericanEnglish en_GB = language.BritishEnglish en_AU = language.MustParse("en-AU") @@ -42,20 +44,35 @@ func TestFormatting(t *testing.T) { 9: {en, 9.0, Symbol.Default(EUR), "€ 9.00"}, 10: {en, 10.123, Symbol.Default(KRW), "₩ 10"}, - 11: {fr, 11.52, Symbol.Default(TWD), "TWD 11.52"}, + 11: {fr, 11.52, Symbol.Default(TWD), "TWD 11,52"}, 12: {en, 12.123, Symbol.Default(czk), "CZK 12.12"}, 13: {en, 13.123, Symbol.Default(czk).Kind(Cash), "CZK 13"}, 14: {en, 14.12345, ISO.Default(MustParseISO("CLF")), "CLF 14.1235"}, 15: {en, USD.Amount(15.00), ISO.Default(TWD), "USD 15.00"}, 16: {en, KRW.Amount(16.00), ISO.Kind(Cash), "KRW 16"}, - // TODO: support integers as well. - 17: {en, USD, nil, "USD"}, 18: {en, USD, ISO, "USD"}, 19: {en, USD, Symbol, "$"}, 20: {en_GB, USD, Symbol, "US$"}, 21: {en_AU, USD, NarrowSymbol, "$"}, + + // https://en.wikipedia.org/wiki/Decimal_separator + 22: {de, EUR.Amount(1234567.89), nil, "EUR 1.234.567,89"}, + 23: {fr, EUR.Amount(1234567.89), nil, "EUR 1\u00a0234\u00a0567,89"}, + 24: {en_AU, EUR.Amount(1234567.89), nil, "EUR 1,234,567.89"}, + 25: {de_CH, EUR.Amount(1234567.89), nil, "EUR 1’234’567.89"}, + + // https://en.wikipedia.org/wiki/Cash_rounding + 26: {de, NOK.Amount(2.49), ISO.Kind(Cash), "NOK 2"}, + 27: {de, NOK.Amount(2.50), ISO.Kind(Cash), "NOK 3"}, + 28: {de, DKK.Amount(0.24), ISO.Kind(Cash), "DKK 0,00"}, + 29: {de, DKK.Amount(0.25), ISO.Kind(Cash), "DKK 0,50"}, + + // integers + 30: {de, EUR.Amount(1234567), nil, "EUR 1.234.567,00"}, + 31: {en, CNY.Amount(0), NarrowSymbol, "¥ 0.00"}, + 32: {en, CNY.Amount(0), Symbol, "CN¥ 0.00"}, } for i, tc := range testCases { p := message.NewPrinter(tc.tag) From 310d592b71f764a238114c372e0ce7db4ecb560a Mon Sep 17 00:00:00 2001 From: Sean Liao Date: Thu, 21 Oct 2021 19:49:33 +0200 Subject: [PATCH 13/27] cmd/gotext: only match files ending with pattern Exclude files with other extensions like .json.swp from editors. Fixes golang/go#48983 Change-Id: Id74ca7ae208688cf900661d641e5403d453da33c Reviewed-on: https://go-review.googlesource.com/c/text/+/357734 Reviewed-by: Marcel van Lohuizen Trust: Marcel van Lohuizen Trust: Ian Lance Taylor Run-TryBot: Marcel van Lohuizen TryBot-Result: Go Bot --- cmd/gotext/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/gotext/main.go b/cmd/gotext/main.go index f31dd4fbd..c8dc7990a 100644 --- a/cmd/gotext/main.go +++ b/cmd/gotext/main.go @@ -48,7 +48,7 @@ func config() (*pipeline.Config, error) { return &pipeline.Config{ SourceLanguage: tag, Supported: getLangs(), - TranslationsPattern: `messages\.(.*)\.json`, + TranslationsPattern: `messages\.(.*)\.json$`, GenFile: *out, }, nil } From 7d8748685ddeb50953764cb62aa1064f0d928d61 Mon Sep 17 00:00:00 2001 From: Shengyu Zhang Date: Tue, 2 Nov 2021 09:32:53 +0000 Subject: [PATCH 14/27] text/unicod/bidi: remove duplicate assignment Fixes golang/go#43623 Change-Id: I4da134cccaf6a9e5331229a0ac2a60e5cb711e92 GitHub-Last-Rev: ae0f2d91f86dfd045bb403f5906ba80d2b9092de GitHub-Pull-Request: golang/text#29 Reviewed-on: https://go-review.googlesource.com/c/text/+/358834 Reviewed-by: Marcel van Lohuizen Trust: Marcel van Lohuizen Trust: Ian Lance Taylor --- unicode/bidi/core.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicode/bidi/core.go b/unicode/bidi/core.go index e4c081101..fde188a33 100644 --- a/unicode/bidi/core.go +++ b/unicode/bidi/core.go @@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() { if t == NSM { s.types[i] = precedingCharacterType } else { - if t.in(LRI, RLI, FSI, PDI) { - precedingCharacterType = ON - } + // if t.in(LRI, RLI, FSI, PDI) { + // precedingCharacterType = ON + // } precedingCharacterType = t } } From 18b340fc7af22495828ffbe71e9f9e22583bc7a9 Mon Sep 17 00:00:00 2001 From: Amelia Downs Date: Fri, 29 Oct 2021 13:32:13 -0400 Subject: [PATCH 15/27] language: fix typo in update docs Change-Id: Ied188b87f0a9a1a4fb160b2a7ba239ed70b843a6 Reviewed-on: https://go-review.googlesource.com/c/text/+/359715 Reviewed-by: Ian Lance Taylor Reviewed-by: Amelia Downs Trust: Cherry Mui --- language/match.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/language/match.go b/language/match.go index f73492134..ee45f4947 100644 --- a/language/match.go +++ b/language/match.go @@ -545,7 +545,7 @@ type bestMatch struct { // match as the preferred match. // // If pin is true and have and tag are a strong match, it will henceforth only -// consider matches for this language. This corresponds to the nothing that most +// consider matches for this language. This corresponds to the idea that most // users have a strong preference for the first defined language. A user can // still prefer a second language over a dialect of the preferred language by // explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should From d1c84af989ab0f62cd853b5ae33b1b4db4f1e88b Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Sun, 23 Jan 2022 16:23:19 -0500 Subject: [PATCH 16/27] message/pipeline: skip TestFullCycle on plan9-arm This test has timed out several times recently on this builder. The test is pretty slow even on Linux, and may be especially filesystem-intensive. For golang/go#49338 Change-Id: Ife2bb399b10f369f815055bb0ad44bb007f606b7 Reviewed-on: https://go-review.googlesource.com/c/text/+/380414 Trust: Bryan Mills Run-TryBot: Bryan Mills TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor --- message/pipeline/pipeline_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/message/pipeline/pipeline_test.go b/message/pipeline/pipeline_test.go index 51c14a50b..2adb11fd2 100644 --- a/message/pipeline/pipeline_test.go +++ b/message/pipeline/pipeline_test.go @@ -33,6 +33,9 @@ func TestFullCycle(t *testing.T) { if runtime.GOOS == "android" { t.Skip("cannot load outside packages on android") } + if b := os.Getenv("GO_BUILDER_NAME"); b == "plan9-arm" { + t.Skipf("skipping: test frequently times out on %s", b) + } if _, err := exec.LookPath("go"); err != nil { t.Skipf("skipping because 'go' command is unavailable: %v", err) } From 8db23f83d6d6c5a1bcecede55281db3c11fc7c60 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 11 Apr 2022 13:13:21 -0400 Subject: [PATCH 17/27] all: gofmt Gofmt to update doc comments to the new formatting. For golang/go#51082. Change-Id: I3e3c5666d5e901f2c5303911ddb548e3dd567fce Reviewed-on: https://go-review.googlesource.com/c/text/+/399603 Run-TryBot: Russ Cox TryBot-Result: Gopher Robot Auto-Submit: Russ Cox Reviewed-by: Ian Lance Taylor --- cases/gen_trieval.go | 73 ++++++++++++----------- cases/trieval.go | 73 ++++++++++++----------- cmd/gotext/doc.go | 39 ++++-------- collate/build/builder.go | 39 +++++++----- collate/build/colelem.go | 8 ++- collate/build/contract.go | 25 ++++---- feature/plural/gen.go | 15 ++--- feature/plural/message.go | 14 ++--- feature/plural/plural.go | 51 ++++++++-------- internal/catmsg/catmsg.go | 50 ++++++++-------- internal/cldrtree/cldrtree.go | 1 - internal/colltab/collelem.go | 39 ++++++------ internal/export/idna/gen_trieval.go | 34 +++++------ internal/export/idna/trieval.go | 34 +++++------ internal/language/tables.go | 17 +++--- internal/number/decimal.go | 48 +++++++-------- internal/testtext/codesize.go | 12 ++-- internal/triegen/triegen.go | 25 ++++---- internal/utf8internal/utf8internal.go | 2 +- language/display/lookup.go | 16 ++--- language/doc.go | 44 +++++++------- message/catalog/catalog.go | 85 +++++++++++++-------------- message/doc.go | 50 ++++++++-------- number/doc.go | 17 +++--- unicode/bidi/core.go | 20 +++---- unicode/cldr/collate.go | 8 ++- unicode/norm/forminfo.go | 9 +-- unicode/norm/maketables.go | 2 + unicode/norm/normalize.go | 11 ++-- width/tables10.0.0.go | 24 +++++--- width/tables11.0.0.go | 24 +++++--- width/tables12.0.0.go | 24 +++++--- width/tables13.0.0.go | 24 +++++--- width/tables9.0.0.go | 24 +++++--- 34 files changed, 528 insertions(+), 453 deletions(-) diff --git a/cases/gen_trieval.go b/cases/gen_trieval.go index 6c7222a73..14dd33b1d 100644 --- a/cases/gen_trieval.go +++ b/cases/gen_trieval.go @@ -19,19 +19,19 @@ package main // // The per-rune values have the following format: // -// if (exception) { -// 15..4 unsigned exception index -// } else { -// 15..8 XOR pattern or index to XOR pattern for case mapping -// Only 13..8 are used for XOR patterns. -// 7 inverseFold (fold to upper, not to lower) -// 6 index: interpret the XOR pattern as an index -// or isMid if case mode is cIgnorableUncased. -// 5..4 CCC: zero (normal or break), above or other -// } -// 3 exception: interpret this value as an exception index -// (TODO: is this bit necessary? Probably implied from case mode.) -// 2..0 case mode +// if (exception) { +// 15..4 unsigned exception index +// } else { +// 15..8 XOR pattern or index to XOR pattern for case mapping +// Only 13..8 are used for XOR patterns. +// 7 inverseFold (fold to upper, not to lower) +// 6 index: interpret the XOR pattern as an index +// or isMid if case mode is cIgnorableUncased. +// 5..4 CCC: zero (normal or break), above or other +// } +// 3 exception: interpret this value as an exception index +// (TODO: is this bit necessary? Probably implied from case mode.) +// 2..0 case mode // // For the non-exceptional cases, a rune must be either uncased, lowercase or // uppercase. If the rune is cased, the XOR pattern maps either a lowercase @@ -133,37 +133,40 @@ const ( // The entry is pointed to by the exception index in an entry. It has the // following format: // -// Header -// byte 0: -// 7..6 unused -// 5..4 CCC type (same bits as entry) -// 3 unused -// 2..0 length of fold +// Header: // -// byte 1: -// 7..6 unused -// 5..3 length of 1st mapping of case type -// 2..0 length of 2nd mapping of case type +// byte 0: +// 7..6 unused +// 5..4 CCC type (same bits as entry) +// 3 unused +// 2..0 length of fold // -// case 1st 2nd -// lower -> upper, title -// upper -> lower, title -// title -> lower, upper +// byte 1: +// 7..6 unused +// 5..3 length of 1st mapping of case type +// 2..0 length of 2nd mapping of case type +// +// case 1st 2nd +// lower -> upper, title +// upper -> lower, title +// title -> lower, upper // // Lengths with the value 0x7 indicate no value and implies no change. // A length of 0 indicates a mapping to zero-length string. // // Body bytes: -// case folding bytes -// lowercase mapping bytes -// uppercase mapping bytes -// titlecase mapping bytes -// closure mapping bytes (for NFKC_Casefold). (TODO) +// +// case folding bytes +// lowercase mapping bytes +// uppercase mapping bytes +// titlecase mapping bytes +// closure mapping bytes (for NFKC_Casefold). (TODO) // // Fallbacks: -// missing fold -> lower -// missing title -> upper -// all missing -> original rune +// +// missing fold -> lower +// missing title -> upper +// all missing -> original rune // // exceptions starts with a dummy byte to enforce that there is no zero index // value. diff --git a/cases/trieval.go b/cases/trieval.go index 99e039628..4e4d13fe5 100644 --- a/cases/trieval.go +++ b/cases/trieval.go @@ -14,19 +14,19 @@ package cases // // The per-rune values have the following format: // -// if (exception) { -// 15..4 unsigned exception index -// } else { -// 15..8 XOR pattern or index to XOR pattern for case mapping -// Only 13..8 are used for XOR patterns. -// 7 inverseFold (fold to upper, not to lower) -// 6 index: interpret the XOR pattern as an index -// or isMid if case mode is cIgnorableUncased. -// 5..4 CCC: zero (normal or break), above or other -// } -// 3 exception: interpret this value as an exception index -// (TODO: is this bit necessary? Probably implied from case mode.) -// 2..0 case mode +// if (exception) { +// 15..4 unsigned exception index +// } else { +// 15..8 XOR pattern or index to XOR pattern for case mapping +// Only 13..8 are used for XOR patterns. +// 7 inverseFold (fold to upper, not to lower) +// 6 index: interpret the XOR pattern as an index +// or isMid if case mode is cIgnorableUncased. +// 5..4 CCC: zero (normal or break), above or other +// } +// 3 exception: interpret this value as an exception index +// (TODO: is this bit necessary? Probably implied from case mode.) +// 2..0 case mode // // For the non-exceptional cases, a rune must be either uncased, lowercase or // uppercase. If the rune is cased, the XOR pattern maps either a lowercase @@ -128,37 +128,40 @@ const ( // The entry is pointed to by the exception index in an entry. It has the // following format: // -// Header -// byte 0: -// 7..6 unused -// 5..4 CCC type (same bits as entry) -// 3 unused -// 2..0 length of fold +// Header: // -// byte 1: -// 7..6 unused -// 5..3 length of 1st mapping of case type -// 2..0 length of 2nd mapping of case type +// byte 0: +// 7..6 unused +// 5..4 CCC type (same bits as entry) +// 3 unused +// 2..0 length of fold // -// case 1st 2nd -// lower -> upper, title -// upper -> lower, title -// title -> lower, upper +// byte 1: +// 7..6 unused +// 5..3 length of 1st mapping of case type +// 2..0 length of 2nd mapping of case type +// +// case 1st 2nd +// lower -> upper, title +// upper -> lower, title +// title -> lower, upper // // Lengths with the value 0x7 indicate no value and implies no change. // A length of 0 indicates a mapping to zero-length string. // // Body bytes: -// case folding bytes -// lowercase mapping bytes -// uppercase mapping bytes -// titlecase mapping bytes -// closure mapping bytes (for NFKC_Casefold). (TODO) +// +// case folding bytes +// lowercase mapping bytes +// uppercase mapping bytes +// titlecase mapping bytes +// closure mapping bytes (for NFKC_Casefold). (TODO) // // Fallbacks: -// missing fold -> lower -// missing title -> upper -// all missing -> original rune +// +// missing fold -> lower +// missing title -> upper +// all missing -> original rune // // exceptions starts with a dummy byte to enforce that there is no zero index // value. diff --git a/cmd/gotext/doc.go b/cmd/gotext/doc.go index fa247c6d3..d363ae25e 100644 --- a/cmd/gotext/doc.go +++ b/cmd/gotext/doc.go @@ -4,60 +4,47 @@ // // Usage: // -// gotext command [arguments] +// gotext command [arguments] // // The commands are: // -// update merge translations and generate catalog -// extract extracts strings to be translated from code -// rewrite rewrites fmt functions to use a message Printer -// generate generates code to insert translated messages +// update merge translations and generate catalog +// extract extracts strings to be translated from code +// rewrite rewrites fmt functions to use a message Printer +// generate generates code to insert translated messages // // Use "gotext help [command]" for more information about a command. // // Additional help topics: // -// // Use "gotext help [topic]" for more information about that topic. // -// -// Merge translations and generate catalog +// # Merge translations and generate catalog // // Usage: // -// gotext update * [-out ] -// -// +// gotext update * [-out ] // -// -// Extracts strings to be translated from code +// # Extracts strings to be translated from code // // Usage: // -// gotext extract * -// -// +// gotext extract * // -// -// Rewrites fmt functions to use a message Printer +// # Rewrites fmt functions to use a message Printer // // Usage: // -// gotext rewrite +// gotext rewrite // // rewrite is typically done once for a project. It rewrites all usages of // fmt to use x/text's message package whenever a message.Printer is in scope. // It rewrites Print and Println calls with constant strings to the equivalent // using Printf to allow translators to reorder arguments. // -// -// Generates code to insert translated messages +// # Generates code to insert translated messages // // Usage: // -// gotext generate -// -// -// -// +// gotext generate package main diff --git a/collate/build/builder.go b/collate/build/builder.go index 092a4b506..3efb7387d 100644 --- a/collate/build/builder.go +++ b/collate/build/builder.go @@ -225,26 +225,37 @@ func (t *Tailoring) SetAnchorBefore(anchor string) error { // // Examples: create a tailoring for Swedish, where "ä" is ordered after "z" // at the primary sorting level: -// t := b.Tailoring("se") -// t.SetAnchor("z") -// t.Insert(colltab.Primary, "ä", "") +// +// t := b.Tailoring("se") +// t.SetAnchor("z") +// t.Insert(colltab.Primary, "ä", "") +// // Order "ü" after "ue" at the secondary sorting level: -// t.SetAnchor("ue") -// t.Insert(colltab.Secondary, "ü","") +// +// t.SetAnchor("ue") +// t.Insert(colltab.Secondary, "ü","") +// // or -// t.SetAnchor("u") -// t.Insert(colltab.Secondary, "ü", "e") +// +// t.SetAnchor("u") +// t.Insert(colltab.Secondary, "ü", "e") +// // Order "q" afer "ab" at the secondary level and "Q" after "q" // at the tertiary level: -// t.SetAnchor("ab") -// t.Insert(colltab.Secondary, "q", "") -// t.Insert(colltab.Tertiary, "Q", "") +// +// t.SetAnchor("ab") +// t.Insert(colltab.Secondary, "q", "") +// t.Insert(colltab.Tertiary, "Q", "") +// // Order "b" before "a": -// t.SetAnchorBefore("a") -// t.Insert(colltab.Primary, "b", "") +// +// t.SetAnchorBefore("a") +// t.Insert(colltab.Primary, "b", "") +// // Order "0" after the last primary ignorable: -// t.SetAnchor("") -// t.Insert(colltab.Primary, "0", "") +// +// t.SetAnchor("") +// t.Insert(colltab.Primary, "0", "") func (t *Tailoring) Insert(level colltab.Level, str, extend string) error { if t.anchor == nil { return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str) diff --git a/collate/build/colelem.go b/collate/build/colelem.go index 04fc3bfb7..1aaa062c5 100644 --- a/collate/build/colelem.go +++ b/collate/build/colelem.go @@ -51,6 +51,7 @@ func makeCE(ce rawCE) (uint32, error) { // - n* is the size of the first node in the contraction trie. // - i* is the index of the first node in the contraction trie. // - b* is the offset into the contraction collation element table. +// // See contract.go for details on the contraction trie. const ( contractID = 0xC0000000 @@ -103,7 +104,8 @@ func makeExpansionHeader(n int) (uint32, error) { // The collation element, in this case, is of the form // 11110000 00000000 wwwwwwww vvvvvvvv, where // - v* is the replacement tertiary weight for the first rune, -// - w* is the replacement tertiary weight for the second rune, +// - w* is the replacement tertiary weight for the second rune. +// // Tertiary weights of subsequent runes should be replaced with maxTertiary. // See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. const ( @@ -162,7 +164,9 @@ func implicitPrimary(r rune) int { // primaries (either double primaries or for illegal runes) // to our own representation. // A CJK character C is represented in the DUCET as -// [.FBxx.0020.0002.C][.BBBB.0000.0000.C] +// +// [.FBxx.0020.0002.C][.BBBB.0000.0000.C] +// // We will rewrite these characters to a single CE. // We assume the CJK values start at 0x8000. // See https://unicode.org/reports/tr10/#Implicit_Weights diff --git a/collate/build/contract.go b/collate/build/contract.go index e2df64f0c..5d79eb8bf 100644 --- a/collate/build/contract.go +++ b/collate/build/contract.go @@ -56,19 +56,22 @@ const ( // entry might still resemble a completed suffix. // Examples: // The suffix strings "ab" and "ac" can be represented as: -// []ctEntry{ -// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF. -// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2 -// } +// +// []ctEntry{ +// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF. +// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2 +// } // // The suffix strings "ab", "abc", "abd", and "abcd" can be represented as: -// []ctEntry{ -// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'. -// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'. -// {'d', 'd', final, 3}, // "abd" -> 3 -// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'. -// {'d', 'd', final, 4}, // "abcd" -> 4 -// } +// +// []ctEntry{ +// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'. +// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'. +// {'d', 'd', final, 3}, // "abd" -> 3 +// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'. +// {'d', 'd', final, 4}, // "abcd" -> 4 +// } +// // See genStateTests in contract_test.go for more examples. type ctEntry struct { L uint8 // non-final: byte value to match; final: lowest match in range. diff --git a/feature/plural/gen.go b/feature/plural/gen.go index b9c5f2493..5f8f375fb 100644 --- a/feature/plural/gen.go +++ b/feature/plural/gen.go @@ -359,15 +359,16 @@ var operandIndex = map[string]opID{ // the resulting or conditions to conds. // // Example rules: -// // Category "one" in English: only allow 1 with no visible fraction -// i = 1 and v = 0 @integer 1 // -// // Category "few" in Czech: all numbers with visible fractions -// v != 0 @decimal ... +// // Category "one" in English: only allow 1 with no visible fraction +// i = 1 and v = 0 @integer 1 // -// // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or -// // numbers with a fraction 11..19 and no trailing zeros. -// n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ... +// // Category "few" in Czech: all numbers with visible fractions +// v != 0 @decimal ... +// +// // Category "zero" in Latvian: all multiples of 10 or the numbers 11-19 or +// // numbers with a fraction 11..19 and no trailing zeros. +// n % 10 = 0 or n % 100 = 11..19 or v = 2 and f % 100 = 11..19 @integer ... // // @integer and @decimal are followed by examples and are not relevant for the // rule itself. The are used here to signal the termination of the rule. diff --git a/feature/plural/message.go b/feature/plural/message.go index f931f8a6a..6248d01cc 100644 --- a/feature/plural/message.go +++ b/feature/plural/message.go @@ -35,13 +35,13 @@ type Interface interface { // The cases argument are pairs of selectors and messages. Selectors are of type // string or Form. Messages are of type string or catalog.Message. A selector // matches an argument if: -// - it is "other" or Other -// - it matches the plural form of the argument: "zero", "one", "two", "few", -// or "many", or the equivalent Form -// - it is of the form "=x" where x is an integer that matches the value of -// the argument. -// - it is of the form " 1. -// - Otherwise the result is i % 10^nMod. +// - Let i be asInt(digits[start:end]), where out-of-range digits are assumed +// to be zero. +// - Result n is big if i / 10^nMod > 1. +// - Otherwise the result is i % 10^nMod. // // For example, if digits is {1, 2, 3} and start:end is 0:5, then the result // for various values of nMod is: -// - when nMod == 2, n == big -// - when nMod == 3, n == big -// - when nMod == 4, n == big -// - when nMod == 5, n == 12300 -// - when nMod == 6, n == 12300 -// - when nMod == 7, n == 12300 +// - when nMod == 2, n == big +// - when nMod == 3, n == big +// - when nMod == 4, n == big +// - when nMod == 5, n == 12300 +// - when nMod == 6, n == 12300 +// - when nMod == 7, n == 12300 func getIntApprox(digits []byte, start, end, nMod, big int) (n int) { // Leading 0 digits just result in 0. p := start @@ -107,12 +106,13 @@ func getIntApprox(digits []byte, start, end, nMod, big int) (n int) { // // The following table contains examples of possible arguments to represent // the given numbers. -// decimal digits exp scale -// 123 []byte{1, 2, 3} 3 0 -// 123.4 []byte{1, 2, 3, 4} 3 1 -// 123.40 []byte{1, 2, 3, 4} 3 2 -// 100000 []byte{1} 6 0 -// 100000.00 []byte{1} 6 3 +// +// decimal digits exp scale +// 123 []byte{1, 2, 3} 3 0 +// 123.4 []byte{1, 2, 3, 4} 3 1 +// 123.40 []byte{1, 2, 3, 4} 3 2 +// 100000 []byte{1} 6 0 +// 100000.00 []byte{1} 6 3 func (p *Rules) MatchDigits(t language.Tag, digits []byte, exp, scale int) Form { index := tagToID(t) @@ -152,14 +152,15 @@ func (p *Rules) matchComponents(t language.Tag, n, f, scale int) Form { // MatchPlural returns the plural form for the given language and plural // operands (as defined in // https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules): -// where -// n absolute value of the source number (integer and decimals) -// input -// i integer digits of n. -// v number of visible fraction digits in n, with trailing zeros. -// w number of visible fraction digits in n, without trailing zeros. -// f visible fractional digits in n, with trailing zeros (f = t * 10^(v-w)) -// t visible fractional digits in n, without trailing zeros. +// +// where +// n absolute value of the source number (integer and decimals) +// input +// i integer digits of n. +// v number of visible fraction digits in n, with trailing zeros. +// w number of visible fraction digits in n, without trailing zeros. +// f visible fractional digits in n, with trailing zeros (f = t * 10^(v-w)) +// t visible fractional digits in n, without trailing zeros. // // If any of the operand values is too large to fit in an int, it is okay to // pass the value modulo 10,000,000. diff --git a/internal/catmsg/catmsg.go b/internal/catmsg/catmsg.go index c0bf86f09..1b257a7b4 100644 --- a/internal/catmsg/catmsg.go +++ b/internal/catmsg/catmsg.go @@ -9,8 +9,7 @@ // own. For instance, the plural package provides functionality for selecting // translation strings based on the plural category of substitution arguments. // -// -// Encoding and Decoding +// # Encoding and Decoding // // Catalogs store Messages encoded as a single string. Compiling a message into // a string both results in compacter representation and speeds up evaluation. @@ -25,8 +24,7 @@ // the message. This decoder takes a Decoder argument which provides the // counterparts for the decoding. // -// -// Renderers +// # Renderers // // A Decoder must be initialized with a Renderer implementation. These // implementations must be provided by packages that use Catalogs, typically @@ -38,22 +36,22 @@ // as sequence of substrings passed to the Renderer. The following snippet shows // how to express the above example using the message package. // -// message.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", plural.Select(1, "one", "minute")), -// catalog.String("You are %[1]d ${minutes} late.")) +// message.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", plural.Select(1, "one", "minute")), +// catalog.String("You are %[1]d ${minutes} late.")) // -// p := message.NewPrinter(language.English) -// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late. +// p := message.NewPrinter(language.English) +// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late. // // To evaluate the Printf, package message wraps the arguments in a Renderer // that is passed to the catalog for message decoding. The call sequence that // results from evaluating the above message, assuming the person is rather // tardy, is: // -// Render("You are %[1]d ") -// Arg(1) -// Render("minutes") -// Render(" late.") +// Render("You are %[1]d ") +// Arg(1) +// Render("minutes") +// Render(" late.") // // The calls to Arg is caused by the plural.Select execution, which evaluates // the argument to determine whether the singular or plural message form should @@ -267,10 +265,12 @@ func (s FirstOf) Compile(e *Encoder) error { // Var defines a message that can be substituted for a placeholder of the same // name. If an expression does not result in a string after evaluation, Name is // used as the substitution. For example: -// Var{ -// Name: "minutes", -// Message: plural.Select(1, "one", "minute"), -// } +// +// Var{ +// Name: "minutes", +// Message: plural.Select(1, "one", "minute"), +// } +// // will resolve to minute for singular and minutes for plural forms. type Var struct { Name string @@ -318,13 +318,15 @@ func (r Raw) Compile(e *Encoder) (err error) { // calls for each placeholder and interstitial string. For example, for the // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls // is: -// d.Render("%[1]v ") -// d.Arg(1) -// d.Render(resultOfInvites) -// d.Render(" %[2]v to ") -// d.Arg(2) -// d.Render(resultOfTheir) -// d.Render(" party.") +// +// d.Render("%[1]v ") +// d.Arg(1) +// d.Render(resultOfInvites) +// d.Render(" %[2]v to ") +// d.Arg(2) +// d.Render(resultOfTheir) +// d.Render(" party.") +// // where the messages for "invites" and "their" both use a plural.Select // referring to the first argument. // diff --git a/internal/cldrtree/cldrtree.go b/internal/cldrtree/cldrtree.go index 7530831d6..cc2714e99 100644 --- a/internal/cldrtree/cldrtree.go +++ b/internal/cldrtree/cldrtree.go @@ -4,7 +4,6 @@ // Package cldrtree builds and generates a CLDR index file, including all // inheritance. -// package cldrtree //go:generate go test -gen diff --git a/internal/colltab/collelem.go b/internal/colltab/collelem.go index 396cebda2..0c23c8a48 100644 --- a/internal/colltab/collelem.go +++ b/internal/colltab/collelem.go @@ -78,24 +78,27 @@ func (ce Elem) ctype() ceType { // For normal collation elements, we assume that a collation element either has // a primary or non-default secondary value, not both. // Collation elements with a primary value are of the form -// 01pppppp pppppppp ppppppp0 ssssssss -// - p* is primary collation value -// - s* is the secondary collation value -// 00pppppp pppppppp ppppppps sssttttt, where -// - p* is primary collation value -// - s* offset of secondary from default value. -// - t* is the tertiary collation value -// 100ttttt cccccccc pppppppp pppppppp -// - t* is the tertiar collation value -// - c* is the canonical combining class -// - p* is the primary collation value +// +// 01pppppp pppppppp ppppppp0 ssssssss +// - p* is primary collation value +// - s* is the secondary collation value +// 00pppppp pppppppp ppppppps sssttttt, where +// - p* is primary collation value +// - s* offset of secondary from default value. +// - t* is the tertiary collation value +// 100ttttt cccccccc pppppppp pppppppp +// - t* is the tertiar collation value +// - c* is the canonical combining class +// - p* is the primary collation value +// // Collation elements with a secondary value are of the form -// 1010cccc ccccssss ssssssss tttttttt, where -// - c* is the canonical combining class -// - s* is the secondary collation value -// - t* is the tertiary collation value -// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 -// - q* quaternary value +// +// 1010cccc ccccssss ssssssss tttttttt, where +// - c* is the canonical combining class +// - s* is the secondary collation value +// - t* is the tertiary collation value +// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 +// - q* quaternary value const ( ceTypeMask = 0xC0000000 ceTypeMaskExt = 0xE0000000 @@ -296,6 +299,7 @@ func (ce Elem) Weight(l Level) int { // - n* is the size of the first node in the contraction trie. // - i* is the index of the first node in the contraction trie. // - b* is the offset into the contraction collation element table. +// // See contract.go for details on the contraction trie. const ( maxNBits = 4 @@ -326,6 +330,7 @@ func splitExpandIndex(ce Elem) (index int) { // The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where // - v* is the replacement tertiary weight for the first rune, // - w* is the replacement tertiary weight for the second rune, +// // Tertiary weights of subsequent runes should be replaced with maxTertiary. // See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. func splitDecompose(ce Elem) (t1, t2 uint8) { diff --git a/internal/export/idna/gen_trieval.go b/internal/export/idna/gen_trieval.go index 9d92407f2..501bfabed 100644 --- a/internal/export/idna/gen_trieval.go +++ b/internal/export/idna/gen_trieval.go @@ -22,23 +22,23 @@ package main // // The per-rune values have the following format: // -// if mapped { -// if inlinedXOR { -// 15..13 inline XOR marker -// 12..11 unused -// 10..3 inline XOR mask -// } else { -// 15..3 index into xor or mapping table -// } -// } else { -// 15..14 unused -// 13 mayNeedNorm -// 12..11 attributes -// 10..8 joining type -// 7..3 category type -// } -// 2 use xor pattern -// 1..0 mapped category +// if mapped { +// if inlinedXOR { +// 15..13 inline XOR marker +// 12..11 unused +// 10..3 inline XOR mask +// } else { +// 15..3 index into xor or mapping table +// } +// } else { +// 15..14 unused +// 13 mayNeedNorm +// 12..11 attributes +// 10..8 joining type +// 7..3 category type +// } +// 2 use xor pattern +// 1..0 mapped category // // See the definitions below for a more detailed description of the various // bits. diff --git a/internal/export/idna/trieval.go b/internal/export/idna/trieval.go index 7a8cf889b..9c070a44b 100644 --- a/internal/export/idna/trieval.go +++ b/internal/export/idna/trieval.go @@ -17,23 +17,23 @@ package idna // // The per-rune values have the following format: // -// if mapped { -// if inlinedXOR { -// 15..13 inline XOR marker -// 12..11 unused -// 10..3 inline XOR mask -// } else { -// 15..3 index into xor or mapping table -// } -// } else { -// 15..14 unused -// 13 mayNeedNorm -// 12..11 attributes -// 10..8 joining type -// 7..3 category type -// } -// 2 use xor pattern -// 1..0 mapped category +// if mapped { +// if inlinedXOR { +// 15..13 inline XOR marker +// 12..11 unused +// 10..3 inline XOR mask +// } else { +// 15..3 index into xor or mapping table +// } +// } else { +// 15..14 unused +// 13 mayNeedNorm +// 12..11 attributes +// 10..8 joining type +// 7..3 category type +// } +// 2 use xor pattern +// 1..0 mapped category // // See the definitions below for a more detailed description of the various // bits. diff --git a/internal/language/tables.go b/internal/language/tables.go index a19480c5b..3552e1afc 100644 --- a/internal/language/tables.go +++ b/internal/language/tables.go @@ -121,9 +121,10 @@ const langPrivateEnd = 0x3179 // lang holds an alphabetically sorted list of ISO-639 language identifiers. // All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag. // For 2-byte language identifiers, the two successive bytes have the following meaning: -// - if the first letter of the 2- and 3-letter ISO codes are the same: -// the second and third letter of the 3-letter ISO code. -// - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. +// - if the first letter of the 2- and 3-letter ISO codes are the same: +// the second and third letter of the 3-letter ISO code. +// - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3. +// // For 3-byte language identifiers the 4th byte is 0. const lang tag.Index = "" + // Size: 5324 bytes "---\x00aaaraai\x00aak\x00aau\x00abbkabi\x00abq\x00abr\x00abt\x00aby\x00a" + @@ -1086,9 +1087,9 @@ var regionTypes = [358]uint8{ // regionISO holds a list of alphabetically sorted 2-letter ISO region codes. // Each 2-letter codes is followed by two bytes with the following meaning: -// - [A-Z}{2}: the first letter of the 2-letter code plus these two -// letters form the 3-letter ISO code. -// - 0, n: index into altRegionISO3. +// - [A-Z]{2}: the first letter of the 2-letter code plus these two +// letters form the 3-letter ISO code. +// - 0, n: index into altRegionISO3. const regionISO tag.Index = "" + // Size: 1308 bytes "AAAAACSCADNDAEREAFFGAGTGAIIAALLBAMRMANNTAOGOAQTAARRGASSMATUTAUUSAWBWAXLA" + "AZZEBAIHBBRBBDGDBEELBFFABGGRBHHRBIDIBJENBLLMBMMUBNRNBOOLBQESBRRABSHSBTTN" + @@ -1206,7 +1207,9 @@ var m49 = [358]int16{ // m49Index gives indexes into fromM49 based on the three most significant bits // of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in -// fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] +// +// fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]] +// // for an entry where the first 7 bits match the 7 lsb of the UN.M49 code. // The region code is stored in the 9 lsb of the indexed value. // Size: 18 bytes, 9 elements diff --git a/internal/number/decimal.go b/internal/number/decimal.go index cb656db6c..37e0c4b98 100644 --- a/internal/number/decimal.go +++ b/internal/number/decimal.go @@ -33,13 +33,14 @@ const maxIntDigits = 20 // may point outside a valid position in Digits. // // Examples: -// Number Decimal -// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 -// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 -// 12000 Digits: [1, 2], Exp: 5 -// 12000.00 Digits: [1, 2], Exp: 5 -// 0.00123 Digits: [1, 2, 3], Exp: -2 -// 0 Digits: [], Exp: 0 +// +// Number Decimal +// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 +// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 +// 12000 Digits: [1, 2], Exp: 5 +// 12000.00 Digits: [1, 2], Exp: 5 +// 0.00123 Digits: [1, 2, 3], Exp: -2 +// 0 Digits: [], Exp: 0 type Decimal struct { digits @@ -60,22 +61,23 @@ type digits struct { // engineering notation. Digits must have at least one digit. // // Examples: -// Number Decimal -// decimal -// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5 -// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5 -// 12000 Digits: [1, 2], Exp: 5 End: 5 -// 12000.00 Digits: [1, 2], Exp: 5 End: 7 -// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3 -// 0 Digits: [], Exp: 0 End: 1 -// scientific (actual exp is Exp - Comma) -// 0e0 Digits: [0], Exp: 1, End: 1, Comma: 1 -// .0e0 Digits: [0], Exp: 0, End: 1, Comma: 0 -// 0.0e0 Digits: [0], Exp: 1, End: 2, Comma: 1 -// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1 -// .123e5 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 0 -// engineering -// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2 +// +// Number Decimal +// decimal +// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5 +// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5 +// 12000 Digits: [1, 2], Exp: 5 End: 5 +// 12000.00 Digits: [1, 2], Exp: 5 End: 7 +// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3 +// 0 Digits: [], Exp: 0 End: 1 +// scientific (actual exp is Exp - Comma) +// 0e0 Digits: [0], Exp: 1, End: 1, Comma: 1 +// .0e0 Digits: [0], Exp: 0, End: 1, Comma: 0 +// 0.0e0 Digits: [0], Exp: 1, End: 2, Comma: 1 +// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1 +// .123e5 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 0 +// engineering +// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2 type Digits struct { digits // End indicates the end position of the number. diff --git a/internal/testtext/codesize.go b/internal/testtext/codesize.go index 5fc5eaec7..88df48747 100644 --- a/internal/testtext/codesize.go +++ b/internal/testtext/codesize.go @@ -16,11 +16,13 @@ import ( // CodeSize builds the given code sample and returns the binary size or en error // if an error occurred. The code sample typically will look like this: -// package main -// import "golang.org/x/text/somepackage" -// func main() { -// somepackage.Func() // reference Func to cause it to be linked in. -// } +// +// package main +// import "golang.org/x/text/somepackage" +// func main() { +// somepackage.Func() // reference Func to cause it to be linked in. +// } +// // See dict_test.go in the display package for an example. func CodeSize(s string) (int, error) { // Write the file. diff --git a/internal/triegen/triegen.go b/internal/triegen/triegen.go index 51d218a30..de54a8075 100644 --- a/internal/triegen/triegen.go +++ b/internal/triegen/triegen.go @@ -34,23 +34,24 @@ // triegen generates both tables and code. The code is optimized to use the // automatically chosen data types. The following code is generated for a Trie // or multiple Tries named "foo": -// - type fooTrie -// The trie type. // -// - func newFooTrie(x int) *fooTrie -// Trie constructor, where x is the index of the trie passed to Gen. +// - type fooTrie +// The trie type. // -// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) -// The lookup method, where uintX is automatically chosen. +// - func newFooTrie(x int) *fooTrie +// Trie constructor, where x is the index of the trie passed to Gen. // -// - func lookupString, lookupUnsafe and lookupStringUnsafe -// Variants of the above. +// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int) +// The lookup method, where uintX is automatically chosen. // -// - var fooValues and fooIndex and any tables generated by Compacters. -// The core trie data. +// - func lookupString, lookupUnsafe and lookupStringUnsafe +// Variants of the above. // -// - var fooTrieHandles -// Indexes of starter blocks in case of multiple trie roots. +// - var fooValues and fooIndex and any tables generated by Compacters. +// The core trie data. +// +// - var fooTrieHandles +// Indexes of starter blocks in case of multiple trie roots. // // It is recommended that users test the generated trie by checking the returned // value for every rune. Such exhaustive tests are possible as the number of diff --git a/internal/utf8internal/utf8internal.go b/internal/utf8internal/utf8internal.go index 575cea870..e5c53b1b3 100644 --- a/internal/utf8internal/utf8internal.go +++ b/internal/utf8internal/utf8internal.go @@ -74,7 +74,7 @@ type AcceptRange struct { // AcceptRanges is a slice of AcceptRange values. For a given byte sequence b // -// AcceptRanges[First[b[0]]>>AcceptShift] +// AcceptRanges[First[b[0]]>>AcceptShift] // // will give the value of AcceptRange for the multi-byte UTF-8 sequence starting // at b[0]. diff --git a/language/display/lookup.go b/language/display/lookup.go index e6dc0e016..88307753d 100644 --- a/language/display/lookup.go +++ b/language/display/lookup.go @@ -92,10 +92,10 @@ func nameTag(langN, scrN, regN namer, x interface{}) string { // offsets for a string in data. For example, consider a header that defines // strings for the languages de, el, en, fi, and nl: // -// header{ -// data: "GermanGreekEnglishDutch", -// index: []uint16{ 0, 6, 11, 18, 18, 23 }, -// } +// header{ +// data: "GermanGreekEnglishDutch", +// index: []uint16{0, 6, 11, 18, 18, 23}, +// } // // For a language with index i, the string is defined by // data[index[i]:index[i+1]]. So the number of elements in index is always one @@ -204,9 +204,11 @@ func supportedRegions() []language.Region { // for each length, which can be used in combination with binary search to get // the index associated with a tag. // For example, a tagIndex{ -// "arenesfrruzh", // 6 2-byte tags. -// "barwae", // 2 3-byte tags. -// "", +// +// "arenesfrruzh", // 6 2-byte tags. +// "barwae", // 2 3-byte tags. +// "", +// // } // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag // "wae" had an index of 7. diff --git a/language/doc.go b/language/doc.go index 8afecd50e..212b77c90 100644 --- a/language/doc.go +++ b/language/doc.go @@ -10,18 +10,17 @@ // and provides the user with the best experience // (see https://blog.golang.org/matchlang). // -// -// Matching preferred against supported languages +// # Matching preferred against supported languages // // A Matcher for an application that supports English, Australian English, // Danish, and standard Mandarin can be created as follows: // -// var matcher = language.NewMatcher([]language.Tag{ -// language.English, // The first language is used as fallback. -// language.MustParse("en-AU"), -// language.Danish, -// language.Chinese, -// }) +// var matcher = language.NewMatcher([]language.Tag{ +// language.English, // The first language is used as fallback. +// language.MustParse("en-AU"), +// language.Danish, +// language.Chinese, +// }) // // This list of supported languages is typically implied by the languages for // which there exists translations of the user interface. @@ -30,14 +29,14 @@ // language tags. // The MatchString finds best matches for such strings: // -// handler(w http.ResponseWriter, r *http.Request) { -// lang, _ := r.Cookie("lang") -// accept := r.Header.Get("Accept-Language") -// tag, _ := language.MatchStrings(matcher, lang.String(), accept) +// handler(w http.ResponseWriter, r *http.Request) { +// lang, _ := r.Cookie("lang") +// accept := r.Header.Get("Accept-Language") +// tag, _ := language.MatchStrings(matcher, lang.String(), accept) // -// // tag should now be used for the initialization of any -// // locale-specific service. -// } +// // tag should now be used for the initialization of any +// // locale-specific service. +// } // // The Matcher's Match method can be used to match Tags directly. // @@ -48,8 +47,7 @@ // For instance, it will know that a reader of Bokmål Danish can read Norwegian // and will know that Cantonese ("yue") is a good match for "zh-HK". // -// -// Using match results +// # Using match results // // To guarantee a consistent user experience to the user it is important to // use the same language tag for the selection of any locale-specific services. @@ -58,9 +56,9 @@ // More subtly confusing is using the wrong sorting order or casing // algorithm for a certain language. // -// All the packages in x/text that provide locale-specific services -// (e.g. collate, cases) should be initialized with the tag that was -// obtained at the start of an interaction with the user. +// All the packages in x/text that provide locale-specific services +// (e.g. collate, cases) should be initialized with the tag that was +// obtained at the start of an interaction with the user. // // Note that Tag that is returned by Match and MatchString may differ from any // of the supported languages, as it may contain carried over settings from @@ -70,8 +68,7 @@ // Match and MatchString both return the index of the matched supported tag // to simplify associating such data with the matched tag. // -// -// Canonicalization +// # Canonicalization // // If one uses the Matcher to compare languages one does not need to // worry about canonicalization. @@ -92,10 +89,9 @@ // equivalence relations. The CanonType type can be used to alter the // canonicalization form. // -// References +// # References // // BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47 -// package language // import "golang.org/x/text/language" // TODO: explanation on how to match languages for your own locale-specific diff --git a/message/catalog/catalog.go b/message/catalog/catalog.go index de595b510..96955d075 100644 --- a/message/catalog/catalog.go +++ b/message/catalog/catalog.go @@ -13,8 +13,7 @@ // language. The Loader interface defines a source of dictionaries. A // translation of a format string is represented by a Message. // -// -// Catalogs +// # Catalogs // // A Catalog defines a programmatic interface for setting message translations. // It maintains a set of per-language dictionaries with translations for a set @@ -24,8 +23,7 @@ // the key. For example, a Dictionary for "en-GB" could leave out entries that // are identical to those in a dictionary for "en". // -// -// Messages +// # Messages // // A Message is a format string which varies on the value of substitution // variables. For instance, to indicate the number of results one could want "no @@ -39,8 +37,7 @@ // to selected string. This separation of concerns allows Catalog to be used to // store any kind of formatting strings. // -// -// Selecting messages based on linguistic features of substitution arguments +// # Selecting messages based on linguistic features of substitution arguments // // Messages may vary based on any linguistic features of the argument values. // The most common one is plural form, but others exist. @@ -48,10 +45,10 @@ // Selection messages are provided in packages that provide support for a // specific linguistic feature. The following snippet uses plural.Selectf: // -// catalog.Set(language.English, "You are %d minute(s) late.", -// plural.Selectf(1, "", -// plural.One, "You are 1 minute late.", -// plural.Other, "You are %d minutes late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// plural.Selectf(1, "", +// plural.One, "You are 1 minute late.", +// plural.Other, "You are %d minutes late.")) // // In this example, a message is stored in the Catalog where one of two messages // is selected based on the first argument, a number. The first message is @@ -64,47 +61,46 @@ // Selects can be nested. This allows selecting sentences based on features of // multiple arguments or multiple linguistic properties of a single argument. // -// -// String interpolation +// # String interpolation // // There is often a lot of commonality between the possible variants of a // message. For instance, in the example above the word "minute" varies based on // the plural catogory of the argument, but the rest of the sentence is // identical. Using interpolation the above message can be rewritten as: // -// catalog.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", -// plural.Selectf(1, "", plural.One, "minute", plural.Other, "minutes")), -// catalog.String("You are %[1]d ${minutes} late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", +// plural.Selectf(1, "", plural.One, "minute", plural.Other, "minutes")), +// catalog.String("You are %[1]d ${minutes} late.")) // // Var is defined to return the variable name if the message does not yield a // match. This allows us to further simplify this snippet to // -// catalog.Set(language.English, "You are %d minute(s) late.", -// catalog.Var("minutes", plural.Selectf(1, "", plural.One, "minute")), -// catalog.String("You are %d ${minutes} late.")) +// catalog.Set(language.English, "You are %d minute(s) late.", +// catalog.Var("minutes", plural.Selectf(1, "", plural.One, "minute")), +// catalog.String("You are %d ${minutes} late.")) // // Overall this is still only a minor improvement, but things can get a lot more // unwieldy if more than one linguistic feature is used to determine a message // variant. Consider the following example: // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", -// catalog.Var("their", -// plural.Selectf(1, "" -// plural.One, gender.Select(1, "female", "her", "other", "his"))), -// catalog.Var("invites", plural.Selectf(1, "", plural.One, "invite")) -// catalog.String("%[1]v ${invites} %[2]v to ${their} party.")), +// // argument 1: list of hosts, argument 2: list of guests +// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", +// catalog.Var("their", +// plural.Selectf(1, "" +// plural.One, gender.Select(1, "female", "her", "other", "his"))), +// catalog.Var("invites", plural.Selectf(1, "", plural.One, "invite")) +// catalog.String("%[1]v ${invites} %[2]v to ${their} party.")), // // Without variable substitution, this would have to be written as // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", -// plural.Selectf(1, "", -// plural.One, gender.Select(1, -// "female", "%[1]v invites %[2]v to her party." -// "other", "%[1]v invites %[2]v to his party."), -// plural.Other, "%[1]v invites %[2]v to their party.") +// // argument 1: list of hosts, argument 2: list of guests +// catalog.Set(language.English, "%[1]v invite(s) %[2]v to their party.", +// plural.Selectf(1, "", +// plural.One, gender.Select(1, +// "female", "%[1]v invites %[2]v to her party." +// "other", "%[1]v invites %[2]v to his party."), +// plural.Other, "%[1]v invites %[2]v to their party.")) // // Not necessarily shorter, but using variables there is less duplication and // the messages are more maintenance friendly. Moreover, languages may have up @@ -113,33 +109,32 @@ // Different messages using the same inflections can reuse variables by moving // them to macros. Using macros we can rewrite the message as: // -// // argument 1: list of hosts, argument 2: list of guests -// catalog.SetString(language.English, "%[1]v invite(s) %[2]v to their party.", -// "%[1]v ${invites(1)} %[2]v to ${their(1)} party.") +// // argument 1: list of hosts, argument 2: list of guests +// catalog.SetString(language.English, "%[1]v invite(s) %[2]v to their party.", +// "%[1]v ${invites(1)} %[2]v to ${their(1)} party.") // // Where the following macros were defined separately. // -// catalog.SetMacro(language.English, "invites", plural.Selectf(1, "", -// plural.One, "invite")) -// catalog.SetMacro(language.English, "their", plural.Selectf(1, "", -// plural.One, gender.Select(1, "female", "her", "other", "his"))), +// catalog.SetMacro(language.English, "invites", plural.Selectf(1, "", +// plural.One, "invite")) +// catalog.SetMacro(language.English, "their", plural.Selectf(1, "", +// plural.One, gender.Select(1, "female", "her", "other", "his"))), // // Placeholders use parentheses and the arguments to invoke a macro. // -// -// Looking up messages +// # Looking up messages // // Message lookup using Catalogs is typically only done by specialized packages // and is not something the user should be concerned with. For instance, to // express the tardiness of a user using the related message we defined earlier, // the user may use the package message like so: // -// p := message.NewPrinter(language.English) -// p.Printf("You are %d minute(s) late.", 5) +// p := message.NewPrinter(language.English) +// p.Printf("You are %d minute(s) late.", 5) // // Which would print: -// You are 5 minutes late. // +// You are 5 minutes late. // // This package is UNDER CONSTRUCTION and its API may change. package catalog // import "golang.org/x/text/message/catalog" diff --git a/message/doc.go b/message/doc.go index 72e8fde71..4bf7bdcac 100644 --- a/message/doc.go +++ b/message/doc.go @@ -5,22 +5,21 @@ // Package message implements formatted I/O for localized strings with functions // analogous to the fmt's print functions. It is a drop-in replacement for fmt. // -// -// Localized Formatting +// # Localized Formatting // // A format string can be localized by replacing any of the print functions of // fmt with an equivalent call to a Printer. // -// p := message.NewPrinter(message.MatchLanguage("en")) -// p.Println(123456.78) // Prints 123,456.78 +// p := message.NewPrinter(message.MatchLanguage("en")) +// p.Println(123456.78) // Prints 123,456.78 // -// p.Printf("%d ducks in a row", 4331) // Prints 4,331 ducks in a row +// p.Printf("%d ducks in a row", 4331) // Prints 4,331 ducks in a row // -// p := message.NewPrinter(message.MatchLanguage("nl")) -// p.Printf("Hoogte: %.1f meter", 1244.9) // Prints Hoogte: 1,244.9 meter +// p := message.NewPrinter(message.MatchLanguage("nl")) +// p.Printf("Hoogte: %.1f meter", 1244.9) // Prints Hoogte: 1,244.9 meter // -// p := message.NewPrinter(message.MatchLanguage("bn")) -// p.Println(123456.78) // Prints ১,২৩,৪৫৬.৭৮ +// p := message.NewPrinter(message.MatchLanguage("bn")) +// p.Println(123456.78) // Prints ১,২৩,৪৫৬.৭৮ // // Printer currently supports numbers and specialized types for which packages // exist in x/text. Other builtin types such as time.Time and slices are @@ -35,8 +34,7 @@ // // See package fmt for more options. // -// -// Translation +// # Translation // // The format strings that are passed to Printf, Sprintf, Fprintf, or Errorf // are used as keys to look up translations for the specified languages. @@ -44,34 +42,36 @@ // // One can use arbitrary keys to distinguish between otherwise ambiguous // strings: -// p := message.NewPrinter(language.English) -// p.Printf("archive(noun)") // Prints "archive" -// p.Printf("archive(verb)") // Prints "archive" // -// p := message.NewPrinter(language.German) -// p.Printf("archive(noun)") // Prints "Archiv" -// p.Printf("archive(verb)") // Prints "archivieren" +// p := message.NewPrinter(language.English) +// p.Printf("archive(noun)") // Prints "archive" +// p.Printf("archive(verb)") // Prints "archive" +// +// p := message.NewPrinter(language.German) +// p.Printf("archive(noun)") // Prints "Archiv" +// p.Printf("archive(verb)") // Prints "archivieren" // // To retain the fallback functionality, use Key: -// p.Printf(message.Key("archive(noun)", "archive")) -// p.Printf(message.Key("archive(verb)", "archive")) // +// p.Printf(message.Key("archive(noun)", "archive")) +// p.Printf(message.Key("archive(verb)", "archive")) // -// Translation Pipeline +// # Translation Pipeline // // Format strings that contain text need to be translated to support different // locales. The first step is to extract strings that need to be translated. // // 1. Install gotext -// go get -u golang.org/x/text/cmd/gotext -// gotext -help +// +// go get -u golang.org/x/text/cmd/gotext +// gotext -help // // 2. Mark strings in your source to be translated by using message.Printer, // instead of the functions of the fmt package. // // 3. Extract the strings from your source // -// gotext extract +// gotext extract // // The output will be written to the textdata directory. // @@ -89,13 +89,11 @@ // see also package golang.org/x/text/message/catalog can be used to implement // either dynamic or static loading of messages. // -// -// Plural and Gender Forms +// # Plural and Gender Forms // // Translated messages can vary based on the plural and gender forms of // substitution values. In general, it is up to the translators to provide // alternative translations for such forms. See the packages in // golang.org/x/text/feature and golang.org/x/text/message/catalog for more // information. -// package message diff --git a/number/doc.go b/number/doc.go index 2ad8d431a..876623086 100644 --- a/number/doc.go +++ b/number/doc.go @@ -9,19 +9,18 @@ // builtin Go types and anything that implements the Convert interface // (currently internal). // -// p := message.NewPrinter(language.English) +// p := message.NewPrinter(language.English) // -// p.Printf("%v bottles of beer on the wall.", number.Decimal(1234)) -// // Prints: 1,234 bottles of beer on the wall. +// p.Printf("%v bottles of beer on the wall.", number.Decimal(1234)) +// // Prints: 1,234 bottles of beer on the wall. // -// p.Printf("%v of gophers lose too much fur", number.Percent(0.12)) -// // Prints: 12% of gophers lose too much fur. +// p.Printf("%v of gophers lose too much fur", number.Percent(0.12)) +// // Prints: 12% of gophers lose too much fur. // -// p := message.NewPrinter(language.Dutch) -// -// p.Printf("There are %v bikes per household.", number.Decimal(1.2)) -// // Prints: Er zijn 1,2 fietsen per huishouden. +// p := message.NewPrinter(language.Dutch) // +// p.Printf("There are %v bikes per household.", number.Decimal(1.2)) +// // Prints: Er zijn 1,2 fietsen per huishouden. // // The width and scale specified in the formatting directives override the // configuration of the formatter. diff --git a/unicode/bidi/core.go b/unicode/bidi/core.go index fde188a33..9d2ae547b 100644 --- a/unicode/bidi/core.go +++ b/unicode/bidi/core.go @@ -193,14 +193,14 @@ func (p *paragraph) run() { // // At the end of this function: // -// - The member variable matchingPDI is set to point to the index of the -// matching PDI character for each isolate initiator character. If there is -// no matching PDI, it is set to the length of the input text. For other -// characters, it is set to -1. -// - The member variable matchingIsolateInitiator is set to point to the -// index of the matching isolate initiator character for each PDI character. -// If there is no matching isolate initiator, or the character is not a PDI, -// it is set to -1. +// - The member variable matchingPDI is set to point to the index of the +// matching PDI character for each isolate initiator character. If there is +// no matching PDI, it is set to the length of the input text. For other +// characters, it is set to -1. +// - The member variable matchingIsolateInitiator is set to point to the +// index of the matching isolate initiator character for each PDI character. +// If there is no matching isolate initiator, or the character is not a PDI, +// it is set to -1. func (p *paragraph) determineMatchingIsolates() { p.matchingPDI = make([]int, p.Len()) p.matchingIsolateInitiator = make([]int, p.Len()) @@ -435,7 +435,7 @@ func maxLevel(a, b level) level { } // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, -// either L or R, for each isolating run sequence. +// either L or R, for each isolating run sequence. func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { length := len(indexes) types := make([]Class, length) @@ -905,7 +905,7 @@ func (p *paragraph) getLevels(linebreaks []int) []level { // Lines are concatenated from left to right. So for example, the fifth // character from the left on the third line is // -// getReordering(linebreaks)[linebreaks[1] + 4] +// getReordering(linebreaks)[linebreaks[1] + 4] // // (linebreaks[1] is the position after the last character of the second // line, which is also the index of the first character on the third line, diff --git a/unicode/cldr/collate.go b/unicode/cldr/collate.go index 27c5bac9a..056fe7f7f 100644 --- a/unicode/cldr/collate.go +++ b/unicode/cldr/collate.go @@ -98,9 +98,13 @@ func processRules(p RuleProcessor, s string) (err error) { } // parseSpecialAnchor parses the anchor syntax which is either of the form -// ['before' ] +// +// ['before' ] +// // or -// [