diff --git a/sumdb/server.go b/sumdb/server.go index 1e1779d..216a256 100644 --- a/sumdb/server.go +++ b/sumdb/server.go @@ -6,6 +6,7 @@ package sumdb import ( + "bytes" "context" "net/http" "os" @@ -150,6 +151,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusInternalServerError) return } + // Data tiles contain formatted records without the first line with record ID. + _, msg, _ = bytes.Cut(msg, []byte{'\n'}) data = append(data, msg...) } w.Header().Set("Content-Type", "text/plain; charset=UTF-8") diff --git a/sumdb/tlog/note.go b/sumdb/tlog/note.go index ce5353e..fc6d5fa 100644 --- a/sumdb/tlog/note.go +++ b/sumdb/tlog/note.go @@ -73,13 +73,16 @@ func ParseTree(text []byte) (tree Tree, err error) { var errMalformedRecord = errors.New("malformed record data") // FormatRecord formats a record for serving to a client -// in a lookup response or data tile. +// in a lookup response. // // The encoded form is the record ID as a single number, // then the text of the record, and then a terminating blank line. // Record text must be valid UTF-8 and must not contain any ASCII control // characters (those below U+0020) other than newline (U+000A). // It must end in a terminating newline and not contain any blank lines. +// +// Responses to data tiles consist of concatenated formatted records from each of +// which the first line, with the record ID, is removed. func FormatRecord(id int64, text []byte) (msg []byte, err error) { if !isValidRecordText(text) { return nil, errMalformedRecord diff --git a/zip/testdata/check_dir/various.txt b/zip/testdata/check_dir/various.txt index ee843be..0ded5ac 100644 --- a/zip/testdata/check_dir/various.txt +++ b/zip/testdata/check_dir/various.txt @@ -1,10 +1,12 @@ -- want -- valid: $work/valid.go +$work/vendor/modules.txt omitted: $work/.hg_archival.txt: file is inserted by 'hg archive' and is always omitted $work/.git: directory is a version control repository +$work/pkg/vendor/vendor.go: file is in vendor directory $work/sub: directory is in another module $work/vendor/x/y: file is in vendor directory @@ -14,7 +16,9 @@ $work/invalid.go': malformed file path "invalid.go'": invalid char '\'' -- valid.go -- -- GO.MOD -- -- invalid.go' -- +-- vendor/modules.txt -- -- vendor/x/y -- -- sub/go.mod -- -- .hg_archival.txt -- -- .git/x -- +-- pkg/vendor/vendor.go -- diff --git a/zip/testdata/check_dir/various_go123.txt b/zip/testdata/check_dir/various_go123.txt new file mode 100644 index 0000000..69102ac --- /dev/null +++ b/zip/testdata/check_dir/various_go123.txt @@ -0,0 +1,25 @@ +-- want -- +valid: +$work/go.mod +$work/valid.go +$work/vendor/modules.txt + +omitted: +$work/.hg_archival.txt: file is inserted by 'hg archive' and is always omitted +$work/.git: directory is a version control repository +$work/pkg/vendor/vendor.go: file is in vendor directory +$work/sub: directory is in another module +$work/vendor/x/y: file is in vendor directory + +invalid: +$work/invalid.go': malformed file path "invalid.go'": invalid char '\'' +-- valid.go -- +-- go.mod -- +go 1.23 +-- invalid.go' -- +-- vendor/modules.txt -- +-- vendor/x/y -- +-- sub/go.mod -- +-- .hg_archival.txt -- +-- .git/x -- +-- pkg/vendor/vendor.go -- diff --git a/zip/testdata/check_dir/various_go124.txt b/zip/testdata/check_dir/various_go124.txt new file mode 100644 index 0000000..43bed2e --- /dev/null +++ b/zip/testdata/check_dir/various_go124.txt @@ -0,0 +1,26 @@ +-- want -- +valid: +$work/go.mod +$work/pkg/vendor/vendor.go +$work/valid.go + +omitted: +$work/.hg_archival.txt: file is inserted by 'hg archive' and is always omitted +$work/.git: directory is a version control repository +$work/sub: directory is in another module +$work/vendor/modules.txt: file is in vendor directory +$work/vendor/x/y: file is in vendor directory + +invalid: +$work/invalid.go': malformed file path "invalid.go'": invalid char '\'' +-- go.mod -- +go 1.24 +-- valid.go -- +-- invalid.go' -- +-- vendor/modules.txt -- +-- vendor/x/y -- +-- sub/go.mod -- +go 1.23 +-- .hg_archival.txt -- +-- .git/x -- +-- pkg/vendor/vendor.go -- diff --git a/zip/testdata/check_files/various.txt b/zip/testdata/check_files/various.txt index a704a8a..06ec791 100644 --- a/zip/testdata/check_files/various.txt +++ b/zip/testdata/check_files/various.txt @@ -1,11 +1,13 @@ -- want -- valid: valid.go +vendor/modules.txt omitted: vendor/x/y: file is in vendor directory sub/go.mod: file is in another module .hg_archival.txt: file is inserted by 'hg archive' and is always omitted +pkg/vendor/vendor.go: file is in vendor directory invalid: not/../clean: file path is not clean @@ -17,9 +19,11 @@ valid.go: multiple entries for file "valid.go" -- GO.MOD -- -- invalid.go' -- -- vendor/x/y -- +-- vendor/modules.txt -- -- sub/go.mod -- -- .hg_archival.txt -- -- valid.go -- duplicate -- valid.go -- another duplicate +-- pkg/vendor/vendor.go -- diff --git a/zip/testdata/check_files/various_go123.txt b/zip/testdata/check_files/various_go123.txt new file mode 100644 index 0000000..a28be93 --- /dev/null +++ b/zip/testdata/check_files/various_go123.txt @@ -0,0 +1,30 @@ +-- want -- +valid: +valid.go +go.mod +vendor/modules.txt + +omitted: +vendor/x/y: file is in vendor directory +sub/go.mod: file is in another module +.hg_archival.txt: file is inserted by 'hg archive' and is always omitted +pkg/vendor/vendor.go: file is in vendor directory + +invalid: +not/../clean: file path is not clean +invalid.go': malformed file path "invalid.go'": invalid char '\'' +valid.go: multiple entries for file "valid.go" +-- valid.go -- +-- not/../clean -- +-- go.mod -- +go 1.23 +-- invalid.go' -- +-- vendor/x/y -- +-- vendor/modules.txt -- +-- sub/go.mod -- +-- .hg_archival.txt -- +-- valid.go -- +duplicate +-- valid.go -- +another duplicate +-- pkg/vendor/vendor.go -- diff --git a/zip/testdata/check_files/various_go124.txt b/zip/testdata/check_files/various_go124.txt new file mode 100644 index 0000000..94d4bfd --- /dev/null +++ b/zip/testdata/check_files/various_go124.txt @@ -0,0 +1,31 @@ +-- want -- +valid: +valid.go +go.mod +pkg/vendor/vendor.go + +omitted: +vendor/x/y: file is in vendor directory +vendor/modules.txt: file is in vendor directory +sub/go.mod: file is in another module +.hg_archival.txt: file is inserted by 'hg archive' and is always omitted + +invalid: +not/../clean: file path is not clean +invalid.go': malformed file path "invalid.go'": invalid char '\'' +valid.go: multiple entries for file "valid.go" +-- valid.go -- +-- not/../clean -- +-- go.mod -- +go 1.24 +-- invalid.go' -- +-- vendor/x/y -- +-- vendor/modules.txt -- +-- sub/go.mod -- +go 1.23 +-- .hg_archival.txt -- +-- valid.go -- +duplicate +-- valid.go -- +another duplicate +-- pkg/vendor/vendor.go -- diff --git a/zip/testdata/create/exclude_vendor.txt b/zip/testdata/create/exclude_vendor.txt index 79b2c08..40ac449 100644 --- a/zip/testdata/create/exclude_vendor.txt +++ b/zip/testdata/create/exclude_vendor.txt @@ -12,3 +12,6 @@ see comment in isVendoredPackage and golang.org/issue/31562. excluded -- sub/vendor/sub.txt -- excluded +-- pkg/vendor/vendor.go -- +excluded +see comment in isVendoredPackage and golang.org/issue/37397 diff --git a/zip/testdata/create/exclude_vendor_go124.txt b/zip/testdata/create/exclude_vendor_go124.txt new file mode 100644 index 0000000..940df51 --- /dev/null +++ b/zip/testdata/create/exclude_vendor_go124.txt @@ -0,0 +1,18 @@ +path=example.com/m +version=v1.0.0 +hash=h1:mJR1q75yiMK6+CDw6DuhMS4v4hNoLXqkyk2Cph4VS8Q= +-- go.mod -- +module example.com/m + +go 1.24 +modules.txt is excluded in 1.24+. See golang.org/issue/63395 +-- vendor/modules.txt -- +excluded +see comment in isVendoredPackage and golang.org/issue/31562. +-- vendor/example.com/x/x.go -- +excluded +-- sub/vendor/sub.txt -- +excluded +-- pkg/vendor/vendor.go -- +included +see comment in isVendoredPackage and golang.org/issue/37397 diff --git a/zip/testdata/create_from_dir/exclude_vendor.txt b/zip/testdata/create_from_dir/exclude_vendor.txt index 79b2c08..40ac449 100644 --- a/zip/testdata/create_from_dir/exclude_vendor.txt +++ b/zip/testdata/create_from_dir/exclude_vendor.txt @@ -12,3 +12,6 @@ see comment in isVendoredPackage and golang.org/issue/31562. excluded -- sub/vendor/sub.txt -- excluded +-- pkg/vendor/vendor.go -- +excluded +see comment in isVendoredPackage and golang.org/issue/37397 diff --git a/zip/testdata/create_from_dir/exclude_vendor_go124.txt b/zip/testdata/create_from_dir/exclude_vendor_go124.txt new file mode 100644 index 0000000..940df51 --- /dev/null +++ b/zip/testdata/create_from_dir/exclude_vendor_go124.txt @@ -0,0 +1,18 @@ +path=example.com/m +version=v1.0.0 +hash=h1:mJR1q75yiMK6+CDw6DuhMS4v4hNoLXqkyk2Cph4VS8Q= +-- go.mod -- +module example.com/m + +go 1.24 +modules.txt is excluded in 1.24+. See golang.org/issue/63395 +-- vendor/modules.txt -- +excluded +see comment in isVendoredPackage and golang.org/issue/31562. +-- vendor/example.com/x/x.go -- +excluded +-- sub/vendor/sub.txt -- +excluded +-- pkg/vendor/vendor.go -- +included +see comment in isVendoredPackage and golang.org/issue/37397 diff --git a/zip/vendor_test.go b/zip/vendor_test.go index 5eb9535..9deb192 100644 --- a/zip/vendor_test.go +++ b/zip/vendor_test.go @@ -6,33 +6,51 @@ package zip import "testing" +var pre124 []string = []string{ + "", + "go1.14", + "go1.21.0", + "go1.22.4", + "go1.23", + "go1.23.1", + "go1.2", + "go1.7", + "go1.9", +} + +var after124 []string = []string{"go1.24.0", "go1.24", "go1.99.0"} + +var allVers []string = append(pre124, after124...) + func TestIsVendoredPackage(t *testing.T) { for _, tc := range []struct { - path string - want bool - falsePositive bool // is this case affected by https://golang.org/issue/37397? + path string + want bool + versions []string }{ - {path: "vendor/foo/foo.go", want: true}, - {path: "pkg/vendor/foo/foo.go", want: true}, - {path: "longpackagename/vendor/foo/foo.go", want: true}, + {path: "vendor/foo/foo.go", want: true, versions: allVers}, + {path: "pkg/vendor/foo/foo.go", want: true, versions: allVers}, + {path: "longpackagename/vendor/foo/foo.go", want: true, versions: allVers}, + {path: "vendor/vendor.go", want: false, versions: allVers}, + {path: "vendor/foo/modules.txt", want: true, versions: allVers}, + {path: "modules.txt", want: false, versions: allVers}, + {path: "vendor/amodules.txt", want: false, versions: allVers}, - {path: "vendor/vendor.go", want: false}, + // These test cases were affected by https://golang.org/issue/63395 + {path: "vendor/modules.txt", want: false, versions: pre124}, + {path: "vendor/modules.txt", want: true, versions: after124}, - // We ideally want these cases to be false, but they are affected by - // https://golang.org/issue/37397, and if we fix them we will invalidate - // existing module checksums. We must leave them as-is-for now. - {path: "pkg/vendor/vendor.go", falsePositive: true}, - {path: "longpackagename/vendor/vendor.go", falsePositive: true}, + // These test cases were affected by https://golang.org/issue/37397 + {path: "pkg/vendor/vendor.go", want: true, versions: pre124}, + {path: "pkg/vendor/vendor.go", want: false, versions: after124}, + {path: "longpackagename/vendor/vendor.go", want: true, versions: pre124}, + {path: "longpackagename/vendor/vendor.go", want: false, versions: after124}, } { - got := isVendoredPackage(tc.path) - want := tc.want - if tc.falsePositive { - want = true - } - if got != want { - t.Errorf("isVendoredPackage(%q) = %t; want %t", tc.path, got, tc.want) - if tc.falsePositive { - t.Logf("(Expected a false-positive due to https://golang.org/issue/37397.)") + for _, v := range tc.versions { + got := isVendoredPackage(tc.path, v) + want := tc.want + if got != want { + t.Errorf("isVendoredPackage(%q, %s) = %t; want %t", tc.path, v, got, tc.want) } } } diff --git a/zip/zip.go b/zip/zip.go index 5aed6e2..3673db4 100644 --- a/zip/zip.go +++ b/zip/zip.go @@ -50,6 +50,7 @@ import ( "bytes" "errors" "fmt" + "go/version" "io" "os" "os/exec" @@ -60,6 +61,7 @@ import ( "unicode" "unicode/utf8" + "golang.org/x/mod/modfile" "golang.org/x/mod/module" ) @@ -193,6 +195,20 @@ func CheckFiles(files []File) (CheckedFiles, error) { return cf, cf.Err() } +// parseGoVers extracts the Go version specified in the given go.mod file. +// It returns an empty string if the version is not found or if an error +// occurs during file parsing. +// +// The version string is in Go toolchain name syntax, prefixed with "go". +// Examples: "go1.21", "go1.22rc2", "go1.23.0" +func parseGoVers(file string, data []byte) string { + mfile, err := modfile.ParseLax(file, data, nil) + if err != nil || mfile.Go == nil { + return "" + } + return "go" + mfile.Go.Version +} + // checkFiles implements CheckFiles and also returns lists of valid files and // their sizes, corresponding to cf.Valid. It omits files in submodules, files // in vendored packages, symlinked files, and various other unwanted files. @@ -217,6 +233,7 @@ func checkFiles(files []File) (cf CheckedFiles, validFiles []File, validSizes [] // Files in these directories will be omitted. // These directories will not be included in the output zip. haveGoMod := make(map[string]bool) + var vers string for _, f := range files { p := f.Path() dir, base := path.Split(p) @@ -226,8 +243,21 @@ func checkFiles(files []File) (cf CheckedFiles, validFiles []File, validSizes [] addError(p, false, err) continue } - if info.Mode().IsRegular() { - haveGoMod[dir] = true + if !info.Mode().IsRegular() { + continue + } + haveGoMod[dir] = true + // Extract the Go language version from the root "go.mod" file. + // This ensures we correctly interpret Go version-specific file omissions. + // We use f.Open() to handle potential custom Open() implementations + // that the underlying File type might have. + if base == "go.mod" && dir == "" { + if file, err := f.Open(); err == nil { + if data, err := io.ReadAll(file); err == nil { + vers = version.Lang(parseGoVers("go.mod", data)) + } + file.Close() + } } } } @@ -257,7 +287,7 @@ func checkFiles(files []File) (cf CheckedFiles, validFiles []File, validSizes [] addError(p, false, errPathNotRelative) continue } - if isVendoredPackage(p) { + if isVendoredPackage(p, vers) { // Skip files in vendored packages. addError(p, true, errVendored) continue @@ -756,20 +786,42 @@ func (fi dataFileInfo) Sys() interface{} { return nil } // in a package whose import path contains (but does not end with) the component // "vendor". // -// Unfortunately, isVendoredPackage reports false positives for files in any -// non-top-level package whose import path ends in "vendor". -func isVendoredPackage(name string) bool { +// The 'vers' parameter specifies the Go version declared in the module's +// go.mod file and must be a valid Go version according to the +// go/version.IsValid function. +// Vendoring behavior has evolved across Go versions, so this function adapts +// its logic accordingly. +func isVendoredPackage(name string, vers string) bool { + // vendor/modules.txt is a vendored package but was included in 1.23 and earlier. + // Remove vendor/modules.txt only for 1.24 and beyond to preserve older checksums. + if version.Compare(vers, "go1.24") >= 0 && name == "vendor/modules.txt" { + return true + } var i int if strings.HasPrefix(name, "vendor/") { i += len("vendor/") } else if j := strings.Index(name, "/vendor/"); j >= 0 { - // This offset looks incorrect; this should probably be + // Calculate the correct starting position within the import path + // to determine if a package is vendored. + // + // Due to a bug in Go versions before 1.24 + // (see https://golang.org/issue/37397), the "/vendor/" prefix within + // a package path was not always correctly interpreted. + // + // This bug affected how vendored packages were identified in cases like: // - // i = j + len("/vendor/") + // - "pkg/vendor/vendor.go" (incorrectly identified as vendored in pre-1.24) + // - "pkg/vendor/foo/foo.go" (correctly identified as vendored) // - // (See https://golang.org/issue/31562 and https://golang.org/issue/37397.) - // Unfortunately, we can't fix it without invalidating module checksums. - i += len("/vendor/") + // To correct this, in Go 1.24 and later, we skip the entire "/vendor/" prefix + // when it's part of a nested package path (as in the first example above). + // In earlier versions, we only skipped the length of "/vendor/", leading + // to the incorrect behavior. + if version.Compare(vers, "go1.24") >= 0 { + i = j + len("/vendor/") + } else { + i += len("/vendor/") + } } else { return false } @@ -894,6 +946,12 @@ func (cc collisionChecker) check(p string, isDir bool) error { // files, as well as a list of directories and files that were skipped (for // example, nested modules and symbolic links). func listFilesInDir(dir string) (files []File, omitted []FileError, err error) { + // Extract the Go language version from the root "go.mod" file. + // This ensures we correctly interpret Go version-specific file omissions. + var vers string + if data, err := os.ReadFile(filepath.Join(dir, "go.mod")); err == nil { + vers = version.Lang(parseGoVers("go.mod", data)) + } err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error { if err != nil { return err @@ -904,11 +962,10 @@ func listFilesInDir(dir string) (files []File, omitted []FileError, err error) { } slashPath := filepath.ToSlash(relPath) - // Skip some subdirectories inside vendor, but maintain bug - // golang.org/issue/31562, described in isVendoredPackage. + // Skip some subdirectories inside vendor. // We would like Create and CreateFromDir to produce the same result // for a set of files, whether expressed as a directory tree or zip. - if isVendoredPackage(slashPath) { + if isVendoredPackage(slashPath, vers) { omitted = append(omitted, FileError{Path: slashPath, Err: errVendored}) return nil }