From 359b2da79be229c0bab10174280a743372816c0e Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Fri, 9 May 2025 05:12:34 +0000 Subject: [PATCH 1/2] chore: expose original length when serving slim binaries This will be used in the extensions and desktop apps to enable compression AND progress reporting for the download by comparing the original content length to the amount of bytes written to disk. --- site/site.go | 102 +++++++++++++++++++++++++++------------------- site/site_test.go | 87 +++++++++++++++++++++++++++++++-------- 2 files changed, 129 insertions(+), 60 deletions(-) diff --git a/site/site.go b/site/site.go index e47e15848cda0..0771f801c3647 100644 --- a/site/site.go +++ b/site/site.go @@ -108,10 +108,34 @@ func New(opts *Options) *Handler { panic(fmt.Sprintf("Failed to parse html files: %v", err)) } - binHashCache := newBinHashCache(opts.BinFS, opts.BinHashes) - mux := http.NewServeMux() - mux.Handle("/bin/", http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + mux.Handle("/bin/", binHandler(opts.BinFS, newBinHashCache(opts.BinHashes))) + mux.Handle("/", http.FileServer( + http.FS( + // OnlyFiles is a wrapper around the file system that prevents directory + // listings. Directory listings are not required for the site file system, so we + // exclude it as a security measure. In practice, this file system comes from our + // open source code base, but this is considered a best practice for serving + // static files. + OnlyFiles(opts.SiteFS))), + ) + buildInfoResponse, err := json.Marshal(opts.BuildInfo) + if err != nil { + panic("failed to marshal build info: " + err.Error()) + } + handler.buildInfoJSON = html.EscapeString(string(buildInfoResponse)) + handler.handler = mux.ServeHTTP + + handler.installScript, err = parseInstallScript(opts.SiteFS, opts.BuildInfo) + if err != nil { + opts.Logger.Warn(context.Background(), "could not parse install.sh, it will be unavailable", slog.Error(err)) + } + + return handler +} + +func binHandler(binFS http.FileSystem, binHashCache *binHashCache) http.Handler { + return http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { // Convert underscores in the filename to hyphens. We eventually want to // change our hyphen-based filenames to underscores, but we need to // support both for now. @@ -122,7 +146,7 @@ func New(opts *Options) *Handler { if name == "" || name == "/" { // Serve the directory listing. This intentionally allows directory listings to // be served. This file system should not contain anything sensitive. - http.FileServer(opts.BinFS).ServeHTTP(rw, r) + http.FileServer(binFS).ServeHTTP(rw, r) return } if strings.Contains(name, "/") { @@ -131,7 +155,8 @@ func New(opts *Options) *Handler { http.NotFound(rw, r) return } - hash, err := binHashCache.getHash(name) + + f, err := binFS.Open(name) if xerrors.Is(err, os.ErrNotExist) { http.NotFound(rw, r) return @@ -140,36 +165,38 @@ func New(opts *Options) *Handler { http.Error(rw, err.Error(), http.StatusInternalServerError) return } + defer f.Close() + + // http.FileServer will not set Content-Length when performing chunked + // transport encoding, which is used for large files like our binaries + // so stream compression can be used. + // + // Clients like IDE extensions and the desktop apps can compare the + // value of this header with the amount of bytes written to disk after + // decompression to show progress. Without this, they cannot show + // progress without disabling compression. + stat, err := f.Stat() + if err != nil { + http.Error(rw, err.Error(), http.StatusInternalServerError) + return + } + // There isn't really a spec for a length header for the "inner" content + // size, but some nginx modules use this header. + rw.Header().Set("X-Original-Content-Length", fmt.Sprintf("%d", stat.Size())) + // Get and set ETag header. + hash, err := binHashCache.getHash(name, f) + if err != nil { + http.Error(rw, err.Error(), http.StatusInternalServerError) + return + } // ETag header needs to be quoted. rw.Header().Set("ETag", fmt.Sprintf(`%q`, hash)) // http.FileServer will see the ETag header and automatically handle // If-Match and If-None-Match headers on the request properly. - http.FileServer(opts.BinFS).ServeHTTP(rw, r) - }))) - mux.Handle("/", http.FileServer( - http.FS( - // OnlyFiles is a wrapper around the file system that prevents directory - // listings. Directory listings are not required for the site file system, so we - // exclude it as a security measure. In practice, this file system comes from our - // open source code base, but this is considered a best practice for serving - // static files. - OnlyFiles(opts.SiteFS))), - ) - buildInfoResponse, err := json.Marshal(opts.BuildInfo) - if err != nil { - panic("failed to marshal build info: " + err.Error()) - } - handler.buildInfoJSON = html.EscapeString(string(buildInfoResponse)) - handler.handler = mux.ServeHTTP - - handler.installScript, err = parseInstallScript(opts.SiteFS, opts.BuildInfo) - if err != nil { - opts.Logger.Warn(context.Background(), "could not parse install.sh, it will be unavailable", slog.Error(err)) - } - - return handler + http.FileServer(binFS).ServeHTTP(rw, r) + })) } type Handler struct { @@ -217,7 +244,7 @@ func (h *Handler) ServeHTTP(rw http.ResponseWriter, r *http.Request) { h.handler.ServeHTTP(rw, r) return // If requesting assets, serve straight up with caching. - case reqFile == "assets" || strings.HasPrefix(reqFile, "assets/"): + case reqFile == "assets" || strings.HasPrefix(reqFile, "assets/") || strings.HasPrefix(reqFile, "icon/"): // It could make sense to cache 404s, but the problem is that during an // upgrade a load balancer may route partially to the old server, and that // would make new asset paths get cached as 404s and not load even once the @@ -953,17 +980,14 @@ func RenderStaticErrorPage(rw http.ResponseWriter, r *http.Request, data ErrorPa } type binHashCache struct { - binFS http.FileSystem - hashes map[string]string mut sync.RWMutex sf singleflight.Group sem chan struct{} } -func newBinHashCache(binFS http.FileSystem, binHashes map[string]string) *binHashCache { +func newBinHashCache(binHashes map[string]string) *binHashCache { b := &binHashCache{ - binFS: binFS, hashes: make(map[string]string, len(binHashes)), mut: sync.RWMutex{}, sf: singleflight.Group{}, @@ -977,7 +1001,7 @@ func newBinHashCache(binFS http.FileSystem, binHashes map[string]string) *binHas return b } -func (b *binHashCache) getHash(name string) (string, error) { +func (b *binHashCache) getHash(name string, f http.File) (string, error) { b.mut.RLock() hash, ok := b.hashes[name] b.mut.RUnlock() @@ -990,14 +1014,8 @@ func (b *binHashCache) getHash(name string) (string, error) { b.sem <- struct{}{} defer func() { <-b.sem }() - f, err := b.binFS.Open(name) - if err != nil { - return "", err - } - defer f.Close() - h := sha1.New() //#nosec // Not used for cryptography. - _, err = io.Copy(h, f) + _, err := io.Copy(h, f) if err != nil { return "", err } diff --git a/site/site_test.go b/site/site_test.go index 63f3f9aa17226..d257bd9519b3d 100644 --- a/site/site_test.go +++ b/site/site_test.go @@ -19,6 +19,7 @@ import ( "time" "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -373,11 +374,13 @@ func TestServingBin(t *testing.T) { delete(sampleBinFSMissingSha256, binCoderSha1) type req struct { - url string - ifNoneMatch string - wantStatus int - wantBody []byte - wantEtag string + url string + ifNoneMatch string + wantStatus int + wantBody []byte + wantOriginalSize int + wantEtag string + compression bool } tests := []struct { name string @@ -390,17 +393,27 @@ func TestServingBin(t *testing.T) { fs: sampleBinFS(), reqs: []req{ { - url: "/bin/coder-linux-amd64", - wantStatus: http.StatusOK, - wantBody: []byte("compressed"), - wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), + url: "/bin/coder-linux-amd64", + wantStatus: http.StatusOK, + wantBody: []byte("compressed"), + wantOriginalSize: 10, + wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), }, // Test ETag support. { - url: "/bin/coder-linux-amd64", - ifNoneMatch: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), - wantStatus: http.StatusNotModified, - wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), + url: "/bin/coder-linux-amd64", + ifNoneMatch: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), + wantStatus: http.StatusNotModified, + wantOriginalSize: 10, + wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), + }, + // Test compression support with X-Original-Content-Length + // header. + { + url: "/bin/coder-linux-amd64", + wantStatus: http.StatusOK, + wantOriginalSize: 10, + compression: true, }, {url: "/bin/GITKEEP", wantStatus: http.StatusNotFound}, }, @@ -462,9 +475,24 @@ func TestServingBin(t *testing.T) { }, reqs: []req{ // We support both hyphens and underscores for compatibility. - {url: "/bin/coder-linux-amd64", wantStatus: http.StatusOK, wantBody: []byte("embed")}, - {url: "/bin/coder_linux_amd64", wantStatus: http.StatusOK, wantBody: []byte("embed")}, - {url: "/bin/GITKEEP", wantStatus: http.StatusOK, wantBody: []byte("")}, + { + url: "/bin/coder-linux-amd64", + wantStatus: http.StatusOK, + wantBody: []byte("embed"), + wantOriginalSize: 5, + }, + { + url: "/bin/coder_linux_amd64", + wantStatus: http.StatusOK, + wantBody: []byte("embed"), + wantOriginalSize: 5, + }, + { + url: "/bin/GITKEEP", + wantStatus: http.StatusOK, + wantBody: []byte(""), + wantOriginalSize: 0, + }, }, }, } @@ -482,12 +510,14 @@ func TestServingBin(t *testing.T) { require.Error(t, err, "extraction or read did not fail") } - srv := httptest.NewServer(site.New(&site.Options{ + site := site.New(&site.Options{ Telemetry: telemetry.NewNoop(), BinFS: binFS, BinHashes: binHashes, SiteFS: rootFS, - })) + }) + compressor := middleware.NewCompressor(1, "text/*", "application/*") + srv := httptest.NewServer(compressor.Handler(site)) defer srv.Close() // Create a context @@ -502,6 +532,9 @@ func TestServingBin(t *testing.T) { if tr.ifNoneMatch != "" { req.Header.Set("If-None-Match", tr.ifNoneMatch) } + if tr.compression { + req.Header.Set("Accept-Encoding", "gzip") + } resp, err := http.DefaultClient.Do(req) require.NoError(t, err, "http do failed") @@ -520,10 +553,28 @@ func TestServingBin(t *testing.T) { assert.Empty(t, gotBody, "body is not empty") } + if tr.compression { + assert.Equal(t, "gzip", resp.Header.Get("Content-Encoding"), "content encoding is not gzip") + } else { + assert.Empty(t, resp.Header.Get("Content-Encoding"), "content encoding is not empty") + } + if tr.wantEtag != "" { assert.NotEmpty(t, resp.Header.Get("ETag"), "etag header is empty") assert.Equal(t, tr.wantEtag, resp.Header.Get("ETag"), "etag did not match") } + + if tr.wantOriginalSize > 0 { + // This is a custom header that we set to help the + // client know the size of the decompressed data. See + // the comment in site.go. + headerStr := resp.Header.Get("X-Original-Content-Length") + assert.NotEmpty(t, headerStr, "X-Original-Content-Length header is empty") + originalSize, err := strconv.Atoi(headerStr) + if assert.NoErrorf(t, err, "could not parse X-Original-Content-Length header %q", headerStr) { + assert.EqualValues(t, tr.wantOriginalSize, originalSize, "X-Original-Content-Length did not match") + } + } }) } }) From 125680f5f486cec307bce3c2043fbff129d065b8 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Fri, 9 May 2025 06:51:59 +0000 Subject: [PATCH 2/2] Cache bin size too --- site/site.go | 113 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 44 deletions(-) diff --git a/site/site.go b/site/site.go index 0771f801c3647..2b64d3cf98f81 100644 --- a/site/site.go +++ b/site/site.go @@ -109,7 +109,7 @@ func New(opts *Options) *Handler { } mux := http.NewServeMux() - mux.Handle("/bin/", binHandler(opts.BinFS, newBinHashCache(opts.BinHashes))) + mux.Handle("/bin/", binHandler(opts.BinFS, newBinMetadataCache(opts.BinFS, opts.BinHashes))) mux.Handle("/", http.FileServer( http.FS( // OnlyFiles is a wrapper around the file system that prevents directory @@ -134,7 +134,7 @@ func New(opts *Options) *Handler { return handler } -func binHandler(binFS http.FileSystem, binHashCache *binHashCache) http.Handler { +func binHandler(binFS http.FileSystem, binMetadataCache *binMetadataCache) http.Handler { return http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { // Convert underscores in the filename to hyphens. We eventually want to // change our hyphen-based filenames to underscores, but we need to @@ -156,7 +156,7 @@ func binHandler(binFS http.FileSystem, binHashCache *binHashCache) http.Handler return } - f, err := binFS.Open(name) + metadata, err := binMetadataCache.getMetadata(name) if xerrors.Is(err, os.ErrNotExist) { http.NotFound(rw, r) return @@ -165,7 +165,6 @@ func binHandler(binFS http.FileSystem, binHashCache *binHashCache) http.Handler http.Error(rw, err.Error(), http.StatusInternalServerError) return } - defer f.Close() // http.FileServer will not set Content-Length when performing chunked // transport encoding, which is used for large files like our binaries @@ -175,23 +174,13 @@ func binHandler(binFS http.FileSystem, binHashCache *binHashCache) http.Handler // value of this header with the amount of bytes written to disk after // decompression to show progress. Without this, they cannot show // progress without disabling compression. - stat, err := f.Stat() - if err != nil { - http.Error(rw, err.Error(), http.StatusInternalServerError) - return - } + // // There isn't really a spec for a length header for the "inner" content // size, but some nginx modules use this header. - rw.Header().Set("X-Original-Content-Length", fmt.Sprintf("%d", stat.Size())) + rw.Header().Set("X-Original-Content-Length", fmt.Sprintf("%d", metadata.sizeBytes)) - // Get and set ETag header. - hash, err := binHashCache.getHash(name, f) - if err != nil { - http.Error(rw, err.Error(), http.StatusInternalServerError) - return - } - // ETag header needs to be quoted. - rw.Header().Set("ETag", fmt.Sprintf(`%q`, hash)) + // Get and set ETag header. Must be quoted. + rw.Header().Set("ETag", fmt.Sprintf(`%q`, metadata.sha1Hash)) // http.FileServer will see the ETag header and automatically handle // If-Match and If-None-Match headers on the request properly. @@ -979,59 +968,95 @@ func RenderStaticErrorPage(rw http.ResponseWriter, r *http.Request, data ErrorPa } } -type binHashCache struct { - hashes map[string]string - mut sync.RWMutex - sf singleflight.Group - sem chan struct{} +type binMetadata struct { + sizeBytes int64 // -1 if not known yet + // SHA1 was chosen because it's fast to compute and reasonable for + // determining if a file has changed. The ETag is not used a security + // measure. + sha1Hash string // always set if in the cache +} + +type binMetadataCache struct { + binFS http.FileSystem + originalHashes map[string]string + + metadata map[string]binMetadata + mut sync.RWMutex + sf singleflight.Group + sem chan struct{} } -func newBinHashCache(binHashes map[string]string) *binHashCache { - b := &binHashCache{ - hashes: make(map[string]string, len(binHashes)), - mut: sync.RWMutex{}, - sf: singleflight.Group{}, - sem: make(chan struct{}, 4), +func newBinMetadataCache(binFS http.FileSystem, binSha1Hashes map[string]string) *binMetadataCache { + b := &binMetadataCache{ + binFS: binFS, + originalHashes: make(map[string]string, len(binSha1Hashes)), + + metadata: make(map[string]binMetadata, len(binSha1Hashes)), + mut: sync.RWMutex{}, + sf: singleflight.Group{}, + sem: make(chan struct{}, 4), } - // Make a copy since we're gonna be mutating it. - for k, v := range binHashes { - b.hashes[k] = v + + // Previously we copied binSha1Hashes to the cache immediately. Since we now + // read other information like size from the file, we can't do that. Instead + // we copy the hashes to a different map that will be used to populate the + // cache on the first request. + for k, v := range binSha1Hashes { + b.originalHashes[k] = v } return b } -func (b *binHashCache) getHash(name string, f http.File) (string, error) { +func (b *binMetadataCache) getMetadata(name string) (binMetadata, error) { b.mut.RLock() - hash, ok := b.hashes[name] + metadata, ok := b.metadata[name] b.mut.RUnlock() if ok { - return hash, nil + return metadata, nil } // Avoid DOS by using a pool, and only doing work once per file. - v, err, _ := b.sf.Do(name, func() (interface{}, error) { + v, err, _ := b.sf.Do(name, func() (any, error) { b.sem <- struct{}{} defer func() { <-b.sem }() - h := sha1.New() //#nosec // Not used for cryptography. - _, err := io.Copy(h, f) + f, err := b.binFS.Open(name) if err != nil { - return "", err + return binMetadata{}, err + } + defer f.Close() + + var metadata binMetadata + + stat, err := f.Stat() + if err != nil { + return binMetadata{}, err + } + metadata.sizeBytes = stat.Size() + + if hash, ok := b.originalHashes[name]; ok { + metadata.sha1Hash = hash + } else { + h := sha1.New() //#nosec // Not used for cryptography. + _, err := io.Copy(h, f) + if err != nil { + return binMetadata{}, err + } + metadata.sha1Hash = hex.EncodeToString(h.Sum(nil)) } - hash := hex.EncodeToString(h.Sum(nil)) b.mut.Lock() - b.hashes[name] = hash + b.metadata[name] = metadata b.mut.Unlock() - return hash, nil + return metadata, nil }) if err != nil { - return "", err + return binMetadata{}, err } //nolint:forcetypeassert - return strings.ToLower(v.(string)), nil + return v.(binMetadata), nil } func applicationNameOrDefault(cfg codersdk.AppearanceConfig) string {