Skip to content

compiler: use hash calculation for determining archive staleness #805

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
176 changes: 128 additions & 48 deletions build/build.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package build

import (
"bytes"
"crypto/sha256"
"fmt"
"go/ast"
"go/build"
Expand All @@ -15,9 +17,9 @@ import (
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
"time"

"github.com/fsnotify/fsnotify"
"github.com/gopherjs/gopherjs/compiler"
Expand All @@ -28,6 +30,25 @@ import (
"golang.org/x/tools/go/buildutil"
)

const (
hashDebug = false
)

var (
compilerBinaryHash string
)

func init() {
// We do this here because it will only fail in truly bad situations, i.e.
// machine running out of resources. We also panic if there is a problem
// because it's unlikely anything else will be useful/work
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than panicking here, would it be useful to fallback to a cache-disabled state?

Maybe that would allow running the gopherjs compiler in unusual environments (such as within a browser?)? (That may already be impossible for other reasons, idk).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a fair point. I think I'd like to understand that situation a bit better; because a whole load of logic in build depends on the OS being available (writing archives etc).

The compiler is run within the browser by the playground, but that does not import build, just compiler.

h, err := hashCompilerBinary()
if err != nil {
panic(err)
}
compilerBinaryHash = h
}

type ImportCError struct {
pkgPath string
}
Expand Down Expand Up @@ -457,11 +478,10 @@ func (o *Options) PrintSuccess(format string, a ...interface{}) {

type PackageData struct {
*build.Package
JSFiles []string
IsTest bool // IsTest is true if the package is being built for running tests.
SrcModTime time.Time
UpToDate bool
IsVirtual bool // If true, the package does not have a corresponding physical directory on disk.
JSFiles []string
IsTest bool // IsTest is true if the package is being built for running tests.
UpToDate bool
IsVirtual bool // If true, the package does not have a corresponding physical directory on disk.
}

type Session struct {
Expand Down Expand Up @@ -591,24 +611,64 @@ func (s *Session) buildImportPathWithSrcDir(path string, srcDir string) (*Packag
return pkg, archive, nil
}

func hashCompilerBinary() (string, error) {
if compilerBinaryHash != "" {
return compilerBinaryHash, nil
}

binHash := sha256.New()
binPath, err := os.Executable()
if err != nil {
return "", fmt.Errorf("could not locate GopherJS binary: %v", err)
}
binFile, err := os.Open(binPath)
if err != nil {
return "", fmt.Errorf("could not open %v: %v", binPath, err)
}
defer binFile.Close()
if _, err := io.Copy(binHash, binFile); err != nil {
return "", fmt.Errorf("failed to hash %v: %v", binPath, err)
}
compilerBinaryHash = fmt.Sprintf("%#x", binHash.Sum(nil))
return compilerBinaryHash, nil
}

func (s *Session) BuildPackage(pkg *PackageData) (*compiler.Archive, error) {
if archive, ok := s.Archives[pkg.ImportPath]; ok {
return archive, nil
}

// For non-main and test packages we build up a hash that will help
// determine staleness. Set hashDebug to see this in action. The format is:
//
// ## <package>
// compiler binary hash: 0x519d22c6ab65a950f5b6278e4d65cb75dbd3a7eb1cf16e976a40b9f1febc0446
// build tags: <list of build tags>
// import: <import path>
// hash: 0xb966d7680c1c8ca75026f993c153aff0102dc9551f314e5352043187b5f9c9a6
// ...
//
// file: <file path>
// <file contents>
// N bytes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I feel like this format is a little bit arbitrary. Can we use JSON or YAML or something formatted? I don't have a strong opinion though...

Copy link
Member

@flimzy flimzy Apr 24, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A possible problem with JSON/YAML is that it's not inherently ordered. While we could find a way to ensure constant ordering, it wouldn't necessarily happen by default, which could lead to false negative cache hits.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The goal here is to create a hash as quickly as possible in order to determine whether we have a cache miss or not. So yes, whilst the format is arbitrary, it is simple. And requires no additional processing in order to write to the hash. Using JSON/YAML adds more overhead in the middle, overhead that is unnecessary because ultimately the output is a 256 bit value. Humans will only ever read this whilst debugging (which itself will be a rare occurrence) with hashDebug = true.

// ...

pkgHash := sha256.New()
var hw io.Writer = pkgHash
var hashDebugOut *bytes.Buffer
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an optional suggestion: Instead of bytes.Buffer, how about using text/template?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm unclear, why/how would we use text/template here?

if hashDebug {
hashDebugOut = new(bytes.Buffer)
hw = io.MultiWriter(hashDebugOut, pkgHash)
}

if pkg.PkgObj != "" {
var fileInfo os.FileInfo
gopherjsBinary, err := os.Executable()
if err == nil {
fileInfo, err = os.Stat(gopherjsBinary)
if err == nil {
pkg.SrcModTime = fileInfo.ModTime()
}
}
if err != nil {
os.Stderr.WriteString("Could not get GopherJS binary's modification timestamp. Please report issue.\n")
pkg.SrcModTime = time.Now()
}
fmt.Fprintf(hw, "## %v\n", pkg.ImportPath)
fmt.Fprintf(hw, "compiler binary hash: %v\n", compilerBinaryHash)

orderedBuildTags := append([]string{}, s.options.BuildTags...)
sort.Strings(orderedBuildTags)

fmt.Fprintf(hw, "build tags: %v\n", strings.Join(orderedBuildTags, ","))

for _, importedPkgPath := range pkg.Imports {
// Ignore all imports that aren't mentioned in import specs of pkg.
Expand All @@ -630,50 +690,72 @@ func (s *Session) BuildPackage(pkg *PackageData) (*compiler.Archive, error) {
if importedPkgPath == "unsafe" || ignored {
continue
}
importedPkg, _, err := s.buildImportPathWithSrcDir(importedPkgPath, pkg.Dir)
_, importedArchive, err := s.buildImportPathWithSrcDir(importedPkgPath, pkg.Dir)
if err != nil {
return nil, err
}
impModTime := importedPkg.SrcModTime
if impModTime.After(pkg.SrcModTime) {
pkg.SrcModTime = impModTime
}

fmt.Fprintf(hw, "import: %v\n", importedPkgPath)
fmt.Fprintf(hw, " hash: %#x\n", importedArchive.Hash)
}

for _, name := range append(pkg.GoFiles, pkg.JSFiles...) {
fileInfo, err := statFile(filepath.Join(pkg.Dir, name))
if err != nil {
return nil, err
hashFile := func() error {
fp := filepath.Join(pkg.Dir, name)
file, err := s.bctx.OpenFile(fp)
if err != nil {
return fmt.Errorf("failed to open %v: %v", fp, err)
}
defer file.Close()
fmt.Fprintf(hw, "file: %v\n", fp)
n, err := io.Copy(hw, file)
if err != nil {
return fmt.Errorf("failed to hash file contents: %v", err)
}
fmt.Fprintf(hw, "%d bytes\n", n)
return nil
}
if fileInfo.ModTime().After(pkg.SrcModTime) {
pkg.SrcModTime = fileInfo.ModTime()

if err := hashFile(); err != nil {
return nil, fmt.Errorf("failed to hash file %v: %v", name, err)
}
}

pkgObjFileInfo, err := os.Stat(pkg.PkgObj)
if err == nil && !pkg.SrcModTime.After(pkgObjFileInfo.ModTime()) {
// package object is up to date, load from disk if library
pkg.UpToDate = true
if pkg.IsCommand() {
return nil, nil
}
if hashDebug {
fmt.Printf("%s", hashDebugOut.String())
}

objFile, err := os.Open(pkg.PkgObj)
if err != nil {
return nil, err
}
defer objFile.Close()
// no commands are archived
if pkg.IsCommand() {
goto CacheMiss
}

archive, err := compiler.ReadArchive(pkg.PkgObj, pkg.ImportPath, objFile, s.Types)
if err != nil {
return nil, err
objFile, err := os.Open(pkg.PkgObj)
if err != nil {
if os.IsNotExist(err) {
goto CacheMiss
}
return nil, err
}
defer objFile.Close()

archive, err := compiler.ReadArchive(pkg.PkgObj, pkg.ImportPath, objFile, s.Types)
if err != nil {
return nil, err
}

if bytes.Equal(archive.Hash, pkgHash.Sum(nil)) {
s.Archives[pkg.ImportPath] = archive
return archive, err
return archive, nil
}
}

CacheMiss:

if s.options.Verbose {
fmt.Printf("Cache miss for %v\n", pkg.ImportPath)
}

fileSet := token.NewFileSet()
files, err := parseAndAugment(s.bctx, pkg.Package, pkg.IsTest, fileSet)
if err != nil {
Expand All @@ -700,6 +782,8 @@ func (s *Session) BuildPackage(pkg *PackageData) (*compiler.Archive, error) {
return nil, err
}

archive.Hash = pkgHash.Sum(nil)

for _, jsFile := range pkg.JSFiles {
code, err := ioutil.ReadFile(filepath.Join(pkg.Dir, jsFile))
if err != nil {
Expand All @@ -710,10 +794,6 @@ func (s *Session) BuildPackage(pkg *PackageData) (*compiler.Archive, error) {
archive.IncJSCode = append(archive.IncJSCode, []byte("\n\t}).call($global);\n")...)
}

if s.options.Verbose {
fmt.Println(pkg.ImportPath)
}

s.Archives[pkg.ImportPath] = archive

if pkg.PkgObj == "" || pkg.IsCommand() {
Expand Down
1 change: 1 addition & 0 deletions compiler/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func (err ErrorList) Error() string {
}

type Archive struct {
Hash []byte
ImportPath string
Name string
Imports []string
Expand Down
Loading