From 7b8618cdcfae81d0adf642c7725307a61f0ba551 Mon Sep 17 00:00:00 2001 From: Spike Curtis Date: Fri, 4 Oct 2024 09:36:56 +0000 Subject: [PATCH 1/2] chore: add http/pprof server over unix socket for debug Signed-off-by: Spike Curtis --- main.go | 1 + pprof_unix.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ pprof_windows.go | 6 ++++++ 3 files changed, 51 insertions(+) create mode 100644 pprof_unix.go create mode 100644 pprof_windows.go diff --git a/main.go b/main.go index 000436a3..a16c9951 100644 --- a/main.go +++ b/main.go @@ -11,6 +11,7 @@ import ( //go:generate go run github.com/hashicorp/terraform-plugin-docs/cmd/tfplugindocs func main() { + servePprof() plugin.Serve(&plugin.ServeOpts{ ProviderFunc: provider.New, }) diff --git a/pprof_unix.go b/pprof_unix.go new file mode 100644 index 00000000..3a8d907b --- /dev/null +++ b/pprof_unix.go @@ -0,0 +1,44 @@ +//go:build !windows + +package main + +import ( + "net" + "net/http" + "net/http/pprof" + "os" +) + +// servePprof starts an HTTP server running the pprof goroutine handler on a local unix domain socket. As described in +// https://github.com/coder/coder/issues/14726 it appears this process is sometimes hanging, unable to exit cleanly, +// and this prevents additional Coder builds that try to reinstall this provider. A goroutine dump should allow us to +// determine what is hanging. +// +// This function is best-effort, and just returns early if we fail to set up the directory/listener. We don't want to +// block the normal functioning of the provider. +func servePprof() { + // Coder runs terraform in a per-build subdirectory of the work directory. The per-build subdirectory uses a + // generated name and is deleted at the end of a build, so we want to place our unix socket up one directory level + // in the provisionerd work directory, so we can connect to it from provisionerd. + err := os.Mkdir("../.coder", 0o700) + if err != nil && !os.IsExist(err) { + return + } + + // remove the old file, if it exists. It's probably from the last run of the provider + if _, err = os.Stat("../.coder/pprof"); err == nil { + if err = os.Remove("../.coder/pprof"); err != nil { + return + } + } + l, err := net.Listen("unix", "../.coder/pprof") + if err != nil { + return + } + mux := http.NewServeMux() + mux.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine")) + srv := http.Server{Handler: mux} + go srv.Serve(l) + // We just leave the server and domain socket up forever. Go programs exit when the `main()` function returns, so + // this won't block exiting, and it ensures the pprof server stays up for the entire lifetime of the provider. +} diff --git a/pprof_windows.go b/pprof_windows.go new file mode 100644 index 00000000..05cd3143 --- /dev/null +++ b/pprof_windows.go @@ -0,0 +1,6 @@ +//go:build windows + +package main + +// servePprof is not supported on Windows +func servePprof() {} From 1547f11c22cdc13e7011c4d2c31fae921415f213 Mon Sep 17 00:00:00 2001 From: Spike Curtis Date: Mon, 14 Oct 2024 07:14:58 +0000 Subject: [PATCH 2/2] remove old pprof file without checking if it exists Signed-off-by: Spike Curtis --- pprof_unix.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pprof_unix.go b/pprof_unix.go index 3a8d907b..717bc01b 100644 --- a/pprof_unix.go +++ b/pprof_unix.go @@ -26,10 +26,8 @@ func servePprof() { } // remove the old file, if it exists. It's probably from the last run of the provider - if _, err = os.Stat("../.coder/pprof"); err == nil { - if err = os.Remove("../.coder/pprof"); err != nil { - return - } + if err = os.Remove("../.coder/pprof"); err != nil && !os.IsNotExist(err) { + return } l, err := net.Listen("unix", "../.coder/pprof") if err != nil {