@@ -8,8 +8,11 @@ import (
8
8
"net/http"
9
9
"net/url"
10
10
"os"
11
+ "os/signal"
12
+ "path/filepath"
11
13
"runtime"
12
14
"strings"
15
+ "syscall"
13
16
"text/template"
14
17
"time"
15
18
@@ -631,3 +634,93 @@ func (h *headerTransport) RoundTrip(req *http.Request) (*http.Response, error) {
631
634
}
632
635
return h .transport .RoundTrip (req )
633
636
}
637
+
638
+ // dumpHandler provides a custom SIGQUIT and SIGTRAP handler that dumps the
639
+ // stacktrace of all goroutines to stderr and a well-known file in the home
640
+ // directory. This is useful for debugging deadlock issues that may occur in
641
+ // production in workspaces, since the default Go runtime will only dump to
642
+ // stderr (which is often difficult/impossible to read in a workspace).
643
+ //
644
+ // SIGQUITs will still cause the program to exit (similarly to the default Go
645
+ // runtime behavior).
646
+ //
647
+ // A SIGQUIT handler will not be registered if GOTRACEBACK=crash.
648
+ //
649
+ // On Windows this immediately returns.
650
+ func dumpHandler (ctx context.Context ) {
651
+ if runtime .GOOS == "windows" {
652
+ // free up the goroutine since it'll be permanently blocked anyways
653
+ return
654
+ }
655
+
656
+ listenSignals := []os.Signal {syscall .SIGTRAP }
657
+ if os .Getenv ("GOTRACEBACK" ) != "crash" {
658
+ listenSignals = append (listenSignals , syscall .SIGQUIT )
659
+ }
660
+
661
+ sigs := make (chan os.Signal , 1 )
662
+ signal .Notify (sigs , listenSignals ... )
663
+ defer signal .Stop (sigs )
664
+
665
+ for {
666
+ sigStr := ""
667
+ select {
668
+ case <- ctx .Done ():
669
+ return
670
+ case sig := <- sigs :
671
+ switch sig {
672
+ case syscall .SIGQUIT :
673
+ sigStr = "SIGQUIT"
674
+ case syscall .SIGTRAP :
675
+ sigStr = "SIGTRAP"
676
+ }
677
+ }
678
+
679
+ // Start with a 1MB buffer and keep doubling it until we can fit the
680
+ // entire stacktrace, stopping early once we reach 64MB.
681
+ buf := make ([]byte , 1_000_000 )
682
+ stacklen := 0
683
+ for {
684
+ stacklen = runtime .Stack (buf , true )
685
+ if stacklen < len (buf ) {
686
+ break
687
+ }
688
+ if 2 * len (buf ) > 64_000_000 {
689
+ // Write a message to the end of the buffer saying that it was
690
+ // truncated.
691
+ const truncatedMsg = "\n \n \n stack trace truncated due to size\n "
692
+ copy (buf [len (buf )- len (truncatedMsg ):], truncatedMsg )
693
+ break
694
+ }
695
+ buf = make ([]byte , 2 * len (buf ))
696
+ }
697
+
698
+ _ , _ = fmt .Fprintf (os .Stderr , "%s:\n %s\n " , sigStr , buf [:stacklen ])
699
+
700
+ // Write to a well-known file.
701
+ dir , err := os .UserHomeDir ()
702
+ if err != nil {
703
+ dir = os .TempDir ()
704
+ }
705
+ fpath := filepath .Join (dir , fmt .Sprintf ("coder-agent-%s.dump" , time .Now ().Format ("2006-01-02T15:04:05.000Z" )))
706
+ _ , _ = fmt .Fprintf (os .Stderr , "writing dump to %q\n " , fpath )
707
+
708
+ f , err := os .Create (fpath )
709
+ if err != nil {
710
+ _ , _ = fmt .Fprintf (os .Stderr , "failed to open dump file: %v\n " , err .Error ())
711
+ goto done
712
+ }
713
+ _ , err = f .Write (buf [:stacklen ])
714
+ _ = f .Close ()
715
+ if err != nil {
716
+ _ , _ = fmt .Fprintf (os .Stderr , "failed to write dump file: %v\n " , err .Error ())
717
+ goto done
718
+ }
719
+
720
+ done:
721
+ if sigStr == "SIGQUIT" {
722
+ //nolint:revive
723
+ os .Exit (1 )
724
+ }
725
+ }
726
+ }
0 commit comments