Skip to content

Commit f0251df

Browse files
chore: retry postgres connection on reset by peer in tests (#18632)
Fixes coder/internal#695 Retries initial connection to postgres in testing up to 3 seconds if we see "reset by peer", which probably means that some other test proc just started the container. --------- Co-authored-by: Hugo Dutka <hugo@coder.com>
1 parent d26d0fc commit f0251df

File tree

1 file changed

+43
-16
lines changed

1 file changed

+43
-16
lines changed

coderd/database/dbtestutil/postgres.go

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ var (
4545
connectionParamsInitOnce sync.Once
4646
defaultConnectionParams ConnectionParams
4747
errDefaultConnectionParamsInit error
48+
retryableErrSubstrings = []string{
49+
"connection reset by peer",
50+
}
51+
noPostgresRunningErrSubstrings = []string{
52+
"connection refused", // nothing is listening on the port
53+
"No connection could be made", // Windows variant of the above
54+
}
4855
)
4956

5057
// initDefaultConnection initializes the default postgres connection parameters.
@@ -59,28 +66,38 @@ func initDefaultConnection(t TBSubset) error {
5966
DBName: "postgres",
6067
}
6168
dsn := params.DSN()
62-
db, dbErr := sql.Open("postgres", dsn)
63-
if dbErr == nil {
64-
dbErr = db.Ping()
65-
if closeErr := db.Close(); closeErr != nil {
66-
return xerrors.Errorf("close db: %w", closeErr)
69+
70+
// Helper closure to try opening and pinging the default Postgres instance.
71+
// Used within a single retry loop that handles both retryable and permanent errors.
72+
attemptConn := func() error {
73+
db, err := sql.Open("postgres", dsn)
74+
if err == nil {
75+
err = db.Ping()
76+
if closeErr := db.Close(); closeErr != nil {
77+
return xerrors.Errorf("close db: %w", closeErr)
78+
}
6779
}
80+
return err
6881
}
69-
shouldOpenContainer := false
70-
if dbErr != nil {
71-
errSubstrings := []string{
72-
"connection refused", // this happens on Linux when there's nothing listening on the port
73-
"No connection could be made", // like above but Windows
82+
83+
var dbErr error
84+
// Retry up to 3 seconds for temporary errors.
85+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
86+
defer cancel()
87+
for r := retry.New(10*time.Millisecond, 500*time.Millisecond); r.Wait(ctx); {
88+
dbErr = attemptConn()
89+
if dbErr == nil {
90+
break
7491
}
7592
errString := dbErr.Error()
76-
for _, errSubstring := range errSubstrings {
77-
if strings.Contains(errString, errSubstring) {
78-
shouldOpenContainer = true
79-
break
80-
}
93+
if !containsAnySubstring(errString, retryableErrSubstrings) {
94+
break
8195
}
96+
t.Logf("failed to connect to postgres, retrying: %s", errString)
8297
}
83-
if dbErr != nil && shouldOpenContainer {
98+
99+
// After the loop dbErr is the last connection error (if any).
100+
if dbErr != nil && containsAnySubstring(dbErr.Error(), noPostgresRunningErrSubstrings) {
84101
// If there's no database running on the default port, we'll start a
85102
// postgres container. We won't be cleaning it up so it can be reused
86103
// by subsequent tests. It'll keep on running until the user terminates
@@ -110,6 +127,7 @@ func initDefaultConnection(t TBSubset) error {
110127
if connErr == nil {
111128
break
112129
}
130+
t.Logf("failed to connect to postgres after starting container, may retry: %s", connErr.Error())
113131
}
114132
} else if dbErr != nil {
115133
return xerrors.Errorf("open postgres connection: %w", dbErr)
@@ -523,3 +541,12 @@ func OpenContainerized(t TBSubset, opts DBContainerOptions) (string, func(), err
523541

524542
return dbURL, containerCleanup, nil
525543
}
544+
545+
func containsAnySubstring(s string, substrings []string) bool {
546+
for _, substr := range substrings {
547+
if strings.Contains(s, substr) {
548+
return true
549+
}
550+
}
551+
return false
552+
}

0 commit comments

Comments
 (0)