Skip to content

Commit bedd2c5

Browse files
fix: avoid race between replicas on start (#12344)
DERP mesh key setup would do a SELECT and then an INSERT on failure, without a lock. During some testing with multiple replicas, I managed to cause a replica to crash due to them initializing simultaneously. Fixes: Encountered an error running "coder server" create coder API: insert mesh key: pq: duplicate key value violates unique constraint "site_configs_key_key" Co-authored-by: Cian Johnston <cian@coder.com>
1 parent 76273bf commit bedd2c5

File tree

4 files changed

+32
-6
lines changed

4 files changed

+32
-6
lines changed

coderd/database/dbauthz/dbauthz_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -1874,6 +1874,7 @@ func (s *MethodTestSuite) TestSystemFunctions() {
18741874
check.Args(u.ID).Asserts(rbac.ResourceSystem, rbac.ActionRead)
18751875
}))
18761876
s.Run("GetDERPMeshKey", s.Subtest(func(db database.Store, check *expects) {
1877+
db.InsertDERPMeshKey(context.Background(), "testing")
18771878
check.Args().Asserts(rbac.ResourceSystem, rbac.ActionRead)
18781879
}))
18791880
s.Run("InsertDERPMeshKey", s.Subtest(func(db database.Store, check *expects) {

coderd/database/dbmem/dbmem.go

+3
Original file line numberDiff line numberDiff line change
@@ -1761,6 +1761,9 @@ func (q *FakeQuerier) GetDERPMeshKey(_ context.Context) (string, error) {
17611761
q.mutex.RLock()
17621762
defer q.mutex.RUnlock()
17631763

1764+
if q.derpMeshKey == "" {
1765+
return "", sql.ErrNoRows
1766+
}
17641767
return q.derpMeshKey, nil
17651768
}
17661769

coderd/database/lock.go

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const (
99
// Keep the unused iota here so we don't need + 1 every time
1010
lockIDUnused = iota
1111
LockIDDeploymentSetup
12+
LockIDEnterpriseDeploymentSetup
1213
)
1314

1415
// GenLockID generates a unique and consistent lock ID from a given string.

enterprise/cli/server.go

+27-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"tailscale.com/types/key"
1616

1717
"github.com/coder/coder/v2/cli/clibase"
18+
"github.com/coder/coder/v2/coderd/database"
1819
"github.com/coder/coder/v2/cryptorand"
1920
"github.com/coder/coder/v2/enterprise/audit"
2021
"github.com/coder/coder/v2/enterprise/audit/backends"
@@ -37,21 +38,41 @@ func (r *RootCmd) Server(_ func()) *clibase.Cmd {
3738
}
3839

3940
options.DERPServer = derp.NewServer(key.NewNode(), tailnet.Logger(options.Logger.Named("derp")))
40-
meshKey, err := options.Database.GetDERPMeshKey(ctx)
41-
if err != nil {
41+
42+
var meshKey string
43+
err := options.Database.InTx(func(tx database.Store) error {
44+
// This will block until the lock is acquired, and will be
45+
// automatically released when the transaction ends.
46+
err := tx.AcquireLock(ctx, database.LockIDEnterpriseDeploymentSetup)
47+
if err != nil {
48+
return xerrors.Errorf("acquire lock: %w", err)
49+
}
50+
51+
meshKey, err = tx.GetDERPMeshKey(ctx)
52+
if err == nil {
53+
return nil
54+
}
4255
if !errors.Is(err, sql.ErrNoRows) {
43-
return nil, nil, xerrors.Errorf("get mesh key: %w", err)
56+
return xerrors.Errorf("get DERP mesh key: %w", err)
4457
}
4558
meshKey, err = cryptorand.String(32)
4659
if err != nil {
47-
return nil, nil, xerrors.Errorf("generate mesh key: %w", err)
60+
return xerrors.Errorf("generate DERP mesh key: %w", err)
4861
}
49-
err = options.Database.InsertDERPMeshKey(ctx, meshKey)
62+
err = tx.InsertDERPMeshKey(ctx, meshKey)
5063
if err != nil {
51-
return nil, nil, xerrors.Errorf("insert mesh key: %w", err)
64+
return xerrors.Errorf("insert DERP mesh key: %w", err)
5265
}
66+
return nil
67+
}, nil)
68+
if err != nil {
69+
return nil, nil, err
70+
}
71+
if meshKey == "" {
72+
return nil, nil, xerrors.New("mesh key is empty")
5373
}
5474
options.DERPServer.SetMeshKey(meshKey)
75+
5576
options.Auditor = audit.NewAuditor(
5677
options.Database,
5778
audit.DefaultFilter,

0 commit comments

Comments
 (0)