Skip to content

feat: Add high availability for multiple replicas #4555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 86 commits into from
Oct 17, 2022
Merged
Changes from 1 commit
Commits
Show all changes
86 commits
Select commit Hold shift + click to select a range
35b2fed
feat: HA tailnet coordinator
coadler Sep 22, 2022
68a812b
fixup! feat: HA tailnet coordinator
coadler Sep 23, 2022
774c5da
fixup! feat: HA tailnet coordinator
coadler Sep 23, 2022
bd82c5e
remove printlns
coadler Sep 23, 2022
02e079d
Merge branch 'main' into colin/pg-coordinate
coadler Oct 7, 2022
fbad8d0
close all connections on coordinator
coadler Oct 7, 2022
46803aa
impelement high availability feature
coadler Oct 7, 2022
d38391e
fixup! impelement high availability feature
coadler Oct 7, 2022
a0bcd64
fixup! impelement high availability feature
coadler Oct 7, 2022
1f33018
fixup! impelement high availability feature
coadler Oct 7, 2022
b6a5070
fixup! impelement high availability feature
coadler Oct 7, 2022
1883430
Add replicas
kylecarbs Oct 12, 2022
7dc968c
Add DERP meshing to arbitrary addresses
kylecarbs Oct 12, 2022
1dcf0d0
Move packages to highavailability folder
kylecarbs Oct 12, 2022
5c43d63
Merge branch 'main' into colin/pg-coordinate
kylecarbs Oct 12, 2022
4804269
Merge branch 'colin/pg-coordinate' into replica
kylecarbs Oct 12, 2022
289e139
Move coordinator to high availability package
kylecarbs Oct 12, 2022
585bc1d
Add flags for HA
kylecarbs Oct 12, 2022
fdb3557
Rename to replicasync
kylecarbs Oct 13, 2022
9124b00
Denest packages for replicas
kylecarbs Oct 13, 2022
d5555f6
Add test for multiple replicas
kylecarbs Oct 13, 2022
8dfc261
Fix coordination test
kylecarbs Oct 13, 2022
ff5968b
Add HA to the helm chart
kylecarbs Oct 13, 2022
557b390
Rename function pointer
kylecarbs Oct 13, 2022
186a5e2
Add warnings for HA
kylecarbs Oct 13, 2022
de5b13b
Add the ability to block endpoints
kylecarbs Oct 13, 2022
9a50ac4
Add flag to disable P2P connections
kylecarbs Oct 14, 2022
6fa941f
Wow, I made the tests pass
kylecarbs Oct 14, 2022
abff96b
Add replicas endpoint
kylecarbs Oct 14, 2022
d6ce216
Ensure close kills replica
kylecarbs Oct 14, 2022
c3786a5
Merge branch 'main' into replica
kylecarbs Oct 14, 2022
d7cc0ff
Update sql
kylecarbs Oct 14, 2022
9914840
Add database latency to high availability
kylecarbs Oct 15, 2022
c1aa3d2
Pipe TLS to DERP mesh
kylecarbs Oct 15, 2022
0cc4263
Fix DERP mesh with TLS
kylecarbs Oct 15, 2022
f9177e4
Add tests for TLS
kylecarbs Oct 15, 2022
ee59d88
Fix replica sync TLS
kylecarbs Oct 15, 2022
8641e58
Fix RootCA for replica meshing
kylecarbs Oct 15, 2022
3dfb796
Remove ID from replicasync
kylecarbs Oct 15, 2022
ec2c1f1
Fix getting certificates for meshing
kylecarbs Oct 15, 2022
590f0f8
Remove excessive locking
kylecarbs Oct 15, 2022
d8580d1
Fix linting
kylecarbs Oct 15, 2022
ae956fb
Store mesh key in the database
kylecarbs Oct 15, 2022
d703e2d
Fix replica key for tests
kylecarbs Oct 15, 2022
9bb021c
Fix types gen
kylecarbs Oct 15, 2022
76c9e2c
Fix unlocking unlocked
kylecarbs Oct 15, 2022
09e87b0
Fix race in tests
kylecarbs Oct 15, 2022
18c0464
Update enterprise/derpmesh/derpmesh.go
kylecarbs Oct 15, 2022
6f25b2d
Rename to syncReplicas
kylecarbs Oct 15, 2022
efb6ece
Merge branch 'replica' of github.com:coder/coder into replica
kylecarbs Oct 15, 2022
1e85039
Reuse http client
kylecarbs Oct 15, 2022
ae0aa5f
Delete old replicas on a CRON
kylecarbs Oct 15, 2022
332d435
Merge branch 'main' into replica
kylecarbs Oct 15, 2022
bd7fb13
Fix race condition in connection tests
kylecarbs Oct 15, 2022
bb5b347
Fix linting
kylecarbs Oct 15, 2022
76e0511
Fix nil type
kylecarbs Oct 15, 2022
1ff5f7d
Move pubsub to in-memory for twenty test
kylecarbs Oct 16, 2022
b732184
Add comment for configuration tweaking
kylecarbs Oct 16, 2022
38465ac
Fix leak with transport
kylecarbs Oct 16, 2022
72555e2
Fix close leak in derpmesh
kylecarbs Oct 16, 2022
e54072a
Fix race when creating server
kylecarbs Oct 16, 2022
27d5f40
Remove handler update
kylecarbs Oct 16, 2022
4d0b1d8
Skip test on Windows
kylecarbs Oct 16, 2022
129f5ba
Fix DERP mesh test
kylecarbs Oct 16, 2022
4e5d30e
Wrap HTTP handler replacement in mutex
kylecarbs Oct 16, 2022
0359a7e
Fix error message for relay
kylecarbs Oct 16, 2022
f364d1f
Fix API handler for normal tests
kylecarbs Oct 16, 2022
423a47e
Fix speedtest
kylecarbs Oct 16, 2022
c3a77fe
Fix replica resend
kylecarbs Oct 16, 2022
729f8a0
Fix derpmesh send
kylecarbs Oct 16, 2022
ae0bc5d
Ping async
kylecarbs Oct 16, 2022
d7d50db
Increase wait time of template version jobd
kylecarbs Oct 16, 2022
77d23dc
Fix race when closing replica sync
kylecarbs Oct 16, 2022
435bbbb
Add name to client
kylecarbs Oct 16, 2022
9b7c41a
Log the derpmap being used
kylecarbs Oct 17, 2022
9615402
Don't connect if DERP is empty
kylecarbs Oct 17, 2022
bcb97ac
Improve agent coordinator logging
kylecarbs Oct 17, 2022
e2f6a19
Fix lock in coordinator
kylecarbs Oct 17, 2022
c855c9b
Fix relay addr
kylecarbs Oct 17, 2022
a0e5cab
Fix race when updating durations
kylecarbs Oct 17, 2022
9878fc5
Fix client publish race
kylecarbs Oct 17, 2022
7a40bf8
Run pubsub loop in a queue
kylecarbs Oct 17, 2022
08b9681
Store agent nodes in order
kylecarbs Oct 17, 2022
79991a9
Fix coordinator locking
kylecarbs Oct 17, 2022
020171b
Check for closed pipe
kylecarbs Oct 17, 2022
6a57554
Merge branch 'main' into replica
kylecarbs Oct 17, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rename to syncReplicas
  • Loading branch information
kylecarbs committed Oct 15, 2022
commit 6f25b2d44b66b12c5c65c20c0095588c3b51347b
22 changes: 11 additions & 11 deletions enterprise/replicasync/replicasync.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, pubsub data
return nil, xerrors.Errorf("publish new replica: %w", err)
}
ctx, cancelFunc := context.WithCancel(ctx)
server := &Manager{
manager := &Manager{
id: id,
options: options,
db: db,
Expand All @@ -83,25 +83,25 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, pubsub data
closed: make(chan struct{}),
closeCancel: cancelFunc,
}
err = server.run(ctx)
err = manager.syncReplicas(ctx)
if err != nil {
return nil, xerrors.Errorf("run replica: %w", err)
}
peers := server.Regional()
peers := manager.Regional()
if len(peers) > 0 {
self := server.Self()
self := manager.Self()
if self.RelayAddress == "" {
return nil, xerrors.Errorf("a relay address must be specified when running multiple replicas in the same region")
}
}

err = server.subscribe(ctx)
err = manager.subscribe(ctx)
if err != nil {
return nil, xerrors.Errorf("subscribe: %w", err)
}
server.closeWait.Add(1)
go server.loop(ctx)
return server, nil
manager.closeWait.Add(1)
go manager.loop(ctx)
return manager, nil
}

// Manager keeps the replica up to date and in sync with other replicas.
Expand Down Expand Up @@ -134,7 +134,7 @@ func (m *Manager) loop(ctx context.Context) {
return
case <-ticker.C:
}
err := m.run(ctx)
err := m.syncReplicas(ctx)
if err != nil && !errors.Is(err, context.Canceled) {
m.logger.Warn(ctx, "run replica update loop", slog.Error(err))
}
Expand All @@ -155,7 +155,7 @@ func (m *Manager) subscribe(ctx context.Context) error {
// it will reprocess afterwards.
var update func()
update = func() {
err := m.run(ctx)
err := m.syncReplicas(ctx)
if err != nil && !errors.Is(err, context.Canceled) {
m.logger.Error(ctx, "run replica from subscribe", slog.Error(err))
}
Expand Down Expand Up @@ -197,7 +197,7 @@ func (m *Manager) subscribe(ctx context.Context) error {
return nil
}

func (m *Manager) run(ctx context.Context) error {
func (m *Manager) syncReplicas(ctx context.Context) error {
m.closeMutex.Lock()
m.closeWait.Add(1)
m.closeMutex.Unlock()
Expand Down