Skip to content

fix(coderd): prevent lost messages in watchWorkspaceAgentMetadata #7934

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions coderd/workspaceagents.go
Original file line number Diff line number Diff line change
Expand Up @@ -1434,17 +1434,15 @@ func (api *API) watchWorkspaceAgentMetadata(rw http.ResponseWriter, r *http.Requ
})
}

// Send initial metadata.
sendMetadata(true)

// We debounce metadata updates to avoid overloading the frontend when
// an agent is sending a lot of updates.
pubsubDebounce := debounce.New(time.Second)
if flag.Lookup("test.v") != nil {
pubsubDebounce = debounce.New(time.Millisecond * 100)
}

// Send metadata on updates.
// Send metadata on updates, we must ensure subscription before sending
// initial metadata to guarantee that events in-between are not missed.
cancelSub, err := api.Pubsub.Subscribe(watchWorkspaceAgentMetadataChannel(workspaceAgent.ID), func(_ context.Context, _ []byte) {
pubsubDebounce(func() {
sendMetadata(true)
Expand All @@ -1456,12 +1454,14 @@ func (api *API) watchWorkspaceAgentMetadata(rw http.ResponseWriter, r *http.Requ
}
defer cancelSub()

// Send initial metadata.
sendMetadata(true)

for {
select {
case <-senderClosed:
return
case <-refreshTicker.C:
break
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Review: Ineffective break => confusing.

}

// Avoid spamming the DB with reads we know there are no updates. We want
Expand Down
39 changes: 29 additions & 10 deletions coderd/workspaceagents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1268,11 +1268,6 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {

var update []codersdk.WorkspaceAgentMetadata

check := func(want codersdk.WorkspaceAgentMetadataResult, got codersdk.WorkspaceAgentMetadata) {
require.Equal(t, want.Value, got.Result.Value)
require.Equal(t, want.Error, got.Result.Error)
}

wantMetadata1 := codersdk.WorkspaceAgentMetadataResult{
CollectedAt: time.Now(),
Value: "bar",
Expand All @@ -1285,32 +1280,53 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {

recvUpdate := func() []codersdk.WorkspaceAgentMetadata {
select {
case <-ctx.Done():
t.Fatalf("context done: %v", ctx.Err())
case err := <-errors:
t.Fatalf("error watching metadata: %v", err)
return nil
case update := <-updates:
return update
}
return nil
}

check := func(want codersdk.WorkspaceAgentMetadataResult, got codersdk.WorkspaceAgentMetadata, retry bool) {
// We can't trust the order of the updates due to timers and debounces,
// so let's check a few times more.
for i := 0; retry && i < 2 && (want.Value != got.Result.Value || want.Error != got.Result.Error); i++ {
update = recvUpdate()
for _, m := range update {
if m.Description.Key == got.Description.Key {
got = m
break
}
}
}
ok1 := assert.Equal(t, want.Value, got.Result.Value)
ok2 := assert.Equal(t, want.Error, got.Result.Error)
if !ok1 || !ok2 {
require.FailNow(t, "check failed")
}
}

update = recvUpdate()
require.Len(t, update, 3)
check(wantMetadata1, update[0])
check(wantMetadata1, update[0], false)
// The second metadata result is not yet posted.
require.Zero(t, update[1].Result.CollectedAt)

wantMetadata2 := wantMetadata1
post("foo2", wantMetadata2)
update = recvUpdate()
require.Len(t, update, 3)
check(wantMetadata1, update[0])
check(wantMetadata2, update[1])
check(wantMetadata1, update[0], true)
check(wantMetadata2, update[1], true)

wantMetadata1.Error = "error"
post("foo1", wantMetadata1)
update = recvUpdate()
require.Len(t, update, 3)
check(wantMetadata1, update[0])
check(wantMetadata1, update[0], true)

const maxValueLen = 32 << 10
tooLongValueMetadata := wantMetadata1
Expand All @@ -1319,6 +1335,9 @@ func TestWorkspaceAgent_Metadata(t *testing.T) {
tooLongValueMetadata.CollectedAt = time.Now()
post("foo3", tooLongValueMetadata)
got := recvUpdate()[2]
for i := 0; i < 2 && len(got.Result.Value) != maxValueLen; i++ {
got = recvUpdate()[2]
}
require.Len(t, got.Result.Value, maxValueLen)
require.NotEmpty(t, got.Result.Error)

Expand Down
19 changes: 17 additions & 2 deletions codersdk/workspaceagents.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)

metadataChan := make(chan []WorkspaceAgentMetadata, 256)

ready := make(chan struct{})
watch := func() error {
res, err := c.Request(ctx, http.MethodGet, fmt.Sprintf("/api/v2/workspaceagents/%s/watch-metadata", id), nil)
if err != nil {
Expand All @@ -316,19 +317,24 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)
nextEvent := ServerSentEventReader(ctx, res.Body)
defer res.Body.Close()

firstEvent := true
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
break
}

sse, err := nextEvent()
if err != nil {
return err
}

if firstEvent {
close(ready) // Only close ready after the first event is received.
firstEvent = false
}

b, ok := sse.Data.([]byte)
if !ok {
return xerrors.Errorf("unexpected data type: %T", sse.Data)
Expand Down Expand Up @@ -358,9 +364,18 @@ func (c *Client) WatchWorkspaceAgentMetadata(ctx context.Context, id uuid.UUID)
errorChan := make(chan error, 1)
go func() {
defer close(errorChan)
errorChan <- watch()
err := watch()
select {
case <-ready:
default:
close(ready) // Error before first event.
}
errorChan <- err
}()

// Wait until first event is received and the subscription is registered.
<-ready

return metadataChan, errorChan
}

Expand Down