Skip to content

feat: add WaitUntilEmpty to LogSender #12159

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions codersdk/agentsdk/logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ func (l *LogSender) SendLoop(ctx context.Context, dest logDest) error {
l.exceededLogLimit = true
// no point in keeping anything we have queued around, server will not accept them
l.queues = make(map[uuid.UUID]*logQueue)
l.Broadcast() // might unblock WaitUntilEmpty
return LogLimitExceededError
}

Expand All @@ -451,6 +452,7 @@ func (l *LogSender) SendLoop(ctx context.Context, dest logDest) error {
if len(q.logs) == 0 {
// no empty queues
delete(l.queues, src)
l.Broadcast() // might unblock WaitUntilEmpty
continue
}
q.lastFlush = time.Now()
Expand Down Expand Up @@ -487,6 +489,34 @@ func (l *LogSender) GetScriptLogger(logSourceID uuid.UUID) ScriptLogger {
return ScriptLogger{srcID: logSourceID, sender: l}
}

// WaitUntilEmpty waits until the LogSender's queues are empty or the given context expires.
func (l *LogSender) WaitUntilEmpty(ctx context.Context) error {
ctxDone := false
nevermind := make(chan struct{})
defer close(nevermind)
go func() {
select {
case <-ctx.Done():
l.L.Lock()
defer l.L.Unlock()
ctxDone = true
l.Broadcast()
return
case <-nevermind:
return
}
}()
Copy link
Member

@mafredri mafredri Feb 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we duplicating logic from SendLoop here? Since this method doesn't attempt to send, it's quite pointless unless the signaling is happening from a running SendLoop anyway.

Edit: Ah, nevermind, just realized this is only here to handle the user provided context.

I think this could be greatly simplified:

func (l *LogSender) WaitUntilEmpty(ctx context.Context) error {
	select {
	case <-ctx.Done():
		return ctx.Err()
	case <-l.allSent:
		return nil
	}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with an allSent channel is how to arrange when it should read. Closing the channel won't work, because you can't "unclose" it if more data gets queued.

Writing to the channel won't work if there are more than one caller to WaitUntilEmpty.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, it would work better/simpler if the send loop was channel-based as well. In that case, one approach could be this:

func (l *LogSender) WaitUntilEmpty(ctx context.Context) error {
	wait := make(chan struct{})
	l.waitUntilEmpty <- wait

	select {
	case <-ctx.Done():
		return ctx.Err()
	case <-wait:
		return nil
	}
}

// SendLoop
	var waiters []chan struct{}
	for {
		select {
		case <-tick:
		case <-l.waitUntilEmpty:
			waiters = append(waiters, wait)
		}

		// ...

		if len(l.queues) == 0 {
			for _, wait := range waiters {
				close(wait)
			}
			waiters = nil
		}

But it's not quite as nice when retrofitted into the mutex style loop.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem there is that it requires SendLoop to actually be running in order for WaitUntilEmpty() to return, which it might not be but we are still empty.

Channels are great for communicating between goroutines. Here what we really, actually want is to know when a condition is satisfied, regardless of other running goroutines, and for that sync.Cond is your friend.

l.L.Lock()
defer l.L.Unlock()
for len(l.queues) != 0 && !ctxDone {
l.Wait()
}
if len(l.queues) == 0 {
return nil
}
return ctx.Err()
Comment on lines +514 to +517
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if len(l.queues) == 0 {
return nil
}
return ctx.Err()
return ctx.Err()

We don't actually need this check, this way we give priority to the context cancellation, even if we happen to be done at the same time (this can be preferable in some cases).

}

type ScriptLogger struct {
sender *LogSender
srcID uuid.UUID
Expand Down
48 changes: 47 additions & 1 deletion codersdk/agentsdk/logs_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ func TestLogSender_Mainline(t *testing.T) {
loopErr <- err
}()

empty := make(chan error, 1)
go func() {
err := uut.WaitUntilEmpty(ctx)
empty <- err
}()

// since neither source has even been flushed, it should immediately Flush
// both, although the order is not controlled
var logReqs []*proto.BatchCreateLogsRequest
Expand Down Expand Up @@ -104,8 +110,11 @@ func TestLogSender_Mainline(t *testing.T) {
require.Equal(t, proto.Log_DEBUG, req.Logs[0].GetLevel())
require.Equal(t, t1, req.Logs[0].GetCreatedAt().AsTime())

err := testutil.RequireRecvCtx(ctx, t, empty)
require.NoError(t, err)

cancel()
err := testutil.RequireRecvCtx(testCtx, t, loopErr)
err = testutil.RequireRecvCtx(testCtx, t, loopErr)
require.ErrorIs(t, err, context.Canceled)

// we can still enqueue more logs after SendLoop returns
Expand All @@ -132,6 +141,12 @@ func TestLogSender_LogLimitExceeded(t *testing.T) {
Level: codersdk.LogLevelInfo,
})

empty := make(chan error, 1)
go func() {
err := uut.WaitUntilEmpty(ctx)
empty <- err
}()

loopErr := make(chan error, 1)
go func() {
err := uut.SendLoop(ctx, fDest)
Expand All @@ -146,6 +161,10 @@ func TestLogSender_LogLimitExceeded(t *testing.T) {
err := testutil.RequireRecvCtx(ctx, t, loopErr)
require.ErrorIs(t, err, LogLimitExceededError)

// Should also unblock WaitUntilEmpty
err = testutil.RequireRecvCtx(ctx, t, empty)
require.NoError(t, err)

// we can still enqueue more logs after SendLoop returns, but they don't
// actually get enqueued
uut.Enqueue(ls1, Log{
Expand Down Expand Up @@ -363,6 +382,33 @@ func TestLogSender_SendError(t *testing.T) {
uut.L.Unlock()
}

func TestLogSender_WaitUntilEmpty_ContextExpired(t *testing.T) {
t.Parallel()
testCtx := testutil.Context(t, testutil.WaitShort)
ctx, cancel := context.WithCancel(testCtx)
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
uut := NewLogSender(logger)

t0 := dbtime.Now()

ls1 := uuid.UUID{0x11}
uut.Enqueue(ls1, Log{
CreatedAt: t0,
Output: "test log 0, src 1",
Level: codersdk.LogLevelInfo,
})

empty := make(chan error, 1)
go func() {
err := uut.WaitUntilEmpty(ctx)
empty <- err
}()

cancel()
err := testutil.RequireRecvCtx(testCtx, t, empty)
require.ErrorIs(t, err, context.Canceled)
}

type fakeLogDest struct {
reqs chan *proto.BatchCreateLogsRequest
resps chan *proto.BatchCreateLogsResponse
Expand Down