-
Notifications
You must be signed in to change notification settings - Fork 889
fix: fix graceful disconnect in DialWorkspaceAgent #11993
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
package codersdk | ||
|
||
import ( | ||
"context" | ||
"io" | ||
"net/http" | ||
"net/http/httptest" | ||
"sync/atomic" | ||
"testing" | ||
"time" | ||
|
||
"github.com/google/uuid" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
"nhooyr.io/websocket" | ||
"tailscale.com/tailcfg" | ||
|
||
"cdr.dev/slog" | ||
"cdr.dev/slog/sloggers/slogtest" | ||
"github.com/coder/coder/v2/tailnet" | ||
"github.com/coder/coder/v2/tailnet/proto" | ||
"github.com/coder/coder/v2/tailnet/tailnettest" | ||
"github.com/coder/coder/v2/testutil" | ||
) | ||
|
||
func TestTailnetAPIConnector_Disconnects(t *testing.T) { | ||
t.Parallel() | ||
testCtx := testutil.Context(t, testutil.WaitShort) | ||
ctx, cancel := context.WithCancel(testCtx) | ||
logger := slogtest.Make(t, &slogtest.Options{ | ||
// we get EOF when we simulate a DERPMap error | ||
IgnoredErrorIs: append(slogtest.DefaultIgnoredErrorIs, io.EOF), | ||
}).Leveled(slog.LevelDebug) | ||
agentID := uuid.UUID{0x55} | ||
clientID := uuid.UUID{0x66} | ||
fCoord := tailnettest.NewFakeCoordinator() | ||
var coord tailnet.Coordinator = fCoord | ||
coordPtr := atomic.Pointer[tailnet.Coordinator]{} | ||
coordPtr.Store(&coord) | ||
derpMapCh := make(chan *tailcfg.DERPMap) | ||
defer close(derpMapCh) | ||
svc, err := tailnet.NewClientService( | ||
logger, &coordPtr, | ||
time.Millisecond, func() *tailcfg.DERPMap { return <-derpMapCh }, | ||
) | ||
require.NoError(t, err) | ||
|
||
svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||
sws, err := websocket.Accept(w, r, nil) | ||
if !assert.NoError(t, err) { | ||
return | ||
} | ||
ctx, nc := websocketNetConn(r.Context(), sws, websocket.MessageBinary) | ||
err = svc.ServeConnV2(ctx, nc, tailnet.StreamID{ | ||
Name: "client", | ||
ID: clientID, | ||
Auth: tailnet.ClientTunnelAuth{AgentID: agentID}, | ||
}) | ||
assert.NoError(t, err) | ||
})) | ||
|
||
fConn := newFakeTailnetConn() | ||
|
||
uut := runTailnetAPIConnector(ctx, logger, agentID, svr.URL, &websocket.DialOptions{}, fConn) | ||
|
||
call := testutil.RequireRecvCtx(ctx, t, fCoord.CoordinateCalls) | ||
reqTun := testutil.RequireRecvCtx(ctx, t, call.Reqs) | ||
require.NotNil(t, reqTun.AddTunnel) | ||
|
||
_ = testutil.RequireRecvCtx(ctx, t, uut.connected) | ||
|
||
// simulate a problem with DERPMaps by sending nil | ||
testutil.RequireSendCtx(ctx, t, derpMapCh, nil) | ||
|
||
// this should cause the coordinate call to hang up WITHOUT disconnecting | ||
reqNil := testutil.RequireRecvCtx(ctx, t, call.Reqs) | ||
require.Nil(t, reqNil) | ||
|
||
// ...and then reconnect | ||
call = testutil.RequireRecvCtx(ctx, t, fCoord.CoordinateCalls) | ||
reqTun = testutil.RequireRecvCtx(ctx, t, call.Reqs) | ||
require.NotNil(t, reqTun.AddTunnel) | ||
|
||
// canceling the context should trigger the disconnect message | ||
cancel() | ||
reqDisc := testutil.RequireRecvCtx(testCtx, t, call.Reqs) | ||
require.NotNil(t, reqDisc) | ||
require.NotNil(t, reqDisc.Disconnect) | ||
} | ||
|
||
type fakeTailnetConn struct{} | ||
|
||
func (*fakeTailnetConn) UpdatePeers([]*proto.CoordinateResponse_PeerUpdate) error { | ||
// TODO implement me | ||
panic("implement me") | ||
} | ||
Comment on lines
+93
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
changing to |
||
|
||
func (*fakeTailnetConn) SetAllPeersLost() {} | ||
|
||
func (*fakeTailnetConn) SetNodeCallback(func(*tailnet.Node)) {} | ||
|
||
func (*fakeTailnetConn) SetDERPMap(*tailcfg.DERPMap) {} | ||
|
||
func newFakeTailnetConn() *fakeTailnetConn { | ||
return &fakeTailnetConn{} | ||
} | ||
Comment on lines
+91
to
+106
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this live in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so, because the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Non-blocking) I figure this is a best-effort situation, but will 1 second be enough? Does this need to be a configurable knob?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it should be plenty, even on a slow connection because we're not waiting for a reply. I definitely don't want to plumb configuration thru.
It is a best effort as you say --- consequence of not doing this is that the agent on the other side will see it as "lost" and possibly still try to handshake with it for up to 15 minutes.