Skip to content

Commit 4ac6d58

Browse files
committed
Retry direct connection
This will cover recent connections which connect directly without going through the whole setup flow. Pretty much the same logic as for listing editors but we display the errors in different ways since this all happens in a progress dialog. I tried to combine what I could in the retry. Also the SshException is misleading; it seems to wrap the real error so unwrap it otherwise it is impossible to tell what is really wrong. In particular this is causing us to retry on cancelations.
1 parent dbee044 commit 4ac6d58

File tree

4 files changed

+122
-54
lines changed

4 files changed

+122
-54
lines changed

src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
package com.coder.gateway
44

5+
import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff
56
import com.coder.gateway.services.CoderRecentWorkspaceConnectionsService
67
import com.intellij.openapi.components.service
78
import com.intellij.openapi.diagnostic.Logger
@@ -12,9 +13,14 @@ import com.jetbrains.gateway.api.GatewayConnectionProvider
1213
import com.jetbrains.gateway.api.GatewayUI
1314
import com.jetbrains.gateway.ssh.SshDeployFlowUtil
1415
import com.jetbrains.gateway.ssh.SshMultistagePanelContext
16+
import com.jetbrains.gateway.ssh.deploy.DeployException
1517
import com.jetbrains.rd.util.lifetime.LifetimeDefinition
1618
import kotlinx.coroutines.launch
19+
import net.schmizz.sshj.common.SSHException
20+
import net.schmizz.sshj.connection.ConnectionException
1721
import java.time.Duration
22+
import java.util.concurrent.TimeUnit
23+
import java.util.concurrent.TimeoutException
1824

1925
class CoderGatewayConnectionProvider : GatewayConnectionProvider {
2026
private val recentConnectionsService = service<CoderRecentWorkspaceConnectionsService>()
@@ -24,12 +30,44 @@ class CoderGatewayConnectionProvider : GatewayConnectionProvider {
2430
// TODO: If this fails determine if it is an auth error and if so prompt
2531
// for a new token, configure the CLI, then try again.
2632
clientLifetime.launchUnderBackgroundProgress(CoderGatewayBundle.message("gateway.connector.coder.connection.provider.title"), canBeCancelled = true, isIndeterminate = true, project = null) {
27-
val context = SshMultistagePanelContext(parameters.toHostDeployInputs())
28-
logger.info("Deploying and starting IDE with $context")
29-
launch {
30-
@Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle(
31-
clientLifetime, context, Duration.ofMinutes(10)
32-
)
33+
val context = suspendingRetryWithExponentialBackOff(
34+
label = "connect",
35+
logger = logger,
36+
action = { attempt ->
37+
logger.info("Deploying (attempt $attempt)...")
38+
indicator.text =
39+
if (attempt > 1) CoderGatewayBundle.message("gateway.connector.coder.connection.retry.text", attempt)
40+
else CoderGatewayBundle.message("gateway.connector.coder.connection.loading.text")
41+
SshMultistagePanelContext(parameters.toHostDeployInputs())
42+
},
43+
predicate = { e ->
44+
e is ConnectionException || e is TimeoutException
45+
|| e is SSHException || e is DeployException
46+
},
47+
update = { _, e, retryMs, ->
48+
indicator.text2 =
49+
if (e is DeployException && e.message.contains("Worker binary deploy failed"))
50+
// Note that text2 will not fit much text before
51+
// truncating; this barely fits.
52+
"Failed to upload worker binary...it may have timed out."
53+
else e?.message ?: CoderGatewayBundle.message("gateway.connector.no-details")
54+
if (retryMs != null) {
55+
val delayS = TimeUnit.MILLISECONDS.toSeconds(retryMs)
56+
val delay = if (delayS < 1) "now" else "in $delayS second${if (delayS > 1) "s" else ""}"
57+
indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connection.retry-error.text", delay)
58+
} else {
59+
indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connection.error.text")
60+
}
61+
},
62+
)
63+
if (context != null) {
64+
launch {
65+
logger.info("Deploying and starting IDE with $context")
66+
// At this point JetBrains takes over with their own UI.
67+
@Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle(
68+
clientLifetime, context, Duration.ofMinutes(10)
69+
)
70+
}
3371
}
3472
}
3573

src/main/kotlin/com/coder/gateway/sdk/Retry.kt

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,82 @@
11
package com.coder.gateway.sdk
22

3+
import com.intellij.openapi.diagnostic.Logger
4+
import com.intellij.openapi.progress.ProcessCanceledException
5+
import com.intellij.ssh.SshException
36
import kotlinx.coroutines.delay
47
import kotlinx.datetime.Clock
58
import java.util.Random
69
import java.util.concurrent.TimeUnit
710
import kotlin.concurrent.timer
11+
import kotlin.coroutines.cancellation.CancellationException
812
import kotlin.math.max
913
import kotlin.math.min
1014

15+
fun unwrap(ex: Exception): Throwable? {
16+
var cause = ex.cause
17+
while(cause?.cause != null) {
18+
cause = cause.cause
19+
}
20+
return cause ?: ex
21+
}
22+
1123
/**
12-
* Similar to Intellij's except it gives you the next delay, does not do its own
13-
* logging, updates periodically (for counting down), and runs forever.
24+
* Similar to Intellij's except it gives you the next delay, logs differently,
25+
* updates periodically (for counting down), runs forever, and takes a
26+
* predicate for determining whether we should retry.
27+
*
28+
* The update will have a boolean to indicate whether it is the first update (so
29+
* things like duplicate logs can be avoided). If remainingMs is null then no
30+
* more retries will be attempted.
31+
*
32+
* If an exception related to canceling is received then return null.
1433
*/
1534
suspend fun <T> suspendingRetryWithExponentialBackOff(
1635
initialDelayMs: Long = TimeUnit.SECONDS.toMillis(5),
1736
backOffLimitMs: Long = TimeUnit.MINUTES.toMillis(3),
1837
backOffFactor: Int = 2,
1938
backOffJitter: Double = 0.1,
20-
update: (attempt: Int, remainingMs: Long, e: Exception) -> Unit,
21-
action: suspend (attempt: Int) -> T
22-
): T {
39+
label: String,
40+
logger: Logger,
41+
predicate: (e: Throwable?) -> Boolean,
42+
update: (attempt: Int, e: Throwable?, remainingMs: Long?) -> Unit,
43+
action: suspend (attempt: Int) -> T?
44+
): T? {
2345
val random = Random()
2446
var delayMs = initialDelayMs
2547
for (attempt in 1..Int.MAX_VALUE) {
2648
try {
2749
return action(attempt)
2850
}
29-
catch (e: Exception) {
51+
catch (originalEx: Exception) {
52+
// Gateway is wrapping exceptions in an SshException which makes it
53+
// impossible to tell what the real error (could be anything from a
54+
// timeout to being canceled) is so unwrap them.
55+
val unwrappedEx = if (originalEx is SshException) unwrap(originalEx) else originalEx
56+
when (unwrappedEx) {
57+
is InterruptedException,
58+
is CancellationException,
59+
is ProcessCanceledException -> {
60+
logger.info("Retry canceled due to ${unwrappedEx.javaClass}")
61+
return null
62+
}
63+
}
64+
if (!predicate(unwrappedEx)) {
65+
logger.error("Failed to $label (attempt $attempt; will not retry)", originalEx)
66+
update(attempt, unwrappedEx, null)
67+
return null
68+
}
3069
val end = Clock.System.now().toEpochMilliseconds() + delayMs
3170
val timer = timer(period = TimeUnit.SECONDS.toMillis(1)) {
3271
val now = Clock.System.now().toEpochMilliseconds()
3372
val next = max(end - now, 0)
3473
if (next > 0) {
35-
update(attempt, next, e)
74+
update(attempt, unwrappedEx, next)
3675
} else {
37-
this.cancel()
76+
cancel()
3877
}
3978
}
79+
logger.error("Failed to $label (attempt $attempt; will retry in $delayMs ms)", originalEx)
4080
delay(delayMs)
4181
timer.cancel()
4282
delayMs = min(delayMs * backOffFactor, backOffLimitMs) + (random.nextGaussian() * delayMs * backOffJitter).toLong()

src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt

Lines changed: 25 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ import javax.swing.JPanel
7979
import javax.swing.ListCellRenderer
8080
import javax.swing.SwingConstants
8181
import javax.swing.event.DocumentEvent
82-
import kotlin.coroutines.cancellation.CancellationException
8382

8483
class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolean) -> Unit) : CoderWorkspacesWizardStep, Disposable {
8584
private val cs = CoroutineScope(Dispatchers.Main)
@@ -179,6 +178,8 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea
179178

180179
ideResolvingJob = cs.launch {
181180
val ides = suspendingRetryWithExponentialBackOff(
181+
label = "retrieve IDEs",
182+
logger = logger,
182183
action={ attempt ->
183184
logger.info("Deploying to ${selectedWorkspace.name} on $deploymentURL (attempt $attempt)")
184185
// Reset text in the select dropdown.
@@ -187,54 +188,38 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea
187188
if (attempt > 1) CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry.text", attempt)
188189
else CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.loading.text"))
189190
}
190-
try {
191-
val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace))
192-
if (ComponentValidator.getInstance(tfProject).isEmpty) {
193-
installRemotePathValidator(executor)
194-
}
195-
retrieveIDEs(executor, selectedWorkspace)
196-
} catch (e: Exception) {
197-
when(e) {
198-
is InterruptedException -> Unit
199-
is CancellationException -> Unit
200-
// Throw to retry these. The main one is
201-
// DeployException which fires when dd times out.
202-
is ConnectionException, is TimeoutException,
203-
is SSHException, is DeployException -> throw e
204-
else -> {
205-
withContext(Dispatchers.Main) {
206-
logger.error("Failed to retrieve IDEs (attempt $attempt)", e)
207-
cbIDEComment.foreground = UIUtil.getErrorForeground()
208-
cbIDEComment.text = e.message ?: "The error did not provide any further details"
209-
cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon())
210-
}
211-
}
212-
}
213-
null
191+
val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace))
192+
if (ComponentValidator.getInstance(tfProject).isEmpty) {
193+
installRemotePathValidator(executor)
214194
}
195+
retrieveIDEs(executor, selectedWorkspace)
196+
},
197+
predicate = { e ->
198+
e is ConnectionException || e is TimeoutException
199+
|| e is SSHException || e is DeployException
215200
},
216-
update = { attempt, retryMs, e ->
217-
logger.error("Failed to retrieve IDEs (attempt $attempt; will retry in $retryMs ms)", e)
201+
update = { _, e, retryMs ->
218202
cbIDEComment.foreground = UIUtil.getErrorForeground()
219203
// When the dd command times out all we get is a
220-
// DeployException and some text that it failed but not why.
221-
// Plus it includes a long tmp path that is a bit nasty to
204+
// DeployException and some text that it failed. Plus
205+
// it includes a long tmp path that is a bit nasty to
222206
// display so replace it with something nicer.
223-
if (e is DeployException && e.message.contains("Worker binary deploy failed")) {
224-
cbIDEComment.text = "Failed to upload worker binary...it may have timed out. Check the command log for details."
207+
cbIDEComment.text =
208+
if (e is DeployException && e.message.contains("Worker binary deploy failed"))
209+
"Failed to upload worker binary...it may have timed out. Check the command log for details."
210+
else e?.message ?: CoderGatewayBundle.message("gateway.connector.no-details")
211+
if (retryMs != null) {
212+
val delayS = TimeUnit.MILLISECONDS.toSeconds(retryMs)
213+
val delay = if (delayS < 1) "now" else "in $delayS second${if (delayS > 1) "s" else ""}"
214+
cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", delay))
225215
} else {
226-
cbIDEComment.text = e.message ?: "The error did not provide any further details."
216+
cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon())
227217
}
228-
val delayS = TimeUnit.MILLISECONDS.toSeconds(retryMs)
229-
val delay = if (delayS < 1) "now" else "in $delayS second${if (delayS > 1) "s" else ""}"
230-
cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", delay))
231218
},
232219
)
233-
if (ides != null) {
234-
withContext(Dispatchers.Main) {
235-
ideComboBoxModel.addAll(ides)
236-
cbIDE.selectedIndex = 0
237-
}
220+
withContext(Dispatchers.Main) {
221+
ideComboBoxModel.addAll(ides)
222+
cbIDE.selectedIndex = 0
238223
}
239224
}
240225
}

src/main/resources/messages/CoderGatewayBundle.properties

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ gateway.connector.recentconnections.new.wizard.button.tooltip=Open a new Coder W
4242
gateway.connector.recentconnections.remove.button.tooltip=Remove from Recent Connections
4343
gateway.connector.recentconnections.terminal.button.tooltip=Open SSH Web Terminal
4444
gateway.connector.coder.connection.provider.title=Connecting to Coder workspace...
45+
gateway.connector.coder.connection.loading.text=Connecting...
46+
gateway.connector.coder.connection.retry.text=Connecting (attempt {0})...
47+
gateway.connector.coder.connection.retry-error.text=Failed to connect...retrying {0}
48+
gateway.connector.coder.connection.error.text=Failed to connect
4549
gateway.connector.settings.binary-source.title=CLI source:
4650
gateway.connector.settings.binary-source.comment=Used to download the Coder \
4751
CLI which is necessary to make SSH connections. The If-None-Matched header \
@@ -54,3 +58,4 @@ gateway.connector.settings.binary-destination.comment=Directories are created \
5458
here that store the CLI and credentials for each domain to which the plugin \
5559
connects. \
5660
Defaults to {0}.
61+
gateway.connector.no-details="The error did not provide any further details"

0 commit comments

Comments
 (0)