Skip to content

Commit a2d42d2

Browse files
committed
Refactor retry
1 parent aa27ca1 commit a2d42d2

File tree

3 files changed

+101
-87
lines changed

3 files changed

+101
-87
lines changed

src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
package com.coder.gateway
44

55
import com.coder.gateway.sdk.humanizeDuration
6+
import com.coder.gateway.sdk.isCancellation
67
import com.coder.gateway.sdk.isWorkerTimeout
78
import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff
89
import com.coder.gateway.services.CoderRecentWorkspaceConnectionsService
@@ -23,6 +24,7 @@ import kotlinx.coroutines.launch
2324
import net.schmizz.sshj.common.SSHException
2425
import net.schmizz.sshj.connection.ConnectionException
2526
import java.time.Duration
27+
import java.util.concurrent.TimeUnit
2628
import java.util.concurrent.TimeoutException
2729

2830
class CoderGatewayConnectionProvider : GatewayConnectionProvider {
@@ -33,45 +35,50 @@ class CoderGatewayConnectionProvider : GatewayConnectionProvider {
3335
// TODO: If this fails determine if it is an auth error and if so prompt
3436
// for a new token, configure the CLI, then try again.
3537
clientLifetime.launchUnderBackgroundProgress(CoderGatewayBundle.message("gateway.connector.coder.connection.provider.title"), canBeCancelled = true, isIndeterminate = true, project = null) {
36-
val context = suspendingRetryWithExponentialBackOff(
37-
label = "connect",
38-
logger = logger,
39-
action = { attempt ->
40-
logger.info("Deploying (attempt $attempt)...")
41-
indicator.text =
42-
if (attempt > 1) CoderGatewayBundle.message("gateway.connector.coder.connection.retry.text", attempt)
43-
else CoderGatewayBundle.message("gateway.connector.coder.connection.loading.text")
44-
SshMultistagePanelContext(parameters.toHostDeployInputs())
45-
},
46-
predicate = { e ->
47-
e is ConnectionException || e is TimeoutException
48-
|| e is SSHException || e is DeployException
49-
},
50-
update = { _, e, remainingMs ->
51-
val message =
52-
if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out"
53-
else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details")
54-
if (remainingMs != null) {
55-
indicator.text = message
56-
indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connection.retry-error.text", humanizeDuration(remainingMs))
57-
} else {
58-
ApplicationManager.getApplication().invokeAndWait {
59-
Messages.showMessageDialog(
60-
message,
61-
CoderGatewayBundle.message("gateway.connector.coder.connection.error.text"),
62-
Messages.getErrorIcon())
63-
}
64-
}
65-
},
66-
)
67-
if (context != null) {
38+
try {
39+
val context = suspendingRetryWithExponentialBackOff(
40+
action = { attempt ->
41+
logger.info("Connecting... (attempt $attempt")
42+
SshMultistagePanelContext(parameters.toHostDeployInputs())
43+
},
44+
retryIf = {
45+
it is ConnectionException || it is TimeoutException
46+
|| it is SSHException || it is DeployException
47+
},
48+
onError = { attempt, nextMs, e ->
49+
logger.error("Failed to connect (attempt $attempt; will retry in $nextMs ms)")
50+
indicator.text =
51+
if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out"
52+
else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details")
53+
},
54+
onCountdown = { attempt, remainingMs ->
55+
indicator.text =
56+
if (remainingMs == null) CoderGatewayBundle.message("gateway.connector.coder.connection.loading.text")
57+
else if (remainingMs < TimeUnit.SECONDS.toMillis(1)) CoderGatewayBundle.message("gateway.connector.coder.connection.retry.text", attempt)
58+
else CoderGatewayBundle.message("gateway.connector.coder.connection.retry-error.text", humanizeDuration(remainingMs))
59+
},
60+
)
6861
launch {
6962
logger.info("Deploying and starting IDE with $context")
7063
// At this point JetBrains takes over with their own UI.
7164
@Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle(
7265
clientLifetime, context, Duration.ofMinutes(10)
7366
)
7467
}
68+
} catch (e: Exception) {
69+
if (isCancellation(e)) {
70+
logger.info("Connection canceled due to ${e.javaClass}")
71+
} else {
72+
logger.info("Failed to connect (will not retry)", e)
73+
// The dialog will close once we return so write the error
74+
// out into a new dialog.
75+
ApplicationManager.getApplication().invokeAndWait {
76+
Messages.showMessageDialog(
77+
e.message ?: CoderGatewayBundle.message("gateway.connector.no-details"),
78+
CoderGatewayBundle.message("gateway.connector.coder.connection.error.text"),
79+
Messages.getErrorIcon())
80+
}
81+
}
7582
}
7683
}
7784

src/main/kotlin/com/coder/gateway/sdk/Retry.kt

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package com.coder.gateway.sdk
22

3-
import com.intellij.openapi.diagnostic.Logger
43
import com.intellij.openapi.progress.ProcessCanceledException
54
import com.intellij.ssh.SshException
65
import com.jetbrains.gateway.ssh.deploy.DeployException
@@ -22,25 +21,27 @@ fun unwrap(ex: Exception): Throwable {
2221
* Similar to Intellij's except it gives you the next delay, logs differently,
2322
* updates periodically (for counting down), runs forever, takes a predicate for
2423
* determining whether we should retry, and has some special handling for
25-
* exceptions to provide the true cause or better messages.
24+
* exceptions to provide the true cause.
2625
*
2726
* The update will have a boolean to indicate whether it is the first update (so
2827
* things like duplicate logs can be avoided). If remaining is null then no
2928
* more retries will be attempted.
3029
*
31-
* If an exception related to canceling is received then return null.
30+
* If an exception that cannot be retried is received (including those related
31+
* to canceling) that exception (after being unwrapped) will be thrown.
32+
*
33+
* onError will only be called on retryable errors.
3234
*/
3335
suspend fun <T> suspendingRetryWithExponentialBackOff(
3436
initialDelayMs: Long = TimeUnit.SECONDS.toMillis(5),
3537
backOffLimitMs: Long = TimeUnit.MINUTES.toMillis(3),
3638
backOffFactor: Int = 2,
3739
backOffJitter: Double = 0.1,
38-
label: String,
39-
logger: Logger,
40-
predicate: (e: Throwable) -> Boolean,
41-
update: (attempt: Int, e: Throwable, remaining: Long?) -> Unit,
42-
action: suspend (attempt: Int) -> T?
43-
): T? {
40+
retryIf: (e: Throwable) -> Boolean,
41+
onError: (attempt: Int, nextMs: Long, e: Throwable) -> Unit,
42+
onCountdown: (attempt: Int, remaining: Long?) -> Unit,
43+
action: suspend (attempt: Int) -> T
44+
): T {
4445
val random = Random()
4546
var delayMs = initialDelayMs
4647
for (attempt in 1..Int.MAX_VALUE) {
@@ -51,23 +52,13 @@ suspend fun <T> suspendingRetryWithExponentialBackOff(
5152
// SshException can happen due to anything from a timeout to being
5253
// canceled so unwrap to find out.
5354
val unwrappedEx = if (originalEx is SshException) unwrap(originalEx) else originalEx
54-
when (unwrappedEx) {
55-
is InterruptedException,
56-
is CancellationException,
57-
is ProcessCanceledException -> {
58-
logger.info("Retrying $label canceled due to ${unwrappedEx.javaClass}")
59-
return null
60-
}
61-
}
62-
if (!predicate(unwrappedEx)) {
63-
logger.error("Failed to $label (attempt $attempt; will not retry)", originalEx)
64-
update(attempt, unwrappedEx, null)
65-
return null
55+
if (!retryIf(unwrappedEx)) {
56+
throw unwrappedEx
6657
}
67-
logger.error("Failed to $label (attempt $attempt; will retry in $delayMs ms)", originalEx)
58+
onError(attempt, delayMs, unwrappedEx)
6859
var remainingMs = delayMs
6960
while (remainingMs > 0) {
70-
update(attempt, unwrappedEx, remainingMs)
61+
onCountdown(attempt, remainingMs)
7162
val next = min(remainingMs, TimeUnit.SECONDS.toMillis(1))
7263
remainingMs -= next
7364
delay(next)
@@ -98,3 +89,12 @@ fun humanizeDuration(durationMs: Long): String {
9889
fun isWorkerTimeout(e: Throwable): Boolean {
9990
return e is DeployException && e.message.contains("Worker binary deploy failed")
10091
}
92+
93+
/**
94+
* Return true if the exception is some kind of cancellation.
95+
*/
96+
fun isCancellation(e: Throwable): Boolean {
97+
return e is InterruptedException
98+
|| e is CancellationException
99+
|| e is ProcessCanceledException
100+
}

src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import com.coder.gateway.sdk.CoderCLIManager
99
import com.coder.gateway.sdk.CoderRestClientService
1010
import com.coder.gateway.sdk.OS
1111
import com.coder.gateway.sdk.humanizeDuration
12+
import com.coder.gateway.sdk.isCancellation
1213
import com.coder.gateway.sdk.isWorkerTimeout
1314
import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff
1415
import com.coder.gateway.sdk.toURL
@@ -70,6 +71,7 @@ import net.schmizz.sshj.connection.ConnectionException
7071
import java.awt.Component
7172
import java.awt.FlowLayout
7273
import java.util.Locale
74+
import java.util.concurrent.TimeUnit
7375
import java.util.concurrent.TimeoutException
7476
import javax.swing.ComboBoxModel
7577
import javax.swing.DefaultComboBoxModel
@@ -162,6 +164,7 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea
162164
// Clear contents from the last attempt if any.
163165
cbIDEComment.foreground = UIUtil.getContextHelpForeground()
164166
cbIDEComment.text = CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.ide.none.comment")
167+
cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.loading.text"))
165168
ideComboBoxModel.removeAllElements()
166169
setNextButtonEnabled(false)
167170

@@ -178,42 +181,46 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea
178181
terminalLink.url = coderClient.coderURL.withPath("/@${coderClient.me.username}/${selectedWorkspace.name}/terminal").toString()
179182

180183
ideResolvingJob = cs.launch {
181-
val ides = suspendingRetryWithExponentialBackOff(
182-
label = "retrieve IDEs",
183-
logger = logger,
184-
action={ attempt ->
185-
logger.info("Deploying to ${selectedWorkspace.name} on $deploymentURL (attempt $attempt)")
186-
// Reset text in the select dropdown.
187-
withContext(Dispatchers.Main) {
188-
cbIDE.renderer = IDECellRenderer(
189-
if (attempt > 1) CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry.text", attempt)
190-
else CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.loading.text"))
191-
}
192-
val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace))
193-
if (ComponentValidator.getInstance(tfProject).isEmpty) {
194-
installRemotePathValidator(executor)
195-
}
196-
retrieveIDEs(executor, selectedWorkspace)
197-
},
198-
predicate = { e ->
199-
e is ConnectionException || e is TimeoutException
200-
|| e is SSHException || e is DeployException
201-
},
202-
update = { _, e, remainingMs ->
203-
cbIDEComment.foreground = UIUtil.getErrorForeground()
204-
cbIDEComment.text =
205-
if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out. Check the command log for more details."
206-
else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details")
207-
cbIDE.renderer =
208-
if (remainingMs != null) IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", humanizeDuration(remainingMs)))
209-
else IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon())
210-
},
211-
)
212-
if (ides != null) {
184+
try {
185+
val ides = suspendingRetryWithExponentialBackOff(
186+
action = { attempt ->
187+
logger.info("Retrieving IDEs...(attempt $attempt)")
188+
val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace))
189+
if (ComponentValidator.getInstance(tfProject).isEmpty) {
190+
installRemotePathValidator(executor)
191+
}
192+
retrieveIDEs(executor, selectedWorkspace)
193+
},
194+
retryIf = {
195+
it is ConnectionException || it is TimeoutException
196+
|| it is SSHException || it is DeployException
197+
},
198+
onError = { attempt, nextMs, e ->
199+
logger.error("Failed to retrieve IDEs (attempt $attempt; will retry in $nextMs ms)")
200+
cbIDEComment.foreground = UIUtil.getErrorForeground()
201+
cbIDEComment.text =
202+
if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out. Check the command log for more details."
203+
else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details")
204+
},
205+
onCountdown = { attempt, remainingMs ->
206+
cbIDE.renderer =
207+
if (remainingMs == null) IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon())
208+
else if (remainingMs < TimeUnit.SECONDS.toMillis(1)) IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry.text", attempt))
209+
else IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", humanizeDuration(remainingMs)))
210+
},
211+
)
213212
withContext(Dispatchers.Main) {
214213
ideComboBoxModel.addAll(ides)
215214
cbIDE.selectedIndex = 0
216215
}
216+
} catch (e: Exception) {
217+
if (isCancellation(e)) {
218+
logger.info("Connection canceled due to ${e.javaClass}")
219+
} else {
220+
logger.error("Failed to retrieve IDEs (will not retry)", e)
221+
cbIDEComment.foreground = UIUtil.getErrorForeground()
222+
cbIDEComment.text = e.message ?: CoderGatewayBundle.message("gateway.connector.no-details")
223+
}
217224
}
218225
}
219226
}

0 commit comments

Comments
 (0)