diff --git a/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt b/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt index 07b7b961..cd625208 100644 --- a/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt +++ b/src/main/kotlin/com/coder/gateway/CoderGatewayConnectionProvider.kt @@ -2,19 +2,29 @@ package com.coder.gateway +import com.coder.gateway.sdk.humanizeDuration +import com.coder.gateway.sdk.isCancellation +import com.coder.gateway.sdk.isWorkerTimeout +import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff import com.coder.gateway.services.CoderRecentWorkspaceConnectionsService +import com.intellij.openapi.application.ApplicationManager import com.intellij.openapi.components.service import com.intellij.openapi.diagnostic.Logger import com.intellij.openapi.rd.util.launchUnderBackgroundProgress +import com.intellij.openapi.ui.Messages import com.jetbrains.gateway.api.ConnectionRequestor import com.jetbrains.gateway.api.GatewayConnectionHandle import com.jetbrains.gateway.api.GatewayConnectionProvider import com.jetbrains.gateway.api.GatewayUI import com.jetbrains.gateway.ssh.SshDeployFlowUtil import com.jetbrains.gateway.ssh.SshMultistagePanelContext +import com.jetbrains.gateway.ssh.deploy.DeployException import com.jetbrains.rd.util.lifetime.LifetimeDefinition import kotlinx.coroutines.launch +import net.schmizz.sshj.common.SSHException +import net.schmizz.sshj.connection.ConnectionException import java.time.Duration +import java.util.concurrent.TimeoutException class CoderGatewayConnectionProvider : GatewayConnectionProvider { private val recentConnectionsService = service() @@ -24,12 +34,53 @@ class CoderGatewayConnectionProvider : GatewayConnectionProvider { // TODO: If this fails determine if it is an auth error and if so prompt // for a new token, configure the CLI, then try again. clientLifetime.launchUnderBackgroundProgress(CoderGatewayBundle.message("gateway.connector.coder.connection.provider.title"), canBeCancelled = true, isIndeterminate = true, project = null) { - val context = SshMultistagePanelContext(parameters.toHostDeployInputs()) - logger.info("Deploying and starting IDE with $context") - launch { - @Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle( - clientLifetime, context, Duration.ofMinutes(10) + try { + indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connecting") + val context = suspendingRetryWithExponentialBackOff( + action = { attempt -> + logger.info("Connecting... (attempt $attempt") + if (attempt > 1) { + // indicator.text is the text above the progress bar. + indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connecting.retry", attempt) + } + SshMultistagePanelContext(parameters.toHostDeployInputs()) + }, + retryIf = { + it is ConnectionException || it is TimeoutException + || it is SSHException || it is DeployException + }, + onException = { attempt, nextMs, e -> + logger.error("Failed to connect (attempt $attempt; will retry in $nextMs ms)") + // indicator.text2 is the text below the progress bar. + indicator.text2 = + if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out" + else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + }, + onCountdown = { remainingMs -> + indicator.text = CoderGatewayBundle.message("gateway.connector.coder.connecting.failed.retry", humanizeDuration(remainingMs)) + }, ) + launch { + logger.info("Deploying and starting IDE with $context") + // At this point JetBrains takes over with their own UI. + @Suppress("UnstableApiUsage") SshDeployFlowUtil.fullDeployCycle( + clientLifetime, context, Duration.ofMinutes(10) + ) + } + } catch (e: Exception) { + if (isCancellation(e)) { + logger.info("Connection canceled due to ${e.javaClass}") + } else { + logger.info("Failed to connect (will not retry)", e) + // The dialog will close once we return so write the error + // out into a new dialog. + ApplicationManager.getApplication().invokeAndWait { + Messages.showMessageDialog( + e.message ?: CoderGatewayBundle.message("gateway.connector.no-details"), + CoderGatewayBundle.message("gateway.connector.coder.connection.failed"), + Messages.getErrorIcon()) + } + } } } diff --git a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt index 23e6e650..51d4c04c 100644 --- a/src/main/kotlin/com/coder/gateway/sdk/Retry.kt +++ b/src/main/kotlin/com/coder/gateway/sdk/Retry.kt @@ -1,23 +1,50 @@ package com.coder.gateway.sdk +import com.intellij.openapi.progress.ProcessCanceledException +import com.intellij.ssh.SshException +import com.jetbrains.gateway.ssh.deploy.DeployException import kotlinx.coroutines.delay -import kotlinx.datetime.Clock import java.util.Random import java.util.concurrent.TimeUnit -import kotlin.concurrent.timer -import kotlin.math.max +import kotlin.coroutines.cancellation.CancellationException import kotlin.math.min +fun unwrap(ex: Exception): Throwable { + var cause = ex.cause + while(cause?.cause != null) { + cause = cause.cause + } + return cause ?: ex +} + /** - * Similar to Intellij's except it gives you the next delay, does not do its own - * logging, updates periodically (for counting down), and runs forever. + * Similar to Intellij's except it adds two new arguments: onCountdown (for + * displaying the time until the next try) and retryIf (to limit which + * exceptions can be retried). + * + * Exceptions that cannot be retried will be thrown. + * + * onException and onCountdown will be called immediately on retryable failures. + * onCountdown will also be called every second until the next try with the time + * left until that next try (the last interval might be less than one second if + * the total delay is not divisible by one second). + * + * Some other differences: + * - onException gives you the time until the next try (intended to be logged + * with the error). + * - Infinite tries. + * - SshException is unwrapped. + * + * It is otherwise identical. */ suspend fun suspendingRetryWithExponentialBackOff( initialDelayMs: Long = TimeUnit.SECONDS.toMillis(5), backOffLimitMs: Long = TimeUnit.MINUTES.toMillis(3), backOffFactor: Int = 2, backOffJitter: Double = 0.1, - update: (attempt: Int, remainingMs: Long, e: Exception) -> Unit, + retryIf: (e: Throwable) -> Boolean, + onException: (attempt: Int, nextMs: Long, e: Throwable) -> Unit, + onCountdown: (remaining: Long) -> Unit, action: suspend (attempt: Int) -> T ): T { val random = Random() @@ -26,21 +53,53 @@ suspend fun suspendingRetryWithExponentialBackOff( try { return action(attempt) } - catch (e: Exception) { - val end = Clock.System.now().toEpochMilliseconds() + delayMs - val timer = timer(period = TimeUnit.SECONDS.toMillis(1)) { - val now = Clock.System.now().toEpochMilliseconds() - val next = max(end - now, 0) - if (next > 0) { - update(attempt, next, e) - } else { - this.cancel() - } + catch (originalEx: Exception) { + // SshException can happen due to anything from a timeout to being + // canceled so unwrap to find out. + val unwrappedEx = if (originalEx is SshException) unwrap(originalEx) else originalEx + if (!retryIf(unwrappedEx)) { + throw unwrappedEx + } + onException(attempt, delayMs, unwrappedEx) + var remainingMs = delayMs + while (remainingMs > 0) { + onCountdown(remainingMs) + val next = min(remainingMs, TimeUnit.SECONDS.toMillis(1)) + remainingMs -= next + delay(next) } - delay(delayMs) - timer.cancel() delayMs = min(delayMs * backOffFactor, backOffLimitMs) + (random.nextGaussian() * delayMs * backOffJitter).toLong() } } error("Should never be reached") } + +/** + * Convert a millisecond duration into a human-readable string. + * + * < 1 second: "now" + * 1 second: "in one second" + * > 1 second: "in seconds" + */ +fun humanizeDuration(durationMs: Long): String { + val seconds = TimeUnit.MILLISECONDS.toSeconds(durationMs) + return if (seconds < 1) "now" else "in $seconds second${if (seconds > 1) "s" else ""}" +} + +/** + * When the worker upload times out Gateway just says it failed. Even the root + * cause (IllegalStateException) is useless. The error also includes a very + * long useless tmp path. Return true if the error looks like this timeout. + */ +fun isWorkerTimeout(e: Throwable): Boolean { + return e is DeployException && e.message.contains("Worker binary deploy failed") +} + +/** + * Return true if the exception is some kind of cancellation. + */ +fun isCancellation(e: Throwable): Boolean { + return e is InterruptedException + || e is CancellationException + || e is ProcessCanceledException +} diff --git a/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt b/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt index 3154bbd4..3b209edd 100644 --- a/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt +++ b/src/main/kotlin/com/coder/gateway/views/steps/CoderLocateRemoteProjectStepView.kt @@ -8,6 +8,9 @@ import com.coder.gateway.sdk.Arch import com.coder.gateway.sdk.CoderCLIManager import com.coder.gateway.sdk.CoderRestClientService import com.coder.gateway.sdk.OS +import com.coder.gateway.sdk.humanizeDuration +import com.coder.gateway.sdk.isCancellation +import com.coder.gateway.sdk.isWorkerTimeout import com.coder.gateway.sdk.suspendingRetryWithExponentialBackOff import com.coder.gateway.sdk.toURL import com.coder.gateway.sdk.withPath @@ -68,7 +71,6 @@ import net.schmizz.sshj.connection.ConnectionException import java.awt.Component import java.awt.FlowLayout import java.util.Locale -import java.util.concurrent.TimeUnit import java.util.concurrent.TimeoutException import javax.swing.ComboBoxModel import javax.swing.DefaultComboBoxModel @@ -79,7 +81,6 @@ import javax.swing.JPanel import javax.swing.ListCellRenderer import javax.swing.SwingConstants import javax.swing.event.DocumentEvent -import kotlin.coroutines.cancellation.CancellationException class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolean) -> Unit) : CoderWorkspacesWizardStep, Disposable { private val cs = CoroutineScope(Dispatchers.Main) @@ -162,6 +163,7 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea // Clear contents from the last attempt if any. cbIDEComment.foreground = UIUtil.getContextHelpForeground() cbIDEComment.text = CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.ide.none.comment") + cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.retrieve-ides")) ideComboBoxModel.removeAllElements() setNextButtonEnabled(false) @@ -178,54 +180,47 @@ class CoderLocateRemoteProjectStepView(private val setNextButtonEnabled: (Boolea terminalLink.url = coderClient.coderURL.withPath("/@${coderClient.me.username}/${selectedWorkspace.name}/terminal").toString() ideResolvingJob = cs.launch { - val ides = suspendingRetryWithExponentialBackOff( - action={ attempt -> - // Reset text in the select dropdown. - withContext(Dispatchers.Main) { - cbIDE.renderer = IDECellRenderer( - if (attempt > 1) CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry.text", attempt) - else CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.loading.text")) - } - try { + try { + val ides = suspendingRetryWithExponentialBackOff( + action = { attempt -> + logger.info("Retrieving IDEs...(attempt $attempt)") + if (attempt > 1) { + cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.retrieve.ides.retry", attempt)) + } val executor = createRemoteExecutor(CoderCLIManager.getHostName(deploymentURL, selectedWorkspace)) if (ComponentValidator.getInstance(tfProject).isEmpty) { installRemotePathValidator(executor) } retrieveIDEs(executor, selectedWorkspace) - } catch (e: Exception) { - when(e) { - is InterruptedException -> Unit - is CancellationException -> Unit - // Throw to retry these. The main one is - // DeployException which fires when dd times out. - is ConnectionException, is TimeoutException, - is SSHException, is DeployException -> throw e - else -> { - withContext(Dispatchers.Main) { - logger.error("Failed to retrieve IDEs (attempt $attempt)", e) - cbIDEComment.foreground = UIUtil.getErrorForeground() - cbIDEComment.text = e.message ?: "The error did not provide any further details" - cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.error.text"), UIUtil.getBalloonErrorIcon()) - } - } - } - null - } - }, - update = { attempt, retryMs, e -> - logger.error("Failed to retrieve IDEs (attempt $attempt; will retry in $retryMs ms)", e) - cbIDEComment.foreground = UIUtil.getErrorForeground() - cbIDEComment.text = e.message ?: "The error did not provide any further details" - val delayS = TimeUnit.MILLISECONDS.toSeconds(retryMs) - val delay = if (delayS < 1) "now" else "in $delayS second${if (delayS > 1) "s" else ""}" - cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.remoteproject.retry-error.text", delay)) - }, - ) - if (ides != null) { + }, + retryIf = { + it is ConnectionException || it is TimeoutException + || it is SSHException || it is DeployException + }, + onException = { attempt, nextMs, e -> + logger.error("Failed to retrieve IDEs (attempt $attempt; will retry in $nextMs ms)") + cbIDEComment.foreground = UIUtil.getErrorForeground() + cbIDEComment.text = + if (isWorkerTimeout(e)) "Failed to upload worker binary...it may have timed out. Check the command log for more details." + else e.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + }, + onCountdown = { remainingMs -> + cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.retrieve-ides.failed.retry", humanizeDuration(remainingMs))) + }, + ) withContext(Dispatchers.Main) { ideComboBoxModel.addAll(ides) cbIDE.selectedIndex = 0 } + } catch (e: Exception) { + if (isCancellation(e)) { + logger.info("Connection canceled due to ${e.javaClass}") + } else { + logger.error("Failed to retrieve IDEs (will not retry)", e) + cbIDEComment.foreground = UIUtil.getErrorForeground() + cbIDEComment.text = e.message ?: CoderGatewayBundle.message("gateway.connector.no-details") + cbIDE.renderer = IDECellRenderer(CoderGatewayBundle.message("gateway.connector.view.coder.retrieve-ides.failed"), UIUtil.getBalloonErrorIcon()) + } } } } diff --git a/src/main/resources/messages/CoderGatewayBundle.properties b/src/main/resources/messages/CoderGatewayBundle.properties index d8295e5c..c5e7e8b0 100644 --- a/src/main/resources/messages/CoderGatewayBundle.properties +++ b/src/main/resources/messages/CoderGatewayBundle.properties @@ -28,10 +28,10 @@ gateway.connector.view.workspaces.token.comment=The last used token is shown abo gateway.connector.view.workspaces.token.rejected=This token was rejected. gateway.connector.view.workspaces.token.injected=This token was pulled from your CLI config. gateway.connector.view.workspaces.token.none=No existing token found. -gateway.connector.view.coder.remoteproject.loading.text=Retrieving products... -gateway.connector.view.coder.remoteproject.retry.text=Retrieving products (attempt {0})... -gateway.connector.view.coder.remoteproject.error.text=Failed to retrieve IDEs -gateway.connector.view.coder.remoteproject.retry-error.text=Failed to retrieve IDEs...retrying {0} +gateway.connector.view.coder.retrieve-ides=Retrieving IDEs... +gateway.connector.view.coder.retrieve.ides.retry=Retrieving IDEs (attempt {0})... +gateway.connector.view.coder.retrieve-ides.failed=Failed to retrieve IDEs +gateway.connector.view.coder.retrieve-ides.failed.retry=Failed to retrieve IDEs...retrying {0} gateway.connector.view.coder.remoteproject.next.text=Start IDE and connect gateway.connector.view.coder.remoteproject.choose.text=Choose IDE and project for workspace {0} gateway.connector.view.coder.remoteproject.ide.download.comment=This IDE will be downloaded from jetbrains.com and installed to the default path on the remote host. @@ -42,6 +42,10 @@ gateway.connector.recentconnections.new.wizard.button.tooltip=Open a new Coder W gateway.connector.recentconnections.remove.button.tooltip=Remove from Recent Connections gateway.connector.recentconnections.terminal.button.tooltip=Open SSH Web Terminal gateway.connector.coder.connection.provider.title=Connecting to Coder workspace... +gateway.connector.coder.connecting=Connecting... +gateway.connector.coder.connecting.retry=Connecting (attempt {0})... +gateway.connector.coder.connection.failed=Failed to connect +gateway.connector.coder.connecting.failed.retry=Failed to connect...retrying {0} gateway.connector.settings.binary-source.title=CLI source: gateway.connector.settings.binary-source.comment=Used to download the Coder \ CLI which is necessary to make SSH connections. The If-None-Matched header \ @@ -54,3 +58,4 @@ gateway.connector.settings.binary-destination.comment=Directories are created \ here that store the CLI and credentials for each domain to which the plugin \ connects. \ Defaults to {0}. +gateway.connector.no-details="The error did not provide any further details"