Skip to content

fix: return error if agent init script fails to download valid binary #13280

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions provisionersdk/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ var (
}
)

// AgentScriptEnv returns a key-pair of scripts that are consumed
// by the Coder Terraform Provider. See:
// https://github.com/coder/terraform-provider-coder/blob/main/internal/provider/provider.go#L97
// AgentScriptEnv returns a key-pair of scripts that are consumed by the Coder Terraform Provider.
// https://github.com/coder/terraform-provider-coder/blob/main/provider/agent.go (updateInitScript)
// performs additional string substitutions.
func AgentScriptEnv() map[string]string {
env := map[string]string{}
for operatingSystem, scripts := range agentScripts {
Expand Down
137 changes: 116 additions & 21 deletions provisionersdk/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,57 +7,152 @@
package provisionersdk_test

import (
"bytes"
"context"
"errors"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os/exec"
"runtime"
"strings"
"sync"
"testing"
"time"

"github.com/go-chi/render"
"github.com/stretchr/testify/require"

"github.com/coder/coder/v2/testutil"

"github.com/coder/coder/v2/provisionersdk"
)

// mimicking the --version output which we use to test the binary (see provisionersdk/scripts/bootstrap_*).
const versionOutput = `Coder v2.11.0+8979bfe Tue May 7 17:30:19 UTC 2024`

// bashEcho is a script that calls the local `echo` with the arguments. This is preferable to
// sending the real `echo` binary since macOS 14.4+ immediately sigkills `echo` if it is copied to
// another directory and run locally.
const bashEcho = `#!/usr/bin/env bash
echo $@`
echo "` + versionOutput + `"`

const unexpectedEcho = `#!/usr/bin/env bash
echo "this is not the agent you are looking for"`

func TestAgentScript(t *testing.T) {
t.Parallel()
t.Run("Run", func(t *testing.T) {

t.Run("Valid", func(t *testing.T) {
t.Parallel()
srv := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
render.Status(r, http.StatusOK)
render.Data(rw, r, []byte(bashEcho))
}))
defer srv.Close()
srvURL, err := url.Parse(srv.URL)
require.NoError(t, err)

script, exists := provisionersdk.AgentScriptEnv()[fmt.Sprintf("CODER_AGENT_SCRIPT_%s_%s", runtime.GOOS, runtime.GOARCH)]
if !exists {
t.Skip("Agent not supported...")
return
}
script = strings.ReplaceAll(script, "${ACCESS_URL}", srvURL.String()+"/")
script = strings.ReplaceAll(script, "${AUTH_TYPE}", "token")
script := serveScript(t, bashEcho)

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
t.Cleanup(cancel)

var output bytes.Buffer
// This is intentionally ran in single quotes to mimic how a customer may
// embed our script. Our scripts should not include any single quotes.
// nolint:gosec
output, err := exec.Command("sh", "-c", "sh -c '"+script+"'").CombinedOutput()
t.Log(string(output))
cmd := exec.CommandContext(ctx, "sh", "-c", "sh -c '"+script+"'")
cmd.Stdout = &output
cmd.Stderr = &output
require.NoError(t, cmd.Start())

err := cmd.Wait()
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
require.Equal(t, 0, exitErr.ExitCode())
} else {
t.Fatalf("unexpected err: %s", err)
}
}

t.Log(output.String())
require.NoError(t, err)
// Ignore debug output from `set -x`, we're only interested in the last line.
lines := strings.Split(strings.TrimSpace(string(output)), "\n")
lines := strings.Split(strings.TrimSpace(output.String()), "\n")
lastLine := lines[len(lines)-1]
// Because we use the "echo" binary, we should expect the arguments provided
// When we use the "bashEcho" binary, we should expect the arguments provided
// as the response to executing our script.
require.Equal(t, "agent", lastLine)
require.Equal(t, versionOutput, lastLine)
})

t.Run("Invalid", func(t *testing.T) {
t.Parallel()

script := serveScript(t, unexpectedEcho)

ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
t.Cleanup(cancel)

var output bytes.Buffer
// This is intentionally ran in single quotes to mimic how a customer may
// embed our script. Our scripts should not include any single quotes.
// nolint:gosec
cmd := exec.CommandContext(ctx, "sh", "-c", "sh -c '"+script+"'")
cmd.WaitDelay = time.Second
cmd.Stdout = &output
cmd.Stderr = &output
require.NoError(t, cmd.Start())

done := make(chan error, 1)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()

// The bootstrap scripts trap exit codes to allow operators to view the script logs and debug the process
// while it is still running. We do not expect Wait() to complete.
err := cmd.Wait()
done <- err
}()

select {
case <-ctx.Done():
// Timeout.
break
case err := <-done:
// If done signals before context times out, script behaved in an unexpected way.
if err != nil {
t.Fatalf("unexpected err: %s", err)
}
}

// Kill the command, wait for the command to yield.
require.NoError(t, cmd.Cancel())
wg.Wait()

t.Log(output.String())

require.Eventually(t, func() bool {
return bytes.Contains(output.Bytes(), []byte("ERROR: Downloaded agent binary returned unexpected version output"))
}, testutil.WaitShort, testutil.IntervalSlow)
})
}

// serveScript creates a fake HTTP server which serves a requested "agent binary" (which is actually the given input string)
// which will be attempted to run to verify that it is correct.
func serveScript(t *testing.T, in string) string {
t.Helper()

srv := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
render.Status(r, http.StatusOK)
render.Data(rw, r, []byte(in))
}))
t.Cleanup(srv.Close)
srvURL, err := url.Parse(srv.URL)
require.NoError(t, err)

script, exists := provisionersdk.AgentScriptEnv()[fmt.Sprintf("CODER_AGENT_SCRIPT_%s_%s", runtime.GOOS, runtime.GOARCH)]
if !exists {
t.Skip("Agent not supported...")
return ""
}
script = strings.ReplaceAll(script, "${ACCESS_URL}", srvURL.String()+"/")
script = strings.ReplaceAll(script, "${AUTH_TYPE}", "token")
return script
}
12 changes: 10 additions & 2 deletions provisionersdk/scripts/bootstrap_darwin.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -eux
# This is to allow folks to exec into a failed workspace and poke around to
# troubleshoot.
waitonexit() {
echo "=== Agent script exited with non-zero code. Sleeping 24h to preserve logs..."
echo "=== Agent script exited with non-zero code ($?). Sleeping 24h to preserve logs..."
sleep 86400
}
trap waitonexit EXIT
Expand All @@ -31,4 +31,12 @@ fi

export CODER_AGENT_AUTH="${AUTH_TYPE}"
export CODER_AGENT_URL="${ACCESS_URL}"
exec ./$BINARY_NAME agent

output=$(./${BINARY_NAME} --version | head -n1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't recall now, but I worry if ${} can be interpreted as a terraform variable here. This is good practice but I think we should avoid it in the bootstrap scripts.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good call-out, but BINARY_NAME is not replaced by the provider, it seems:
https://github.com/coder/terraform-provider-coder/blob/7815596401d6e69aebb4ceefe1e84369cb63c4ac/provider/agent.go#L345-L376

IMHO I think we should use a different template replacement syntax than Bash's variable expansion, to make it very clear that these are replaced by a script and not accepted into the script as env vars.

if ! echo "${output}" | grep -q Coder; then
echo >&2 "ERROR: Downloaded agent binary returned unexpected version output"
echo >&2 "${BINARY_NAME} --version output: \"${output}\""
exit 2
fi

exec ./${BINARY_NAME} agent
12 changes: 10 additions & 2 deletions provisionersdk/scripts/bootstrap_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -eux
# This is to allow folks to exec into a failed workspace and poke around to
# troubleshoot.
waitonexit() {
echo "=== Agent script exited with non-zero code. Sleeping 24h to preserve logs..."
echo "=== Agent script exited with non-zero code ($?). Sleeping 24h to preserve logs..."
sleep 86400
}
trap waitonexit EXIT
Expand Down Expand Up @@ -86,4 +86,12 @@ fi

export CODER_AGENT_AUTH="${AUTH_TYPE}"
export CODER_AGENT_URL="${ACCESS_URL}"
exec ./$BINARY_NAME agent

output=$(./${BINARY_NAME} --version | head -n1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd still like to see stderr redirected here so we can give a sensible output. Thoughts?

❯ output=$(echo '<html>' >hi; chmod +x hi; ./hi --version); declare -p output
./hi: line 1: syntax error near unexpected token `newline'
./hi: line 1: `<html>'
typeset output=''

if ! echo "${output}" | grep -q Coder; then
echo >&2 "ERROR: Downloaded agent binary returned unexpected version output"
echo >&2 "${BINARY_NAME} --version output: \"${output}\""
exit 2
fi

exec ./${BINARY_NAME} agent
13 changes: 13 additions & 0 deletions provisionersdk/scripts/bootstrap_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ if (-not (Get-Command 'Set-MpPreference' -ErrorAction SilentlyContinue)) {
$env:CODER_AGENT_AUTH = "${AUTH_TYPE}"
$env:CODER_AGENT_URL = "${ACCESS_URL}"

$psi = [System.Diagnostics.ProcessStartInfo]::new("$env:TEMP\sshd.exe", '--version')
$psi.UseShellExecute = $false
$psi.RedirectStandardOutput = $true
$p = [System.Diagnostics.Process]::Start($psi)
$output = $p.StandardOutput.ReadToEnd()
$p.WaitForExit()

if ($output -notlike "*Coder*") {
Write-Output "$env:TEMP\sshd.exe --version output: `"$output"`"
Write-Error "ERROR: Downloaded agent binary returned unexpected version output"
Throw "unexpected binary"
}

# Check if we're running inside a Windows container!
$inContainer = $false
if ((Get-ItemProperty 'HKLM:\SYSTEM\CurrentControlSet\Control' -Name 'ContainerType' -ErrorAction SilentlyContinue) -ne $null) {
Expand Down
2 changes: 1 addition & 1 deletion scripts/check_site_icons.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash

set -euo pipefail
# shellcheck source=scripts/lib.sh
Expand Down
Loading