From ee6c3f08f581de16ff258c843d86d786b0f6af16 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 2 Jun 2023 19:44:41 +0100 Subject: [PATCH 01/16] chore: add scaletest convenience script --- .gitignore | 4 +- scaletest/terraform/coder.tf | 7 ++ scaletest/terraform/coder_init.sh | 6 + scaletest/terraform/coder_workspacetraffic.sh | 5 +- scaletest/terraform/scenario-large.tfvars | 4 + scaletest/terraform/scenario-large2x.tfvars | 5 + scaletest/terraform/scenario-medium.tfvars | 4 + scaletest/terraform/scenario-medium2x.tfvars | 5 + scaletest/terraform/scenario-small.tfvars | 4 + scaletest/terraform/scenario-small2x.tfvars | 5 + scaletest/terraform/secrets.tfvars.tpl | 6 + scaletest/workspacetraffic/run.go | 11 -- scripts/scaletest.sh | 109 ++++++++++++++++++ 13 files changed, 161 insertions(+), 14 deletions(-) create mode 100644 scaletest/terraform/scenario-large.tfvars create mode 100644 scaletest/terraform/scenario-large2x.tfvars create mode 100644 scaletest/terraform/scenario-medium.tfvars create mode 100644 scaletest/terraform/scenario-medium2x.tfvars create mode 100644 scaletest/terraform/scenario-small.tfvars create mode 100644 scaletest/terraform/scenario-small2x.tfvars create mode 100644 scaletest/terraform/secrets.tfvars.tpl create mode 100755 scripts/scaletest.sh diff --git a/.gitignore b/.gitignore index 29b297a9e41ec..173b72ae41b38 100644 --- a/.gitignore +++ b/.gitignore @@ -58,5 +58,5 @@ site/stats/ # Loadtesting ./scaletest/terraform/.terraform ./scaletest/terraform/.terraform.lock.hcl -terraform.tfstate.* -**/*.tfvars +scaletest/terraform/secrets.tfvars +.terraform.tfstate.* diff --git a/scaletest/terraform/coder.tf b/scaletest/terraform/coder.tf index 2486f753f76c8..c65640cda3426 100644 --- a/scaletest/terraform/coder.tf +++ b/scaletest/terraform/coder.tf @@ -96,6 +96,8 @@ coder: secretKeyRef: name: "${kubernetes_secret.coder-db.metadata.0.name}" key: url + - name: "CODER_PPROF_ENABLE" + value: "true" - name: "CODER_PROMETHEUS_ENABLE" value: "true" - name: "CODER_VERBOSE" @@ -216,6 +218,11 @@ resource "local_file" "kubernetes_template" { EOF } +resource "local_file" "output_vars" { + filename = "${path.module}/.coderv2/url" + content = local.coder_url +} + output "coder_url" { description = "URL of the Coder deployment" value = local.coder_url diff --git a/scaletest/terraform/coder_init.sh b/scaletest/terraform/coder_init.sh index fe038a6d9aff7..203e5fe3da100 100755 --- a/scaletest/terraform/coder_init.sh +++ b/scaletest/terraform/coder_init.sh @@ -18,6 +18,12 @@ if [[ "$ARCH" == "x86_64" ]]; then fi PLATFORM="$(uname | tr '[:upper:]' '[:lower:]')" +if [[ -f "${CONFIG_DIR}/coder.env" ]]; then + echo "Found existing coder.env in ${CONFIG_DIR}!" + echo "Nothing to do, exiting." + exit 0 +fi + mkdir -p "${CONFIG_DIR}" echo "Fetching Coder CLI for first-time setup!" curl -fsSLk "${CODER_URL}/bin/coder-${PLATFORM}-${ARCH}" -o "${CONFIG_DIR}/coder" diff --git a/scaletest/terraform/coder_workspacetraffic.sh b/scaletest/terraform/coder_workspacetraffic.sh index b979ca04a8be5..d75f51f69eeb2 100755 --- a/scaletest/terraform/coder_workspacetraffic.sh +++ b/scaletest/terraform/coder_workspacetraffic.sh @@ -15,6 +15,9 @@ CODER_TOKEN=$(./coder_shim.sh tokens create) CODER_URL="http://coder.coder-${LOADTEST_NAME}.svc.cluster.local" export KUBECONFIG="${PWD}/.coderv2/${LOADTEST_NAME}-cluster.kubeconfig" +# Clean up any pre-existing pods +kubectl -n "coder-${LOADTEST_NAME}" delete pod coder-scaletest-workspace-traffic --force || true + cat < " +fi + +[[ -n ${VERBOSE:-} ]] && set -x +set -euo pipefail + +SCALETEST_NAME="$1" +SCALETEST_SCENARIO="$2" +SCALETEST_NUM_WORKSPACES="$3" +SCALETEST_PROJECT="${SCALETEST_PROJECT:-}" +PROJECT_ROOT="$(git rev-parse --show-toplevel)" +SCALETEST_SCENARIO_VARS="${PROJECT_ROOT}/scaletest/terraform/scenario-${SCALETEST_SCENARIO}.tfvars" +SCALETEST_SECRETS="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars" +SCALETEST_SECRETS_TEMPLATE="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars.tpl" +SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-}" + +if [[ "${SCALETEST_SKIP_CLEANUP}" == "true" ]]; then + echo "WARNING: you told me to not clean up after myself, so this is now your job!" +fi + +if [[ -z "${SCALETEST_PROJECT}" ]]; then + echo "Environment variable SCALETEST_PROJECT not set. Please set it and try again." + exit 1 +fi + +if [[ ! -f "${SCALETEST_SCENARIO_VARS}" ]] ; then + echo "No definition for scenario ${SCALETEST_SCENARIO} exists. Please create it and try again" + exit 1 +fi + +echo "Writing scaletest secrets to file." +SCALETEST_NAME="${SCALETEST_NAME}" envsubst < "${SCALETEST_SECRETS_TEMPLATE}" > "${SCALETEST_SECRETS}" + +pushd "${PROJECT_ROOT}/scaletest/terraform" + +echo "Initializing terraform." +terraform init + +echo "Setting up infrastructure." +terraform plan --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" -out=scaletest.tfplan +terraform apply -auto-approve scaletest.tfplan + +SCALETEST_CODER_URL=$(<./.coderv2/url) +attempt_counter=0 +max_attempts=6 # 60 seconds +echo -n "Waiting for Coder deployment at ${SCALETEST_CODER_URL} to become ready" +until curl --output /dev/null --silent --fail "${SCALETEST_CODER_URL}/healthz"; do + if [[ $attempt_counter -eq $max_attempts ]]; then + echo + echo "Max attempts reached." + exit 1 + fi + + echo -n '.' + attempt_counter=$((attempt_counter+1)) + sleep 10 +done + +echo "Initializing Coder deployment." +./coder_init.sh "${SCALETEST_CODER_URL}" + +echo "Creating ${SCALETEST_NUM_WORKSPACES} workspaces." +./coder_shim.sh scaletest create-workspaces \ + --count "${SCALETEST_NUM_WORKSPACES}" \ + --template=kubernetes \ + --concurrency 10 \ + --no-cleanup + +echo "Sleeping 10 minutes to establish a baseline measurement." +sleep 600 + +echo "Sending traffic to workspaces" +./coder_workspacetraffic.sh "${SCALETEST_NAME}" +export KUBECONFIG="${PWD}/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig" +kubectl -n "coder-${SCALETEST_NAME}" wait pods coder-scaletest-workspace-traffic --for condition=Ready +kubectl -n "coder-${SCALETEST_NAME}" logs -f pod/coder-scaletest-workspace-traffic + +echo "Starting pprof" +kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 & +pfpid=$! +trap 'kill $pfpid' EXIT + +echo -n "Waiting for pprof endpoint to become available" +pprof_attempt_counter=0 +while ! timeout 1 bash -c "echo > /dev/tcp/localhost/6061"; do + if [[ $pprof_attempt_counter -eq 10 ]]; then + echo + echo "pprof failed to become ready in time!" + exit 1 + fi + sleep 3 + echo -n "." +done +echo "Taking pprof snapshots" +curl --silent --fail --output "${SCALETEST_NAME}-heap.pprof.gz" http://localhost:6061/debug/pprof/heap +curl --silent --fail --output "${SCALETEST_NAME}-goroutine.pprof.gz" http://localhost:6061/debug/pprof/goroutine +kill $pfpid + +if [[ "${SCALETEST_SKIP_CLEANUP}" == "true" ]]; then + echo "Leaving resources up for you to inspect." + echo "Please don't forget to clean up afterwards!" + exit 0 +fi + +echo "Cleaning up" +terraform apply --destroy --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" --auto-approve From 68c583ff3bbfd8de4a20a0a11403a3cd71b66b9a Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 2 Jun 2023 19:48:56 +0100 Subject: [PATCH 02/16] fixup! chore: add scaletest convenience script --- scaletest/terraform/secrets.tfvars.tpl | 2 -- 1 file changed, 2 deletions(-) diff --git a/scaletest/terraform/secrets.tfvars.tpl b/scaletest/terraform/secrets.tfvars.tpl index f7dade80ee787..7298db304d8b6 100644 --- a/scaletest/terraform/secrets.tfvars.tpl +++ b/scaletest/terraform/secrets.tfvars.tpl @@ -1,6 +1,4 @@ name = "${SCALETEST_NAME}" -coder_image_repo = "gcr.io/coder-dev-1/coder-cian/coderv2" -coder_image_tag = "dev" project_id = "${SCALETEST_PROJECT}" prometheus_remote_write_user = "${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER}" prometheus_remote_write_password = "${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD}" From a4be408fa6f68d6c9da5020823149901cad35c89 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 2 Jun 2023 19:58:00 +0100 Subject: [PATCH 03/16] fumpt --- scaletest/terraform/coder.tf | 2 +- scaletest/terraform/scenario-large.tfvars | 6 +++--- scaletest/terraform/scenario-large2x.tfvars | 8 ++++---- scaletest/terraform/scenario-medium.tfvars | 6 +++--- scaletest/terraform/scenario-medium2x.tfvars | 8 ++++---- scaletest/terraform/scenario-small.tfvars | 6 +++--- scaletest/terraform/scenario-small2x.tfvars | 8 ++++---- scripts/scaletest.sh | 6 +++--- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/scaletest/terraform/coder.tf b/scaletest/terraform/coder.tf index c65640cda3426..9948f2b79c43a 100644 --- a/scaletest/terraform/coder.tf +++ b/scaletest/terraform/coder.tf @@ -220,7 +220,7 @@ resource "local_file" "kubernetes_template" { resource "local_file" "output_vars" { filename = "${path.module}/.coderv2/url" - content = local.coder_url + content = local.coder_url } output "coder_url" { diff --git a/scaletest/terraform/scenario-large.tfvars b/scaletest/terraform/scenario-large.tfvars index 0b237cdd39bdf..009198ddc66d7 100644 --- a/scaletest/terraform/scenario-large.tfvars +++ b/scaletest/terraform/scenario-large.tfvars @@ -1,4 +1,4 @@ -nodepool_machine_type_coder = "t2d-standard-8" +nodepool_machine_type_coder = "t2d-standard-8" nodepool_machine_type_workspaces = "t2d-standard-8" -coder_cpu = "7" -coder_mem = "31Gi" +coder_cpu = "7" +coder_mem = "31Gi" diff --git a/scaletest/terraform/scenario-large2x.tfvars b/scaletest/terraform/scenario-large2x.tfvars index ae923740dff11..cd7a2b97c2506 100644 --- a/scaletest/terraform/scenario-large2x.tfvars +++ b/scaletest/terraform/scenario-large2x.tfvars @@ -1,5 +1,5 @@ -nodepool_machine_type_coder = "t2d-standard-8" +nodepool_machine_type_coder = "t2d-standard-8" nodepool_machine_type_workspaces = "t2d-standard-8" -coder_replicas = 2 -coder_cpu = "7" -coder_mem = "31Gi" +coder_replicas = 2 +coder_cpu = "7" +coder_mem = "31Gi" diff --git a/scaletest/terraform/scenario-medium.tfvars b/scaletest/terraform/scenario-medium.tfvars index 9d2a37135e316..6f4d1e1d4e1fb 100644 --- a/scaletest/terraform/scenario-medium.tfvars +++ b/scaletest/terraform/scenario-medium.tfvars @@ -1,4 +1,4 @@ -nodepool_machine_type_coder = "t2d-standard-4" +nodepool_machine_type_coder = "t2d-standard-4" nodepool_machine_type_workspaces = "t2d-standard-4" -coder_cpu = "3500m" -coder_mem = "15Gi" +coder_cpu = "3500m" +coder_mem = "15Gi" diff --git a/scaletest/terraform/scenario-medium2x.tfvars b/scaletest/terraform/scenario-medium2x.tfvars index 73aaf248d41aa..151d160e27908 100644 --- a/scaletest/terraform/scenario-medium2x.tfvars +++ b/scaletest/terraform/scenario-medium2x.tfvars @@ -1,5 +1,5 @@ -nodepool_machine_type_coder = "t2d-standard-8" +nodepool_machine_type_coder = "t2d-standard-8" nodepool_machine_type_workspaces = "t2d-standard-8" -nodepool_size_workspaces = 2 -coder_cpu = "3500m" -coder_mem = "15Gi" +nodepool_size_workspaces = 2 +coder_cpu = "3500m" +coder_mem = "15Gi" diff --git a/scaletest/terraform/scenario-small.tfvars b/scaletest/terraform/scenario-small.tfvars index 0809bce05f2c1..ee9580dea5ed4 100644 --- a/scaletest/terraform/scenario-small.tfvars +++ b/scaletest/terraform/scenario-small.tfvars @@ -1,4 +1,4 @@ -nodepool_machine_type_coder = "t2d-standard-2" +nodepool_machine_type_coder = "t2d-standard-2" nodepool_machine_type_workspaces = "t2d-standard-2" -coder_cpu = "1500m" -coder_mem = "7Gi" +coder_cpu = "1500m" +coder_mem = "7Gi" diff --git a/scaletest/terraform/scenario-small2x.tfvars b/scaletest/terraform/scenario-small2x.tfvars index d79f41e77c221..94af1cdf8b1b9 100644 --- a/scaletest/terraform/scenario-small2x.tfvars +++ b/scaletest/terraform/scenario-small2x.tfvars @@ -1,5 +1,5 @@ -nodepool_machine_type_coder = "t2d-standard-2" +nodepool_machine_type_coder = "t2d-standard-2" nodepool_machine_type_workspaces = "t2d-standard-2" -nodepool_size_workspaces = 2 -coder_cpu = "1500m" -coder_mem = "7Gi" +nodepool_size_workspaces = 2 +coder_cpu = "1500m" +coder_mem = "7Gi" diff --git a/scripts/scaletest.sh b/scripts/scaletest.sh index 6cb49f825176c..a3b652e8edd27 100755 --- a/scripts/scaletest.sh +++ b/scripts/scaletest.sh @@ -26,13 +26,13 @@ if [[ -z "${SCALETEST_PROJECT}" ]]; then exit 1 fi -if [[ ! -f "${SCALETEST_SCENARIO_VARS}" ]] ; then +if [[ ! -f "${SCALETEST_SCENARIO_VARS}" ]]; then echo "No definition for scenario ${SCALETEST_SCENARIO} exists. Please create it and try again" exit 1 fi echo "Writing scaletest secrets to file." -SCALETEST_NAME="${SCALETEST_NAME}" envsubst < "${SCALETEST_SECRETS_TEMPLATE}" > "${SCALETEST_SECRETS}" +SCALETEST_NAME="${SCALETEST_NAME}" envsubst <"${SCALETEST_SECRETS_TEMPLATE}" >"${SCALETEST_SECRETS}" pushd "${PROJECT_ROOT}/scaletest/terraform" @@ -55,7 +55,7 @@ until curl --output /dev/null --silent --fail "${SCALETEST_CODER_URL}/healthz"; fi echo -n '.' - attempt_counter=$((attempt_counter+1)) + attempt_counter=$((attempt_counter + 1)) sleep 10 done From 548a43d495d1ebc3db8a794b6b31ef9393419606 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Fri, 2 Jun 2023 20:01:17 +0100 Subject: [PATCH 04/16] gen --- .prettierignore | 4 ++-- site/.eslintignore | 4 ++-- site/.prettierignore | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.prettierignore b/.prettierignore index d96e9df947ddd..866c9b3eb889a 100644 --- a/.prettierignore +++ b/.prettierignore @@ -61,8 +61,8 @@ site/stats/ # Loadtesting ./scaletest/terraform/.terraform ./scaletest/terraform/.terraform.lock.hcl -terraform.tfstate.* -**/*.tfvars +scaletest/terraform/secrets.tfvars +.terraform.tfstate.* # .prettierignore.include: # Helm templates contain variables that are invalid YAML and can't be formatted # by Prettier. diff --git a/site/.eslintignore b/site/.eslintignore index f768843a9e6c4..9202d0bf186b0 100644 --- a/site/.eslintignore +++ b/site/.eslintignore @@ -61,8 +61,8 @@ stats/ # Loadtesting .././scaletest/terraform/.terraform .././scaletest/terraform/.terraform.lock.hcl -terraform.tfstate.* -**/*.tfvars +../scaletest/terraform/secrets.tfvars +.terraform.tfstate.* # .prettierignore.include: # Helm templates contain variables that are invalid YAML and can't be formatted # by Prettier. diff --git a/site/.prettierignore b/site/.prettierignore index f768843a9e6c4..9202d0bf186b0 100644 --- a/site/.prettierignore +++ b/site/.prettierignore @@ -61,8 +61,8 @@ stats/ # Loadtesting .././scaletest/terraform/.terraform .././scaletest/terraform/.terraform.lock.hcl -terraform.tfstate.* -**/*.tfvars +../scaletest/terraform/secrets.tfvars +.terraform.tfstate.* # .prettierignore.include: # Helm templates contain variables that are invalid YAML and can't be formatted # by Prettier. From b79176915f6f39090363f0cf2f3ef4b90c42b71e Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Tue, 6 Jun 2023 22:51:28 +0100 Subject: [PATCH 05/16] adjust coder resources in scaletest scenarios --- scaletest/terraform/scenario-large.tfvars | 4 ++-- scaletest/terraform/scenario-large2x.tfvars | 6 +++--- scaletest/terraform/scenario-medium.tfvars | 4 ++-- scaletest/terraform/scenario-medium2x.tfvars | 4 ++-- scaletest/terraform/scenario-small.tfvars | 4 ++-- scaletest/terraform/scenario-small2x.tfvars | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/scaletest/terraform/scenario-large.tfvars b/scaletest/terraform/scenario-large.tfvars index 009198ddc66d7..22af34497d96d 100644 --- a/scaletest/terraform/scenario-large.tfvars +++ b/scaletest/terraform/scenario-large.tfvars @@ -1,4 +1,4 @@ nodepool_machine_type_coder = "t2d-standard-8" nodepool_machine_type_workspaces = "t2d-standard-8" -coder_cpu = "7" -coder_mem = "31Gi" +coder_cpu = "7" # Leaving 1 CPU for system workloads +coder_mem = "28Gi" # Leaving 4GB for system workloads diff --git a/scaletest/terraform/scenario-large2x.tfvars b/scaletest/terraform/scenario-large2x.tfvars index cd7a2b97c2506..ca1908e9fa7a3 100644 --- a/scaletest/terraform/scenario-large2x.tfvars +++ b/scaletest/terraform/scenario-large2x.tfvars @@ -1,5 +1,5 @@ nodepool_machine_type_coder = "t2d-standard-8" nodepool_machine_type_workspaces = "t2d-standard-8" -coder_replicas = 2 -coder_cpu = "7" -coder_mem = "31Gi" +nodepool_size_workspaces = 2 +coder_cpu = "7" # Leaving 1 CPU for system workloads +coder_mem = "28Gi" # Leaving 4 GB for system workloads diff --git a/scaletest/terraform/scenario-medium.tfvars b/scaletest/terraform/scenario-medium.tfvars index 6f4d1e1d4e1fb..fcc2becd6ce61 100644 --- a/scaletest/terraform/scenario-medium.tfvars +++ b/scaletest/terraform/scenario-medium.tfvars @@ -1,4 +1,4 @@ nodepool_machine_type_coder = "t2d-standard-4" nodepool_machine_type_workspaces = "t2d-standard-4" -coder_cpu = "3500m" -coder_mem = "15Gi" +coder_cpu = "3000m" # Leaving 1 CPU for system workloads +coder_mem = "12Gi" # Leaving 4 GB for system workloads diff --git a/scaletest/terraform/scenario-medium2x.tfvars b/scaletest/terraform/scenario-medium2x.tfvars index 151d160e27908..389224d16a926 100644 --- a/scaletest/terraform/scenario-medium2x.tfvars +++ b/scaletest/terraform/scenario-medium2x.tfvars @@ -1,5 +1,5 @@ nodepool_machine_type_coder = "t2d-standard-8" nodepool_machine_type_workspaces = "t2d-standard-8" nodepool_size_workspaces = 2 -coder_cpu = "3500m" -coder_mem = "15Gi" +coder_cpu = "3000m" # Leaving 1 CPU for system workloads +coder_mem = "12Gi" # Leaving 4 GB for system workloads diff --git a/scaletest/terraform/scenario-small.tfvars b/scaletest/terraform/scenario-small.tfvars index ee9580dea5ed4..6e331fcb427f8 100644 --- a/scaletest/terraform/scenario-small.tfvars +++ b/scaletest/terraform/scenario-small.tfvars @@ -1,4 +1,4 @@ nodepool_machine_type_coder = "t2d-standard-2" nodepool_machine_type_workspaces = "t2d-standard-2" -coder_cpu = "1500m" -coder_mem = "7Gi" +coder_cpu = "1000m" # Leaving 1 CPU for system workloads +coder_mem = "4Gi" # Leaving 4GB for system workloads diff --git a/scaletest/terraform/scenario-small2x.tfvars b/scaletest/terraform/scenario-small2x.tfvars index 94af1cdf8b1b9..bc793706ef474 100644 --- a/scaletest/terraform/scenario-small2x.tfvars +++ b/scaletest/terraform/scenario-small2x.tfvars @@ -1,5 +1,5 @@ nodepool_machine_type_coder = "t2d-standard-2" nodepool_machine_type_workspaces = "t2d-standard-2" nodepool_size_workspaces = 2 -coder_cpu = "1500m" -coder_mem = "7Gi" +coder_cpu = "1000m" # Leaving 1 CPU for system workloads +coder_mem = "4Gi" # Leaving 4 GB for system workloads From ea65896249b02291a76abf3ce94befd8c2c17e7c Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Tue, 6 Jun 2023 22:52:47 +0100 Subject: [PATCH 06/16] adjust path of temporary files to parent scaletest folder --- scaletest/terraform/coder.tf | 4 ++-- scaletest/terraform/prometheus.tf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scaletest/terraform/coder.tf b/scaletest/terraform/coder.tf index 9948f2b79c43a..448f13fc964f6 100644 --- a/scaletest/terraform/coder.tf +++ b/scaletest/terraform/coder.tf @@ -131,7 +131,7 @@ EOF } resource "local_file" "kubernetes_template" { - filename = "${path.module}/.coderv2/templates/kubernetes/main.tf" + filename = "${path.module}/../.coderv2/templates/kubernetes/main.tf" content = < Date: Tue, 6 Jun 2023 22:53:18 +0100 Subject: [PATCH 07/16] move utilities to lib folder --- scaletest/{terraform => lib}/coder_init.sh | 22 ++++++++++++------- scaletest/lib/coder_shim.sh | 11 ++++++++++ .../coder_workspacetraffic.sh | 5 +++-- scaletest/terraform/coder_shim.sh | 8 ------- 4 files changed, 28 insertions(+), 18 deletions(-) rename scaletest/{terraform => lib}/coder_init.sh (63%) create mode 100755 scaletest/lib/coder_shim.sh rename scaletest/{terraform => lib}/coder_workspacetraffic.sh (90%) delete mode 100755 scaletest/terraform/coder_shim.sh diff --git a/scaletest/terraform/coder_init.sh b/scaletest/lib/coder_init.sh similarity index 63% rename from scaletest/terraform/coder_init.sh rename to scaletest/lib/coder_init.sh index 203e5fe3da100..c3b322e6c47a0 100755 --- a/scaletest/terraform/coder_init.sh +++ b/scaletest/lib/coder_init.sh @@ -11,7 +11,11 @@ fi [[ -n ${VERBOSE:-} ]] && set -x CODER_URL=$1 -CONFIG_DIR="${PWD}/.coderv2" +DRY_RUN="${DRY_RUN:-0}" +PROJECT_ROOT="$(git rev-parse --show-toplevel)" +# shellcheck source=scripts/lib.sh +source "${PROJECT_ROOT}/scripts/lib.sh" +CONFIG_DIR="${PROJECT_ROOT}/scaletest/.coderv2" ARCH="$(arch)" if [[ "$ARCH" == "x86_64" ]]; then ARCH="amd64" @@ -24,10 +28,10 @@ if [[ -f "${CONFIG_DIR}/coder.env" ]]; then exit 0 fi -mkdir -p "${CONFIG_DIR}" +maybedryrun "$DRY_RUN" mkdir -p "${CONFIG_DIR}" echo "Fetching Coder CLI for first-time setup!" -curl -fsSLk "${CODER_URL}/bin/coder-${PLATFORM}-${ARCH}" -o "${CONFIG_DIR}/coder" -chmod +x "${CONFIG_DIR}/coder" +maybedryrun "$DRY_RUN" curl -fsSLk "${CODER_URL}/bin/coder-${PLATFORM}-${ARCH}" -o "${CONFIG_DIR}/coder" +maybedryrun "$DRY_RUN" chmod +x "${CONFIG_DIR}/coder" set +o pipefail RANDOM_ADMIN_PASSWORD=$(tr "${CONFIG_DIR}/coder.env" +maybedryrun "$DRY_RUN" cat <"${CONFIG_DIR}/coder.env" CODER_FIRST_USER_EMAIL=admin@coder.com CODER_FIRST_USER_USERNAME=coder CODER_FIRST_USER_PASSWORD="${RANDOM_ADMIN_PASSWORD}" @@ -53,5 +57,7 @@ CODER_FIRST_USER_TRIAL="${CODER_FIRST_USER_TRIAL}" EOF echo "Importing kubernetes template" -"${CONFIG_DIR}/coder" templates create --global-config="${CONFIG_DIR}" \ - --directory "${CONFIG_DIR}/templates/kubernetes" --yes kubernetes +DRY_RUN="$DRY_RUN" "$PROJECT_ROOT/scaletest/lib/coder_shim.sh" templates create \ + --global-config="${CONFIG_DIR}" \ + --directory "${CONFIG_DIR}/templates/kubernetes" \ + --yes kubernetes diff --git a/scaletest/lib/coder_shim.sh b/scaletest/lib/coder_shim.sh new file mode 100755 index 0000000000000..639b371040c16 --- /dev/null +++ b/scaletest/lib/coder_shim.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +# This is a shim for easily executing Coder commands against a loadtest cluster +# without having to overwrite your own session/URL +PROJECT_ROOT="$(git rev-parse --show-toplevel)" +# shellcheck source=scripts/lib.sh +source "${PROJECT_ROOT}/scripts/lib.sh" +CONFIG_DIR="${PROJECT_ROOT}/scaletest/.coderv2" +CODER_BIN="${CONFIG_DIR}/coder" +DRY_RUN="${DRY_RUN:-0}" +maybedryrun "$DRY_RUN" exec "${CODER_BIN}" --global-config "${CONFIG_DIR}" "$@" diff --git a/scaletest/terraform/coder_workspacetraffic.sh b/scaletest/lib/coder_workspacetraffic.sh similarity index 90% rename from scaletest/terraform/coder_workspacetraffic.sh rename to scaletest/lib/coder_workspacetraffic.sh index d75f51f69eeb2..4be16a39fa998 100755 --- a/scaletest/terraform/coder_workspacetraffic.sh +++ b/scaletest/lib/coder_workspacetraffic.sh @@ -11,9 +11,10 @@ fi [[ -n ${VERBOSE:-} ]] && set -x LOADTEST_NAME="$1" -CODER_TOKEN=$(./coder_shim.sh tokens create) +PROJECT_ROOT="$(git rev-parse --show-toplevel)" +CODER_TOKEN=$("${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" tokens create) CODER_URL="http://coder.coder-${LOADTEST_NAME}.svc.cluster.local" -export KUBECONFIG="${PWD}/.coderv2/${LOADTEST_NAME}-cluster.kubeconfig" +export KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${LOADTEST_NAME}-cluster.kubeconfig" # Clean up any pre-existing pods kubectl -n "coder-${LOADTEST_NAME}" delete pod coder-scaletest-workspace-traffic --force || true diff --git a/scaletest/terraform/coder_shim.sh b/scaletest/terraform/coder_shim.sh deleted file mode 100755 index d62c5a952ecb3..0000000000000 --- a/scaletest/terraform/coder_shim.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# This is a shim for easily executing Coder commands against a loadtest cluster -# without having to overwrite your own session/URL -SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}") -CONFIG_DIR="${SCRIPT_DIR}/.coderv2" -CODER_BIN="${CONFIG_DIR}/coder" -exec "${CODER_BIN}" --global-config "${CONFIG_DIR}" "$@" From a0c05ff4b06438f23494bc2cb7ac22701f68a19e Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Tue, 6 Jun 2023 22:53:59 +0100 Subject: [PATCH 08/16] move scaletest.sh to top level of scaletest dir --- scaletest/scaletest.sh | 182 +++++++++++++++++++++++++++++++++++++++++ scripts/scaletest.sh | 109 ------------------------ 2 files changed, 182 insertions(+), 109 deletions(-) create mode 100755 scaletest/scaletest.sh delete mode 100755 scripts/scaletest.sh diff --git a/scaletest/scaletest.sh b/scaletest/scaletest.sh new file mode 100755 index 0000000000000..ff539474cb0a8 --- /dev/null +++ b/scaletest/scaletest.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash + +[[ -n ${VERBOSE:-} ]] && set -x +set -euo pipefail + +PROJECT_ROOT="$(git rev-parse --show-toplevel)" +# shellcheck source=scripts/lib.sh +source "${PROJECT_ROOT}/scripts/lib.sh" + +DRY_RUN="${DRY_RUN:-0}" +SCALETEST_NAME="${SCALETEST_NAME:-}" +SCALETEST_NUM_WORKSPACES="${SCALETEST_NUM_WORKSPACES:-}" +SCALETEST_SCENARIO="${SCALETEST_SCENARIO:-}" +SCALETEST_PROJECT="${SCALETEST_PROJECT:-}" +SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER:-}" +SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD:-}" +SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-}" + +script_name=$(basename "$0") +args="$(getopt -o "" -l dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup -- "$@")" +eval set -- "$args" +while true; do + case "$1" in + --dry-run) + DRY_RUN=1 + shift + ;; + --help) + echo "Usage: $script_name --name --project [--num-workspaces ] [--scenario ] [--dry-run]" + exit 1 + ;; + --name) + SCALETEST_NAME="$2" + shift 2 + ;; + --num-workspaces) + SCALETEST_NUM_WORKSPACES="$2" + shift 2 + ;; + --project) + SCALETEST_PROJECT="$2" + shift 2 + ;; + --scenario) + SCALETEST_SCENARIO="$2" + shift 2 + ;; + --skip-cleanup) + SCALETEST_SKIP_CLEANUP=1 + shift + ;; + --) + shift + break + ;; + *) + error "Unrecognized option: $1" + ;; + esac +done + +dependencies gcloud kubectl terraform + +if [[ -z "${SCALETEST_NAME}" ]]; then + echo "Must specify --name" + exit 1 +fi + +if [[ -z "${SCALETEST_PROJECT}" ]]; then + echo "Must specify --project" + exit 1 +fi + +if [[ -z "${SCALETEST_NUM_WORKSPACES}" ]]; then + echo "Must specify --num-workspaces" + exit 1 +fi + +if [[ -z "${SCALETEST_SCENARIO}" ]]; then + echo "Must specify --scenario" + exit 1 +fi + +if [[ -z "${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER}" ]] || [[ -z "${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD}" ]]; then + echo "SCALETEST_PROMETHEUS_REMOTE_WRITE_USER or SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD not specified." + echo "No prometheus metrics will be collected!" + read -p "Continue (y/N)? " choice + case "$choice" in + y|Y|yes|YES ) ;; + * ) exit 1;; + esac +fi + +SCALETEST_SCENARIO_VARS="${PROJECT_ROOT}/scaletest/terraform/scenario-${SCALETEST_SCENARIO}.tfvars" +if [[ ! -f "${SCALETEST_SCENARIO_VARS}" ]]; then + echo "Scenario ${SCALETEST_SCENARIO_VARS} not found." + echo "Please create it or choose another scenario:" + find "${PROJECT_ROOT}/scaletest/terraform" -type f -name 'scenario-*.tfvars' + exit 1 +fi + +if [[ "${SCALETEST_SKIP_CLEANUP}" == "true" ]]; then + log "WARNING: you told me to not clean up after myself, so this is now your job!" +fi + +CONFIG_DIR="${PROJECT_ROOT}/scaletest/.coderv2" +SCALETEST_SCENARIO_VARS="${PROJECT_ROOT}/scaletest/terraform/scenario-${SCALETEST_SCENARIO}.tfvars" +SCALETEST_SECRETS="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars" +SCALETEST_SECRETS_TEMPLATE="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars.tpl" + +log "Writing scaletest secrets to file." +SCALETEST_NAME="${SCALETEST_NAME}" \ + SCALETEST_PROJECT="${SCALETEST_PROJECT}" \ + SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER}" \ + SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD}" \ + envsubst <"${SCALETEST_SECRETS_TEMPLATE}" >"${SCALETEST_SECRETS}" + +pushd "${PROJECT_ROOT}/scaletest/terraform" + +echo "Initializing terraform." +maybedryrun "$DRY_RUN" terraform init + +echo "Setting up infrastructure." +maybedryrun "$DRY_RUN" terraform apply --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" --auto-approve + +if [[ "${DRY_RUN}" != 1 ]]; then + SCALETEST_CODER_URL=$(<"${CONFIG_DIR}/url") +else + SCALETEST_CODER_URL="http://coder.dryrun.local:3000" +fi +KUBECONFIG="${PWD}/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig" +echo "Waiting for Coder deployment at ${SCALETEST_CODER_URL} to become ready" +maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" rollout status deployment/coder + +echo "Initializing Coder deployment." +DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_init.sh" "${SCALETEST_CODER_URL}" + +echo "Creating ${SCALETEST_NUM_WORKSPACES} workspaces." +DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" scaletest create-workspaces \ + --count "${SCALETEST_NUM_WORKSPACES}" \ + --template=kubernetes \ + --concurrency 10 \ + --no-cleanup + +echo "Sleeping 10 minutes to establish a baseline measurement." +maybedryrun "$DRY_RUN" sleep 600 + +echo "Sending traffic to workspaces" +maybedryrun "$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_workspacetraffic.sh" "${SCALETEST_NAME}" +maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" wait pods coder-scaletest-workspace-traffic --for condition=Ready +maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" logs -f pod/coder-scaletest-workspace-traffic + +echo "Starting pprof" +maybedryrun "$DRY_RUN" kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 & +pfpid=$! +maybedryrun "$DRY_RUN" trap 'kill $pfpid' EXIT + +echo "Waiting for pprof endpoint to become available" +pprof_attempt_counter=0 +while ! maybedryrun "$DRY_RUN" timeout 1 bash -c "echo > /dev/tcp/localhost/6061"; do + if [[ $pprof_attempt_counter -eq 10 ]]; then + echo + echo "pprof failed to become ready in time!" + exit 1 + fi + maybedryrun "$DRY_RUN" sleep 3 +done + +echo "Taking pprof snapshots" +maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-heap.pprof.gz" http://localhost:6061/debug/pprof/heap +maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-goroutine.pprof.gz" http://localhost:6061/debug/pprof/goroutine +maybedryrun "$DRY_RUN" kill $pfpid + +if [[ "${SCALETEST_SKIP_CLEANUP}" == "true" ]]; then + echo "Leaving resources up for you to inspect." + echo "Please don't forget to clean up afterwards:" + echo "cd terraform && terraform destroy --var-file=${SCALETEST_SCENARIO_VARS} --var-file=${SCALETEST_SECRETS} --auto-approve" + exit 0 +fi + +echo "Cleaning up" +maybedryrun "$DRY_RUN" terraform destroy --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" --auto-approve diff --git a/scripts/scaletest.sh b/scripts/scaletest.sh deleted file mode 100755 index a3b652e8edd27..0000000000000 --- a/scripts/scaletest.sh +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env bash - -if [[ $# -lt 2 ]]; then - echo "Usage: $0 " -fi - -[[ -n ${VERBOSE:-} ]] && set -x -set -euo pipefail - -SCALETEST_NAME="$1" -SCALETEST_SCENARIO="$2" -SCALETEST_NUM_WORKSPACES="$3" -SCALETEST_PROJECT="${SCALETEST_PROJECT:-}" -PROJECT_ROOT="$(git rev-parse --show-toplevel)" -SCALETEST_SCENARIO_VARS="${PROJECT_ROOT}/scaletest/terraform/scenario-${SCALETEST_SCENARIO}.tfvars" -SCALETEST_SECRETS="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars" -SCALETEST_SECRETS_TEMPLATE="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars.tpl" -SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-}" - -if [[ "${SCALETEST_SKIP_CLEANUP}" == "true" ]]; then - echo "WARNING: you told me to not clean up after myself, so this is now your job!" -fi - -if [[ -z "${SCALETEST_PROJECT}" ]]; then - echo "Environment variable SCALETEST_PROJECT not set. Please set it and try again." - exit 1 -fi - -if [[ ! -f "${SCALETEST_SCENARIO_VARS}" ]]; then - echo "No definition for scenario ${SCALETEST_SCENARIO} exists. Please create it and try again" - exit 1 -fi - -echo "Writing scaletest secrets to file." -SCALETEST_NAME="${SCALETEST_NAME}" envsubst <"${SCALETEST_SECRETS_TEMPLATE}" >"${SCALETEST_SECRETS}" - -pushd "${PROJECT_ROOT}/scaletest/terraform" - -echo "Initializing terraform." -terraform init - -echo "Setting up infrastructure." -terraform plan --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" -out=scaletest.tfplan -terraform apply -auto-approve scaletest.tfplan - -SCALETEST_CODER_URL=$(<./.coderv2/url) -attempt_counter=0 -max_attempts=6 # 60 seconds -echo -n "Waiting for Coder deployment at ${SCALETEST_CODER_URL} to become ready" -until curl --output /dev/null --silent --fail "${SCALETEST_CODER_URL}/healthz"; do - if [[ $attempt_counter -eq $max_attempts ]]; then - echo - echo "Max attempts reached." - exit 1 - fi - - echo -n '.' - attempt_counter=$((attempt_counter + 1)) - sleep 10 -done - -echo "Initializing Coder deployment." -./coder_init.sh "${SCALETEST_CODER_URL}" - -echo "Creating ${SCALETEST_NUM_WORKSPACES} workspaces." -./coder_shim.sh scaletest create-workspaces \ - --count "${SCALETEST_NUM_WORKSPACES}" \ - --template=kubernetes \ - --concurrency 10 \ - --no-cleanup - -echo "Sleeping 10 minutes to establish a baseline measurement." -sleep 600 - -echo "Sending traffic to workspaces" -./coder_workspacetraffic.sh "${SCALETEST_NAME}" -export KUBECONFIG="${PWD}/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig" -kubectl -n "coder-${SCALETEST_NAME}" wait pods coder-scaletest-workspace-traffic --for condition=Ready -kubectl -n "coder-${SCALETEST_NAME}" logs -f pod/coder-scaletest-workspace-traffic - -echo "Starting pprof" -kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 & -pfpid=$! -trap 'kill $pfpid' EXIT - -echo -n "Waiting for pprof endpoint to become available" -pprof_attempt_counter=0 -while ! timeout 1 bash -c "echo > /dev/tcp/localhost/6061"; do - if [[ $pprof_attempt_counter -eq 10 ]]; then - echo - echo "pprof failed to become ready in time!" - exit 1 - fi - sleep 3 - echo -n "." -done -echo "Taking pprof snapshots" -curl --silent --fail --output "${SCALETEST_NAME}-heap.pprof.gz" http://localhost:6061/debug/pprof/heap -curl --silent --fail --output "${SCALETEST_NAME}-goroutine.pprof.gz" http://localhost:6061/debug/pprof/goroutine -kill $pfpid - -if [[ "${SCALETEST_SKIP_CLEANUP}" == "true" ]]; then - echo "Leaving resources up for you to inspect." - echo "Please don't forget to clean up afterwards!" - exit 0 -fi - -echo "Cleaning up" -terraform apply --destroy --var-file="${SCALETEST_SCENARIO_VARS}" --var-file="${SCALETEST_SECRETS}" --auto-approve From 105dbca02d0107c3b45d3bcd042d84d8a9101e28 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Tue, 6 Jun 2023 22:54:18 +0100 Subject: [PATCH 09/16] move README --- scaletest/{terraform => }/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scaletest/{terraform => }/README.md (100%) diff --git a/scaletest/terraform/README.md b/scaletest/README.md similarity index 100% rename from scaletest/terraform/README.md rename to scaletest/README.md From 750fc642993af8779d105519addcf4920f006a2f Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 7 Jun 2023 10:35:36 +0100 Subject: [PATCH 10/16] collect agent stats --- scaletest/terraform/coder.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scaletest/terraform/coder.tf b/scaletest/terraform/coder.tf index 448f13fc964f6..44e7f4a374448 100644 --- a/scaletest/terraform/coder.tf +++ b/scaletest/terraform/coder.tf @@ -100,6 +100,8 @@ coder: value: "true" - name: "CODER_PROMETHEUS_ENABLE" value: "true" + - name: "CODER_PROMETHEUS_COLLECT_AGENT_STATS" + value: "true" - name: "CODER_VERBOSE" value: "true" image: From 4d165b51b31fa1aafbcc4deef16c0c66300ff0be Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 7 Jun 2023 13:17:00 +0100 Subject: [PATCH 11/16] fixes to script --- scaletest/scaletest.sh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/scaletest/scaletest.sh b/scaletest/scaletest.sh index ff539474cb0a8..8197ee875e485 100755 --- a/scaletest/scaletest.sh +++ b/scaletest/scaletest.sh @@ -14,7 +14,7 @@ SCALETEST_SCENARIO="${SCALETEST_SCENARIO:-}" SCALETEST_PROJECT="${SCALETEST_PROJECT:-}" SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER:-}" SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD:-}" -SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-}" +SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-0}" script_name=$(basename "$0") args="$(getopt -o "" -l dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup -- "$@")" @@ -26,7 +26,7 @@ while true; do shift ;; --help) - echo "Usage: $script_name --name --project [--num-workspaces ] [--scenario ] [--dry-run]" + echo "Usage: $script_name --name --project --num-workspaces --scenario [--dry-run] [--skip-cleanup]" exit 1 ;; --name) @@ -99,11 +99,17 @@ if [[ ! -f "${SCALETEST_SCENARIO_VARS}" ]]; then exit 1 fi -if [[ "${SCALETEST_SKIP_CLEANUP}" == "true" ]]; then +if [[ "${SCALETEST_SKIP_CLEANUP}" == 1 ]]; then log "WARNING: you told me to not clean up after myself, so this is now your job!" fi CONFIG_DIR="${PROJECT_ROOT}/scaletest/.coderv2" +if [[ -d "${CONFIG_DIR}" ]] && files=$(ls -qAH -- "${CONFIG_DIR}") && [[ -z "$files" ]]; then + echo "Cleaning previous configuration" + maybedryrun "$DRY_RUN" rm -fv "${CONFIG_DIR}/*" +fi +maybedryrun "$DRY_RUN" mkdir -p "${CONFIG_DIR}" + SCALETEST_SCENARIO_VARS="${PROJECT_ROOT}/scaletest/terraform/scenario-${SCALETEST_SCENARIO}.tfvars" SCALETEST_SECRETS="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars" SCALETEST_SECRETS_TEMPLATE="${PROJECT_ROOT}/scaletest/terraform/secrets.tfvars.tpl" @@ -128,7 +134,7 @@ if [[ "${DRY_RUN}" != 1 ]]; then else SCALETEST_CODER_URL="http://coder.dryrun.local:3000" fi -KUBECONFIG="${PWD}/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig" +KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig" echo "Waiting for Coder deployment at ${SCALETEST_CODER_URL} to become ready" maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" rollout status deployment/coder @@ -171,7 +177,7 @@ maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-heap.ppr maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-goroutine.pprof.gz" http://localhost:6061/debug/pprof/goroutine maybedryrun "$DRY_RUN" kill $pfpid -if [[ "${SCALETEST_SKIP_CLEANUP}" == "true" ]]; then +if [[ "${SCALETEST_SKIP_CLEANUP}" == 1 ]]; then echo "Leaving resources up for you to inspect." echo "Please don't forget to clean up afterwards:" echo "cd terraform && terraform destroy --var-file=${SCALETEST_SCENARIO_VARS} --var-file=${SCALETEST_SECRETS} --auto-approve" From fd9886e1b50f95389a47c5e7601dbe1d882286cb Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 7 Jun 2023 13:17:30 +0100 Subject: [PATCH 12/16] update README --- scaletest/README.md | 95 ++++++++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/scaletest/README.md b/scaletest/README.md index 3933c6f8c4bcf..8e2c714c35cd7 100644 --- a/scaletest/README.md +++ b/scaletest/README.md @@ -1,43 +1,84 @@ -# Load Test Terraform +# Scale Testing -This folder contains Terraform code and scripts to aid in performing load tests of Coder. -It does the following: +This folder contains CLI commands, Terraform code, and scripts to aid in performing load tests of Coder. +At a high level, it performs the following steps: -- Creates a GCP VPC. -- Creates a CloudSQL instance with a global peering rule so it's accessible inside the VPC. -- Creates a GKE cluster inside the VPC with separate nodegroups for Coder and workspaces. -- Installs Coder in a new namespace, using the CloudSQL instance. +- Using the Terraform code in `./terraform`, stands up a preconfigured Google Cloud environment + consisting of a VPC, GKE Cluster, and CloudSQL instance. + > **Note: You must have an existing Google Cloud project available.** +- Creates a dedicated namespace for Coder and installs Coder using the Helm chart in this namespace. +- Configures the Coder deployment with random credentials and a predefined Kubernetes template. + > **Note:** These credentials are stored in `${PROJECT_ROOT}/scaletest/.coderv2/coder.env`. +- Creates a number of workspaces and waits for them to all start successfully. These workspaces + are ephemeral and do not contain any persistent resources. +- Waits for 10 minutes to allow things to settle and establish a baseline. +- Generates web terminal traffic to all workspaces for 30 minutes. +- Directly after traffic generation, captures goroutine and heap snapshots of the Coder deployment. +- Tears down all resources (unless `--skip-cleanup` is specified). -## Usage -> You must have an existing Google Cloud project available. +## Usage -1. Create a file named `override.tfvars` with the following content, modifying as appropriate: +The main entrypoint is the `scaletest.sh` script. -```terraform -name = "some_unique_identifier" -project_id = "some_google_project_id" +```console +$ scaletest.sh --help +Usage: scaletest.sh --name --project --num-workspaces --scenario [--dry-run] [--skip-cleanup] ``` -1. Inspect `vars.tf` and override any other variables you deem necessary. +### Required arguments: + +- `--name`: Name for the loadtest. This is added as a prefix to resources created by Terraform (e.g. `joe-big-loadtest`). +- `--project`: Google Cloud project in which to create the resources (example: `my-loadtest-project`). +- `--num-workspaces`: Number of workspaces to create (example: `10`). +- `--scenario`: Deployment scenario to use (example: `small`). See `terraform/scenario-*.tfvars`. + +> **Note:** In order to capture Prometheus metrics, you must define the environment variables + `SCALETEST_PROMETHEUS_REMOTE_WRITE_USER` and `SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD`. + +### Optional arguments: + +- `--dry-run`: Do not perform any action and instead print what would be executed. +- `--skip-cleanup`: Do not perform any cleanup. You will be responsible for deleting any resources this creates. -1. Run `terraform init`. +### Environment Variables -1. Run `terraform plan -var-file=override.tfvars` and inspect the output. - If you are not satisfied, modify `override.tfvars` until you are. +All of the above arguments may be specified as environment variables. Consult the script for details. -1. Run `terraform apply -var-file=override.tfvars`. This will spin up a pre-configured environment - and emit the Coder URL as an output. +### Prometheus Metrics -1. Run `coder_init.sh ` to setup an initial user and a pre-configured Kubernetes - template. It will also download the Coder CLI from the Coder instance locally. +To capture Prometheus metrics from the loadtest, two environment + +## Scenarios + +A scenario defines a number of variables that override the default Terraform variables. +A number of existing scenarios are provided in `scaletest/terraform/scenario-*.tfvars`. + +For example, `scenario-small.tfvars` includes the following variable definitions: + +``` +nodepool_machine_type_coder = "t2d-standard-2" +nodepool_machine_type_workspaces = "t2d-standard-2" +coder_cpu = "1000m" # Leaving 1 CPU for system workloads +coder_mem = "4Gi" # Leaving 4GB for system workloads +``` -1. Do whatever you need to do with the Coder instance: +To create your own scenario, simply add a new file `terraform/scenario-$SCENARIO_NAME.tfvars`. +In this file, override variables as required, consulting `vars.tf` as needed. +You can then use this scenario by specifying `--scenario $SCENARIO_NAME`. +For example, if your scenario file were named `scenario-big-whopper2x.tfvars`, you would specify +`--scenario=big-whopper2x`. - > Note: To run Coder commands against the instance, you can use `coder_shim.sh `. - > You don't need to run `coder login` yourself. +## Utility scripts - - To create workspaces, run `./coder_shim.sh scaletest create-workspaces --template="kubernetes" --count=N` - - To generate workspace traffic, run `./coder_trafficgen.sh `. This will keep running until you delete the pod `coder-scaletest-workspace-traffic`. +A number of utility scripts are provided in `lib`, and are used by `scaletest.sh`: -1. When you are finished, you can run `terraform destroy -var-file=override.tfvars`. +- `coder_shim.sh`: a convenience script to run the `coder` binary with a predefined config root. + This is intended to allow running Coder CLI commands against the loadtest cluster without + modifying a user's existing Coder CLI configuration. +- `coder_init.sh`: Performs first-time user setup of an existing Coder instance, generating + a random password for the admin user. The admin user is named `admin@coder.com` by default. + Credentials are written to `scaletest/.coderv2/coder.env`. +- `coder_workspacetraffic.sh`: Runs traffic generation against the loadtest cluster and creates + a monitoring manifest for the traffic generation pod. This pod will restart automatically + after the traffic generation has completed. From b5e81d9bbdce1297d4e107e8179da4b7daa90610 Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 7 Jun 2023 13:18:00 +0100 Subject: [PATCH 13/16] remove 2x scenarios --- scaletest/terraform/scenario-large2x.tfvars | 5 ----- scaletest/terraform/scenario-medium2x.tfvars | 5 ----- scaletest/terraform/scenario-small2x.tfvars | 5 ----- 3 files changed, 15 deletions(-) delete mode 100644 scaletest/terraform/scenario-large2x.tfvars delete mode 100644 scaletest/terraform/scenario-medium2x.tfvars delete mode 100644 scaletest/terraform/scenario-small2x.tfvars diff --git a/scaletest/terraform/scenario-large2x.tfvars b/scaletest/terraform/scenario-large2x.tfvars deleted file mode 100644 index ca1908e9fa7a3..0000000000000 --- a/scaletest/terraform/scenario-large2x.tfvars +++ /dev/null @@ -1,5 +0,0 @@ -nodepool_machine_type_coder = "t2d-standard-8" -nodepool_machine_type_workspaces = "t2d-standard-8" -nodepool_size_workspaces = 2 -coder_cpu = "7" # Leaving 1 CPU for system workloads -coder_mem = "28Gi" # Leaving 4 GB for system workloads diff --git a/scaletest/terraform/scenario-medium2x.tfvars b/scaletest/terraform/scenario-medium2x.tfvars deleted file mode 100644 index 389224d16a926..0000000000000 --- a/scaletest/terraform/scenario-medium2x.tfvars +++ /dev/null @@ -1,5 +0,0 @@ -nodepool_machine_type_coder = "t2d-standard-8" -nodepool_machine_type_workspaces = "t2d-standard-8" -nodepool_size_workspaces = 2 -coder_cpu = "3000m" # Leaving 1 CPU for system workloads -coder_mem = "12Gi" # Leaving 4 GB for system workloads diff --git a/scaletest/terraform/scenario-small2x.tfvars b/scaletest/terraform/scenario-small2x.tfvars deleted file mode 100644 index bc793706ef474..0000000000000 --- a/scaletest/terraform/scenario-small2x.tfvars +++ /dev/null @@ -1,5 +0,0 @@ -nodepool_machine_type_coder = "t2d-standard-2" -nodepool_machine_type_workspaces = "t2d-standard-2" -nodepool_size_workspaces = 2 -coder_cpu = "1000m" # Leaving 1 CPU for system workloads -coder_mem = "4Gi" # Leaving 4 GB for system workloads From 86b3315f2bb7624d30fe1d8a82536ad70db82b2f Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 7 Jun 2023 13:20:17 +0100 Subject: [PATCH 14/16] clear trap to avoid error on exit --- scaletest/scaletest.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scaletest/scaletest.sh b/scaletest/scaletest.sh index 8197ee875e485..df54c79ff7813 100755 --- a/scaletest/scaletest.sh +++ b/scaletest/scaletest.sh @@ -159,7 +159,7 @@ maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETES echo "Starting pprof" maybedryrun "$DRY_RUN" kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 & pfpid=$! -maybedryrun "$DRY_RUN" trap 'kill $pfpid' EXIT +maybedryrun "$DRY_RUN" trap "kill $pfpid" EXIT echo "Waiting for pprof endpoint to become available" pprof_attempt_counter=0 @@ -175,7 +175,7 @@ done echo "Taking pprof snapshots" maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-heap.pprof.gz" http://localhost:6061/debug/pprof/heap maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-goroutine.pprof.gz" http://localhost:6061/debug/pprof/goroutine -maybedryrun "$DRY_RUN" kill $pfpid +maybedryrun "$DRY_RUN" trap - EXIT if [[ "${SCALETEST_SKIP_CLEANUP}" == 1 ]]; then echo "Leaving resources up for you to inspect." From 95fc69ecf621e3f0e7be7e170b2fd927fa976fac Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 7 Jun 2023 13:47:40 +0100 Subject: [PATCH 15/16] make fmt; make lint --- scaletest/README.md | 3 +-- scaletest/scaletest.sh | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/scaletest/README.md b/scaletest/README.md index 8e2c714c35cd7..b9cf1b429fae8 100644 --- a/scaletest/README.md +++ b/scaletest/README.md @@ -16,7 +16,6 @@ At a high level, it performs the following steps: - Directly after traffic generation, captures goroutine and heap snapshots of the Coder deployment. - Tears down all resources (unless `--skip-cleanup` is specified). - ## Usage The main entrypoint is the `scaletest.sh` script. @@ -34,7 +33,7 @@ Usage: scaletest.sh --name --project --num-workspaces **Note:** In order to capture Prometheus metrics, you must define the environment variables - `SCALETEST_PROMETHEUS_REMOTE_WRITE_USER` and `SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD`. +> `SCALETEST_PROMETHEUS_REMOTE_WRITE_USER` and `SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD`. ### Optional arguments: diff --git a/scaletest/scaletest.sh b/scaletest/scaletest.sh index df54c79ff7813..d7cd245e4e608 100755 --- a/scaletest/scaletest.sh +++ b/scaletest/scaletest.sh @@ -84,10 +84,10 @@ fi if [[ -z "${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER}" ]] || [[ -z "${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD}" ]]; then echo "SCALETEST_PROMETHEUS_REMOTE_WRITE_USER or SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD not specified." echo "No prometheus metrics will be collected!" - read -p "Continue (y/N)? " choice + read -pr "Continue (y/N)? " choice case "$choice" in - y|Y|yes|YES ) ;; - * ) exit 1;; + y | Y | yes | YES) ;; + *) exit 1 ;; esac fi From e9ac3c8ba6ef0484d63046c9d9f6a45275542eee Mon Sep 17 00:00:00 2001 From: Cian Johnston Date: Wed, 7 Jun 2023 14:11:16 +0100 Subject: [PATCH 16/16] ensure we kill the port-forward --- scaletest/scaletest.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scaletest/scaletest.sh b/scaletest/scaletest.sh index d7cd245e4e608..f9f7fb7ca2ed4 100755 --- a/scaletest/scaletest.sh +++ b/scaletest/scaletest.sh @@ -175,6 +175,8 @@ done echo "Taking pprof snapshots" maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-heap.pprof.gz" http://localhost:6061/debug/pprof/heap maybedryrun "$DRY_RUN" curl --silent --fail --output "${SCALETEST_NAME}-goroutine.pprof.gz" http://localhost:6061/debug/pprof/goroutine +# No longer need to port-forward +maybedryrun "$DRY_RUN" kill "$pfpid" maybedryrun "$DRY_RUN" trap - EXIT if [[ "${SCALETEST_SKIP_CLEANUP}" == 1 ]]; then