From b3f9cb33f736d9edb15706121f1e7765e17a8dd0 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 7 Nov 2023 15:02:29 +0200 Subject: [PATCH 1/5] feat(scaletest): add greedy agent test to runner --- scaletest/templates/scaletest-runner/main.tf | 46 +++++++++++++ .../templates/scaletest-runner/scripts/run.sh | 65 +++++++++++++++++++ .../templates/scaletest-runner/startup.sh | 5 ++ 3 files changed, 116 insertions(+) diff --git a/scaletest/templates/scaletest-runner/main.tf b/scaletest/templates/scaletest-runner/main.tf index 9e46817a7dd6b..0f8349a9a202f 100644 --- a/scaletest/templates/scaletest-runner/main.tf +++ b/scaletest/templates/scaletest-runner/main.tf @@ -335,6 +335,50 @@ data "coder_parameter" "load_scenario_baseline_duration" { } } +data "coder_parameter" "greedy_agent" { + order = 30 + type = "bool" + name = "Greedy Agent" + default = false + description = "If true, the agent will attempt to consume all available resources." + mutable = true + ephemeral = true +} + +data "coder_parameter" "greedy_agent_template" { + order = 31 + name = "Greedy Agent Template" + display_name = "Greedy Agent Template" + description = "The template used for the greedy agent workspace (must not be same as workspace template)." + default = "kubernetes-medium" + icon = "/emojis/1f4dc.png" # Scroll. + mutable = true + option { + name = "Minimal" + value = "kubernetes-minimal" # Feather. + icon = "/emojis/1fab6.png" + description = "Sized to fit approx. 32 per t2d-standard-8 instance." + } + option { + name = "Small" + value = "kubernetes-small" + icon = "/emojis/1f42d.png" # Mouse. + description = "Provisions a small-sized workspace with no persistent storage." + } + option { + name = "Medium" + value = "kubernetes-medium" + icon = "/emojis/1f436.png" # Dog. + description = "Provisions a medium-sized workspace with no persistent storage." + } + option { + name = "Large" + value = "kubernetes-large" + icon = "/emojis/1f434.png" # Horse. + description = "Provisions a large-sized workspace with no persistent storage." + } +} + data "coder_parameter" "namespace" { order = 999 type = "string" @@ -395,6 +439,8 @@ resource "coder_agent" "main" { SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL : "${data.coder_parameter.load_scenario_web_terminal_tick_interval.value}", SCALETEST_PARAM_LOAD_SCENARIO_DASHBOARD_TRAFFIC_DURATION : "${data.coder_parameter.load_scenario_dashboard_traffic_duration.value}", SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION : "${data.coder_parameter.load_scenario_baseline_duration.value}", + SCALETEST_PARAM_GREEDY_AGENT : data.coder_parameter.greedy_agent.value ? "1" : "0", + SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE : data.coder_parameter.greedy_agent_template.value, GRAFANA_URL : local.grafana_url, diff --git a/scaletest/templates/scaletest-runner/scripts/run.sh b/scaletest/templates/scaletest-runner/scripts/run.sh index 584c80f81f15d..e3ecf48c53594 100755 --- a/scaletest/templates/scaletest-runner/scripts/run.sh +++ b/scaletest/templates/scaletest-runner/scripts/run.sh @@ -26,6 +26,53 @@ end_phase wait_baseline "${SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION}" +if [[ ${SCALETEST_PARAM_GREEDY_AGENT} != 1 ]]; then + greedy_agent() { :; } +else + echo "WARNING: Greedy agent enabled, this may cause the load tests to fail." >&2 + + coder exp scaletest create-workspaces \ + --count 1 \ + --template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \ + --concurrency 1 \ + --timeout 5h \ + --job-timeout 5h \ + --no-cleanup \ + --output json:"${SCALETEST_RESULTS_DIR}/create-workspaces-greedy-agent.json" + + greedy_agent() { + local timeout=${1} scenario=${2} + # Run the greedy test for ~1/3 of the timeout. + delay=$((timeout * 60 / 3)) + + local type=web-terminal + args=() + if [[ ${scenario} == "SSH Traffic" ]]; then + type=ssh + args+=(--ssh) + fi + + sleep "${delay}" + annotate_grafana greedy_agent "${scenario}: Greedy agent" + + # Produce load at about 1000MB/s. + set +e + coder exp scaletest workspace-traffic \ + --template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \ + --timeout "$((delay))s" \ + --job-timeout "$((delay))s" \ + --output json:"${SCALETEST_RESULTS_DIR}/traffic-${type}-greedy-agent.json" \ + --bytes-per-tick $((1024 * 1000)) \ + --tick-interval 1ms \ + "${args[@]}" + status=${?} + + annotate_grafana_end greedy_agent "${scenario}: Greedy agent" + + return ${status} + } +fi + declare -A failed=() for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do start_phase "Load scenario: ${scenario}" @@ -34,7 +81,9 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do status=0 case "${scenario}" in "SSH Traffic") + greedy_agent "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}" "${scenario}" & coder exp scaletest workspace-traffic \ + --template "${SCALETEST_PARAM_TEMPLATE}" \ --ssh \ --bytes-per-tick "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_BYTES_PER_TICK}" \ --tick-interval "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_TICK_INTERVAL}ms" \ @@ -42,16 +91,28 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do --job-timeout "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}m30s" \ --output json:"${SCALETEST_RESULTS_DIR}/traffic-ssh.json" status=$? + wait + status2=$? + if [[ ${status} == 0 ]]; then + status=${status2} + fi show_json "${SCALETEST_RESULTS_DIR}/traffic-ssh.json" ;; "Web Terminal Traffic") + greedy_agent "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}" "${scenario}" & coder exp scaletest workspace-traffic \ + --template "${SCALETEST_PARAM_TEMPLATE}" \ --bytes-per-tick "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_BYTES_PER_TICK}" \ --tick-interval "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL}ms" \ --timeout "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}m" \ --job-timeout "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}m30s" \ --output json:"${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json" status=$? + wait + status2=$? + if [[ ${status} == 0 ]]; then + status=${status2} + fi show_json "${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json" ;; "Dashboard Traffic") @@ -65,6 +126,10 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do ;; # Debug scenarios, for testing the runner. + "debug:greedy_agent") + greedy_agent 10 "${scenario}" + status=$? + ;; "debug:success") maybedryrun "$DRY_RUN" sleep 10 status=0 diff --git a/scaletest/templates/scaletest-runner/startup.sh b/scaletest/templates/scaletest-runner/startup.sh index 5375a8550ef8f..e391196025773 100755 --- a/scaletest/templates/scaletest-runner/startup.sh +++ b/scaletest/templates/scaletest-runner/startup.sh @@ -3,6 +3,11 @@ set -euo pipefail [[ $VERBOSE == 1 ]] && set -x +if [[ ${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE} == "${SCALETEST_PARAM_TEMPLATE}" ]]; then + echo "ERROR: Greedy agent template must be different from the scaletest template." >&2 + exit 1 +fi + # Unzip scripts and add to path. # shellcheck disable=SC2153 echo "Extracting scaletest scripts into ${SCRIPTS_DIR}..." From bcb980d6013ae6c18304af9eb17a3b3a70cfcd08 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 13 Nov 2023 15:17:25 +0200 Subject: [PATCH 2/5] Increase payload, fix delete timeout --- scaletest/templates/scaletest-runner/main.tf | 2 +- scaletest/templates/scaletest-runner/scripts/run.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scaletest/templates/scaletest-runner/main.tf b/scaletest/templates/scaletest-runner/main.tf index 0f8349a9a202f..da120dc0f9a66 100644 --- a/scaletest/templates/scaletest-runner/main.tf +++ b/scaletest/templates/scaletest-runner/main.tf @@ -630,7 +630,7 @@ resource "kubernetes_pod" "main" { } # Set the pod delete timeout to termination_grace_period_seconds + 1m. timeouts { - delete = "${(local.workspace_pod_termination_grace_period_seconds + 120) / 60}s" + delete = "${(local.workspace_pod_termination_grace_period_seconds + 120)}s" } spec { security_context { diff --git a/scaletest/templates/scaletest-runner/scripts/run.sh b/scaletest/templates/scaletest-runner/scripts/run.sh index e3ecf48c53594..527e5f57dabff 100755 --- a/scaletest/templates/scaletest-runner/scripts/run.sh +++ b/scaletest/templates/scaletest-runner/scripts/run.sh @@ -55,15 +55,15 @@ else sleep "${delay}" annotate_grafana greedy_agent "${scenario}: Greedy agent" - # Produce load at about 1000MB/s. + # Produce load at about 1000MB/s (25MB/40ms). set +e coder exp scaletest workspace-traffic \ --template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \ --timeout "$((delay))s" \ --job-timeout "$((delay))s" \ --output json:"${SCALETEST_RESULTS_DIR}/traffic-${type}-greedy-agent.json" \ - --bytes-per-tick $((1024 * 1000)) \ - --tick-interval 1ms \ + --bytes-per-tick $((1024 * 1024 * 25)) \ + --tick-interval 40ms \ "${args[@]}" status=${?} From 1278cbe65640f49217c528ef9c66aa705f890544 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 1 Dec 2023 19:43:37 +0200 Subject: [PATCH 3/5] minor tweaks --- scaletest/templates/scaletest-runner/main.tf | 2 +- .../templates/scaletest-runner/scripts/run.sh | 23 +++++++++++++++---- .../templates/scaletest-runner/startup.sh | 2 ++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/scaletest/templates/scaletest-runner/main.tf b/scaletest/templates/scaletest-runner/main.tf index da120dc0f9a66..01a910d4cfec7 100644 --- a/scaletest/templates/scaletest-runner/main.tf +++ b/scaletest/templates/scaletest-runner/main.tf @@ -216,7 +216,7 @@ data "coder_parameter" "num_workspaces" { validation { min = 0 - max = 1000 + max = 2000 } } diff --git a/scaletest/templates/scaletest-runner/scripts/run.sh b/scaletest/templates/scaletest-runner/scripts/run.sh index 527e5f57dabff..9348e08302b17 100755 --- a/scaletest/templates/scaletest-runner/scripts/run.sh +++ b/scaletest/templates/scaletest-runner/scripts/run.sh @@ -26,10 +26,16 @@ end_phase wait_baseline "${SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION}" +non_greedy_agent_traffic_args=() if [[ ${SCALETEST_PARAM_GREEDY_AGENT} != 1 ]]; then greedy_agent() { :; } else echo "WARNING: Greedy agent enabled, this may cause the load tests to fail." >&2 + non_greedy_agent_traffic_args=( + # Let the greedy agent traffic command be scraped. + # --scaletest-prometheus-address 0.0.0.0:21113 + # --trace=false + ) coder exp scaletest create-workspaces \ --count 1 \ @@ -59,14 +65,21 @@ else set +e coder exp scaletest workspace-traffic \ --template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \ + --bytes-per-tick $((1024 * 1024 * 25)) \ + --tick-interval 40ms \ --timeout "$((delay))s" \ --job-timeout "$((delay))s" \ --output json:"${SCALETEST_RESULTS_DIR}/traffic-${type}-greedy-agent.json" \ - --bytes-per-tick $((1024 * 1024 * 25)) \ - --tick-interval 40ms \ + --scaletest-prometheus-address 0.0.0.0:21113 \ + --trace=false \ "${args[@]}" status=${?} + show_json "${SCALETEST_RESULTS_DIR}/traffic-${type}-greedy-agent.json" + export GRAFANA_ADD_TAGS= + if [[ ${status} != 0 ]]; then + GRAFANA_ADD_TAGS=error + fi annotate_grafana_end greedy_agent "${scenario}: Greedy agent" return ${status} @@ -89,7 +102,8 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do --tick-interval "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_TICK_INTERVAL}ms" \ --timeout "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}m" \ --job-timeout "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}m30s" \ - --output json:"${SCALETEST_RESULTS_DIR}/traffic-ssh.json" + --output json:"${SCALETEST_RESULTS_DIR}/traffic-ssh.json" \ + "${non_greedy_agent_traffic_args[@]}" status=$? wait status2=$? @@ -106,7 +120,8 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do --tick-interval "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL}ms" \ --timeout "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}m" \ --job-timeout "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}m30s" \ - --output json:"${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json" + --output json:"${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json" \ + "${non_greedy_agent_traffic_args[@]}" status=$? wait status2=$? diff --git a/scaletest/templates/scaletest-runner/startup.sh b/scaletest/templates/scaletest-runner/startup.sh index e391196025773..45bf4fb9ebd5c 100755 --- a/scaletest/templates/scaletest-runner/startup.sh +++ b/scaletest/templates/scaletest-runner/startup.sh @@ -15,6 +15,8 @@ base64 -d <<<"${SCRIPTS_ZIP}" >/tmp/scripts.zip rm -rf "${SCRIPTS_DIR}" || true mkdir -p "${SCRIPTS_DIR}" unzip -o /tmp/scripts.zip -d "${SCRIPTS_DIR}" +# Chmod to work around https://github.com/coder/coder/issues/10034 +chmod +x "${SCRIPTS_DIR}"/*.sh rm /tmp/scripts.zip echo "Cloning coder/coder repo..." From 3d7e98ed0d2e3f0de2a4293fb3dd16561a54db74 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 4 Dec 2023 13:26:33 +0200 Subject: [PATCH 4/5] add greedy medium template --- scaletest/templates/scaletest-runner/main.tf | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/scaletest/templates/scaletest-runner/main.tf b/scaletest/templates/scaletest-runner/main.tf index 01a910d4cfec7..b536fc51afbb3 100644 --- a/scaletest/templates/scaletest-runner/main.tf +++ b/scaletest/templates/scaletest-runner/main.tf @@ -198,6 +198,12 @@ data "coder_parameter" "workspace_template" { icon = "/emojis/1f436.png" # Dog. description = "Provisions a medium-sized workspace with no persistent storage." } + option { + name = "Medium (Greedy)" + value = "kubernetes-medium-greedy" + icon = "/emojis/1f436.png" # Dog. + description = "Provisions a medium-sized workspace with no persistent storage. Greedy agent variant." + } option { name = "Large" value = "kubernetes-large" @@ -350,7 +356,7 @@ data "coder_parameter" "greedy_agent_template" { name = "Greedy Agent Template" display_name = "Greedy Agent Template" description = "The template used for the greedy agent workspace (must not be same as workspace template)." - default = "kubernetes-medium" + default = "kubernetes-medium-greedy" icon = "/emojis/1f4dc.png" # Scroll. mutable = true option { @@ -371,6 +377,12 @@ data "coder_parameter" "greedy_agent_template" { icon = "/emojis/1f436.png" # Dog. description = "Provisions a medium-sized workspace with no persistent storage." } + option { + name = "Medium (Greedy)" + value = "kubernetes-medium-greedy" + icon = "/emojis/1f436.png" # Dog. + description = "Provisions a medium-sized workspace with no persistent storage. Greedy agent variant." + } option { name = "Large" value = "kubernetes-large" From 11926cebd2359e2ffc5a6bfb16fbb485fe3f990f Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 4 Dec 2023 13:31:48 +0200 Subject: [PATCH 5/5] add greedy agent baseline --- .../templates/scaletest-runner/scripts/run.sh | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/scaletest/templates/scaletest-runner/scripts/run.sh b/scaletest/templates/scaletest-runner/scripts/run.sh index 9348e08302b17..03bafc7cf6a84 100755 --- a/scaletest/templates/scaletest-runner/scripts/run.sh +++ b/scaletest/templates/scaletest-runner/scripts/run.sh @@ -28,7 +28,7 @@ wait_baseline "${SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION}" non_greedy_agent_traffic_args=() if [[ ${SCALETEST_PARAM_GREEDY_AGENT} != 1 ]]; then - greedy_agent() { :; } + greedy_agent_traffic() { :; } else echo "WARNING: Greedy agent enabled, this may cause the load tests to fail." >&2 non_greedy_agent_traffic_args=( @@ -37,6 +37,8 @@ else # --trace=false ) + annotate_grafana greedy_agent "Create greedy agent" + coder exp scaletest create-workspaces \ --count 1 \ --template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \ @@ -46,7 +48,9 @@ else --no-cleanup \ --output json:"${SCALETEST_RESULTS_DIR}/create-workspaces-greedy-agent.json" - greedy_agent() { + wait_baseline "${SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION}" + + greedy_agent_traffic() { local timeout=${1} scenario=${2} # Run the greedy test for ~1/3 of the timeout. delay=$((timeout * 60 / 3)) @@ -59,7 +63,7 @@ else fi sleep "${delay}" - annotate_grafana greedy_agent "${scenario}: Greedy agent" + annotate_grafana greedy_agent "${scenario}: Greedy agent traffic" # Produce load at about 1000MB/s (25MB/40ms). set +e @@ -80,7 +84,7 @@ else if [[ ${status} != 0 ]]; then GRAFANA_ADD_TAGS=error fi - annotate_grafana_end greedy_agent "${scenario}: Greedy agent" + annotate_grafana_end greedy_agent "${scenario}: Greedy agent traffic" return ${status} } @@ -94,7 +98,7 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do status=0 case "${scenario}" in "SSH Traffic") - greedy_agent "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}" "${scenario}" & + greedy_agent_traffic "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}" "${scenario}" & coder exp scaletest workspace-traffic \ --template "${SCALETEST_PARAM_TEMPLATE}" \ --ssh \ @@ -113,7 +117,7 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do show_json "${SCALETEST_RESULTS_DIR}/traffic-ssh.json" ;; "Web Terminal Traffic") - greedy_agent "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}" "${scenario}" & + greedy_agent_traffic "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}" "${scenario}" & coder exp scaletest workspace-traffic \ --template "${SCALETEST_PARAM_TEMPLATE}" \ --bytes-per-tick "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_BYTES_PER_TICK}" \ @@ -141,8 +145,8 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do ;; # Debug scenarios, for testing the runner. - "debug:greedy_agent") - greedy_agent 10 "${scenario}" + "debug:greedy_agent_traffic") + greedy_agent_traffic 10 "${scenario}" status=$? ;; "debug:success")