Skip to content

feat(scaletest): add greedy agent test to runner #10559

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 60 additions & 2 deletions scaletest/templates/scaletest-runner/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ data "coder_parameter" "workspace_template" {
icon = "/emojis/1f436.png" # Dog.
description = "Provisions a medium-sized workspace with no persistent storage."
}
option {
name = "Medium (Greedy)"
value = "kubernetes-medium-greedy"
icon = "/emojis/1f436.png" # Dog.
description = "Provisions a medium-sized workspace with no persistent storage. Greedy agent variant."
}
option {
name = "Large"
value = "kubernetes-large"
Expand All @@ -216,7 +222,7 @@ data "coder_parameter" "num_workspaces" {

validation {
min = 0
max = 1000
max = 2000
}
}

Expand Down Expand Up @@ -335,6 +341,56 @@ data "coder_parameter" "load_scenario_baseline_duration" {
}
}

data "coder_parameter" "greedy_agent" {
order = 30
type = "bool"
name = "Greedy Agent"
default = false
description = "If true, the agent will attempt to consume all available resources."
mutable = true
ephemeral = true
}

data "coder_parameter" "greedy_agent_template" {
order = 31
name = "Greedy Agent Template"
display_name = "Greedy Agent Template"
description = "The template used for the greedy agent workspace (must not be same as workspace template)."
default = "kubernetes-medium-greedy"
icon = "/emojis/1f4dc.png" # Scroll.
mutable = true
option {
name = "Minimal"
value = "kubernetes-minimal" # Feather.
icon = "/emojis/1fab6.png"
description = "Sized to fit approx. 32 per t2d-standard-8 instance."
}
option {
name = "Small"
value = "kubernetes-small"
icon = "/emojis/1f42d.png" # Mouse.
description = "Provisions a small-sized workspace with no persistent storage."
}
option {
name = "Medium"
value = "kubernetes-medium"
icon = "/emojis/1f436.png" # Dog.
description = "Provisions a medium-sized workspace with no persistent storage."
}
option {
name = "Medium (Greedy)"
value = "kubernetes-medium-greedy"
icon = "/emojis/1f436.png" # Dog.
description = "Provisions a medium-sized workspace with no persistent storage. Greedy agent variant."
}
option {
name = "Large"
value = "kubernetes-large"
icon = "/emojis/1f434.png" # Horse.
description = "Provisions a large-sized workspace with no persistent storage."
}
}

data "coder_parameter" "namespace" {
order = 999
type = "string"
Expand Down Expand Up @@ -395,6 +451,8 @@ resource "coder_agent" "main" {
SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL : "${data.coder_parameter.load_scenario_web_terminal_tick_interval.value}",
SCALETEST_PARAM_LOAD_SCENARIO_DASHBOARD_TRAFFIC_DURATION : "${data.coder_parameter.load_scenario_dashboard_traffic_duration.value}",
SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION : "${data.coder_parameter.load_scenario_baseline_duration.value}",
SCALETEST_PARAM_GREEDY_AGENT : data.coder_parameter.greedy_agent.value ? "1" : "0",
SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE : data.coder_parameter.greedy_agent_template.value,

GRAFANA_URL : local.grafana_url,

Expand Down Expand Up @@ -584,7 +642,7 @@ resource "kubernetes_pod" "main" {
}
# Set the pod delete timeout to termination_grace_period_seconds + 1m.
timeouts {
delete = "${(local.workspace_pod_termination_grace_period_seconds + 120) / 60}s"
delete = "${(local.workspace_pod_termination_grace_period_seconds + 120)}s"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Review: bug fix, already in seconds.

}
spec {
security_context {
Expand Down
88 changes: 86 additions & 2 deletions scaletest/templates/scaletest-runner/scripts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,70 @@ end_phase

wait_baseline "${SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION}"

non_greedy_agent_traffic_args=()
if [[ ${SCALETEST_PARAM_GREEDY_AGENT} != 1 ]]; then
greedy_agent_traffic() { :; }
else
echo "WARNING: Greedy agent enabled, this may cause the load tests to fail." >&2
non_greedy_agent_traffic_args=(
# Let the greedy agent traffic command be scraped.
# --scaletest-prometheus-address 0.0.0.0:21113
# --trace=false
)

annotate_grafana greedy_agent "Create greedy agent"

coder exp scaletest create-workspaces \
--count 1 \
--template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \
--concurrency 1 \
--timeout 5h \
--job-timeout 5h \
--no-cleanup \
--output json:"${SCALETEST_RESULTS_DIR}/create-workspaces-greedy-agent.json"

wait_baseline "${SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION}"

greedy_agent_traffic() {
local timeout=${1} scenario=${2}
# Run the greedy test for ~1/3 of the timeout.
delay=$((timeout * 60 / 3))

local type=web-terminal
args=()
if [[ ${scenario} == "SSH Traffic" ]]; then
type=ssh
args+=(--ssh)
fi

sleep "${delay}"
annotate_grafana greedy_agent "${scenario}: Greedy agent traffic"

# Produce load at about 1000MB/s (25MB/40ms).
set +e
coder exp scaletest workspace-traffic \
--template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \
--bytes-per-tick $((1024 * 1024 * 25)) \
--tick-interval 40ms \
--timeout "$((delay))s" \
--job-timeout "$((delay))s" \
--output json:"${SCALETEST_RESULTS_DIR}/traffic-${type}-greedy-agent.json" \
--scaletest-prometheus-address 0.0.0.0:21113 \
--trace=false \
"${args[@]}"
status=${?}
show_json "${SCALETEST_RESULTS_DIR}/traffic-${type}-greedy-agent.json"

export GRAFANA_ADD_TAGS=
if [[ ${status} != 0 ]]; then
GRAFANA_ADD_TAGS=error
fi
annotate_grafana_end greedy_agent "${scenario}: Greedy agent traffic"

return ${status}
}
fi

declare -A failed=()
for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
start_phase "Load scenario: ${scenario}"
Expand All @@ -34,24 +98,40 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
status=0
case "${scenario}" in
"SSH Traffic")
greedy_agent_traffic "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}" "${scenario}" &
coder exp scaletest workspace-traffic \
--template "${SCALETEST_PARAM_TEMPLATE}" \
--ssh \
--bytes-per-tick "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_BYTES_PER_TICK}" \
--tick-interval "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_TICK_INTERVAL}ms" \
--timeout "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}m" \
--job-timeout "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}m30s" \
--output json:"${SCALETEST_RESULTS_DIR}/traffic-ssh.json"
--output json:"${SCALETEST_RESULTS_DIR}/traffic-ssh.json" \
"${non_greedy_agent_traffic_args[@]}"
status=$?
wait
status2=$?
if [[ ${status} == 0 ]]; then
status=${status2}
fi
show_json "${SCALETEST_RESULTS_DIR}/traffic-ssh.json"
;;
"Web Terminal Traffic")
greedy_agent_traffic "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}" "${scenario}" &
coder exp scaletest workspace-traffic \
--template "${SCALETEST_PARAM_TEMPLATE}" \
--bytes-per-tick "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_BYTES_PER_TICK}" \
--tick-interval "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL}ms" \
--timeout "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}m" \
--job-timeout "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}m30s" \
--output json:"${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json"
--output json:"${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json" \
"${non_greedy_agent_traffic_args[@]}"
status=$?
wait
status2=$?
if [[ ${status} == 0 ]]; then
status=${status2}
fi
show_json "${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json"
;;
"Dashboard Traffic")
Expand All @@ -65,6 +145,10 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
;;

# Debug scenarios, for testing the runner.
"debug:greedy_agent_traffic")
greedy_agent_traffic 10 "${scenario}"
status=$?
;;
"debug:success")
maybedryrun "$DRY_RUN" sleep 10
status=0
Expand Down
7 changes: 7 additions & 0 deletions scaletest/templates/scaletest-runner/startup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@ set -euo pipefail

[[ $VERBOSE == 1 ]] && set -x

if [[ ${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE} == "${SCALETEST_PARAM_TEMPLATE}" ]]; then
echo "ERROR: Greedy agent template must be different from the scaletest template." >&2
exit 1
fi

# Unzip scripts and add to path.
# shellcheck disable=SC2153
echo "Extracting scaletest scripts into ${SCRIPTS_DIR}..."
base64 -d <<<"${SCRIPTS_ZIP}" >/tmp/scripts.zip
rm -rf "${SCRIPTS_DIR}" || true
mkdir -p "${SCRIPTS_DIR}"
unzip -o /tmp/scripts.zip -d "${SCRIPTS_DIR}"
# Chmod to work around https://github.com/coder/coder/issues/10034
chmod +x "${SCRIPTS_DIR}"/*.sh
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice finding!

rm /tmp/scripts.zip

echo "Cloning coder/coder repo..."
Expand Down