From 0a4597f31c63819ff47c50ad20acc0f52bbec9b7 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 17 Oct 2023 14:16:25 +0000 Subject: [PATCH 1/3] fix(scaletest/templates): fix bugs and improve debugging --- .../templates/scaletest-runner/scripts/lib.sh | 22 ++++++++----------- .../templates/scaletest-runner/scripts/run.sh | 4 ++++ .../templates/scaletest-runner/startup.sh | 6 ++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/scaletest/templates/scaletest-runner/scripts/lib.sh b/scaletest/templates/scaletest-runner/scripts/lib.sh index 753dc1a5fefea..868dd5c078d2e 100644 --- a/scaletest/templates/scaletest-runner/scripts/lib.sh +++ b/scaletest/templates/scaletest-runner/scripts/lib.sh @@ -82,7 +82,7 @@ end_phase() { phase=$(tail -n 1 "${SCALETEST_PHASE_FILE}" | grep "START:${phase_num}:" | cut -d' ' -f3-) if [[ -z ${phase} ]]; then log "BUG: Could not find start phase ${phase_num} in ${SCALETEST_PHASE_FILE}" - exit 1 + return 1 fi log "End phase ${phase_num}: ${phase}" echo "$(date -Ins) END:${phase_num}: ${phase}" >>"${SCALETEST_PHASE_FILE}" @@ -132,6 +132,7 @@ annotate_grafana() { '{time: $time, tags: $tags | split(","), text: $text}' <<<'{}' )" if [[ ${DRY_RUN} == 1 ]]; then + echo "FAKEID:${tags}:${text}:${start}" >>"${SCALETEST_STATE_DIR}/grafana-annotations" log "Would have annotated Grafana, data=${json}" return 0 fi @@ -171,23 +172,18 @@ annotate_grafana_end() { tags="${tags},${GRAFANA_EXTRA_TAGS}" fi - if [[ ${DRY_RUN} == 1 ]]; then - log "Would have updated Grafana annotation (end=${end}): ${text} [${tags}]" - return 0 - fi - if ! id=$(grep ":${tags}:${text}:${start}" "${SCALETEST_STATE_DIR}/grafana-annotations" | sort -n | tail -n1 | cut -d: -f1); then log "NOTICE: Could not find Grafana annotation to end: '${tags}:${text}:${start}', skipping..." return 0 fi - log "Annotating Grafana (end=${end}): ${text} [${tags}]" + log "Updating Grafana annotation (end=${end}): ${text} [${tags}, add=${GRAFANA_ADD_TAGS:-}]" if [[ -n ${GRAFANA_ADD_TAGS:-} ]]; then json="$( jq -n \ --argjson timeEnd "${end}" \ - --argjson tags "${tags},${GRAFANA_ADD_TAGS}" \ + --arg tags "${tags},${GRAFANA_ADD_TAGS}" \ '{timeEnd: $timeEnd, tags: $tags | split(",")}' )" else @@ -275,7 +271,7 @@ coder_pods() { fetch_coder_full() { if [[ -x "${SCALETEST_CODER_BINARY}" ]]; then log "Full Coder binary already exists at ${SCALETEST_CODER_BINARY}" - return + return 0 fi ns=$(namespace) if [[ -z "${ns}" ]]; then @@ -286,12 +282,12 @@ fetch_coder_full() { pods=$(coder_pods) if [[ -z ${pods} ]]; then log "Could not find coder pods!" - return + return 1 fi pod=$(cut -d ' ' -f 1 <<<"${pods}") if [[ -z ${pod} ]]; then log "Could not find coder pod!" - return + return 1 fi log "Fetching full Coder binary from ${pod}" # We need --retries due to https://github.com/kubernetes/kubernetes/issues/60140 :( @@ -309,8 +305,8 @@ fetch_coder_full() { # com.coder.scaletest.status. It will overwrite the previous status. set_pod_status_annotation() { if [[ $# -ne 1 ]]; then - log "must specify an annotation value" - return + log "BUG: Must specify an annotation value" + return 1 else maybedryrun "${DRY_RUN}" kubectl --namespace "$(namespace)" annotate pod "$(hostname)" "com.coder.scaletest.status=$1" --overwrite fi diff --git a/scaletest/templates/scaletest-runner/scripts/run.sh b/scaletest/templates/scaletest-runner/scripts/run.sh index c96995febc3d5..584c80f81f15d 100755 --- a/scaletest/templates/scaletest-runner/scripts/run.sh +++ b/scaletest/templates/scaletest-runner/scripts/run.sh @@ -73,6 +73,10 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do maybedryrun "$DRY_RUN" sleep 10 status=1 ;; + + *) + log "WARNING: Unknown load scenario: ${scenario}, skipping..." + ;; esac set -e if ((status > 0)); then diff --git a/scaletest/templates/scaletest-runner/startup.sh b/scaletest/templates/scaletest-runner/startup.sh index 2946432553e42..bd985e8459985 100755 --- a/scaletest/templates/scaletest-runner/startup.sh +++ b/scaletest/templates/scaletest-runner/startup.sh @@ -74,13 +74,13 @@ gather_logs() { annotate_grafana "logs" "Gather logs" podsraw="$( kubectl -n coder-big get pods -l app.kubernetes.io/name=coder -o name - kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name - kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-" + kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name || true + kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-" || true )" mapfile -t pods <<<"${podsraw}" for pod in "${pods[@]}"; do pod_name="${pod#pod/}" - kubectl -n coder-big logs "${pod}" --since="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt" + kubectl -n coder-big logs "${pod}" --since-time="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt" done annotate_grafana_end "logs" "Gather logs" } From a4f12a652ae16d3fc5bdd34960cbadc6eac4eb76 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 17 Oct 2023 14:31:45 +0000 Subject: [PATCH 2/3] make sure we echo the exit code --- scaletest/templates/scaletest-runner/startup.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scaletest/templates/scaletest-runner/startup.sh b/scaletest/templates/scaletest-runner/startup.sh index bd985e8459985..b5412edd55419 100755 --- a/scaletest/templates/scaletest-runner/startup.sh +++ b/scaletest/templates/scaletest-runner/startup.sh @@ -131,6 +131,8 @@ on_exit() { set_appearance "${appearance_json}" "${message_color}" "${service_banner_message} | Scaletest ${message_status}: [${CODER_USER}/${CODER_WORKSPACE}](${CODER_URL}/@${CODER_USER}/${CODER_WORKSPACE})!" annotate_grafana_end "" "Start scaletest: ${SCALETEST_COMMENT}" + + exit "${code}" } trap on_exit EXIT From f7ec1758a3d32a53c4efb3f6f6c286df2efb7ae4 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Tue, 17 Oct 2023 14:48:36 +0000 Subject: [PATCH 3/3] fix script exit before subprocesses --- scaletest/templates/scaletest-runner/startup.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scaletest/templates/scaletest-runner/startup.sh b/scaletest/templates/scaletest-runner/startup.sh index b5412edd55419..5375a8550ef8f 100755 --- a/scaletest/templates/scaletest-runner/startup.sh +++ b/scaletest/templates/scaletest-runner/startup.sh @@ -47,7 +47,11 @@ annotate_grafana "workspace" "Agent running" # Ended in shutdown.sh. trap 'trap - EXIT; kill -INT "${pids[@]}"; exit 1' INT EXIT while :; do - sleep 285 # ~300 when accounting for profile and trace. + # Sleep for short periods of time so that we can exit quickly. + # This adds up to ~300 when accounting for profile and trace. + for ((i = 0; i < 285; i++)); do + sleep 1 + done log "Grabbing pprof dumps" start="$(date +%s)" annotate_grafana "pprof" "Grab pprof dumps (start=${start})" @@ -132,6 +136,7 @@ on_exit() { annotate_grafana_end "" "Start scaletest: ${SCALETEST_COMMENT}" + wait "${pprof_pid}" exit "${code}" } trap on_exit EXIT