From b51555b7dfc0d40b7b1afb07f953d939ae9847a9 Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Mon, 16 Oct 2023 13:13:34 +0000 Subject: [PATCH] feat(scaletest/templates): gather pod logs at the end of a scale test --- scaletest/templates/scaletest-runner/main.tf | 4 ++- .../templates/scaletest-runner/scripts/lib.sh | 5 ++-- .../templates/scaletest-runner/startup.sh | 29 +++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/scaletest/templates/scaletest-runner/main.tf b/scaletest/templates/scaletest-runner/main.tf index ca9d8b7b6bfb0..f07f723d649d4 100644 --- a/scaletest/templates/scaletest-runner/main.tf +++ b/scaletest/templates/scaletest-runner/main.tf @@ -42,8 +42,9 @@ locals { cpu = 16 memory = 64 home_disk_size = 10 - scaletest_run_id = "scaletest-${time_static.start_time.rfc3339}" + scaletest_run_id = "scaletest-${replace(time_static.start_time.rfc3339, ":", "-")}" scaletest_run_dir = "/home/coder/${local.scaletest_run_id}" + scaletest_run_start_time = time_static.start_time.rfc3339 grafana_url = "https://stats.dev.c8s.io" grafana_dashboard_uid = "qLVSTR-Vz" grafana_dashboard_name = "coderv2-loadtest-dashboard" @@ -364,6 +365,7 @@ resource "coder_agent" "main" { # Local envs passed as arguments to `coder exp scaletest` invocations. SCALETEST_RUN_ID : local.scaletest_run_id, SCALETEST_RUN_DIR : local.scaletest_run_dir, + SCALETEST_RUN_START_TIME : local.scaletest_run_start_time, SCALETEST_PARAM_TEMPLATE : data.coder_parameter.workspace_template.value, SCALETEST_PARAM_REPO_BRANCH : data.coder_parameter.repo_branch.value, diff --git a/scaletest/templates/scaletest-runner/scripts/lib.sh b/scaletest/templates/scaletest-runner/scripts/lib.sh index f70b92fcdd6b1..07398bc58e577 100644 --- a/scaletest/templates/scaletest-runner/scripts/lib.sh +++ b/scaletest/templates/scaletest-runner/scripts/lib.sh @@ -19,11 +19,12 @@ SCALETEST_STATE_DIR="${SCALETEST_RUN_DIR}/state" SCALETEST_PHASE_FILE="${SCALETEST_STATE_DIR}/phase" # shellcheck disable=SC2034 SCALETEST_RESULTS_DIR="${SCALETEST_RUN_DIR}/results" +SCALETEST_LOGS_DIR="${SCALETEST_RUN_DIR}/logs" SCALETEST_PPROF_DIR="${SCALETEST_RUN_DIR}/pprof" # https://github.com/kubernetes/kubernetes/issues/72501 :-( -SCALETEST_CODER_BINARY="/tmp/coder-full-${SCALETEST_RUN_ID//:/-}" +SCALETEST_CODER_BINARY="/tmp/coder-full-${SCALETEST_RUN_ID}" -mkdir -p "${SCALETEST_STATE_DIR}" "${SCALETEST_RESULTS_DIR}" "${SCALETEST_PPROF_DIR}" +mkdir -p "${SCALETEST_STATE_DIR}" "${SCALETEST_RESULTS_DIR}" "${SCALETEST_LOGS_DIR}" "${SCALETEST_PPROF_DIR}" coder() { if [[ ! -x "${SCALETEST_CODER_BINARY}" ]]; then diff --git a/scaletest/templates/scaletest-runner/startup.sh b/scaletest/templates/scaletest-runner/startup.sh index 7d0ef2d593f18..57b8b1091efa8 100755 --- a/scaletest/templates/scaletest-runner/startup.sh +++ b/scaletest/templates/scaletest-runner/startup.sh @@ -60,6 +60,28 @@ annotate_grafana "workspace" "Agent running" # Ended in shutdown.sh. } & pprof_pid=$! +logs_gathered=0 +gather_logs() { + if ((logs_gathered == 1)); then + return + fi + logs_gathered=1 + + # Gather logs from all coderd and provisioner instances, and all workspaces. + annotate_grafana "logs" "Gather logs" + podsraw="$( + kubectl -n coder-big get pods -l app.kubernetes.io/name=coder -o name + kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name + kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-" + )" + mapfile -t pods <<<"${podsraw}" + for pod in "${pods[@]}"; do + pod_name="${pod#pod/}" + kubectl -n coder-big logs "${pod}" --since="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt" + done + annotate_grafana_end "logs" "Gather logs" +} + set_appearance "${appearance_json}" "${service_banner_color}" "${service_banner_message} | Scaletest running: [${CODER_USER}/${CODER_WORKSPACE}](${CODER_URL}/@${CODER_USER}/${CODER_WORKSPACE})!" # Show failure in the UI if script exits with error. @@ -77,6 +99,10 @@ on_exit() { message_status=FAILED fi + # In case the test failed before gathering logs, gather them before + # cleaning up, whilst the workspaces are still present. + gather_logs + case "${SCALETEST_PARAM_CLEANUP_STRATEGY}" in on_stop) # Handled by shutdown script. @@ -127,4 +153,7 @@ annotate_grafana "" "Start scaletest" "${SCRIPTS_DIR}/run.sh" +# Gather logs before ending the test. +gather_logs + "${SCRIPTS_DIR}/report.sh" completed