Skip to content

fix(scaletest/templates): fix bugs and improve debugging #10316

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 9 additions & 13 deletions scaletest/templates/scaletest-runner/scripts/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ end_phase() {
phase=$(tail -n 1 "${SCALETEST_PHASE_FILE}" | grep "START:${phase_num}:" | cut -d' ' -f3-)
if [[ -z ${phase} ]]; then
log "BUG: Could not find start phase ${phase_num} in ${SCALETEST_PHASE_FILE}"
exit 1
return 1
fi
log "End phase ${phase_num}: ${phase}"
echo "$(date -Ins) END:${phase_num}: ${phase}" >>"${SCALETEST_PHASE_FILE}"
Expand Down Expand Up @@ -132,6 +132,7 @@ annotate_grafana() {
'{time: $time, tags: $tags | split(","), text: $text}' <<<'{}'
)"
if [[ ${DRY_RUN} == 1 ]]; then
echo "FAKEID:${tags}:${text}:${start}" >>"${SCALETEST_STATE_DIR}/grafana-annotations"
log "Would have annotated Grafana, data=${json}"
return 0
fi
Expand Down Expand Up @@ -171,23 +172,18 @@ annotate_grafana_end() {
tags="${tags},${GRAFANA_EXTRA_TAGS}"
fi

if [[ ${DRY_RUN} == 1 ]]; then
log "Would have updated Grafana annotation (end=${end}): ${text} [${tags}]"
return 0
fi

if ! id=$(grep ":${tags}:${text}:${start}" "${SCALETEST_STATE_DIR}/grafana-annotations" | sort -n | tail -n1 | cut -d: -f1); then
log "NOTICE: Could not find Grafana annotation to end: '${tags}:${text}:${start}', skipping..."
return 0
fi

log "Annotating Grafana (end=${end}): ${text} [${tags}]"
log "Updating Grafana annotation (end=${end}): ${text} [${tags}, add=${GRAFANA_ADD_TAGS:-}]"

if [[ -n ${GRAFANA_ADD_TAGS:-} ]]; then
json="$(
jq -n \
--argjson timeEnd "${end}" \
--argjson tags "${tags},${GRAFANA_ADD_TAGS}" \
--arg tags "${tags},${GRAFANA_ADD_TAGS}" \
'{timeEnd: $timeEnd, tags: $tags | split(",")}'
)"
else
Expand Down Expand Up @@ -275,7 +271,7 @@ coder_pods() {
fetch_coder_full() {
if [[ -x "${SCALETEST_CODER_BINARY}" ]]; then
log "Full Coder binary already exists at ${SCALETEST_CODER_BINARY}"
return
return 0
fi
ns=$(namespace)
if [[ -z "${ns}" ]]; then
Expand All @@ -286,12 +282,12 @@ fetch_coder_full() {
pods=$(coder_pods)
if [[ -z ${pods} ]]; then
log "Could not find coder pods!"
return
return 1
fi
pod=$(cut -d ' ' -f 1 <<<"${pods}")
if [[ -z ${pod} ]]; then
log "Could not find coder pod!"
return
return 1
fi
log "Fetching full Coder binary from ${pod}"
# We need --retries due to https://github.com/kubernetes/kubernetes/issues/60140 :(
Expand All @@ -309,8 +305,8 @@ fetch_coder_full() {
# com.coder.scaletest.status. It will overwrite the previous status.
set_pod_status_annotation() {
if [[ $# -ne 1 ]]; then
log "must specify an annotation value"
return
log "BUG: Must specify an annotation value"
return 1
else
maybedryrun "${DRY_RUN}" kubectl --namespace "$(namespace)" annotate pod "$(hostname)" "com.coder.scaletest.status=$1" --overwrite
fi
Expand Down
4 changes: 4 additions & 0 deletions scaletest/templates/scaletest-runner/scripts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
maybedryrun "$DRY_RUN" sleep 10
status=1
;;

*)
log "WARNING: Unknown load scenario: ${scenario}, skipping..."
;;
esac
set -e
if ((status > 0)); then
Expand Down
15 changes: 11 additions & 4 deletions scaletest/templates/scaletest-runner/startup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ annotate_grafana "workspace" "Agent running" # Ended in shutdown.sh.
trap 'trap - EXIT; kill -INT "${pids[@]}"; exit 1' INT EXIT

while :; do
sleep 285 # ~300 when accounting for profile and trace.
# Sleep for short periods of time so that we can exit quickly.
# This adds up to ~300 when accounting for profile and trace.
for ((i = 0; i < 285; i++)); do
sleep 1
done
log "Grabbing pprof dumps"
start="$(date +%s)"
annotate_grafana "pprof" "Grab pprof dumps (start=${start})"
Expand All @@ -74,13 +78,13 @@ gather_logs() {
annotate_grafana "logs" "Gather logs"
podsraw="$(
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder -o name
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-"
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name || true
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-" || true
)"
mapfile -t pods <<<"${podsraw}"
for pod in "${pods[@]}"; do
pod_name="${pod#pod/}"
kubectl -n coder-big logs "${pod}" --since="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt"
kubectl -n coder-big logs "${pod}" --since-time="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt"
done
annotate_grafana_end "logs" "Gather logs"
}
Expand Down Expand Up @@ -131,6 +135,9 @@ on_exit() {
set_appearance "${appearance_json}" "${message_color}" "${service_banner_message} | Scaletest ${message_status}: [${CODER_USER}/${CODER_WORKSPACE}](${CODER_URL}/@${CODER_USER}/${CODER_WORKSPACE})!"

annotate_grafana_end "" "Start scaletest: ${SCALETEST_COMMENT}"

wait "${pprof_pid}"
exit "${code}"
}
trap on_exit EXIT

Expand Down