Skip to content

chore: fix miscellaneous issues in scaletest scripts #8006

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cli/scaletest.go
Original file line number Diff line number Diff line change
Expand Up @@ -902,10 +902,10 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
_, _ = fmt.Fprintln(inv.Stderr, "\nUploading traces...")
if err := closeTracing(ctx); err != nil {
_, _ = fmt.Fprintf(inv.Stderr, "\nError uploading traces: %+v\n", err)
// Wait for prometheus metrics to be scraped
_, _ = fmt.Fprintf(inv.Stderr, "Waiting %s for prometheus metrics to be scraped\n", scaletestPrometheusWait)
<-time.After(scaletestPrometheusWait)
}
// Wait for prometheus metrics to be scraped
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'm wondering if there could be an active loop implemented instead to make sure that metrics have been scraped. Nothing is wrong with this submission though!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considered that as well but didn't think it worth the additional complexity.

_, _ = fmt.Fprintf(inv.Stderr, "Waiting %s for prometheus metrics to be scraped\n", scaletestPrometheusWait)
<-time.After(scaletestPrometheusWait)
}()
tracer := tracerProvider.Tracer(scaletestTracerName)

Expand Down
70 changes: 53 additions & 17 deletions scaletest/lib/coder_workspacetraffic.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,68 @@

set -euo pipefail

if [[ $# -lt 1 ]]; then
echo "Usage: $0 <loadtest name>"
exit 1
fi
PROJECT_ROOT="$(git rev-parse --show-toplevel)"
# shellcheck source=scripts/lib.sh
source "${PROJECT_ROOT}/scripts/lib.sh"

# Allow toggling verbose output
[[ -n ${VERBOSE:-} ]] && set -x

LOADTEST_NAME="$1"
PROJECT_ROOT="$(git rev-parse --show-toplevel)"
SCALETEST_NAME="${SCALETEST_NAME:-}"
SCALETEST_TRAFFIC_BYTES_PER_TICK="${SCALETEST_TRAFFIC_BYTES_PER_TICK:-1024}"
SCALETEST_TRAFFIC_TICK_INTERVAL="${SCALETEST_TRAFFIC_TICK_INTERVAL:-100ms}"

script_name=$(basename "$0")
args="$(getopt -o "" -l help,name:,traffic-bytes-per-tick:,traffic-tick-interval:, -- "$@")"
eval set -- "$args"
while true; do
case "$1" in
--help)
echo "Usage: $script_name --name <name> [--traffic-bytes-per-tick <bytes_per-tick>] [--traffic-tick-interval <ticks_per_second]"
exit 1
;;
--name)
SCALETEST_NAME="$2"
shift 2
;;
--traffic-bytes-per-tick)
SCALETEST_TRAFFIC_BYTES_PER_TICK="$2"
shift 2
;;
--traffic-tick-interval)
SCALETEST_TRAFFIC_TICK_INTERVAL="$2"
shift 2
;;
--)
shift
break
;;
*)
error "Unrecognized option: $1"
;;
esac
done

dependencies kubectl

if [[ -z "${SCALETEST_NAME}" ]]; then
echo "Must specify --name"
exit 1
fi

CODER_TOKEN=$("${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" tokens create)
CODER_URL="http://coder.coder-${LOADTEST_NAME}.svc.cluster.local"
export KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${LOADTEST_NAME}-cluster.kubeconfig"
CODER_URL="http://coder.coder-${SCALETEST_NAME}.svc.cluster.local"
export KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig"

# Clean up any pre-existing pods
kubectl -n "coder-${LOADTEST_NAME}" delete pod coder-scaletest-workspace-traffic --force || true
kubectl -n "coder-${SCALETEST_NAME}" delete pod coder-scaletest-workspace-traffic --force || true

cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: coder-scaletest-workspace-traffic
namespace: coder-${LOADTEST_NAME}
namespace: coder-${SCALETEST_NAME}
labels:
app.kubernetes.io/name: coder-scaletest-workspace-traffic
spec:
Expand All @@ -36,12 +75,12 @@ spec:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- ${LOADTEST_NAME}-misc
- ${SCALETEST_NAME}-misc
containers:
- command:
- sh
- -c
- "curl -fsSL $CODER_URL/bin/coder-linux-amd64 -o /tmp/coder && chmod +x /tmp/coder && /tmp/coder --verbose --url=$CODER_URL --token=$CODER_TOKEN scaletest workspace-traffic --concurrency=0 --bytes-per-tick=4096 --tick-interval=100ms"
- "curl -fsSL $CODER_URL/bin/coder-linux-amd64 -o /tmp/coder && chmod +x /tmp/coder && /tmp/coder --verbose --url=$CODER_URL --token=$CODER_TOKEN scaletest workspace-traffic --concurrency=0 --bytes-per-tick=${SCALETEST_TRAFFIC_BYTES_PER_TICK} --tick-interval=${SCALETEST_TRAFFIC_TICK_INTERVAL} --scaletest-prometheus-wait=60s"
env:
- name: CODER_URL
value: $CODER_URL
Expand All @@ -51,21 +90,18 @@ spec:
value: "0.0.0.0:21112"
- name: CODER_SCALETEST_JOB_TIMEOUT
value: "30m"
- name: CODER_SCALETEST_CONCURRENCY
value: "0"
- name: CODER_SCALETEST_WORKSPACE_TRAFFIC_BYTES_PER_TICK
value: "2048"
ports:
- containerPort: 21112
name: prometheus-http
protocol: TCP
name: cli
image: docker.io/codercom/enterprise-minimal:ubuntu
restartPolicy: Never
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
namespace: coder-${LOADTEST_NAME}
namespace: coder-${SCALETEST_NAME}
name: coder-workspacetraffic-monitoring
spec:
selector:
Expand Down
31 changes: 26 additions & 5 deletions scaletest/scaletest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,25 @@ SCALETEST_PROJECT="${SCALETEST_PROJECT:-}"
SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER:-}"
SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD:-}"
SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-0}"
SCALETEST_CREATE_CONCURRENCY="${SCALETEST_CREATE_CONCURRENCY:-10}"
SCALETEST_TRAFFIC_BYTES_PER_TICK="${SCALETEST_TRAFFIC_BYTES_PER_TICK:-1024}"
SCALETEST_TRAFFIC_TICK_INTERVAL="${SCALETEST_TRAFFIC_TICK_INTERVAL:-10}"

script_name=$(basename "$0")
args="$(getopt -o "" -l dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup -- "$@")"
args="$(getopt -o "" -l create-concurrency:,dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup,traffic-bytes-per-tick:,traffic-tick-interval:, -- "$@")"
eval set -- "$args"
while true; do
case "$1" in
--create-concurrency)
SCALETEST_CREATE_CONCURRENCY="$2"
shift 2
;;
--dry-run)
DRY_RUN=1
shift
;;
--help)
echo "Usage: $script_name --name <name> --project <project> --num-workspaces <num-workspaces> --scenario <scenario> [--dry-run] [--skip-cleanup]"
echo "Usage: $script_name --name <name> --project <project> --num-workspaces <num-workspaces> --scenario <scenario> [--dry-run] [--skip-cleanup] [--create-concurrency=<create-concurrency>]"
exit 1
;;
--name)
Expand All @@ -49,6 +56,14 @@ while true; do
SCALETEST_SKIP_CLEANUP=1
shift
;;
--traffic-bytes-per-tick)
SCALETEST_TRAFFIC_BYTES_PER_TICK="$2"
shift 2
;;
--traffic-tick-interval)
SCALETEST_TRAFFIC_TICK_INTERVAL="$2"
shift 2
;;
--)
shift
break
Expand Down Expand Up @@ -144,16 +159,21 @@ echo "Creating ${SCALETEST_NUM_WORKSPACES} workspaces."
DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" scaletest create-workspaces \
--count "${SCALETEST_NUM_WORKSPACES}" \
--template=kubernetes \
--concurrency 10 \
--concurrency "${SCALETEST_CREATE_CONCURRENCY}" \
--no-cleanup

echo "Sleeping 10 minutes to establish a baseline measurement."
maybedryrun "$DRY_RUN" sleep 600

echo "Sending traffic to workspaces"
maybedryrun "$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_workspacetraffic.sh" "${SCALETEST_NAME}"
maybedryrun "$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_workspacetraffic.sh" \
--name "${SCALETEST_NAME}" \
--traffic-bytes-per-tick "${SCALETEST_TRAFFIC_BYTES_PER_TICK}" \
--traffic-tick-interval "${SCALETEST_TRAFFIC_TICK_INTERVAL}"
maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" wait pods coder-scaletest-workspace-traffic --for condition=Ready
maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" logs -f pod/coder-scaletest-workspace-traffic

echo "Sleeping 15 minutes for traffic generation"
maybedryrun "$DRY_RUN" sleep 900

echo "Starting pprof"
maybedryrun "$DRY_RUN" kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 &
Expand All @@ -168,6 +188,7 @@ while ! maybedryrun "$DRY_RUN" timeout 1 bash -c "echo > /dev/tcp/localhost/6061
echo "pprof failed to become ready in time!"
exit 1
fi
pprof_attempt_counter+=1
maybedryrun "$DRY_RUN" sleep 3
done

Expand Down
2 changes: 1 addition & 1 deletion scaletest/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ terraform {
}
}

required_version = "~> 1.4.0"
required_version = "~> 1.5.0"
}