-
Notifications
You must be signed in to change notification settings - Fork 894
chore: add prometheus monitoring of workspace traffic generation #7583
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
f62cf16
00b9eca
9a9778c
f00b8d7
69bc5ad
57d338d
a1d14df
fb96c4d
59ef445
6734521
4ab2314
d54af33
8c48d2b
cf00b5a
e6917e6
ea71c4f
399e506
ec96a00
a4ecd0c
fe0ecfc
239ba96
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,9 +14,14 @@ import ( | |
"time" | ||
|
||
"github.com/google/uuid" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/client_golang/prometheus/promhttp" | ||
"go.opentelemetry.io/otel/trace" | ||
"golang.org/x/xerrors" | ||
|
||
"cdr.dev/slog" | ||
"cdr.dev/slog/sloggers/sloghuman" | ||
|
||
"github.com/coder/coder/cli/clibase" | ||
"github.com/coder/coder/cli/cliui" | ||
"github.com/coder/coder/coderd/httpapi" | ||
|
@@ -896,13 +901,14 @@ func (r *RootCmd) scaletestCreateWorkspaces() *clibase.Cmd { | |
|
||
func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd { | ||
var ( | ||
tickInterval time.Duration | ||
bytesPerTick int64 | ||
client = &codersdk.Client{} | ||
tracingFlags = &scaletestTracingFlags{} | ||
strategy = &scaletestStrategyFlags{} | ||
cleanupStrategy = &scaletestStrategyFlags{cleanup: true} | ||
output = &scaletestOutputFlags{} | ||
tickInterval time.Duration | ||
bytesPerTick int64 | ||
scaletestPrometheusAddress string | ||
client = &codersdk.Client{} | ||
tracingFlags = &scaletestTracingFlags{} | ||
strategy = &scaletestStrategyFlags{} | ||
cleanupStrategy = &scaletestStrategyFlags{cleanup: true} | ||
output = &scaletestOutputFlags{} | ||
) | ||
|
||
cmd := &clibase.Cmd{ | ||
|
@@ -913,6 +919,12 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd { | |
), | ||
Handler: func(inv *clibase.Invocation) error { | ||
ctx := inv.Context() | ||
reg := prometheus.NewRegistry() | ||
metrics := workspacetraffic.NewMetrics(reg, "username", "workspace_name", "agent_name") | ||
|
||
logger := slog.Make(sloghuman.Sink(io.Discard)) | ||
prometheusSrvClose := ServeHandler(ctx, logger, promhttp.HandlerFor(reg, promhttp.HandlerOpts{}), scaletestPrometheusAddress, "prometheus") | ||
defer prometheusSrvClose() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if we need to add some graceful period before closing to make sure that all relevant metrics are scraped before the tool goes down. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this would be heavily dependent on the prometheus scrape interval. Simplest is probably to expose it as a parameter to be set by the test operatorl. |
||
|
||
// Bypass rate limiting | ||
client.HTTPClient = &http.Client{ | ||
|
@@ -955,16 +967,18 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd { | |
th := harness.NewTestHarness(strategy.toStrategy(), cleanupStrategy.toStrategy()) | ||
for idx, ws := range workspaces { | ||
var ( | ||
agentID uuid.UUID | ||
name = "workspace-traffic" | ||
id = strconv.Itoa(idx) | ||
agentID uuid.UUID | ||
agentName string | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't go through the whole PR, but why is it required to specify the agent name? isn't it always "main"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is that the agent name maps directly to what's specified in the workspace Terraform, is this not the case? |
||
name = "workspace-traffic" | ||
id = strconv.Itoa(idx) | ||
) | ||
|
||
for _, res := range ws.LatestBuild.Resources { | ||
if len(res.Agents) == 0 { | ||
continue | ||
} | ||
agentID = res.Agents[0].ID | ||
agentName = res.Agents[0].Name | ||
} | ||
|
||
if agentID == uuid.Nil { | ||
|
@@ -974,16 +988,20 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd { | |
|
||
// Setup our workspace agent connection. | ||
config := workspacetraffic.Config{ | ||
AgentID: agentID, | ||
BytesPerTick: bytesPerTick, | ||
Duration: strategy.timeout, | ||
TickInterval: tickInterval, | ||
AgentID: agentID, | ||
AgentName: agentName, | ||
BytesPerTick: bytesPerTick, | ||
Duration: strategy.timeout, | ||
TickInterval: tickInterval, | ||
WorkspaceName: ws.Name, | ||
WorkspaceOwner: ws.OwnerName, | ||
Registry: reg, | ||
} | ||
|
||
if err := config.Validate(); err != nil { | ||
return xerrors.Errorf("validate config: %w", err) | ||
} | ||
var runner harness.Runnable = workspacetraffic.NewRunner(client, config) | ||
var runner harness.Runnable = workspacetraffic.NewRunner(client, config, metrics) | ||
if tracingEnabled { | ||
runner = &runnableTraceWrapper{ | ||
tracer: tracer, | ||
|
@@ -1034,6 +1052,13 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd { | |
Description: "How often to send traffic.", | ||
Value: clibase.DurationOf(&tickInterval), | ||
}, | ||
{ | ||
Flag: "scaletest-prometheus-address", | ||
Env: "CODER_SCALETEST_PROMETHEUS_ADDRESS", | ||
Default: "0.0.0.0:21112", | ||
Description: "Address on which to expose scaletest prometheus metrics.", | ||
Value: clibase.StringOf(&scaletestPrometheusAddress), | ||
}, | ||
} | ||
|
||
tracingFlags.attach(&cmd.Options) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,9 +32,12 @@ project_id = "some_google_project_id" | |
1. Run `coder_init.sh <coder_url>` to setup an initial user and a pre-configured Kubernetes | ||
template. It will also download the Coder CLI from the Coder instance locally. | ||
|
||
1. Do whatever you need to do with the Coder instance. | ||
1. Do whatever you need to do with the Coder instance: | ||
|
||
> To run Coder commands against the instance, you can use `coder_shim.sh <command>`. | ||
> Note: To run Coder commands against the instance, you can use `coder_shim.sh <command>`. | ||
> You don't need to run `coder login` yourself. | ||
|
||
- To create workspaces, run `./coder_shim.sh scaletest create-workspaces --template="kubernetes" --count=N` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe that at some point you may want to include also rich parameters as they cause more DB calls in total (worst case scenario). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, that would be a good test for load related to concurrent workspace builds. |
||
- To generate workspace traffic, run `./coder_trafficgen.sh <name of loadtest from your Terraform vars>`. This will keep running until you delete the pod `coder-scaletest-workspace-traffic`. | ||
|
||
1. When you are finished, you can run `terraform destroy -var-file=override.tfvars`. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -euo pipefail | ||
|
||
if [[ $# -lt 1 ]]; then | ||
echo "Usage: $0 <loadtest name>" | ||
exit 1 | ||
fi | ||
|
||
# Allow toggling verbose output | ||
[[ -n ${VERBOSE:-} ]] && set -x | ||
johnstcn marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
LOADTEST_NAME="$1" | ||
CODER_TOKEN=$(./coder_shim.sh tokens create) | ||
CODER_URL="http://coder.coder-${LOADTEST_NAME}.svc.cluster.local" | ||
export KUBECONFIG="${PWD}/.coderv2/${LOADTEST_NAME}-cluster.kubeconfig" | ||
|
||
cat <<EOF | kubectl apply -f - | ||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
name: coder-scaletest-workspace-traffic | ||
namespace: coder-${LOADTEST_NAME} | ||
labels: | ||
app.kubernetes.io/name: coder-scaletest-workspace-traffic | ||
spec: | ||
affinity: | ||
nodeAffinity: | ||
requiredDuringSchedulingIgnoredDuringExecution: | ||
nodeSelectorTerms: | ||
- matchExpressions: | ||
- key: cloud.google.com/gke-nodepool | ||
operator: In | ||
values: | ||
- ${LOADTEST_NAME}-misc | ||
containers: | ||
- command: | ||
- sh | ||
- -c | ||
- "curl -fsSL $CODER_URL/bin/coder-linux-amd64 -o /tmp/coder && chmod +x /tmp/coder && /tmp/coder --url=$CODER_URL --token=$CODER_TOKEN scaletest workspace-traffic" | ||
env: | ||
- name: CODER_URL | ||
value: $CODER_URL | ||
- name: CODER_TOKEN | ||
value: $CODER_TOKEN | ||
- name: CODER_SCALETEST_PROMETHEUS_ADDRESS | ||
value: "0.0.0.0:21112" | ||
- name: CODER_SCALETEST_JOB_TIMEOUT | ||
value: "30m" | ||
- name: CODER_SCALETEST_CONCURRENCY | ||
value: "0" | ||
- name: CODER_SCALETEST_WORKSPACE_TRAFFIC_BYTES_PER_TICK | ||
value: "2048" | ||
ports: | ||
- containerPort: 21112 | ||
name: prometheus-http | ||
protocol: TCP | ||
name: cli | ||
image: docker.io/codercom/enterprise-minimal:ubuntu | ||
--- | ||
apiVersion: monitoring.coreos.com/v1 | ||
kind: PodMonitor | ||
metadata: | ||
namespace: coder-${LOADTEST_NAME} | ||
name: coder-workspacetraffic-monitoring | ||
spec: | ||
selector: | ||
matchLabels: | ||
app.kubernetes.io/name: coder-scaletest-workspace-traffic | ||
podMetricsEndpoints: | ||
- port: prometheus-http | ||
interval: 15s | ||
EOF |
Uh oh!
There was an error while loading. Please reload this page.