Skip to content

Commit 7072b8e

Browse files
authored
chore: update scaletest terraform with latest findings (#8249)
Updates scaletest terraform with learnings from #8213: - Increase max pods per node to 256 - Decrease CPU requests for test workspace to allow maxing out workspaces per node - Explicitly set CODER_ACCESS_URL for ssh to work - Explicitly disable rate limits in coderd - Increase DB size for medium and large scenarios - Mount cache volume directly under /tmp/coder instead of /tmp. - Plumb through requests and limits for workspaces - Plumb through requests for coderd
1 parent 83fee4b commit 7072b8e

File tree

6 files changed

+80
-32
lines changed

6 files changed

+80
-32
lines changed

scaletest/terraform/coder.tf

+13-9
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ coder:
8383
operator: "In"
8484
values: ["${local.coder_release_name}"]
8585
env:
86+
- name: "CODER_ACCESS_URL"
87+
value: "${local.coder_url}"
8688
- name: "CODER_CACHE_DIRECTORY"
8789
value: "/tmp/coder"
8890
- name: "CODER_ENABLE_TELEMETRY"
@@ -108,25 +110,27 @@ coder:
108110
value: "true"
109111
- name: "CODER_EXPERIMENTS"
110112
value: "${var.coder_experiments}"
113+
- name: "CODER_DANGEROUS_DISABLE_RATE_LIMITS"
114+
value: "true"
111115
image:
112116
repo: ${var.coder_image_repo}
113117
tag: ${var.coder_image_tag}
114118
replicaCount: "${var.coder_replicas}"
115119
resources:
116120
requests:
117-
cpu: "${var.coder_cpu}"
118-
memory: "${var.coder_mem}"
121+
cpu: "${var.coder_cpu_request}"
122+
memory: "${var.coder_mem_request}"
119123
limits:
120-
cpu: "${var.coder_cpu}"
121-
memory: "${var.coder_mem}"
124+
cpu: "${var.coder_cpu_limit}"
125+
memory: "${var.coder_mem_limit}"
122126
securityContext:
123127
readOnlyRootFilesystem: true
124128
service:
125129
enable: true
126130
sessionAffinity: None
127131
loadBalancerIP: "${local.coder_address}"
128132
volumeMounts:
129-
- mountPath: "/tmp"
133+
- mountPath: "/tmp/coder"
130134
name: cache
131135
readOnly: false
132136
volumes:
@@ -197,12 +201,12 @@ resource "local_file" "kubernetes_template" {
197201
}
198202
resources {
199203
requests = {
200-
"cpu" = "0.1"
201-
"memory" = "128Mi"
204+
"cpu" = "${var.workspace_cpu_request}"
205+
"memory" = "${var.workspace_mem_request}"
202206
}
203207
limits = {
204-
"cpu" = "1"
205-
"memory" = "1Gi"
208+
"cpu" = "${var.workspace_cpu_limit}"
209+
"memory" = "${var.workspace_mem_limit}"
206210
}
207211
}
208212
}

scaletest/terraform/gcp_cluster.tf

+7-6
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ data "google_compute_default_service_account" "default" {
33
}
44

55
resource "google_container_cluster" "primary" {
6-
name = var.name
7-
location = var.zone
8-
project = var.project_id
9-
network = google_compute_network.vpc.name
10-
subnetwork = google_compute_subnetwork.subnet.name
11-
networking_mode = "VPC_NATIVE"
6+
name = var.name
7+
location = var.zone
8+
project = var.project_id
9+
network = google_compute_network.vpc.name
10+
subnetwork = google_compute_subnetwork.subnet.name
11+
networking_mode = "VPC_NATIVE"
12+
default_max_pods_per_node = 256
1213
ip_allocation_policy { # Required with networking_mode=VPC_NATIVE
1314

1415
}
+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
nodepool_machine_type_coder = "t2d-standard-8"
22
nodepool_size_coder = 3
33
nodepool_machine_type_workspaces = "t2d-standard-8"
4-
coder_cpu = "6000m" # Leaving 2 CPUs for system workloads
5-
coder_mem = "24Gi" # Leaving 8 GB for system workloads
4+
cloudsql_tier = "db-custom-2-7680"
5+
coder_cpu_request = "3000m"
6+
coder_mem_request = "12Gi"
7+
coder_cpu_limit = "6000m" # Leaving 2 CPUs for system workloads
8+
coder_mem_limit = "24Gi" # Leaving 8 GB for system workloads
69
coder_replicas = 3
+5-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
nodepool_machine_type_coder = "t2d-standard-8"
22
nodepool_machine_type_workspaces = "t2d-standard-8"
3-
coder_cpu = "6000m" # Leaving 2 CPUs for system workloads
4-
coder_mem = "24Gi" # Leaving 8 GB for system workloads
3+
cloudsql_tier = "db-custom-1-3840"
4+
coder_cpu_request = "3000m"
5+
coder_mem_request = "12Gi"
6+
coder_cpu_limit = "6000m" # Leaving 2 CPUs for system workloads
7+
coder_mem_limit = "24Gi" # Leaving 8 GB for system workloads
+4-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
nodepool_machine_type_coder = "t2d-standard-4"
22
nodepool_machine_type_workspaces = "t2d-standard-4"
3-
coder_cpu = "2000m" # Leaving 2 CPUs for system workloads
4-
coder_mem = "12Gi" # Leaving 4GB for system workloads
3+
coder_cpu_request = "1000m"
4+
coder_mem_request = "6Gi"
5+
coder_cpu_limit = "2000m" # Leaving 2 CPUs for system workloads
6+
coder_mem_limit = "12Gi" # Leaving 4GB for system workloads

scaletest/terraform/vars.tf

+46-11
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,30 @@ variable "cloudsql_max_connections" {
9494

9595
// These variables control the Coder deployment.
9696
variable "coder_replicas" {
97-
description = "Number of Coder replicas to provision"
97+
description = "Number of Coder replicas to provision."
9898
default = 1
9999
}
100100

101-
variable "coder_cpu" {
102-
description = "CPU to allocate to Coder"
101+
// Ensure that requests allow for at least two replicas to be scheduled
102+
// on a single node temporarily, otherwise deployments may fail due to
103+
// lack of resources.
104+
variable "coder_cpu_request" {
105+
description = "CPU request to allocate to Coder."
106+
default = "500m"
107+
}
108+
109+
variable "coder_mem_request" {
110+
description = "Memory request to allocate to Coder."
111+
default = "512Mi"
112+
}
113+
114+
variable "coder_cpu_limit" {
115+
description = "CPU limit to allocate to Coder."
103116
default = "1000m"
104117
}
105118

106-
variable "coder_mem" {
107-
description = "Memory to allocate to Coder"
119+
variable "coder_mem_limit" {
120+
description = "Memory limit to allocate to Coder."
108121
default = "1024Mi"
109122
}
110123

@@ -123,11 +136,38 @@ variable "coder_image_tag" {
123136
default = "latest"
124137
}
125138

139+
variable "coder_experiments" {
140+
description = "Coder Experiments to enable."
141+
default = ""
142+
}
143+
144+
// These variables control the default workspace template.
126145
variable "workspace_image" {
127146
description = "Image and tag to use for workspaces."
128147
default = "docker.io/codercom/enterprise-minimal:ubuntu"
129148
}
130149

150+
variable "workspace_cpu_request" {
151+
description = "CPU request to allocate to workspaces."
152+
default = "100m"
153+
}
154+
155+
variable "workspace_cpu_limit" {
156+
description = "CPU limit to allocate to workspaces."
157+
default = "100m"
158+
}
159+
160+
variable "workspace_mem_request" {
161+
description = "Memory request to allocate to workspaces."
162+
default = "128Mi"
163+
}
164+
165+
variable "workspace_mem_limit" {
166+
description = "Memory limit to allocate to workspaces."
167+
default = "128Mi"
168+
}
169+
170+
// These variables control the Prometheus deployment.
131171
variable "prometheus_remote_write_user" {
132172
description = "Username for Prometheus remote write."
133173
default = ""
@@ -139,7 +179,7 @@ variable "prometheus_remote_write_password" {
139179
}
140180

141181
variable "prometheus_remote_write_url" {
142-
description = "URL for Prometheus remote write. Defaults to stats.dev.c8s.io"
182+
description = "URL for Prometheus remote write. Defaults to stats.dev.c8s.io."
143183
default = "https://stats.dev.c8s.io:9443/api/v1/write"
144184
}
145185

@@ -157,8 +197,3 @@ variable "prometheus_remote_write_send_interval" {
157197
description = "Prometheus remote write interval."
158198
default = "15s"
159199
}
160-
161-
variable "coder_experiments" {
162-
description = "Coder Experiments to enable"
163-
default = ""
164-
}

0 commit comments

Comments
 (0)