Skip to content

Commit 68a4619

Browse files
authored
fix(scaletest): deploy external provisionerd (#8618)
* scaletest: stop kubernetes_secret from being constantly recreated * scaletest: ensure we do not get auto-upgraded * scaletest: add external provisionerd deployment, the lazy way
1 parent 9689bca commit 68a4619

File tree

3 files changed

+199
-2
lines changed

3 files changed

+199
-2
lines changed

scaletest/terraform/coder.tf

+166-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ resource "random_password" "prometheus-postgres-password" {
5454
}
5555

5656
resource "kubernetes_secret" "coder-db" {
57-
type = "" # Opaque
57+
type = "Opaque"
5858
metadata {
5959
name = "coder-db-url"
6060
namespace = local.coder_namespace
@@ -125,6 +125,9 @@ coder:
125125
value: "${var.coder_experiments}"
126126
- name: "CODER_DANGEROUS_DISABLE_RATE_LIMITS"
127127
value: "true"
128+
# Disabling built-in provisioner daemons
129+
- name: "CODER_PROVISIONER_DAEMONS"
130+
value: "0"
128131
image:
129132
repo: ${var.coder_image_repo}
130133
tag: ${var.coder_image_tag}
@@ -242,6 +245,168 @@ resource "local_file" "kubernetes_template" {
242245
EOF
243246
}
244247

248+
# TODO(cian): Remove this when we have support in the Helm chart.
249+
# Ref: https://github.com/coder/coder/issues/8243
250+
resource "local_file" "provisionerd_deployment" {
251+
filename = "${path.module}/../.coderv2/provisionerd-deployment.yaml"
252+
content = <<EOF
253+
apiVersion: apps/v1
254+
kind: Deployment
255+
metadata:
256+
labels:
257+
app.kubernetes.io/instance: ${var.name}
258+
app.kubernetes.io/name: provisionerd
259+
name: provisionerd
260+
namespace: ${local.coder_namespace}
261+
spec:
262+
replicas: ${var.provisionerd_replicas}
263+
selector:
264+
matchLabels:
265+
app.kubernetes.io/instance: ${var.name}
266+
app.kubernetes.io/name: provisionerd
267+
strategy:
268+
rollingUpdate:
269+
maxSurge: 25%
270+
maxUnavailable: 25%
271+
type: RollingUpdate
272+
template:
273+
metadata:
274+
creationTimestamp: null
275+
labels:
276+
app.kubernetes.io/instance: ${var.name}
277+
app.kubernetes.io/name: provisionerd
278+
spec:
279+
affinity:
280+
nodeAffinity:
281+
requiredDuringSchedulingIgnoredDuringExecution:
282+
nodeSelectorTerms:
283+
- matchExpressions:
284+
- key: cloud.google.com/gke-nodepool
285+
operator: In
286+
values:
287+
- ${google_container_node_pool.coder.name}
288+
podAntiAffinity:
289+
preferredDuringSchedulingIgnoredDuringExecution:
290+
- podAffinityTerm:
291+
labelSelector:
292+
matchExpressions:
293+
- key: app.kubernetes.io/instance
294+
operator: In
295+
values:
296+
- ${var.name}
297+
topologyKey: kubernetes.io/hostname
298+
weight: 1
299+
containers:
300+
- args:
301+
- server
302+
command:
303+
- /opt/coder
304+
env:
305+
- name: CODER_HTTP_ADDRESS
306+
value: 0.0.0.0:8080
307+
- name: CODER_PROMETHEUS_ADDRESS
308+
value: 0.0.0.0:2112
309+
- name: CODER_ACCESS_URL
310+
value: ${local.coder_url}
311+
- name: CODER_CACHE_DIRECTORY
312+
value: /tmp/coder
313+
- name: CODER_ENABLE_TELEMETRY
314+
value: "false"
315+
- name: CODER_LOGGING_HUMAN
316+
value: /dev/null
317+
- name: CODER_LOGGING_STACKDRIVER
318+
value: /dev/stderr
319+
- name: CODER_PG_CONNECTION_URL
320+
valueFrom:
321+
secretKeyRef:
322+
key: url
323+
name: coder-db-url
324+
- name: CODER_PPROF_ENABLE
325+
value: "true"
326+
- name: CODER_PROMETHEUS_ENABLE
327+
value: "true"
328+
- name: CODER_PROMETHEUS_COLLECT_AGENT_STATS
329+
value: "true"
330+
- name: CODER_PROMETHEUS_COLLECT_DB_METRICS
331+
value: "true"
332+
- name: CODER_VERBOSE
333+
value: "true"
334+
- name: CODER_PROVISIONER_DAEMONS
335+
value: "${var.provisionerd_concurrency}"
336+
image: "${var.coder_image_repo}:${var.coder_image_tag}"
337+
imagePullPolicy: IfNotPresent
338+
lifecycle: {}
339+
livenessProbe:
340+
failureThreshold: 3
341+
httpGet:
342+
path: /api/v2/buildinfo
343+
port: http
344+
scheme: HTTP
345+
periodSeconds: 10
346+
successThreshold: 1
347+
timeoutSeconds: 1
348+
name: provisionerd
349+
ports:
350+
- containerPort: 8080
351+
name: http
352+
protocol: TCP
353+
- containerPort: 2112
354+
name: prometheus-http
355+
protocol: TCP
356+
readinessProbe:
357+
failureThreshold: 3
358+
httpGet:
359+
path: /api/v2/buildinfo
360+
port: http
361+
scheme: HTTP
362+
periodSeconds: 10
363+
successThreshold: 1
364+
timeoutSeconds: 1
365+
resources:
366+
limits:
367+
cpu: "${var.provisionerd_cpu_limit}"
368+
memory: "${var.provisionerd_mem_limit}"
369+
requests:
370+
cpu: "${var.provisionerd_cpu_request}"
371+
memory: "${var.provisionerd_mem_request}"
372+
securityContext:
373+
allowPrivilegeEscalation: false
374+
readOnlyRootFilesystem: true
375+
runAsGroup: 1000
376+
runAsNonRoot: true
377+
runAsUser: 1000
378+
seccompProfile:
379+
type: RuntimeDefault
380+
terminationMessagePath: /dev/termination-log
381+
terminationMessagePolicy: File
382+
volumeMounts:
383+
- mountPath: /tmp
384+
name: cache
385+
dnsPolicy: ClusterFirst
386+
restartPolicy: Always
387+
serviceAccount: coder
388+
serviceAccountName: coder
389+
terminationGracePeriodSeconds: 60
390+
volumes:
391+
- emptyDir:
392+
sizeLimit: 10Gi
393+
name: cache
394+
EOF
395+
}
396+
397+
resource "null_resource" "provisionerd_deployment_apply" {
398+
depends_on = [helm_release.coder-chart, local_file.provisionerd_deployment, null_resource.cluster_kubeconfig]
399+
triggers = {
400+
kubeconfig_path = local.cluster_kubeconfig_path
401+
manifest_path = local_file.provisionerd_deployment.filename
402+
}
403+
provisioner "local-exec" {
404+
command = <<EOF
405+
KUBECONFIG=${self.triggers.kubeconfig_path} kubectl apply -f ${self.triggers.manifest_path}
406+
EOF
407+
}
408+
}
409+
245410
resource "local_file" "output_vars" {
246411
filename = "${path.module}/../.coderv2/url"
247412
content = local.coder_url

scaletest/terraform/gcp_cluster.tf

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ resource "google_container_cluster" "primary" {
1818

1919
}
2020
release_channel {
21-
channel = "STABLE"
21+
# Setting release channel as STABLE can cause unexpected cluster upgrades.
22+
channel = "UNSPECIFIED"
2223
}
2324
initial_node_count = 1
2425
remove_default_node_pool = true

scaletest/terraform/vars.tf

+31
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,37 @@ variable "coder_mem_limit" {
130130
default = "1024Mi"
131131
}
132132

133+
// Allow independently scaling provisionerd resources
134+
variable "provisionerd_cpu_request" {
135+
description = "CPU request to allocate to provisionerd."
136+
default = "500m"
137+
}
138+
139+
variable "provisionerd_mem_request" {
140+
description = "Memory request to allocate to provisionerd."
141+
default = "512Mi"
142+
}
143+
144+
variable "provisionerd_cpu_limit" {
145+
description = "CPU limit to allocate to provisionerd."
146+
default = "1000m"
147+
}
148+
149+
variable "provisionerd_mem_limit" {
150+
description = "Memory limit to allocate to provisionerd."
151+
default = "1024Mi"
152+
}
153+
154+
variable "provisionerd_replicas" {
155+
description = "Number of Provisionerd replicas."
156+
default = 1
157+
}
158+
159+
variable "provisionerd_concurrency" {
160+
description = "Number of concurrent provisioner jobs per provisionerd instance."
161+
default = 3
162+
}
163+
133164
variable "coder_chart_version" {
134165
description = "Version of the Coder Helm chart to install. Defaults to latest."
135166
default = null

0 commit comments

Comments
 (0)