Skip to content

fix(scaletest): deploy external provisionerd #8618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 166 additions & 1 deletion scaletest/terraform/coder.tf
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ resource "random_password" "prometheus-postgres-password" {
}

resource "kubernetes_secret" "coder-db" {
type = "" # Opaque
type = "Opaque"
metadata {
name = "coder-db-url"
namespace = local.coder_namespace
Expand Down Expand Up @@ -125,6 +125,9 @@ coder:
value: "${var.coder_experiments}"
- name: "CODER_DANGEROUS_DISABLE_RATE_LIMITS"
value: "true"
# Disabling built-in provisioner daemons
- name: "CODER_PROVISIONER_DAEMONS"
value: "0"
image:
repo: ${var.coder_image_repo}
tag: ${var.coder_image_tag}
Expand Down Expand Up @@ -242,6 +245,168 @@ resource "local_file" "kubernetes_template" {
EOF
}

# TODO(cian): Remove this when we have support in the Helm chart.
# Ref: https://github.com/coder/coder/issues/8243
resource "local_file" "provisionerd_deployment" {
filename = "${path.module}/../.coderv2/provisionerd-deployment.yaml"
content = <<EOF
apiVersion: apps/v1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider formatting with prettier, just so it matches our other YAML?

kind: Deployment
metadata:
labels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
name: provisionerd
namespace: ${local.coder_namespace}
spec:
replicas: ${var.provisionerd_replicas}
selector:
matchLabels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
creationTimestamp: null
labels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- ${google_container_node_pool.coder.name}
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/instance
operator: In
values:
- ${var.name}
topologyKey: kubernetes.io/hostname
weight: 1
containers:
- args:
- server
command:
- /opt/coder
env:
- name: CODER_HTTP_ADDRESS
value: 0.0.0.0:8080
- name: CODER_PROMETHEUS_ADDRESS
value: 0.0.0.0:2112
- name: CODER_ACCESS_URL
value: ${local.coder_url}
- name: CODER_CACHE_DIRECTORY
value: /tmp/coder
- name: CODER_ENABLE_TELEMETRY
value: "false"
- name: CODER_LOGGING_HUMAN
value: /dev/null
- name: CODER_LOGGING_STACKDRIVER
value: /dev/stderr
- name: CODER_PG_CONNECTION_URL
valueFrom:
secretKeyRef:
key: url
name: coder-db-url
- name: CODER_PPROF_ENABLE
value: "true"
- name: CODER_PROMETHEUS_ENABLE
value: "true"
- name: CODER_PROMETHEUS_COLLECT_AGENT_STATS
value: "true"
- name: CODER_PROMETHEUS_COLLECT_DB_METRICS
value: "true"
- name: CODER_VERBOSE
value: "true"
- name: CODER_PROVISIONER_DAEMONS
value: "${var.provisionerd_concurrency}"
image: "${var.coder_image_repo}:${var.coder_image_tag}"
imagePullPolicy: IfNotPresent
lifecycle: {}
livenessProbe:
failureThreshold: 3
httpGet:
path: /api/v2/buildinfo
port: http
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
name: provisionerd
ports:
- containerPort: 8080
name: http
protocol: TCP
- containerPort: 2112
name: prometheus-http
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /api/v2/buildinfo
port: http
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: "${var.provisionerd_cpu_limit}"
memory: "${var.provisionerd_mem_limit}"
requests:
cpu: "${var.provisionerd_cpu_request}"
memory: "${var.provisionerd_mem_request}"
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsGroup: 1000
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /tmp
name: cache
dnsPolicy: ClusterFirst
restartPolicy: Always
serviceAccount: coder
serviceAccountName: coder
terminationGracePeriodSeconds: 60
volumes:
- emptyDir:
sizeLimit: 10Gi
name: cache
EOF
}

resource "null_resource" "provisionerd_deployment_apply" {
depends_on = [helm_release.coder-chart, local_file.provisionerd_deployment, null_resource.cluster_kubeconfig]
triggers = {
kubeconfig_path = local.cluster_kubeconfig_path
manifest_path = local_file.provisionerd_deployment.filename
}
provisioner "local-exec" {
command = <<EOF
KUBECONFIG=${self.triggers.kubeconfig_path} kubectl apply -f ${self.triggers.manifest_path}
EOF
}
}

resource "local_file" "output_vars" {
filename = "${path.module}/../.coderv2/url"
content = local.coder_url
Expand Down
3 changes: 2 additions & 1 deletion scaletest/terraform/gcp_cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ resource "google_container_cluster" "primary" {

}
release_channel {
channel = "STABLE"
# Setting release channel as STABLE can cause unexpected cluster upgrades.
channel = "UNSPECIFIED"
}
initial_node_count = 1
remove_default_node_pool = true
Expand Down
31 changes: 31 additions & 0 deletions scaletest/terraform/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,37 @@ variable "coder_mem_limit" {
default = "1024Mi"
}

// Allow independently scaling provisionerd resources
variable "provisionerd_cpu_request" {
description = "CPU request to allocate to provisionerd."
default = "500m"
}

variable "provisionerd_mem_request" {
description = "Memory request to allocate to provisionerd."
default = "512Mi"
}

variable "provisionerd_cpu_limit" {
description = "CPU limit to allocate to provisionerd."
default = "1000m"
}

variable "provisionerd_mem_limit" {
description = "Memory limit to allocate to provisionerd."
default = "1024Mi"
}

variable "provisionerd_replicas" {
description = "Number of Provisionerd replicas."
default = 1
}

variable "provisionerd_concurrency" {
description = "Number of concurrent provisioner jobs per provisionerd instance."
default = 3
}

variable "coder_chart_version" {
description = "Version of the Coder Helm chart to install. Defaults to latest."
default = null
Expand Down