-
Notifications
You must be signed in to change notification settings - Fork 899
chore: add terraform for spinning up load test cluster #7504
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 24 commits
29016dc
8759e23
bef9887
ea42b44
9875f84
8fb3511
13ca9e4
3a3509b
41ab251
0bbf206
d4b1fe6
2b5a15b
dbcfc64
69bdfd1
99c0f3c
ccda05d
6ace619
01c6d39
9f7c165
660959c
34f8b02
75d1746
5a3c801
caa04d4
9419701
435e74d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Load Test Terraform | ||
|
||
This folder contains Terraform code and scripts to aid in performing load tests of Coder. | ||
It does the following: | ||
|
||
- Creates a GCP VPC. | ||
- Creates a CloudSQL instance with a global peering rule so it's accessible inside the VPC. | ||
- Creates a GKE cluster inside the VPC with separate nodegroups for Coder and workspaces. | ||
- Installs Coder in a new namespace, using the CloudSQL instance. | ||
|
||
## Usage | ||
|
||
> You must have an existing Google Cloud project available. | ||
|
||
1. Create a file named `override.tfvars` with the following content, modifying as appropriate: | ||
|
||
```terraform | ||
name = "some_unique_identifier" | ||
project_id = "some_google_project_id" | ||
``` | ||
|
||
1. Inspect `vars.tf` and override any other variables you deem necessary. | ||
|
||
1. Run `terraform init`. | ||
|
||
1. Run `terraform plan -var-file=override.tfvars` and inspect the output. | ||
If you are not satisfied, modify `override.tfvars` until you are. | ||
|
||
1. Run `terraform apply -var-file=override.tfvars`. This will spin up a pre-configured environment | ||
and emit the Coder URL as an output. | ||
|
||
1. Run `coder_init.sh <coder_url>` to setup an initial user and a pre-configured Kubernetes | ||
template. It will also download the Coder CLI from the Coder instance locally. | ||
|
||
1. Do whatever you need to do with the Coder instance. | ||
|
||
> To run Coder commands against the instance, you can use `coder_shim.sh <command>`. | ||
> You don't need to run `coder login` yourself. | ||
|
||
1. When you are finished, you can run `terraform destroy -var-file=override.tfvars`. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,265 @@ | ||
data "google_client_config" "default" {} | ||
|
||
locals { | ||
coder_helm_repo = "https://helm.coder.com/v2" | ||
coder_helm_chart = "coder" | ||
coder_release_name = "coder-${var.name}" | ||
coder_namespace = "coder-${var.name}" | ||
coder_admin_email = "admin@coder.com" | ||
coder_admin_user = "coder" | ||
coder_address = google_compute_address.coder.address | ||
coder_url = "http://${google_compute_address.coder.address}" | ||
} | ||
|
||
provider "kubernetes" { | ||
host = "https://${google_container_cluster.primary.endpoint}" | ||
cluster_ca_certificate = base64decode(google_container_cluster.primary.master_auth.0.cluster_ca_certificate) | ||
token = data.google_client_config.default.access_token | ||
} | ||
|
||
provider "helm" { | ||
kubernetes { | ||
host = "https://${google_container_cluster.primary.endpoint}" | ||
cluster_ca_certificate = base64decode(google_container_cluster.primary.master_auth.0.cluster_ca_certificate) | ||
token = data.google_client_config.default.access_token | ||
} | ||
} | ||
|
||
resource "kubernetes_namespace" "coder_namespace" { | ||
metadata { | ||
name = local.coder_namespace | ||
} | ||
depends_on = [ | ||
google_container_node_pool.coder | ||
] | ||
} | ||
|
||
resource "random_password" "postgres-admin-password" { | ||
length = 12 | ||
} | ||
|
||
resource "random_password" "coder-postgres-password" { | ||
length = 12 | ||
} | ||
|
||
resource "kubernetes_secret" "coder-db" { | ||
type = "" # Opaque | ||
metadata { | ||
name = "coder-db-url" | ||
namespace = kubernetes_namespace.coder_namespace.metadata.0.name | ||
} | ||
data = { | ||
url = "postgres://${google_sql_user.coder.name}:${urlencode(random_password.coder-postgres-password.result)}@${google_sql_database_instance.db.private_ip_address}/${google_sql_database.coder.name}?sslmode=disable" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fine for now, but I think TLS would be better here & more realistic |
||
} | ||
} | ||
|
||
resource "helm_release" "coder-chart" { | ||
repository = local.coder_helm_repo | ||
chart = local.coder_helm_chart | ||
name = local.coder_release_name | ||
version = var.coder_chart_version | ||
namespace = kubernetes_namespace.coder_namespace.metadata.0.name | ||
depends_on = [ | ||
google_container_node_pool.coder, | ||
] | ||
values = [<<EOF | ||
coder: | ||
affinity: | ||
nodeAffinity: | ||
requiredDuringSchedulingIgnoredDuringExecution: | ||
nodeSelectorTerms: | ||
- matchExpressions: | ||
- key: "cloud.google.com/gke-nodepool" | ||
operator: "In" | ||
values: ["${google_container_node_pool.coder.name}"] | ||
podAntiAffinity: | ||
preferredDuringSchedulingIgnoredDuringExecution: | ||
- weight: 1 | ||
podAffinityTerm: | ||
topologyKey: "kubernetes.io/hostname" | ||
labelSelector: | ||
matchExpressions: | ||
- key: "app.kubernetes.io/instance" | ||
operator: "In" | ||
values: ["${local.coder_release_name}"] | ||
env: | ||
- name: "CODER_CACHE_DIRECTORY" | ||
value: "/tmp/coder" | ||
- name: "CODER_ENABLE_TELEMETRY" | ||
value: "false" | ||
- name: "CODER_LOGGING_HUMAN" | ||
value: "/dev/null" | ||
- name: "CODER_LOGGING_STACKDRIVER" | ||
value: "/dev/stderr" | ||
- name: "CODER_PG_CONNECTION_URL" | ||
valueFrom: | ||
secretKeyRef: | ||
name: "${kubernetes_secret.coder-db.metadata.0.name}" | ||
key: url | ||
- name: "CODER_PROMETHEUS_ENABLE" | ||
value: "true" | ||
- name: "CODER_VERBOSE" | ||
value: "true" | ||
image: | ||
repo: ${var.coder_image_repo} | ||
tag: ${var.coder_image_tag} | ||
replicaCount: "${var.coder_replicas}" | ||
resources: | ||
requests: | ||
cpu: "${var.coder_cpu}" | ||
memory: "${var.coder_mem}" | ||
limits: | ||
cpu: "${var.coder_cpu}" | ||
memory: "${var.coder_mem}" | ||
securityContext: | ||
readOnlyRootFilesystem: true | ||
service: | ||
enable: true | ||
loadBalancerIP: "${local.coder_address}" | ||
volumeMounts: | ||
- mountPath: "/tmp" | ||
name: cache | ||
readOnly: false | ||
volumes: | ||
- emptyDir: | ||
sizeLimit: 1024Mi | ||
name: cache | ||
EOF | ||
] | ||
} | ||
|
||
resource "local_file" "coder-monitoring-manifest" { | ||
filename = "${path.module}/.coderv2/coder-monitoring.yaml" | ||
content = <<EOF | ||
apiVersion: monitoring.googleapis.com/v1 | ||
kind: PodMonitoring | ||
metadata: | ||
namespace: ${kubernetes_namespace.coder_namespace.metadata.0.name} | ||
name: coder-monitoring | ||
spec: | ||
selector: | ||
matchLabels: | ||
app.kubernetes.io/name: coder | ||
endpoints: | ||
- port: prometheus-http | ||
interval: 30s | ||
EOF | ||
} | ||
|
||
resource "null_resource" "coder-monitoring-manifest_apply" { | ||
provisioner "local-exec" { | ||
working_dir = "${abspath(path.module)}/.coderv2" | ||
command = <<EOF | ||
KUBECONFIG=${var.name}-cluster.kubeconfig gcloud container clusters get-credentials ${var.name}-cluster --project=${var.project_id} --zone=${var.zone} && \ | ||
KUBECONFIG=${var.name}-cluster.kubeconfig kubectl apply -f ${abspath(local_file.coder-monitoring-manifest.filename)} | ||
EOF | ||
} | ||
} | ||
|
||
resource "local_file" "kubernetes_template" { | ||
filename = "${path.module}/.coderv2/templates/kubernetes/main.tf" | ||
content = <<EOF | ||
terraform { | ||
required_providers { | ||
coder = { | ||
source = "coder/coder" | ||
version = "~> 0.7.0" | ||
} | ||
kubernetes = { | ||
source = "hashicorp/kubernetes" | ||
version = "~> 2.18" | ||
} | ||
} | ||
} | ||
|
||
provider "coder" {} | ||
|
||
provider "kubernetes" { | ||
config_path = null # always use host | ||
} | ||
|
||
data "coder_workspace" "me" {} | ||
|
||
resource "coder_agent" "main" { | ||
os = "linux" | ||
arch = "amd64" | ||
startup_script_timeout = 180 | ||
startup_script = "" | ||
} | ||
|
||
resource "kubernetes_pod" "main" { | ||
count = data.coder_workspace.me.start_count | ||
metadata { | ||
name = "coder-$${lower(data.coder_workspace.me.owner)}-$${lower(data.coder_workspace.me.name)}" | ||
namespace = "${kubernetes_namespace.coder_namespace.metadata.0.name}" | ||
labels = { | ||
"app.kubernetes.io/name" = "coder-workspace" | ||
"app.kubernetes.io/instance" = "coder-workspace-$${lower(data.coder_workspace.me.owner)}-$${lower(data.coder_workspace.me.name)}" | ||
} | ||
} | ||
spec { | ||
security_context { | ||
run_as_user = "1000" | ||
fs_group = "1000" | ||
} | ||
container { | ||
name = "dev" | ||
image = "${var.workspace_image}" | ||
image_pull_policy = "Always" | ||
command = ["sh", "-c", coder_agent.main.init_script] | ||
security_context { | ||
run_as_user = "1000" | ||
} | ||
env { | ||
name = "CODER_AGENT_TOKEN" | ||
value = coder_agent.main.token | ||
} | ||
resources { | ||
requests = { | ||
"cpu" = "0.1" | ||
"memory" = "128Mi" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this what we observe the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Close enough at idle, yes. |
||
} | ||
limits = { | ||
"cpu" = "1" | ||
"memory" = "1Gi" | ||
} | ||
} | ||
} | ||
|
||
affinity { | ||
node_affinity { | ||
required_during_scheduling_ignored_during_execution { | ||
node_selector_term { | ||
match_expressions { | ||
key = "cloud.google.com/gke-nodepool" | ||
operator = "In" | ||
values = ["${google_container_node_pool.workspaces.name}"] | ||
} | ||
} | ||
} | ||
} | ||
pod_affinity { | ||
preferred_during_scheduling_ignored_during_execution { | ||
weight = 1 | ||
pod_affinity_term { | ||
topology_key = "kubernetes.io/hostname" | ||
label_selector { | ||
match_expressions { | ||
key = "app.kubernetes.io/name" | ||
operator = "In" | ||
values = ["coder-workspace"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. generally workspaces >> nodes, so I'm not sure this affinity term helps us |
||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
EOF | ||
} | ||
|
||
output "coder_url" { | ||
description = "URL of the Coder deployment" | ||
value = local.coder_url | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -euo pipefail | ||
|
||
if [[ $# -lt 1 ]]; then | ||
echo "Usage: $0 <coder URL>" | ||
exit 1 | ||
fi | ||
|
||
# Allow toggling verbose output | ||
[[ -n ${VERBOSE:-} ]] && set -x | ||
|
||
CODER_URL=$1 | ||
CONFIG_DIR="${PWD}/.coderv2" | ||
ARCH="$(arch)" | ||
if [[ "$ARCH" == "x86_64" ]]; then | ||
ARCH="amd64" | ||
fi | ||
PLATFORM="$(uname | tr '[:upper:]' '[:lower:]')" | ||
|
||
mkdir -p "${CONFIG_DIR}" | ||
echo "Fetching Coder CLI for first-time setup!" | ||
curl -fsSLk "${CODER_URL}/bin/coder-${PLATFORM}-${ARCH}" -o "${CONFIG_DIR}/coder" | ||
chmod +x "${CONFIG_DIR}/coder" | ||
|
||
set +o pipefail | ||
RANDOM_ADMIN_PASSWORD=$(tr </dev/urandom -dc _A-Z-a-z-0-9 | head -c16) | ||
set -o pipefail | ||
CODER_FIRST_USER_EMAIL="admin@coder.com" | ||
CODER_FIRST_USER_USERNAME="coder" | ||
CODER_FIRST_USER_PASSWORD="${RANDOM_ADMIN_PASSWORD}" | ||
CODER_FIRST_USER_TRIAL="false" | ||
echo "Running login command!" | ||
"${CONFIG_DIR}/coder" login "${CODER_URL}" \ | ||
--global-config="${CONFIG_DIR}" \ | ||
--first-user-username="${CODER_FIRST_USER_USERNAME}" \ | ||
--first-user-email="${CODER_FIRST_USER_EMAIL}" \ | ||
--first-user-password="${CODER_FIRST_USER_PASSWORD}" \ | ||
--first-user-trial=false | ||
|
||
echo "Writing credentials to ${CONFIG_DIR}/coder.env" | ||
cat <<EOF >"${CONFIG_DIR}/coder.env" | ||
CODER_FIRST_USER_EMAIL=admin@coder.com | ||
CODER_FIRST_USER_USERNAME=coder | ||
CODER_FIRST_USER_PASSWORD="${RANDOM_ADMIN_PASSWORD}" | ||
CODER_FIRST_USER_TRIAL="${CODER_FIRST_USER_TRIAL}" | ||
EOF | ||
|
||
echo "Importing kubernetes template" | ||
"${CONFIG_DIR}/coder" templates create --global-config="${CONFIG_DIR}" \ | ||
--directory "${CONFIG_DIR}/templates/kubernetes" --yes kubernetes |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see the value of including
var.name
in the Google resources, since there is a search you can do, but here I think it just makes things more wordy --- the idea is to have only one Coder instance per load-balancing cluster.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair, and the namespace also includes
var.name
.