Skip to content

feat: add new scaletest infrastructure #15573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions scaletest/terraform/action/cf_dns.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
resource "cloudflare_record" "coder" {
for_each = local.deployments
zone_id = var.cloudflare_zone_id
name = each.value.subdomain
content = google_compute_address.coder[each.key].address
type = "A"
ttl = 3600
}
63 changes: 63 additions & 0 deletions scaletest/terraform/action/coder_proxies.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
data "http" "coder_healthy" {
url = local.deployments.primary.url
// Wait up to 5 minutes for DNS to propagate
retry {
attempts = 30
min_delay_ms = 10000
}

lifecycle {
postcondition {
condition = self.status_code == 200
error_message = "${self.url} returned an unhealthy status code"
}
}

depends_on = [helm_release.coder_primary, cloudflare_record.coder["primary"]]
}

resource "null_resource" "proxy_tokens" {
provisioner "local-exec" {
interpreter = ["/bin/bash", "-c"]
command = <<EOF
curl '${local.deployments.primary.url}/api/v2/users/first' \
--data-raw $'{"email":"${local.coder_admin_email}","password":"${local.coder_admin_password}","username":"${local.coder_admin_user}","name":"${local.coder_admin_full_name}","trial":false}' \
--insecure --silent --output /dev/null

token=$(curl '${local.deployments.primary.url}/api/v2/users/login' \
--data-raw $'{"email":"${local.coder_admin_email}","password":"${local.coder_admin_password}"}' \
--insecure --silent | jq -r .session_token)

curl '${local.deployments.primary.url}/api/v2/licenses' \
-H "Coder-Session-Token: $${token}" \
--data-raw '{"license":"${var.coder_license}"}' \
--insecure --silent --output /dev/null

europe_token=$(curl '${local.deployments.primary.url}/api/v2/workspaceproxies' \
-H "Coder-Session-Token: $${token}" \
--data-raw '{"name":"europe","display_name":"Europe","icon":"/emojis/1f950.png"}' \
--insecure --silent | jq -r .proxy_token)

asia_token=$(curl '${local.deployments.primary.url}/api/v2/workspaceproxies' \
-H "Coder-Session-Token: $${token}" \
--data-raw '{"name":"asia","display_name":"Asia","icon":"/emojis/1f35b.png"}' \
--insecure --silent | jq -r .proxy_token)

mkdir -p ${path.module}/.coderv2
echo -n $${europe_token} > ${path.module}/.coderv2/europe_proxy_token
echo -n $${asia_token} > ${path.module}/.coderv2/asia_proxy_token
EOF
}

depends_on = [data.http.coder_healthy]
}

data "local_file" "europe_proxy_token" {
filename = "${path.module}/.coderv2/europe_proxy_token"
depends_on = [null_resource.proxy_tokens]
}

data "local_file" "asia_proxy_token" {
filename = "${path.module}/.coderv2/asia_proxy_token"
depends_on = [null_resource.proxy_tokens]
}
25 changes: 25 additions & 0 deletions scaletest/terraform/action/deployments.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
locals {
deployments = {
primary = {
subdomain = "${var.name}-scaletest"
url = "http://${var.name}-scaletest.${var.cloudflare_domain}"
region = "us-east1"
zone = "us-east1-c"
cidr = "10.200.0.0/24"
}
europe = {
subdomain = "${var.name}-europe-scaletest"
url = "http://${var.name}-europe-scaletest.${var.cloudflare_domain}"
region = "europe-west1"
zone = "europe-west1-b"
cidr = "10.201.0.0/24"
}
asia = {
subdomain = "${var.name}-asia-scaletest"
url = "http://${var.name}-asia-scaletest.${var.cloudflare_domain}"
region = "asia-southeast1"
zone = "asia-southeast1-a"
cidr = "10.202.0.0/24"
}
}
}
123 changes: 123 additions & 0 deletions scaletest/terraform/action/gcp_clusters.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
data "google_compute_default_service_account" "default" {
project = var.project_id
depends_on = [google_project_service.api["compute.googleapis.com"]]
}

locals {
node_pools = {
primary_coder = {
name = "coder"
cluster = "primary"
}
primary_workspaces = {
name = "workspaces"
cluster = "primary"
}
primary_misc = {
name = "misc"
cluster = "primary"
}
europe_coder = {
name = "coder"
cluster = "europe"
}
europe_workspaces = {
name = "workspaces"
cluster = "europe"
}
europe_misc = {
name = "misc"
cluster = "europe"
}
asia_coder = {
name = "coder"
cluster = "asia"
}
asia_workspaces = {
name = "workspaces"
cluster = "asia"
}
asia_misc = {
name = "misc"
cluster = "asia"
}
}
}

resource "google_container_cluster" "cluster" {
for_each = local.deployments
name = "${var.name}-${each.key}"
location = each.value.zone
project = var.project_id
network = google_compute_network.vpc.name
subnetwork = google_compute_subnetwork.subnet[each.key].name
networking_mode = "VPC_NATIVE"
default_max_pods_per_node = 256
ip_allocation_policy { # Required with networking_mode=VPC_NATIVE

}
release_channel {
# Setting release channel as STABLE can cause unexpected cluster upgrades.
channel = "UNSPECIFIED"
}
initial_node_count = 1
remove_default_node_pool = true

network_policy {
enabled = true
}
depends_on = [
google_project_service.api["container.googleapis.com"]
]
monitoring_config {
enable_components = ["SYSTEM_COMPONENTS"]
managed_prometheus {
enabled = false
}
}
workload_identity_config {
workload_pool = "${data.google_project.project.project_id}.svc.id.goog"
}


lifecycle {
ignore_changes = [
maintenance_policy,
release_channel,
remove_default_node_pool
]
}
}

resource "google_container_node_pool" "node_pool" {
for_each = local.node_pools
name = each.value.name
location = local.deployments[each.value.cluster].zone
project = var.project_id
cluster = google_container_cluster.cluster[each.value.cluster].name
node_count = local.scenarios[var.scenario][each.value.name].nodepool_size
node_config {
oauth_scopes = [
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
]
disk_size_gb = 100
machine_type = local.scenarios[var.scenario][each.value.name].machine_type
image_type = "cos_containerd"
service_account = data.google_compute_default_service_account.default.email
tags = ["gke-node", "${var.project_id}-gke"]
labels = {
env = var.project_id
}
metadata = {
disable-legacy-endpoints = "true"
}
}
lifecycle {
ignore_changes = [management[0].auto_repair, management[0].auto_upgrade, timeouts]
}
}
89 changes: 89 additions & 0 deletions scaletest/terraform/action/gcp_db.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
resource "google_sql_database_instance" "db" {
name = "${var.name}-coder"
project = var.project_id
region = local.deployments.primary.region
database_version = "POSTGRES_14"
deletion_protection = false

depends_on = [google_service_networking_connection.private_vpc_connection]

settings {
tier = local.scenarios[var.scenario].cloudsql.tier
activation_policy = "ALWAYS"
availability_type = "ZONAL"

location_preference {
zone = local.deployments.primary.zone
}

database_flags {
name = "max_connections"
value = local.scenarios[var.scenario].cloudsql.max_connections
}

ip_configuration {
ipv4_enabled = false
private_network = google_compute_network.vpc.id
}

insights_config {
query_insights_enabled = true
query_string_length = 1024
record_application_tags = false
record_client_address = false
}
}

lifecycle {
ignore_changes = [deletion_protection, timeouts]
}
}

resource "google_sql_database" "coder" {
project = var.project_id
instance = google_sql_database_instance.db.id
name = "${var.name}-coder"
# required for postgres, otherwise db fails to delete
deletion_policy = "ABANDON"
lifecycle {
ignore_changes = [deletion_policy]
}
}

resource "random_password" "coder_postgres_password" {
length = 12
}

resource "random_password" "prometheus_postgres_password" {
length = 12
}

resource "google_sql_user" "coder" {
project = var.project_id
instance = google_sql_database_instance.db.id
name = "${var.name}-coder"
type = "BUILT_IN"
password = random_password.coder_postgres_password.result
# required for postgres, otherwise user fails to delete
deletion_policy = "ABANDON"
lifecycle {
ignore_changes = [deletion_policy, password]
}
}

resource "google_sql_user" "prometheus" {
project = var.project_id
instance = google_sql_database_instance.db.id
name = "${var.name}-prometheus"
type = "BUILT_IN"
password = random_password.prometheus_postgres_password.result
# required for postgres, otherwise user fails to delete
deletion_policy = "ABANDON"
lifecycle {
ignore_changes = [deletion_policy, password]
}
}

locals {
coder_db_url = "postgres://${google_sql_user.coder.name}:${urlencode(random_password.coder_postgres_password.result)}@${google_sql_database_instance.db.private_ip_address}/${google_sql_database.coder.name}?sslmode=disable"
}
27 changes: 27 additions & 0 deletions scaletest/terraform/action/gcp_project.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
locals {
project_apis = [
"cloudtrace",
"compute",
"container",
"logging",
"monitoring",
"servicemanagement",
"servicenetworking",
"sqladmin",
"stackdriver",
"storage-api",
]
}

data "google_project" "project" {
project_id = var.project_id
}

resource "google_project_service" "api" {
for_each = toset(local.project_apis)
project = data.google_project.project.project_id
service = "${each.value}.googleapis.com"

disable_dependent_services = false
disable_on_destroy = false
}
42 changes: 42 additions & 0 deletions scaletest/terraform/action/gcp_vpc.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@

resource "google_compute_network" "vpc" {
project = var.project_id
name = var.name
auto_create_subnetworks = "false"
depends_on = [
google_project_service.api["compute.googleapis.com"]
]
}

resource "google_compute_subnetwork" "subnet" {
for_each = local.deployments
name = "${var.name}-${each.key}"
project = var.project_id
region = each.value.region
network = google_compute_network.vpc.name
ip_cidr_range = each.value.cidr
}

resource "google_compute_address" "coder" {
for_each = local.deployments
project = var.project_id
region = each.value.region
name = "${var.name}-${each.key}-coder"
address_type = "EXTERNAL"
network_tier = "PREMIUM"
}

resource "google_compute_global_address" "sql_peering" {
project = var.project_id
name = "${var.name}-sql-peering"
purpose = "VPC_PEERING"
address_type = "INTERNAL"
prefix_length = 16
network = google_compute_network.vpc.id
}

resource "google_service_networking_connection" "private_vpc_connection" {
network = google_compute_network.vpc.id
service = "servicenetworking.googleapis.com"
reserved_peering_ranges = [google_compute_global_address.sql_peering.name]
}
Loading
Loading