From 7977fa87aa620bac05c28d2e16c4ac30231f89d7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Aug 2025 01:30:33 +0000 Subject: [PATCH 1/6] chore: bump coder/claude-code/coder from 2.0.7 to 2.1.0 in /dogfood/coder (#19512) [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=coder/claude-code/coder&package-manager=terraform&previous-version=2.0.7&new-version=2.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- dogfood/coder/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dogfood/coder/main.tf b/dogfood/coder/main.tf index a464972cb05b6..dd3001909f08b 100644 --- a/dogfood/coder/main.tf +++ b/dogfood/coder/main.tf @@ -473,7 +473,7 @@ module "devcontainers-cli" { module "claude-code" { count = local.has_ai_prompt ? data.coder_workspace.me.start_count : 0 source = "dev.registry.coder.com/coder/claude-code/coder" - version = "2.0.7" + version = "2.1.0" agent_id = coder_agent.dev.id folder = local.repo_dir install_claude_code = true From 3fadf1ae6e7a6ac4c670229abcdf3677dc64385d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Aug 2025 01:31:29 +0000 Subject: [PATCH 2/6] chore: bump coder/vscode-web/coder from 1.3.1 to 1.4.1 in /dogfood/coder (#19513) [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=coder/vscode-web/coder&package-manager=terraform&previous-version=1.3.1&new-version=1.4.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- dogfood/coder/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dogfood/coder/main.tf b/dogfood/coder/main.tf index dd3001909f08b..3c1a5ca4d0fdd 100644 --- a/dogfood/coder/main.tf +++ b/dogfood/coder/main.tf @@ -395,7 +395,7 @@ module "code-server" { module "vscode-web" { count = contains(jsondecode(data.coder_parameter.ide_choices.value), "vscode-web") ? data.coder_workspace.me.start_count : 0 source = "dev.registry.coder.com/coder/vscode-web/coder" - version = "1.3.1" + version = "1.4.1" agent_id = coder_agent.dev.id folder = local.repo_dir extensions = ["github.copilot"] From 236844e5cce533e2197d12f78e11a144643ce6ec Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Aug 2025 01:33:01 +0000 Subject: [PATCH 3/6] chore: bump coder/cursor/coder from 1.3.0 to 1.3.1 in /dogfood/coder (#19514) [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=coder/cursor/coder&package-manager=terraform&previous-version=1.3.0&new-version=1.3.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- dogfood/coder/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dogfood/coder/main.tf b/dogfood/coder/main.tf index 3c1a5ca4d0fdd..e6a294b09e28e 100644 --- a/dogfood/coder/main.tf +++ b/dogfood/coder/main.tf @@ -432,7 +432,7 @@ module "coder-login" { module "cursor" { count = contains(jsondecode(data.coder_parameter.ide_choices.value), "cursor") ? data.coder_workspace.me.start_count : 0 source = "dev.registry.coder.com/coder/cursor/coder" - version = "1.3.0" + version = "1.3.1" agent_id = coder_agent.dev.id folder = local.repo_dir } From 5145cd002dcdd10f8f7547839c98b730503e3558 Mon Sep 17 00:00:00 2001 From: Ethan <39577870+ethanndickson@users.noreply.github.com> Date: Mon, 25 Aug 2025 12:25:09 +1000 Subject: [PATCH 4/6] chore(scaletest): add tls to infrastructure (#19412) Closes https://github.com/coder/internal/issues/850 This PR has the scaletest infrastructure retrieve and use TLS certificates from the persistent observability cluster. To support creating multiple instances of the infrastructure simultaneously, `var.name` can be set to `alpha`, `bravo` or `charlie`, which retrieves the corresponding certificates. Also: - Adds support for wildcard apps. - Retrieves the Cloudflare token from GCP secrets. --- .editorconfig | 2 +- scaletest/terraform/action/cf_dns.tf | 11 ++- .../terraform/action/coder_helm_values.tftpl | 9 ++ scaletest/terraform/action/gcp_clusters.tf | 43 +++++--- scaletest/terraform/action/k8s_coder_asia.tf | 97 +++++++++++-------- .../terraform/action/k8s_coder_europe.tf | 97 +++++++++++-------- .../terraform/action/k8s_coder_primary.tf | 97 +++++++++++-------- scaletest/terraform/action/main.tf | 13 +++ scaletest/terraform/action/tls.tf | 13 +++ scaletest/terraform/action/vars.tf | 21 +++- 10 files changed, 270 insertions(+), 133 deletions(-) create mode 100644 scaletest/terraform/action/tls.tf diff --git a/.editorconfig b/.editorconfig index 419ae5b6d16d2..554e8a73ffeda 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,7 +7,7 @@ trim_trailing_whitespace = true insert_final_newline = true indent_style = tab -[*.{yaml,yml,tf,tfvars,nix}] +[*.{yaml,yml,tf,tftpl,tfvars,nix}] indent_style = space indent_size = 2 diff --git a/scaletest/terraform/action/cf_dns.tf b/scaletest/terraform/action/cf_dns.tf index 664b909ae90b2..126c35c12cc76 100644 --- a/scaletest/terraform/action/cf_dns.tf +++ b/scaletest/terraform/action/cf_dns.tf @@ -5,8 +5,17 @@ data "cloudflare_zone" "domain" { resource "cloudflare_record" "coder" { for_each = local.deployments zone_id = data.cloudflare_zone.domain.zone_id - name = each.value.subdomain + name = "${each.value.subdomain}.${var.cloudflare_domain}" content = google_compute_address.coder[each.key].address type = "A" ttl = 3600 } + +resource "cloudflare_record" "coder_wildcard" { + for_each = local.deployments + zone_id = data.cloudflare_zone.domain.id + name = each.value.wildcard_subdomain + content = cloudflare_record.coder[each.key].name + type = "CNAME" + ttl = 3600 +} diff --git a/scaletest/terraform/action/coder_helm_values.tftpl b/scaletest/terraform/action/coder_helm_values.tftpl index be24bf61cd5e3..3fc8d5dfd4226 100644 --- a/scaletest/terraform/action/coder_helm_values.tftpl +++ b/scaletest/terraform/action/coder_helm_values.tftpl @@ -22,6 +22,8 @@ coder: %{~ if workspace_proxy ~} - name: "CODER_ACCESS_URL" value: "${access_url}" + - name: "CODER_WILDCARD_ACCESS_URL" + value: "${wildcard_access_url}" - name: CODER_PRIMARY_ACCESS_URL value: "${primary_url}" - name: CODER_PROXY_SESSION_TOKEN @@ -45,6 +47,8 @@ coder: %{~ if !workspace_proxy && !provisionerd ~} - name: "CODER_ACCESS_URL" value: "${access_url}" + - name: "CODER_WILDCARD_ACCESS_URL" + value: "${wildcard_access_url}" - name: "CODER_PG_CONNECTION_URL" valueFrom: secretKeyRef: @@ -109,3 +113,8 @@ coder: - emptyDir: sizeLimit: 1024Mi name: cache + %{~ if !provisionerd ~} + tls: + secretNames: + - "${tls_secret_name}" + %{~ endif ~} diff --git a/scaletest/terraform/action/gcp_clusters.tf b/scaletest/terraform/action/gcp_clusters.tf index 5681ff8b44ce5..5987d07db03ad 100644 --- a/scaletest/terraform/action/gcp_clusters.tf +++ b/scaletest/terraform/action/gcp_clusters.tf @@ -6,25 +6,31 @@ data "google_compute_default_service_account" "default" { locals { deployments = { primary = { - subdomain = "${var.name}-scaletest" - url = "http://${var.name}-scaletest.${var.cloudflare_domain}" - region = "us-east1" - zone = "us-east1-c" - subnet = "scaletest" + subdomain = "primary.${var.name}" + wildcard_subdomain = "*.primary.${var.name}" + url = "https://primary.${var.name}.${var.cloudflare_domain}" + wildcard_access_url = "*.primary.${var.name}.${var.cloudflare_domain}" + region = "us-east1" + zone = "us-east1-c" + subnet = "scaletest" } europe = { - subdomain = "${var.name}-europe-scaletest" - url = "http://${var.name}-europe-scaletest.${var.cloudflare_domain}" - region = "europe-west1" - zone = "europe-west1-b" - subnet = "scaletest" + subdomain = "europe.${var.name}" + wildcard_subdomain = "*.europe.${var.name}" + url = "https://europe.${var.name}.${var.cloudflare_domain}" + wildcard_access_url = "*.europe.${var.name}.${var.cloudflare_domain}" + region = "europe-west1" + zone = "europe-west1-b" + subnet = "scaletest" } asia = { - subdomain = "${var.name}-asia-scaletest" - url = "http://${var.name}-asia-scaletest.${var.cloudflare_domain}" - region = "asia-southeast1" - zone = "asia-southeast1-a" - subnet = "scaletest" + subdomain = "asia.${var.name}" + wildcard_subdomain = "*.asia.${var.name}" + url = "https://asia.${var.name}.${var.cloudflare_domain}" + wildcard_access_url = "*.asia.${var.name}.${var.cloudflare_domain}" + region = "asia-southeast1" + zone = "asia-southeast1-a" + subnet = "scaletest" } } node_pools = { @@ -146,6 +152,11 @@ resource "google_container_node_pool" "node_pool" { } } lifecycle { - ignore_changes = [management[0].auto_repair, management[0].auto_upgrade, timeouts] + ignore_changes = [ + management[0].auto_repair, + management[0].auto_upgrade, + timeouts, + node_config[0].resource_labels + ] } } diff --git a/scaletest/terraform/action/k8s_coder_asia.tf b/scaletest/terraform/action/k8s_coder_asia.tf index 307a50136ec28..33df0e08dcfcf 100644 --- a/scaletest/terraform/action/k8s_coder_asia.tf +++ b/scaletest/terraform/action/k8s_coder_asia.tf @@ -43,6 +43,23 @@ resource "kubernetes_secret" "proxy_token_asia" { } } +resource "kubernetes_secret" "coder_tls_asia" { + provider = kubernetes.asia + + type = "kubernetes.io/tls" + metadata { + name = "coder-tls" + namespace = kubernetes_namespace.coder_asia.metadata.0.name + } + data = { + "tls.crt" = data.kubernetes_secret.coder_tls["asia"].data["tls.crt"] + "tls.key" = data.kubernetes_secret.coder_tls["asia"].data["tls.key"] + } + lifecycle { + ignore_changes = [timeouts, wait_for_service_account_token] + } +} + resource "helm_release" "coder_asia" { provider = helm.asia @@ -52,25 +69,27 @@ resource "helm_release" "coder_asia" { version = var.coder_chart_version namespace = kubernetes_namespace.coder_asia.metadata.0.name values = [templatefile("${path.module}/coder_helm_values.tftpl", { - workspace_proxy = true, - provisionerd = false, - primary_url = local.deployments.primary.url, - proxy_token = kubernetes_secret.proxy_token_asia.metadata.0.name, - db_secret = null, - ip_address = google_compute_address.coder["asia"].address, - provisionerd_psk = null, - access_url = local.deployments.asia.url, - node_pool = google_container_node_pool.node_pool["asia_coder"].name, - release_name = local.coder_release_name, - experiments = var.coder_experiments, - image_repo = var.coder_image_repo, - image_tag = var.coder_image_tag, - replicas = local.scenarios[var.scenario].coder.replicas, - cpu_request = local.scenarios[var.scenario].coder.cpu_request, - mem_request = local.scenarios[var.scenario].coder.mem_request, - cpu_limit = local.scenarios[var.scenario].coder.cpu_limit, - mem_limit = local.scenarios[var.scenario].coder.mem_limit, - deployment = "asia", + workspace_proxy = true, + provisionerd = false, + primary_url = local.deployments.primary.url, + proxy_token = kubernetes_secret.proxy_token_asia.metadata.0.name, + db_secret = null, + ip_address = google_compute_address.coder["asia"].address, + provisionerd_psk = null, + access_url = local.deployments.asia.url, + wildcard_access_url = local.deployments.asia.wildcard_access_url, + node_pool = google_container_node_pool.node_pool["asia_coder"].name, + release_name = local.coder_release_name, + experiments = var.coder_experiments, + image_repo = var.coder_image_repo, + image_tag = var.coder_image_tag, + replicas = local.scenarios[var.scenario].coder.replicas, + cpu_request = local.scenarios[var.scenario].coder.cpu_request, + mem_request = local.scenarios[var.scenario].coder.mem_request, + cpu_limit = local.scenarios[var.scenario].coder.cpu_limit, + mem_limit = local.scenarios[var.scenario].coder.mem_limit, + deployment = "asia", + tls_secret_name = kubernetes_secret.coder_tls_asia.metadata.0.name, })] depends_on = [null_resource.license] @@ -85,25 +104,27 @@ resource "helm_release" "provisionerd_asia" { version = var.provisionerd_chart_version namespace = kubernetes_namespace.coder_asia.metadata.0.name values = [templatefile("${path.module}/coder_helm_values.tftpl", { - workspace_proxy = false, - provisionerd = true, - primary_url = null, - proxy_token = null, - db_secret = null, - ip_address = null, - provisionerd_psk = kubernetes_secret.provisionerd_psk_asia.metadata.0.name, - access_url = local.deployments.primary.url, - node_pool = google_container_node_pool.node_pool["asia_coder"].name, - release_name = local.coder_release_name, - experiments = var.coder_experiments, - image_repo = var.coder_image_repo, - image_tag = var.coder_image_tag, - replicas = local.scenarios[var.scenario].provisionerd.replicas, - cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request, - mem_request = local.scenarios[var.scenario].provisionerd.mem_request, - cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit, - mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit, - deployment = "asia", + workspace_proxy = false, + provisionerd = true, + primary_url = null, + proxy_token = null, + db_secret = null, + ip_address = null, + provisionerd_psk = kubernetes_secret.provisionerd_psk_asia.metadata.0.name, + access_url = local.deployments.primary.url, + wildcard_access_url = null, + node_pool = google_container_node_pool.node_pool["asia_coder"].name, + release_name = local.coder_release_name, + experiments = var.coder_experiments, + image_repo = var.coder_image_repo, + image_tag = var.coder_image_tag, + replicas = local.scenarios[var.scenario].provisionerd.replicas, + cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request, + mem_request = local.scenarios[var.scenario].provisionerd.mem_request, + cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit, + mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit, + deployment = "asia", + tls_secret_name = null, })] depends_on = [null_resource.license] diff --git a/scaletest/terraform/action/k8s_coder_europe.tf b/scaletest/terraform/action/k8s_coder_europe.tf index b6169c84a5da2..efb80498c2ad4 100644 --- a/scaletest/terraform/action/k8s_coder_europe.tf +++ b/scaletest/terraform/action/k8s_coder_europe.tf @@ -43,6 +43,23 @@ resource "kubernetes_secret" "proxy_token_europe" { } } +resource "kubernetes_secret" "coder_tls_europe" { + provider = kubernetes.europe + + type = "kubernetes.io/tls" + metadata { + name = "coder-tls" + namespace = kubernetes_namespace.coder_europe.metadata.0.name + } + data = { + "tls.crt" = data.kubernetes_secret.coder_tls["europe"].data["tls.crt"] + "tls.key" = data.kubernetes_secret.coder_tls["europe"].data["tls.key"] + } + lifecycle { + ignore_changes = [timeouts, wait_for_service_account_token] + } +} + resource "helm_release" "coder_europe" { provider = helm.europe @@ -52,25 +69,27 @@ resource "helm_release" "coder_europe" { version = var.coder_chart_version namespace = kubernetes_namespace.coder_europe.metadata.0.name values = [templatefile("${path.module}/coder_helm_values.tftpl", { - workspace_proxy = true, - provisionerd = false, - primary_url = local.deployments.primary.url, - proxy_token = kubernetes_secret.proxy_token_europe.metadata.0.name, - db_secret = null, - ip_address = google_compute_address.coder["europe"].address, - provisionerd_psk = null, - access_url = local.deployments.europe.url, - node_pool = google_container_node_pool.node_pool["europe_coder"].name, - release_name = local.coder_release_name, - experiments = var.coder_experiments, - image_repo = var.coder_image_repo, - image_tag = var.coder_image_tag, - replicas = local.scenarios[var.scenario].coder.replicas, - cpu_request = local.scenarios[var.scenario].coder.cpu_request, - mem_request = local.scenarios[var.scenario].coder.mem_request, - cpu_limit = local.scenarios[var.scenario].coder.cpu_limit, - mem_limit = local.scenarios[var.scenario].coder.mem_limit, - deployment = "europe", + workspace_proxy = true, + provisionerd = false, + primary_url = local.deployments.primary.url, + proxy_token = kubernetes_secret.proxy_token_europe.metadata.0.name, + db_secret = null, + ip_address = google_compute_address.coder["europe"].address, + provisionerd_psk = null, + access_url = local.deployments.europe.url, + wildcard_access_url = local.deployments.europe.wildcard_access_url, + node_pool = google_container_node_pool.node_pool["europe_coder"].name, + release_name = local.coder_release_name, + experiments = var.coder_experiments, + image_repo = var.coder_image_repo, + image_tag = var.coder_image_tag, + replicas = local.scenarios[var.scenario].coder.replicas, + cpu_request = local.scenarios[var.scenario].coder.cpu_request, + mem_request = local.scenarios[var.scenario].coder.mem_request, + cpu_limit = local.scenarios[var.scenario].coder.cpu_limit, + mem_limit = local.scenarios[var.scenario].coder.mem_limit, + deployment = "europe", + tls_secret_name = kubernetes_secret.coder_tls_europe.metadata.0.name, })] depends_on = [null_resource.license] @@ -85,25 +104,27 @@ resource "helm_release" "provisionerd_europe" { version = var.provisionerd_chart_version namespace = kubernetes_namespace.coder_europe.metadata.0.name values = [templatefile("${path.module}/coder_helm_values.tftpl", { - workspace_proxy = false, - provisionerd = true, - primary_url = null, - proxy_token = null, - db_secret = null, - ip_address = null, - provisionerd_psk = kubernetes_secret.provisionerd_psk_europe.metadata.0.name, - access_url = local.deployments.primary.url, - node_pool = google_container_node_pool.node_pool["europe_coder"].name, - release_name = local.coder_release_name, - experiments = var.coder_experiments, - image_repo = var.coder_image_repo, - image_tag = var.coder_image_tag, - replicas = local.scenarios[var.scenario].provisionerd.replicas, - cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request, - mem_request = local.scenarios[var.scenario].provisionerd.mem_request, - cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit, - mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit, - deployment = "europe", + workspace_proxy = false, + provisionerd = true, + primary_url = null, + proxy_token = null, + db_secret = null, + ip_address = null, + provisionerd_psk = kubernetes_secret.provisionerd_psk_europe.metadata.0.name, + access_url = local.deployments.primary.url, + wildcard_access_url = null, + node_pool = google_container_node_pool.node_pool["europe_coder"].name, + release_name = local.coder_release_name, + experiments = var.coder_experiments, + image_repo = var.coder_image_repo, + image_tag = var.coder_image_tag, + replicas = local.scenarios[var.scenario].provisionerd.replicas, + cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request, + mem_request = local.scenarios[var.scenario].provisionerd.mem_request, + cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit, + mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit, + deployment = "europe", + tls_secret_name = null, })] depends_on = [null_resource.license] diff --git a/scaletest/terraform/action/k8s_coder_primary.tf b/scaletest/terraform/action/k8s_coder_primary.tf index 0c4a64815a156..bc00e903a386e 100644 --- a/scaletest/terraform/action/k8s_coder_primary.tf +++ b/scaletest/terraform/action/k8s_coder_primary.tf @@ -63,6 +63,23 @@ resource "kubernetes_secret" "provisionerd_psk_primary" { } } +resource "kubernetes_secret" "coder_tls_primary" { + provider = kubernetes.primary + + type = "kubernetes.io/tls" + metadata { + name = "coder-tls" + namespace = kubernetes_namespace.coder_primary.metadata.0.name + } + data = { + "tls.crt" = data.kubernetes_secret.coder_tls["primary"].data["tls.crt"] + "tls.key" = data.kubernetes_secret.coder_tls["primary"].data["tls.key"] + } + lifecycle { + ignore_changes = [timeouts, wait_for_service_account_token] + } +} + resource "helm_release" "coder_primary" { provider = helm.primary @@ -72,25 +89,27 @@ resource "helm_release" "coder_primary" { version = var.coder_chart_version namespace = kubernetes_namespace.coder_primary.metadata.0.name values = [templatefile("${path.module}/coder_helm_values.tftpl", { - workspace_proxy = false, - provisionerd = false, - primary_url = null, - proxy_token = null, - db_secret = kubernetes_secret.coder_db.metadata.0.name, - ip_address = google_compute_address.coder["primary"].address, - provisionerd_psk = kubernetes_secret.provisionerd_psk_primary.metadata.0.name, - access_url = local.deployments.primary.url, - node_pool = google_container_node_pool.node_pool["primary_coder"].name, - release_name = local.coder_release_name, - experiments = var.coder_experiments, - image_repo = var.coder_image_repo, - image_tag = var.coder_image_tag, - replicas = local.scenarios[var.scenario].coder.replicas, - cpu_request = local.scenarios[var.scenario].coder.cpu_request, - mem_request = local.scenarios[var.scenario].coder.mem_request, - cpu_limit = local.scenarios[var.scenario].coder.cpu_limit, - mem_limit = local.scenarios[var.scenario].coder.mem_limit, - deployment = "primary", + workspace_proxy = false, + provisionerd = false, + primary_url = null, + proxy_token = null, + db_secret = kubernetes_secret.coder_db.metadata.0.name, + ip_address = google_compute_address.coder["primary"].address, + provisionerd_psk = kubernetes_secret.provisionerd_psk_primary.metadata.0.name, + access_url = local.deployments.primary.url, + wildcard_access_url = local.deployments.primary.wildcard_access_url, + node_pool = google_container_node_pool.node_pool["primary_coder"].name, + release_name = local.coder_release_name, + experiments = var.coder_experiments, + image_repo = var.coder_image_repo, + image_tag = var.coder_image_tag, + replicas = local.scenarios[var.scenario].coder.replicas, + cpu_request = local.scenarios[var.scenario].coder.cpu_request, + mem_request = local.scenarios[var.scenario].coder.mem_request, + cpu_limit = local.scenarios[var.scenario].coder.cpu_limit, + mem_limit = local.scenarios[var.scenario].coder.mem_limit, + deployment = "primary", + tls_secret_name = kubernetes_secret.coder_tls_primary.metadata.0.name, })] } @@ -103,25 +122,27 @@ resource "helm_release" "provisionerd_primary" { version = var.provisionerd_chart_version namespace = kubernetes_namespace.coder_primary.metadata.0.name values = [templatefile("${path.module}/coder_helm_values.tftpl", { - workspace_proxy = false, - provisionerd = true, - primary_url = null, - proxy_token = null, - db_secret = null, - ip_address = null, - provisionerd_psk = kubernetes_secret.provisionerd_psk_primary.metadata.0.name, - access_url = local.deployments.primary.url, - node_pool = google_container_node_pool.node_pool["primary_coder"].name, - release_name = local.coder_release_name, - experiments = var.coder_experiments, - image_repo = var.coder_image_repo, - image_tag = var.coder_image_tag, - replicas = local.scenarios[var.scenario].provisionerd.replicas, - cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request, - mem_request = local.scenarios[var.scenario].provisionerd.mem_request, - cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit, - mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit, - deployment = "primary", + workspace_proxy = false, + provisionerd = true, + primary_url = null, + proxy_token = null, + db_secret = null, + ip_address = null, + provisionerd_psk = kubernetes_secret.provisionerd_psk_primary.metadata.0.name, + access_url = local.deployments.primary.url, + wildcard_access_url = null, + node_pool = google_container_node_pool.node_pool["primary_coder"].name, + release_name = local.coder_release_name, + experiments = var.coder_experiments, + image_repo = var.coder_image_repo, + image_tag = var.coder_image_tag, + replicas = local.scenarios[var.scenario].provisionerd.replicas, + cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request, + mem_request = local.scenarios[var.scenario].provisionerd.mem_request, + cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit, + mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit, + deployment = "primary", + tls_secret_name = null, })] depends_on = [null_resource.license] diff --git a/scaletest/terraform/action/main.tf b/scaletest/terraform/action/main.tf index cd26c7ec1ccd2..41c97b1aeab4b 100644 --- a/scaletest/terraform/action/main.tf +++ b/scaletest/terraform/action/main.tf @@ -55,6 +55,12 @@ provider "cloudflare" { api_token = coalesce(var.cloudflare_api_token, data.google_secret_manager_secret_version_access.cloudflare_api_token_dns.secret_data) } +data "google_container_cluster" "observability" { + name = var.observability_cluster_name + location = var.observability_cluster_location + project = var.project_id +} + provider "kubernetes" { alias = "primary" host = "https://${google_container_cluster.cluster["primary"].endpoint}" @@ -76,6 +82,13 @@ provider "kubernetes" { token = data.google_client_config.default.access_token } +provider "kubernetes" { + alias = "observability" + host = "https://${data.google_container_cluster.observability.endpoint}" + cluster_ca_certificate = base64decode(data.google_container_cluster.observability.master_auth.0.cluster_ca_certificate) + token = data.google_client_config.default.access_token +} + provider "kubectl" { alias = "primary" host = "https://${google_container_cluster.cluster["primary"].endpoint}" diff --git a/scaletest/terraform/action/tls.tf b/scaletest/terraform/action/tls.tf new file mode 100644 index 0000000000000..224ff7618d327 --- /dev/null +++ b/scaletest/terraform/action/tls.tf @@ -0,0 +1,13 @@ +locals { + coder_certs_namespace = "coder-certs" +} + +# These certificates are managed by flux and cert-manager. +data "kubernetes_secret" "coder_tls" { + for_each = local.deployments + provider = kubernetes.observability + metadata { + name = "coder-${var.name}-${each.key}-tls" + namespace = local.coder_certs_namespace + } +} diff --git a/scaletest/terraform/action/vars.tf b/scaletest/terraform/action/vars.tf index 3952baab82b80..fe625ed5665ba 100644 --- a/scaletest/terraform/action/vars.tf +++ b/scaletest/terraform/action/vars.tf @@ -1,5 +1,9 @@ variable "name" { - description = "The name all resources will be prefixed with" + description = "The name all resources will be prefixed with. Must be one of alpha, bravo, or charlie." + validation { + condition = contains(["alpha", "bravo", "charlie"], var.name) + error_message = "Name must be one of alpha, bravo, or charlie." + } } variable "scenario" { @@ -82,6 +86,21 @@ variable "provisionerd_image_tag" { default = "latest" } +variable "observability_cluster_name" { + description = "Name of the observability GKE cluster." + default = "observability" +} + +variable "observability_cluster_location" { + description = "Location of the observability GKE cluster." + default = "us-east1-b" +} + +variable "cloudflare_api_token_secret" { + description = "Name of the Google Secret Manager secret containing the Cloudflare API token." + default = "cloudflare-api-token-dns" +} + // Prometheus variable "prometheus_remote_write_url" { description = "URL to push prometheus metrics to." From 6132cd5ebae353e8b69131aa9c0e85cbc4b7ef52 Mon Sep 17 00:00:00 2001 From: Ethan <39577870+ethanndickson@users.noreply.github.com> Date: Mon, 25 Aug 2025 12:35:32 +1000 Subject: [PATCH 5/6] refactor(scaletest): use vpc for networking infrastructure (#19464) This PR refactors the scaletest infrastructure to use a dedicated VPC for each deployment (i.e. alpha, bravo, charlie). It then peers that VPC with the observability VPC, and the Cloud SQL database. It also sets up subnetting for and within each deployment. With this deployed, I was able to get the scaletest running with metrics flowing into `scaletest.cdr.dev`. Co-authored-by: Dean Sheather --- scaletest/terraform/action/gcp_clusters.tf | 8 +- scaletest/terraform/action/gcp_db.tf | 2 +- scaletest/terraform/action/gcp_vpc.tf | 141 +++++++++++++++++++-- scaletest/terraform/action/vars.tf | 5 + 4 files changed, 143 insertions(+), 13 deletions(-) diff --git a/scaletest/terraform/action/gcp_clusters.tf b/scaletest/terraform/action/gcp_clusters.tf index 5987d07db03ad..0a3acfd06ccae 100644 --- a/scaletest/terraform/action/gcp_clusters.tf +++ b/scaletest/terraform/action/gcp_clusters.tf @@ -78,12 +78,13 @@ resource "google_container_cluster" "cluster" { name = "${var.name}-${each.key}" location = each.value.zone project = var.project_id - network = local.vpc_name - subnetwork = local.subnet_name + network = google_compute_network.network.name + subnetwork = google_compute_subnetwork.subnetwork[each.key].name networking_mode = "VPC_NATIVE" default_max_pods_per_node = 256 ip_allocation_policy { # Required with networking_mode=VPC_NATIVE - + cluster_secondary_range_name = local.secondary_ip_range_k8s_pods + services_secondary_range_name = local.secondary_ip_range_k8s_services } release_channel { # Setting release channel as STABLE can cause unexpected cluster upgrades. @@ -108,7 +109,6 @@ resource "google_container_cluster" "cluster" { workload_pool = "${data.google_project.project.project_id}.svc.id.goog" } - lifecycle { ignore_changes = [ maintenance_policy, diff --git a/scaletest/terraform/action/gcp_db.tf b/scaletest/terraform/action/gcp_db.tf index 9eb17464e1ce9..e7e64005f4b8f 100644 --- a/scaletest/terraform/action/gcp_db.tf +++ b/scaletest/terraform/action/gcp_db.tf @@ -23,7 +23,7 @@ resource "google_sql_database_instance" "db" { ip_configuration { ipv4_enabled = false - private_network = local.vpc_id + private_network = google_compute_network.network.id } insights_config { diff --git a/scaletest/terraform/action/gcp_vpc.tf b/scaletest/terraform/action/gcp_vpc.tf index 10624edaddf91..4bca3b3f510ba 100644 --- a/scaletest/terraform/action/gcp_vpc.tf +++ b/scaletest/terraform/action/gcp_vpc.tf @@ -1,9 +1,91 @@ locals { - vpc_name = "scaletest" - vpc_id = "projects/${var.project_id}/global/networks/${local.vpc_name}" - subnet_name = "scaletest" + # Generate a /14 for each deployment. + cidr_networks = cidrsubnets( + "172.16.0.0/12", + 2, + 2, + 2, + ) + + networks = { + alpha = local.cidr_networks[0] + bravo = local.cidr_networks[1] + charlie = local.cidr_networks[2] + } + + # Generate a bunch of /18s within the subnet we're using from the above map. + cidr_subnetworks = cidrsubnets( + local.networks[var.name], + 4, # PSA + 4, # primary subnetwork + 4, # primary k8s pod network + 4, # primary k8s services network + 4, # europe subnetwork + 4, # europe k8s pod network + 4, # europe k8s services network + 4, # asia subnetwork + 4, # asia k8s pod network + 4, # asia k8s services network + ) + + psa_range_address = split("/", local.cidr_subnetworks[0])[0] + psa_range_prefix_length = tonumber(split("/", local.cidr_subnetworks[0])[1]) + + subnetworks = { + primary = local.cidr_subnetworks[1] + europe = local.cidr_subnetworks[4] + asia = local.cidr_subnetworks[7] + } + cluster_ranges = { + primary = { + pods = local.cidr_subnetworks[2] + services = local.cidr_subnetworks[3] + } + europe = { + pods = local.cidr_subnetworks[5] + services = local.cidr_subnetworks[6] + } + asia = { + pods = local.cidr_subnetworks[8] + services = local.cidr_subnetworks[9] + } + } + + secondary_ip_range_k8s_pods = "k8s-pods" + secondary_ip_range_k8s_services = "k8s-services" +} + +# Create a VPC for the deployment +resource "google_compute_network" "network" { + project = var.project_id + name = "${var.name}-scaletest" + description = "scaletest network for ${var.name}" + auto_create_subnetworks = false +} + +# Create a subnetwork with a unique range for each region +resource "google_compute_subnetwork" "subnetwork" { + for_each = local.subnetworks + name = "${var.name}-${each.key}" + # Use the deployment region + region = local.deployments[each.key].region + network = google_compute_network.network.id + project = var.project_id + ip_cidr_range = each.value + private_ip_google_access = true + + secondary_ip_range { + range_name = local.secondary_ip_range_k8s_pods + ip_cidr_range = local.cluster_ranges[each.key].pods + } + + secondary_ip_range { + range_name = local.secondary_ip_range_k8s_services + ip_cidr_range = local.cluster_ranges[each.key].services + } } +# Create a public IP for each region resource "google_compute_address" "coder" { for_each = local.deployments project = var.project_id @@ -13,17 +95,60 @@ resource "google_compute_address" "coder" { network_tier = "PREMIUM" } -resource "google_compute_global_address" "sql_peering" { +# Reserve an internal range for Google-managed services (PSA), used for Cloud +# SQL +resource "google_compute_global_address" "psa_peering" { project = var.project_id name = "${var.name}-sql-peering" purpose = "VPC_PEERING" address_type = "INTERNAL" - prefix_length = 16 - network = local.vpc_name + address = local.psa_range_address + prefix_length = local.psa_range_prefix_length + network = google_compute_network.network.self_link } resource "google_service_networking_connection" "private_vpc_connection" { - network = local.vpc_id + network = google_compute_network.network.id service = "servicenetworking.googleapis.com" - reserved_peering_ranges = [google_compute_global_address.sql_peering.name] + reserved_peering_ranges = [google_compute_global_address.psa_peering.name] +} + +# Join the new network to the observability network so we can talk to the +# Prometheus instance +data "google_compute_network" "observability" { + project = var.project_id + name = var.observability_cluster_vpc +} + +resource "google_compute_network_peering" "scaletest_to_observability" { + name = "peer-${google_compute_network.network.name}-to-${data.google_compute_network.observability.name}" + network = google_compute_network.network.self_link + peer_network = data.google_compute_network.observability.self_link + import_custom_routes = true + export_custom_routes = true +} + +resource "google_compute_network_peering" "observability_to_scaletest" { + name = "peer-${data.google_compute_network.observability.name}-to-${google_compute_network.network.name}" + network = data.google_compute_network.observability.self_link + peer_network = google_compute_network.network.self_link + import_custom_routes = true + export_custom_routes = true +} + +# Allow traffic from the scaletest network into the observability network so we +# can connect to Prometheus +resource "google_compute_firewall" "observability_allow_from_scaletest" { + project = var.project_id + name = "allow-from-scaletest-${var.name}" + network = data.google_compute_network.observability.self_link + direction = "INGRESS" + source_ranges = [local.networks[var.name]] + allow { + protocol = "icmp" + } + allow { + protocol = "tcp" + ports = ["0-65535"] + } } diff --git a/scaletest/terraform/action/vars.tf b/scaletest/terraform/action/vars.tf index fe625ed5665ba..0df162f92527b 100644 --- a/scaletest/terraform/action/vars.tf +++ b/scaletest/terraform/action/vars.tf @@ -96,6 +96,11 @@ variable "observability_cluster_location" { default = "us-east1-b" } +variable "observability_cluster_vpc" { + description = "Name of the observability cluster VPC network to peer with." + default = "default" +} + variable "cloudflare_api_token_secret" { description = "Name of the Google Secret Manager secret containing the Cloudflare API token." default = "cloudflare-api-token-dns" From fe8ca2a440aa5cf7f680cd5c384f248b6c11551a Mon Sep 17 00:00:00 2001 From: Ethan <39577870+ethanndickson@users.noreply.github.com> Date: Mon, 25 Aug 2025 12:45:31 +1000 Subject: [PATCH 6/6] chore(scaletest): add deployment name to all metrics (#19479) If multiple of `alpha`, `bravo` or `charlie` are running simultaneously, we'll have trouble differentiating the metrics. To fix this, we'll add that name to all metrics. image --- scaletest/terraform/action/prometheus.tf | 3 +++ scaletest/terraform/action/prometheus_helm_values.tftpl | 1 + 2 files changed, 4 insertions(+) diff --git a/scaletest/terraform/action/prometheus.tf b/scaletest/terraform/action/prometheus.tf index 63b22df091542..6898e0cfbd128 100644 --- a/scaletest/terraform/action/prometheus.tf +++ b/scaletest/terraform/action/prometheus.tf @@ -17,6 +17,7 @@ resource "helm_release" "prometheus_chart_primary" { name = local.prometheus_release_name namespace = kubernetes_namespace.coder_primary.metadata.0.name values = [templatefile("${path.module}/prometheus_helm_values.tftpl", { + deployment_name = var.name, nodepool = google_container_node_pool.node_pool["primary_misc"].name, cluster = "primary", prometheus_remote_write_url = var.prometheus_remote_write_url, @@ -104,6 +105,7 @@ resource "helm_release" "prometheus_chart_europe" { name = local.prometheus_release_name namespace = kubernetes_namespace.coder_europe.metadata.0.name values = [templatefile("${path.module}/prometheus_helm_values.tftpl", { + deployment_name = var.name, nodepool = google_container_node_pool.node_pool["europe_misc"].name, cluster = "europe", prometheus_remote_write_url = var.prometheus_remote_write_url, @@ -141,6 +143,7 @@ resource "helm_release" "prometheus_chart_asia" { name = local.prometheus_release_name namespace = kubernetes_namespace.coder_asia.metadata.0.name values = [templatefile("${path.module}/prometheus_helm_values.tftpl", { + deployment_name = var.name, nodepool = google_container_node_pool.node_pool["asia_misc"].name, cluster = "asia", prometheus_remote_write_url = var.prometheus_remote_write_url, diff --git a/scaletest/terraform/action/prometheus_helm_values.tftpl b/scaletest/terraform/action/prometheus_helm_values.tftpl index e5e32b3feaa43..eefe5a88babfd 100644 --- a/scaletest/terraform/action/prometheus_helm_values.tftpl +++ b/scaletest/terraform/action/prometheus_helm_values.tftpl @@ -22,6 +22,7 @@ prometheus: values: ["${nodepool}"] prometheusSpec: externalLabels: + deployment_name: "${deployment_name}" cluster: "${cluster}" podMonitorSelectorNilUsesHelmValues: false serviceMonitorSelectorNilUsesHelmValues: false