Skip to content

feat: add scaletest load generation infrastructure #15816

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fmt
  • Loading branch information
f0ssel committed Jan 13, 2025
commit 97769e06d0f5103ddc7e7b8a757ba220b6547c2d
61 changes: 34 additions & 27 deletions scaletest/terraform/action/coder_traffic.tf
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
locals {
wait_baseline_duration = "5m"
workspace_traffic_job_timeout = "15m"
workspace_traffic_duration = "10m"
bytes_per_tick = 1024
tick_interval = "100ms"
wait_baseline_duration = "5m"
bytes_per_tick = 1024
tick_interval = "100ms"

traffic_types = {
ssh = {
wait_duration_minutes = "0"
wait_duration = "0m"
duration = "30m"
job_timeout = "35m"
flags = [
"--ssh",
]
}
webterminal = {
wait_duration_minutes = "5"
flags = []
wait_duration = "5m"
duration = "25m"
job_timeout = "30m"
flags = []
}
app = {
wait_duration_minutes = "10"
wait_duration = "10m"
duration = "20m"
job_timeout = "25m"
flags = [
"--app=wsec",
]
Expand All @@ -31,15 +35,18 @@ resource "time_sleep" "wait_baseline" {
kubernetes_job.create_workspaces_europe,
kubernetes_job.create_workspaces_asia,
]
# depends_on = [
# kubernetes_job.push_template_primary,
# kubernetes_job.push_template_europe,
# kubernetes_job.push_template_asia,
# ]

create_duration = local.wait_baseline_duration
}

resource "time_sleep" "wait_traffic" {
for_each = local.traffic_types

depends_on = [time_sleep.wait_baseline]

create_duration = "${local.traffic_types[each.key].wait_duration_minutes}m"
}

resource "kubernetes_job" "workspace_traffic_primary" {
provider = kubernetes.primary

Expand All @@ -52,7 +59,7 @@ resource "kubernetes_job" "workspace_traffic_primary" {
}
}
spec {
completions = 1
completions = 1
backoff_limit = 0
template {
metadata {}
Expand Down Expand Up @@ -86,7 +93,7 @@ resource "kubernetes_job" "workspace_traffic_primary" {
"--bytes-per-tick=${local.bytes_per_tick}",
"--tick-interval=${local.tick_interval}",
"--scaletest-prometheus-wait=30s",
"--job-timeout=${local.workspace_traffic_duration}",
"--job-timeout=${local.traffic_types[each.key].duration}",
], local.traffic_types[each.key].flags)
}
restart_policy = "Never"
Expand All @@ -96,16 +103,16 @@ resource "kubernetes_job" "workspace_traffic_primary" {
wait_for_completion = true

timeouts {
create = local.workspace_traffic_job_timeout
create = local.traffic_types[each.key].job_timeout
}

depends_on = [time_sleep.wait_baseline]
depends_on = [time_sleep.wait_baseline, time_sleep.wait_traffic[each.key]]
}

resource "kubernetes_job" "workspace_traffic_europe" {
provider = kubernetes.europe

for_each = local.traffic_types
for_each = local.traffic_types
metadata {
name = "${var.name}-workspace-traffic-${each.key}"
namespace = kubernetes_namespace.coder_europe.metadata.0.name
Expand All @@ -114,7 +121,7 @@ for_each = local.traffic_types
}
}
spec {
completions = 1
completions = 1
backoff_limit = 0
template {
metadata {}
Expand Down Expand Up @@ -148,7 +155,7 @@ for_each = local.traffic_types
"--bytes-per-tick=${local.bytes_per_tick}",
"--tick-interval=${local.tick_interval}",
"--scaletest-prometheus-wait=30s",
"--job-timeout=${local.workspace_traffic_duration}",
"--job-timeout=${local.traffic_types[each.key].duration}",
"--workspace-proxy-url=${local.deployments.europe.url}",
], local.traffic_types[each.key].flags)
}
Expand All @@ -159,10 +166,10 @@ for_each = local.traffic_types
wait_for_completion = true

timeouts {
create = local.workspace_traffic_job_timeout
create = local.traffic_types[each.key].job_timeout
}

depends_on = [time_sleep.wait_baseline]
depends_on = [time_sleep.wait_baseline, time_sleep.wait_traffic[each.key]]
}

resource "kubernetes_job" "workspace_traffic_asia" {
Expand All @@ -177,7 +184,7 @@ resource "kubernetes_job" "workspace_traffic_asia" {
}
}
spec {
completions = 1
completions = 1
backoff_limit = 0
template {
metadata {}
Expand Down Expand Up @@ -211,7 +218,7 @@ resource "kubernetes_job" "workspace_traffic_asia" {
"--bytes-per-tick=${local.bytes_per_tick}",
"--tick-interval=${local.tick_interval}",
"--scaletest-prometheus-wait=30s",
"--job-timeout=${local.workspace_traffic_duration}",
"--job-timeout=${local.traffic_types[each.key].duration}",
"--workspace-proxy-url=${local.deployments.asia.url}",
], local.traffic_types[each.key].flags)
}
Expand All @@ -222,8 +229,8 @@ resource "kubernetes_job" "workspace_traffic_asia" {
wait_for_completion = true

timeouts {
create = local.workspace_traffic_job_timeout
create = local.traffic_types[each.key].job_timeout
}

depends_on = [time_sleep.wait_baseline]
depends_on = [time_sleep.wait_baseline, time_sleep.wait_traffic[each.key]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note for reviewers: these introduce a transitive dependency on the create-workspaces jobs.

}
6 changes: 3 additions & 3 deletions scaletest/terraform/action/coder_workspaces.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ resource "kubernetes_job" "create_workspaces_primary" {
}
}
spec {
completions = 1
completions = 1
backoff_limit = 0
template {
metadata {}
Expand Down Expand Up @@ -72,7 +72,7 @@ resource "kubernetes_job" "create_workspaces_europe" {
}
}
spec {
completions = 1
completions = 1
backoff_limit = 0
template {
metadata {}
Expand Down Expand Up @@ -131,7 +131,7 @@ resource "kubernetes_job" "create_workspaces_asia" {
}
}
spec {
completions = 1
completions = 1
backoff_limit = 0
template {
metadata {}
Expand Down
4 changes: 2 additions & 2 deletions scaletest/terraform/action/gcp_clusters.tf
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ resource "google_container_node_pool" "node_pool" {
}
kubelet_config {
cpu_manager_policy = ""
cpu_cfs_quota = false
pod_pids_limit = 0
cpu_cfs_quota = false
pod_pids_limit = 0
}
}
lifecycle {
Expand Down
12 changes: 6 additions & 6 deletions scaletest/terraform/action/k8s_coder_asia.tf
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ resource "helm_release" "provisionerd_asia" {
version = var.provisionerd_chart_version
namespace = kubernetes_namespace.coder_asia.metadata.0.name
values = [templatefile("${path.module}/coder_helm_values.tftpl", {
workspace_proxy = false,
provisionerd = true,
primary_url = null,
proxy_token = null,
db_secret = null,
ip_address = null,
workspace_proxy = false,
provisionerd = true,
primary_url = null,
proxy_token = null,
db_secret = null,
ip_address = null,
provisionerd_psk = kubernetes_secret.provisionerd_psk_asia.metadata.0.name,
access_url = local.deployments.primary.url,
node_pool = google_container_node_pool.node_pool["asia_coder"].name,
Expand Down
70 changes: 0 additions & 70 deletions scaletest/terraform/action/scenarios.tf
Original file line number Diff line number Diff line change
@@ -1,75 +1,5 @@
locals {
scenarios = {
small = {
coder = {
nodepool_size = 1
machine_type = "t2d-standard-4"
replicas = 1
cpu_request = "1000m"
mem_request = "6Gi"
cpu_limit = "2000m"
mem_limit = "12Gi"
}
provisionerd = {
replicas = 1
cpu_request = "100m"
mem_request = "1Gi"
cpu_limit = "1000m"
mem_limit = "1Gi"
}
workspaces = {
count_per_deployment = 10
nodepool_size = 1
machine_type = "t2d-standard-8"
cpu_request = "100m"
mem_request = "128Mi"
cpu_limit = "100m"
mem_limit = "128Mi"
}
misc = {
nodepool_size = 1
machine_type = "t2d-standard-4"
}
cloudsql = {
tier = "db-f1-micro"
max_connections = 500
}
}
medium = {
coder = {
nodepool_size = 1
machine_type = "t2d-standard-8"
replicas = 1
cpu_request = "3000m"
mem_request = "12Gi"
cpu_limit = "6000m"
mem_limit = "24Gi"
}
provisionerd = {
replicas = 1
cpu_request = "100m"
mem_request = "1Gi"
cpu_limit = "1000m"
mem_limit = "1Gi"
}
workspaces = {
count_per_deployment = 10
nodepool_size = 1
machine_type = "t2d-standard-8"
cpu_request = "100m"
mem_request = "128Mi"
cpu_limit = "100m"
mem_limit = "128Mi"
}
misc = {
nodepool_size = 1
machine_type = "t2d-standard-4"
}
cloudsql = {
tier = "db-custom-1-3840"
max_connections = 500
}
}
large = {
coder = {
nodepool_size = 3
Expand Down
Loading