From fc8839d63caf491abba1a7826c98d1b882078a6f Mon Sep 17 00:00:00 2001
From: Ben <me@bpmct.net>
Date: Wed, 30 Nov 2022 21:37:53 +0000
Subject: [PATCH 01/10] docs: scaling Coder

---
 docs/admin/scale/docker.md   |  0
 docs/admin/scale/index.md    | 37 ++++++++++++++++++++++++++++++++++++
 docs/images/icons/growth.svg |  1 +
 docs/manifest.json           |  6 ++++++
 4 files changed, 44 insertions(+)
 create mode 100644 docs/admin/scale/docker.md
 create mode 100644 docs/admin/scale/index.md
 create mode 100644 docs/images/icons/growth.svg
diff --git a/docs/admin/scale/docker.md b/docs/admin/scale/docker.md
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/docs/admin/scale/index.md b/docs/admin/scale/index.md
new file mode 100644
index 0000000000000..a9ebc63637bf8
--- /dev/null
+++ b/docs/admin/scale/index.md
@@ -0,0 +1,37 @@
+We regularly scale-test Coder against various reference architectures. Additionally, we provide a [scale testing utility](#scaletest-utility) which can be used in your own environment to give insight on how Coder scales with your deployment's specific templates, images, etc.
+
+## Reference Architectures
+
+| Environment                               | Users | Workspaces | Last tested  | Status   |
+| ----------------------------------------- | ----- | ---------- | ------------ | -------- |
+| [Google Kubernetes Engine (GKE)](#)       | 100   | 200        | Nov 29, 2022 | Complete |
+| [AWS Elastic Kubernetes Service (EKS)](#) | 100   | 200        | Nov 29, 2022 | Complete |
+| [Google Compute Engine + Docker](#)       | 1000  | 200        | Nov 29, 2022 | Complete |
+| [Google Compute Engine + VMs](#)          | 1000  | 200        | Nov 29, 2022 | Complete |
+
+## Scale testing utility
+
+Since Coder's performance is highly dependent on the templates and workflows you support, we recommend using our scale testing utility against your own environments.
+
+For example, this command will do the following:
+
+- create 100 workspaces
+- establish a SSH connection to each workspace
+- run `sleep 3 && echo hello` on each workspace via the web terminal
+- close connections, attempt to delete all workspaces
+- return results (e.g. `99 succeeded, 1 failed to connect` )
+
+```sh
+coder loadtest create-workspaces \
+    --count 100 \
+    --template "my-custom-template" \
+    --parameter image="my-custom-image" \
+    --run-command "sleep 3 && echo hello" \
+    --connect-timeout "10s"
+
+# Run `coder scaletest --help` for all usage
+```
+
+> To avoid user outages and orphaned resources, we recommend running scale tests on a secondary "staging" environment.
+
+If a test fails, you can leverage Coder's [performance tracing](#) and [prometheus metrics](#) to identify bottlenecks during scale tests. Additionally, you can use your existing cloud monitoring stack to measure load, view server logs, etc.
diff --git a/docs/images/icons/growth.svg b/docs/images/icons/growth.svg
new file mode 100644
index 0000000000000..2092514651547
--- /dev/null
+++ b/docs/images/icons/growth.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" height="24" viewBox="0 0 24 24"><path d="M14.06 9.94 12 9l2.06-.94L15 6l.94 2.06L18 9l-2.06.94L15 12l-.94-2.06zM4 14l.94-2.06L7 11l-2.06-.94L4 8l-.94 2.06L1 11l2.06.94L4 14zm4.5-5 1.09-2.41L12 5.5 9.59 4.41 8.5 2 7.41 4.41 5 5.5l2.41 1.09L8.5 9zm-4 11.5 6-6.01 4 4L23 8.93l-1.41-1.41-7.09 7.97-4-4L3 19l1.5 1.5z"></path></svg>
diff --git a/docs/manifest.json b/docs/manifest.json
index bac69202bbf1a..edbc66edc970e 100644
--- a/docs/manifest.json
+++ b/docs/manifest.json
@@ -253,6 +253,12 @@
           "icon_path": "./images/icons/plug.svg",
           "path": "./admin/automation.md"
         },
+        {
+          "title": "Scaling Coder",
+          "description": "Reference architecture and load testing tools",
+          "icon_path": "./images/icons/growth.svg",
+          "path": "./admin/scale/index.md"
+        },
         {
           "title": "Audit Logs",
           "description": "Learn how to use Audit Logs in your Coder deployment",

From a587e453a3cd10ccd87947df3d09ba273d2e6bec Mon Sep 17 00:00:00 2001
From: Ben <me@bpmct.net>
Date: Wed, 30 Nov 2022 21:45:27 +0000
Subject: [PATCH 02/10] change icon

---
 docs/images/icons/growth.svg | 1 -
 docs/images/icons/scale.svg  | 1 +
 docs/manifest.json           | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)
 delete mode 100644 docs/images/icons/growth.svg
 create mode 100644 docs/images/icons/scale.svg

diff --git a/docs/images/icons/growth.svg b/docs/images/icons/growth.svg
deleted file mode 100644
index 2092514651547..0000000000000
--- a/docs/images/icons/growth.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" height="24" viewBox="0 0 24 24"><path d="M14.06 9.94 12 9l2.06-.94L15 6l.94 2.06L18 9l-2.06.94L15 12l-.94-2.06zM4 14l.94-2.06L7 11l-2.06-.94L4 8l-.94 2.06L1 11l2.06.94L4 14zm4.5-5 1.09-2.41L12 5.5 9.59 4.41 8.5 2 7.41 4.41 5 5.5l2.41 1.09L8.5 9zm-4 11.5 6-6.01 4 4L23 8.93l-1.41-1.41-7.09 7.97-4-4L3 19l1.5 1.5z"></path></svg>
diff --git a/docs/images/icons/scale.svg b/docs/images/icons/scale.svg
new file mode 100644
index 0000000000000..3807fa5707081
--- /dev/null
+++ b/docs/images/icons/scale.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" enable-background="new 0 0 24 24" height="24px" viewBox="0 0 24 24" width="24px" fill="#000000"><g><rect fill="none" height="24" width="24"/><path d="M20,15H4v-2h16V15z M20,17H4v2h16V17z M15,11l5-3.55L20,5l-5,3.55L10,5L4,8.66L4,11l5.92-3.61L15,11z"/></g></svg>
diff --git a/docs/manifest.json b/docs/manifest.json
index edbc66edc970e..f80dc6273625b 100644
--- a/docs/manifest.json
+++ b/docs/manifest.json
@@ -256,7 +256,7 @@
         {
           "title": "Scaling Coder",
           "description": "Reference architecture and load testing tools",
-          "icon_path": "./images/icons/growth.svg",
+          "icon_path": "./images/icons/scale.svg",
           "path": "./admin/scale/index.md"
         },
         {

From fdacfad7a0b4a0a8a47447f34f757bb16e90309c Mon Sep 17 00:00:00 2001
From: Ben Potter <ben@coder.com>
Date: Wed, 30 Nov 2022 13:46:28 -0800
Subject: [PATCH 03/10] Update docs/admin/scale/index.md

Co-authored-by: Dean Sheather <dean@deansheather.com>
---
 docs/admin/scale/index.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/admin/scale/index.md b/docs/admin/scale/index.md
index a9ebc63637bf8..95da3b82f6aa2 100644
--- a/docs/admin/scale/index.md
+++ b/docs/admin/scale/index.md
@@ -22,12 +22,11 @@ For example, this command will do the following:
 - return results (e.g. `99 succeeded, 1 failed to connect` )
 
 ```sh
-coder loadtest create-workspaces \
+coder scaletest create-workspaces \
     --count 100 \
     --template "my-custom-template" \
     --parameter image="my-custom-image" \
-    --run-command "sleep 3 && echo hello" \
-    --connect-timeout "10s"
+    --run-command "sleep 3 && echo hello"
 
 # Run `coder scaletest --help` for all usage
 ```

From 8cd6abb65a94b12485d9df2dbd325f29e937a2e2 Mon Sep 17 00:00:00 2001
From: Ben Potter <ben@coder.com>
Date: Wed, 30 Nov 2022 13:46:34 -0800
Subject: [PATCH 04/10] Update docs/admin/scale/index.md

Co-authored-by: Dean Sheather <dean@deansheather.com>
---
 docs/admin/scale/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/admin/scale/index.md b/docs/admin/scale/index.md
index 95da3b82f6aa2..a0ff835f29d59 100644
--- a/docs/admin/scale/index.md
+++ b/docs/admin/scale/index.md
@@ -28,7 +28,7 @@ coder scaletest create-workspaces \
     --parameter image="my-custom-image" \
     --run-command "sleep 3 && echo hello"
 
-# Run `coder scaletest --help` for all usage
+# Run `coder scaletest create-workspaces --help` for all usage
 ```
 
 > To avoid user outages and orphaned resources, we recommend running scale tests on a secondary "staging" environment.

From 7637f86150899c2bd8b69676359dbe92d5ec37ae Mon Sep 17 00:00:00 2001
From: Ben Potter <ben@coder.com>
Date: Wed, 30 Nov 2022 13:46:39 -0800
Subject: [PATCH 05/10] Update docs/admin/scale/index.md

Co-authored-by: Dean Sheather <dean@deansheather.com>
---
 docs/admin/scale/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/admin/scale/index.md b/docs/admin/scale/index.md
index a0ff835f29d59..ac0d7b7f81d0c 100644
--- a/docs/admin/scale/index.md
+++ b/docs/admin/scale/index.md
@@ -19,7 +19,7 @@ For example, this command will do the following:
 - establish a SSH connection to each workspace
 - run `sleep 3 && echo hello` on each workspace via the web terminal
 - close connections, attempt to delete all workspaces
-- return results (e.g. `99 succeeded, 1 failed to connect` )
+- return results (e.g. `99 succeeded, 1 failed to connect`)
 
 ```sh
 coder scaletest create-workspaces \

From c1de2b499131d33b7647b957dbeacde81d48ec41 Mon Sep 17 00:00:00 2001
From: Ben <me@bpmct.net>
Date: Wed, 30 Nov 2022 21:47:38 +0000
Subject: [PATCH 06/10] add prom link

---
 docs/admin/scale/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/admin/scale/index.md b/docs/admin/scale/index.md
index a9ebc63637bf8..389a24dc981d6 100644
--- a/docs/admin/scale/index.md
+++ b/docs/admin/scale/index.md
@@ -34,4 +34,4 @@ coder loadtest create-workspaces \
 
 > To avoid user outages and orphaned resources, we recommend running scale tests on a secondary "staging" environment.
 
-If a test fails, you can leverage Coder's [performance tracing](#) and [prometheus metrics](#) to identify bottlenecks during scale tests. Additionally, you can use your existing cloud monitoring stack to measure load, view server logs, etc.
+If a test fails, you can leverage Coder's [performance tracing](#) and [prometheus metrics](../prometheus.md) to identify bottlenecks during scale tests. Additionally, you can use your existing cloud monitoring stack to measure load, view server logs, etc.

From 1cf65aa28432eb1d8f8cb94f59c78d45f3189a5b Mon Sep 17 00:00:00 2001
From: Ben <me@bpmct.net>
Date: Wed, 30 Nov 2022 23:05:13 +0000
Subject: [PATCH 07/10] add plumbing for gke doc

---
 docs/admin/scale/docker.md |  0
 docs/admin/scale/gke.md    | 50 ++++++++++++++++++++++++++++++++++++++
 docs/admin/scale/index.md  | 12 ++++-----
 docs/manifest.json         |  9 ++++++-
 4 files changed, 64 insertions(+), 7 deletions(-)
 delete mode 100644 docs/admin/scale/docker.md
 create mode 100644 docs/admin/scale/gke.md

diff --git a/docs/admin/scale/docker.md b/docs/admin/scale/docker.md
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/docs/admin/scale/gke.md b/docs/admin/scale/gke.md
new file mode 100644
index 0000000000000..3eb701a801cd4
--- /dev/null
+++ b/docs/admin/scale/gke.md
@@ -0,0 +1,50 @@
+# Scaling Coder on Google Kubernetes Engine (GKE)
+
+This is a reference architecture for Coder on [Google Kubernetes Engine](#). We regurily load test these environments with a standard [kubernetes example](https://github.com/coder/coder/tree/main/examples/templates/kubernetes) template.
+
+> Performance and ideal node sizing depends on many factors, including workspace image and the [workspace sizes](https://github.com/coder/coder/issues/3519) you wish to give developers. Use Coder's [scale testing utility](./index.md#scale-testing-utility) to test your own deployment.
+
+## 50 users
+
+### Cluster configuration
+
+- **Autoscaling profile**: `optimize-utilization`
+
+- **Node pools**
+  - Default
+    - **Operating system**: `Ubuntu with containerd`
+    - **Instance type**: `e2-highcpu-8`
+    - **Min nodes**: `1`
+    - **Max nodes**: `4`
+
+### Coder settings
+
+- **Replica count**: `1`
+- **Provisioner daemons**: `30`
+- **Template**: [kubernetes example](https://github.com/coder/coder/tree/main/examples/templates/kubernetes)
+
+## 100 users
+
+For deployments with 100+ users, we recommend running the Coder server in a separate node pool via taints, tolerations, and nodeselectors.
+
+### Cluster configuration
+
+- **Node pools**
+  - Coder server
+    - **Instance type**: `e2-highcpu-4`
+    - **Operating system**: `Ubuntu with containerd`
+    - **Autoscaling profile**: `optimize-utilization`
+    - **Min nodes**: `2`
+    - **Max nodes**: `4`
+  - Workspaces
+    - **Instance type**: `e2-highcpu-16`
+    - **Node**: `Ubuntu with containerd`
+    - **Autoscaling profile**: `optimize-utilization`
+    - **Min nodes**: `3`
+    - **Max nodes**: `10`
+
+### Coder settings
+
+- **Replica count**: `4`
+- **Provisioner daemons**: `25`
+- **Template**: [kubernetes example](https://github.com/coder/coder/tree/main/examples/templates/kubernetes)
diff --git a/docs/admin/scale/index.md b/docs/admin/scale/index.md
index f1dad945fbb8f..81cc1f68928b3 100644
--- a/docs/admin/scale/index.md
+++ b/docs/admin/scale/index.md
@@ -2,12 +2,12 @@ We regularly scale-test Coder against various reference architectures. Additiona
 
 ## Reference Architectures
 
-| Environment                               | Users | Workspaces | Last tested  | Status   |
-| ----------------------------------------- | ----- | ---------- | ------------ | -------- |
-| [Google Kubernetes Engine (GKE)](#)       | 100   | 200        | Nov 29, 2022 | Complete |
-| [AWS Elastic Kubernetes Service (EKS)](#) | 100   | 200        | Nov 29, 2022 | Complete |
-| [Google Compute Engine + Docker](#)       | 1000  | 200        | Nov 29, 2022 | Complete |
-| [Google Compute Engine + VMs](#)          | 1000  | 200        | Nov 29, 2022 | Complete |
+| Environment                                       | Users         | Last tested  | Status   |
+| ------------------------------------------------- | ------------- | ------------ | -------- |
+| [Google Kubernetes Engine (GKE)](./gke.md)        | 50, 100, 1000 | Nov 29, 2022 | Complete |
+| [AWS Elastic Kubernetes Service (EKS)](./eks.md)  | 50, 100, 1000 | Nov 29, 2022 | Complete |
+| [Google Compute Engine + Docker](./gce-docker.md) | 15, 50        | Nov 29, 2022 | Complete |
+| [Google Compute Engine + VMs](./gce-vms.md)       | 1000          | Nov 29, 2022 | Complete |
 
 ## Scale testing utility
 
diff --git a/docs/manifest.json b/docs/manifest.json
index f80dc6273625b..6937070c744b4 100644
--- a/docs/manifest.json
+++ b/docs/manifest.json
@@ -257,7 +257,14 @@
           "title": "Scaling Coder",
           "description": "Reference architecture and load testing tools",
           "icon_path": "./images/icons/scale.svg",
-          "path": "./admin/scale/index.md"
+          "path": "./admin/scale/index.md",
+          "children": [
+            {
+              "title": "GKE",
+              "description": "Learn how to scale Coder on GKE",
+              "path": "./admin/scale/gke.md"
+            }
+          ]
         },
         {
           "title": "Audit Logs",

From 933beac63b7b9ff85dd2ac2d5f49186f482ee3f1 Mon Sep 17 00:00:00 2001
From: Ben <me@bpmct.net>
Date: Wed, 30 Nov 2022 23:07:40 +0000
Subject: [PATCH 08/10] add limits/requests

---
 docs/admin/scale/gke.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/admin/scale/gke.md b/docs/admin/scale/gke.md
index 3eb701a801cd4..fe291e10a18bf 100644
--- a/docs/admin/scale/gke.md
+++ b/docs/admin/scale/gke.md
@@ -22,6 +22,12 @@ This is a reference architecture for Coder on [Google Kubernetes Engine](#). We
 - **Replica count**: `1`
 - **Provisioner daemons**: `30`
 - **Template**: [kubernetes example](https://github.com/coder/coder/tree/main/examples/templates/kubernetes)
+- **Coder server limits**:
+  - CPU: `2 cores`
+  - RAM: `4 GB`
+- **Coder server requests**:
+  - CPU: `2 cores`
+  - RAM: `4 GB`
 
 ## 100 users
 
@@ -48,3 +54,9 @@ For deployments with 100+ users, we recommend running the Coder server in a sepa
 - **Replica count**: `4`
 - **Provisioner daemons**: `25`
 - **Template**: [kubernetes example](https://github.com/coder/coder/tree/main/examples/templates/kubernetes)
+- **Coder server limits**:
+  - CPU: `4 cores`
+  - RAM: `8 GB`
+- **Coder server requests**:
+  - CPU: `4 cores`
+  - RAM: `8 GB`

From b31e8132f38e352360688b7ca9892223aa72e142 Mon Sep 17 00:00:00 2001
From: Ben <me@bpmct.net>
Date: Thu, 1 Dec 2022 23:18:15 +0000
Subject: [PATCH 09/10] changes from feedback

---
 docs/admin/scale/index.md | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/docs/admin/scale/index.md b/docs/admin/scale/index.md
index 81cc1f68928b3..1374f5315269c 100644
--- a/docs/admin/scale/index.md
+++ b/docs/admin/scale/index.md
@@ -13,24 +13,30 @@ We regularly scale-test Coder against various reference architectures. Additiona
 
 Since Coder's performance is highly dependent on the templates and workflows you support, we recommend using our scale testing utility against your own environments.
 
-For example, this command will do the following:
-
-- create 100 workspaces
-- establish a SSH connection to each workspace
-- run `sleep 3 && echo hello` on each workspace via the web terminal
-- close connections, attempt to delete all workspaces
-- return results (e.g. `99 succeeded, 1 failed to connect`)
+The following command will run the same scenario against your own Coder deployment. You can also specify a template name and any parameter values.
 
 ```sh
 coder scaletest create-workspaces \
     --count 100 \
     --template "my-custom-template" \
     --parameter image="my-custom-image" \
-    --run-command "sleep 3 && echo hello"
+    --run-command "sleep 2 && echo hello"
 
 # Run `coder scaletest create-workspaces --help` for all usage
 ```
 
-> To avoid user outages and orphaned resources, we recommend running scale tests on a secondary "staging" environment.
+> To avoid outages and orphaned resources, we recommend running scale tests on a secondary "staging" environment.
+
+The test does the following:
+
+- create `n` workspaces
+- establish SSH connection to each workspace
+- run `sleep 3 && echo hello` on each workspace via the web terminal
+- close connections, attempt to delete all workspaces
+- return results (e.g. `99 succeeded, 1 failed to connect`)
+
+Workspace jobs run concurrently, meaning that the test will attempt to connect to each workspace as soon as it is provisioned instead of first waiting for all 100 workspaces to create.
+
+## Troubleshooting
 
-If a test fails, you can leverage Coder's [performance tracing](#) and [prometheus metrics](../prometheus.md) to identify bottlenecks during scale tests. Additionally, you can use your existing cloud monitoring stack to measure load, view server logs, etc.
+If a load test fails or if you are experiencing performance issues during day-to-day use, you can leverage Coder's [performance tracing](#) and [prometheus metrics](../prometheus.md) to identify bottlenecks during scale tests. Additionally, you can use your existing cloud monitoring stack to measure load, view server logs, etc.

From b493aa97f9f7080bf8b04ed85be37c12a4a307ab Mon Sep 17 00:00:00 2001
From: Ben <me@bpmct.net>
Date: Thu, 1 Dec 2022 23:21:14 +0000
Subject: [PATCH 10/10] change

---
 docs/admin/scale/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/admin/scale/index.md b/docs/admin/scale/index.md
index 1374f5315269c..d0dbf758bfd2e 100644
--- a/docs/admin/scale/index.md
+++ b/docs/admin/scale/index.md
@@ -35,7 +35,7 @@ The test does the following:
 - close connections, attempt to delete all workspaces
 - return results (e.g. `99 succeeded, 1 failed to connect`)
 
-Workspace jobs run concurrently, meaning that the test will attempt to connect to each workspace as soon as it is provisioned instead of first waiting for all 100 workspaces to create.
+Workspace jobs run concurrently, meaning that the test will attempt to connect to each workspace as soon as it is provisioned instead of waiting for all 100 workspaces to create.
 
 ## Troubleshooting