From 53479bd6c0d59157bd35b355a95cb9c6676a6884 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 17 Jan 2025 08:19:54 +0000 Subject: [PATCH 1/8] Reducing the number of iterations of go-race to make it more likely to pass Signed-off-by: Danny Kopping --- .github/workflows/nightly-gauntlet.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index 5814ddf72b60f..b305c5e5f583c 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -39,7 +39,7 @@ jobs: # -race is likeliest to catch flaky tests # due to correctness detection and its performance # impact. - gotestsum --junitfile="gotests.xml" -- -timeout=240m -count=10 -race ./... + gotestsum --junitfile="gotests.xml" -- -timeout=240m -count=3 -race ./... - name: Upload test results to DataDog uses: ./.github/actions/upload-datadog From 9e4352a872c8d1e621052fc459c4067edec63327 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 17 Jan 2025 08:21:56 +0000 Subject: [PATCH 2/8] Run at 4AM to avoid possible collision with other midnight operations Signed-off-by: Danny Kopping --- .github/workflows/nightly-gauntlet.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index b305c5e5f583c..ac2684558f084 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -3,8 +3,8 @@ name: nightly-gauntlet on: schedule: - # Every day at midnight - - cron: "0 0 * * *" + # Every day at 4AM + - cron: "0 4 * * *" workflow_dispatch: permissions: @@ -78,7 +78,7 @@ jobs: - go-race - go-timing runs-on: ubuntu-latest - if: failure() + if: failure() && github.ref == 'refs/heads/main' steps: - name: Send Slack notification From eb4419719e1f41fadfc07eb0ef99bbde8c495d0e Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 17 Jan 2025 08:36:22 +0000 Subject: [PATCH 3/8] Remove low-signal tests, add pg tests for windows/mac Signed-off-by: Danny Kopping --- .github/workflows/nightly-gauntlet.yaml | 105 +++++++++++++++--------- 1 file changed, 66 insertions(+), 39 deletions(-) diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index ac2684558f084..7f8591a01a80a 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -11,14 +11,27 @@ permissions: contents: read jobs: - go-race: - # While GitHub's toaster runners are likelier to flake, we want consistency - # between this environment and the regular test environment for DataDog - # statistics and to only show real workflow threats. - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-8' || 'ubuntu-latest' }} - # This runner costs 0.016 USD per minute, - # so 0.016 * 240 = 3.84 USD per run. - timeout-minutes: 240 + test-go-pg: + runs-on: ${{ matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-16-cores' || matrix.os }} + + + # TODO: re-enable main check! + + + + + + # if: github.ref == 'refs/heads/main' + # This timeout must be greater than the timeout set by `go test` in + # `make test-postgres` to ensure we receive a trace of running + # goroutines. Setting this to the timeout +5m should work quite well + # even if some of the preceding steps are slow. + timeout-minutes: 25 + strategy: + matrix: + os: + - macos-latest + - windows-2022 steps: - name: Harden Runner uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 @@ -27,6 +40,8 @@ jobs: - name: Checkout uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + with: + fetch-depth: 1 - name: Setup Go uses: ./.github/actions/setup-go @@ -34,49 +49,61 @@ jobs: - name: Setup Terraform uses: ./.github/actions/setup-tf - - name: Run Tests - run: | - # -race is likeliest to catch flaky tests - # due to correctness detection and its performance - # impact. - gotestsum --junitfile="gotests.xml" -- -timeout=240m -count=3 -race ./... + # Sets up the ImDisk toolkit for Windows and creates a RAM disk on drive R:. + - name: Setup ImDisk + if: runner.os == 'Windows' + uses: ./.github/actions/setup-imdisk - - name: Upload test results to DataDog - uses: ./.github/actions/upload-datadog - if: always() - with: - api-key: ${{ secrets.DATADOG_API_KEY }} + - name: Test with PostgreSQL Database + env: + POSTGRES_VERSION: "13" + TS_DEBUG_DISCO: "true" + LC_CTYPE: "en_US.UTF-8" + LC_ALL: "en_US.UTF-8" + shell: bash + run: | + # if macOS, install google-chrome for scaletests + # As another concern, should we really have this kind of external dependency + # requirement on standard CI? + if [ "${{ matrix.os }}" == "macos-latest" ]; then + brew install google-chrome + fi - go-timing: - # We run these tests with p=1 so we don't need a lot of compute. - runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04' || 'ubuntu-latest' }} - timeout-minutes: 10 - steps: - - name: Harden Runner - uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 - with: - egress-policy: audit + # By default Go will use the number of logical CPUs, which + # is a fine default. + PARALLEL_FLAG="" - - name: Checkout - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + # macOS will output "The default interactive shell is now zsh" + # intermittently in CI... + if [ "${{ matrix.os }}" == "macos-latest" ]; then + touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile + fi - - name: Setup Go - uses: ./.github/actions/setup-go + if [ "${{ runner.os }}" == "Windows" ]; then + # Create a temp dir on the R: ramdisk drive for Windows. The default + # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 + mkdir -p "R:/temp/embedded-pg" + go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" + # Reduce test parallelism, mirroring what we do for race tests. + # We'd been encountering issues with timing related flakes, and + # this seems to help. + else + go run scripts/embedded-pg/main.go + fi - - name: Run Tests - run: | - gotestsum --junitfile="gotests.xml" -- --tags="timing" -p=1 -run='_Timing/' ./... + DB=ci gotestsum --format standard-quiet -- -v -short -count=1 -parallel 4 -p 4 ./... - - name: Upload test results to DataDog + - name: Upload test stats to Datadog + timeout-minutes: 1 + continue-on-error: true uses: ./.github/actions/upload-datadog - if: always() + if: success() || failure() with: api-key: ${{ secrets.DATADOG_API_KEY }} notify-slack-on-failure: needs: - - go-race - - go-timing + - test-go-pg runs-on: ubuntu-latest if: failure() && github.ref == 'refs/heads/main' From c4de10e3c7c9304c215201ff1504cdf589045987 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 17 Jan 2025 09:00:26 +0000 Subject: [PATCH 4/8] Run CLI tests for Windows/MacOS on each PR Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 59 +++++++++++++++++++++++++++++++++++++++ Makefile | 4 +++ 2 files changed, 63 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 82f2a7f9489b8..cc64156eca381 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -378,6 +378,65 @@ jobs: with: api-key: ${{ secrets.DATADOG_API_KEY }} + # We don't run the full test-suite for Windows & MacOS, so we just run the CLI tests on every PR. + # We run the test suite in test-go-pg, including CLI. + test-cli: + runs-on: ${{ matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-16-cores' || matrix.os }} + needs: changes + + + #TODO: reenable condition + + + #if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' + strategy: + matrix: + os: + - macos-latest + - windows-2022 + steps: + - name: Harden Runner + uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 + with: + egress-policy: audit + + - name: Checkout + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + with: + fetch-depth: 1 + + - name: Setup Go + uses: ./.github/actions/setup-go + + - name: Setup Terraform + uses: ./.github/actions/setup-tf + + # Sets up the ImDisk toolkit for Windows and creates a RAM disk on drive R:. + - name: Setup ImDisk + if: runner.os == 'Windows' + uses: ./.github/actions/setup-imdisk + + - name: Test CLI + env: + TS_DEBUG_DISCO: "true" + LC_CTYPE: "en_US.UTF-8" + LC_ALL: "en_US.UTF-8" + shell: bash + run: | + # By default Go will use the number of logical CPUs, which + # is a fine default. + PARALLEL_FLAG="" + + make test-cli + + - name: Upload test stats to Datadog + timeout-minutes: 1 + continue-on-error: true + uses: ./.github/actions/upload-datadog + if: success() || failure() + with: + api-key: ${{ secrets.DATADOG_API_KEY }} + test-go-pg: runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-4' || matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-16-cores' || matrix.os }} needs: changes diff --git a/Makefile b/Makefile index 71bcef76aee70..45adfd12c7087 100644 --- a/Makefile +++ b/Makefile @@ -807,6 +807,10 @@ test: $(GIT_FLAGS) gotestsum --format standard-quiet -- -v -short -count=1 ./... .PHONY: test +test-cli: + $(GIT_FLAGS) gotestsum --format standard-quiet -- -v -short -count=1 ./cli/... +.PHONY: test-cli + # sqlc-cloud-is-setup will fail if no SQLc auth token is set. Use this as a # dependency for any sqlc-cloud related targets. sqlc-cloud-is-setup: From e6761e6d64cc2813705f09c00a1f9b5ce3004774 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 17 Jan 2025 09:00:47 +0000 Subject: [PATCH 5/8] Do not run full test-suite with Postgres on Windows/MacOS on each PR Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index cc64156eca381..c1e99415a65db 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -438,7 +438,7 @@ jobs: api-key: ${{ secrets.DATADOG_API_KEY }} test-go-pg: - runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-4' || matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-16-cores' || matrix.os }} + runs-on: ${{ matrix.os == 'ubuntu-latest' && github.repository_owner == 'coder' && 'depot-ubuntu-22.04-4' || matrix.os }} needs: changes if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' # This timeout must be greater than the timeout set by `go test` in @@ -450,8 +450,6 @@ jobs: matrix: os: - ubuntu-latest - - macos-latest - - windows-2022 steps: - name: Harden Runner uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 @@ -482,39 +480,11 @@ jobs: LC_ALL: "en_US.UTF-8" shell: bash run: | - # if macOS, install google-chrome for scaletests - # As another concern, should we really have this kind of external dependency - # requirement on standard CI? - if [ "${{ matrix.os }}" == "macos-latest" ]; then - brew install google-chrome - fi - # By default Go will use the number of logical CPUs, which # is a fine default. PARALLEL_FLAG="" - # macOS will output "The default interactive shell is now zsh" - # intermittently in CI... - if [ "${{ matrix.os }}" == "macos-latest" ]; then - touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile - fi - - if [ "${{ runner.os }}" == "Linux" ]; then - make test-postgres - elif [ "${{ runner.os }}" == "Windows" ]; then - # Create a temp dir on the R: ramdisk drive for Windows. The default - # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 - mkdir -p "R:/temp/embedded-pg" - go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" - # Reduce test parallelism, mirroring what we do for race tests. - # We'd been encountering issues with timing related flakes, and - # this seems to help. - DB=ci gotestsum --format standard-quiet -- -v -short -count=1 -parallel 4 -p 4 ./... - else - go run scripts/embedded-pg/main.go - # Reduce test parallelism, like for Windows above. - DB=ci gotestsum --format standard-quiet -- -v -short -count=1 -parallel 4 -p 4 ./... - fi + make test-postgres - name: Upload test stats to Datadog timeout-minutes: 1 From f1b54b92270dc63d181f764f7f85cd87a32c70fa Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 17 Jan 2025 09:06:15 +0000 Subject: [PATCH 6/8] Testing complete Signed-off-by: Danny Kopping --- .github/workflows/ci.yaml | 7 +------ .github/workflows/nightly-gauntlet.yaml | 10 +--------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c1e99415a65db..ca244c3237058 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -383,12 +383,7 @@ jobs: test-cli: runs-on: ${{ matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-16-cores' || matrix.os }} needs: changes - - - #TODO: reenable condition - - - #if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' + if: needs.changes.outputs.go == 'true' || needs.changes.outputs.ci == 'true' || github.ref == 'refs/heads/main' strategy: matrix: os: diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index 7f8591a01a80a..86d89f674c801 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -13,15 +13,7 @@ permissions: jobs: test-go-pg: runs-on: ${{ matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'windows-latest-16-cores' || matrix.os }} - - - # TODO: re-enable main check! - - - - - - # if: github.ref == 'refs/heads/main' + if: github.ref == 'refs/heads/main' # This timeout must be greater than the timeout set by `go test` in # `make test-postgres` to ensure we receive a trace of running # goroutines. Setting this to the timeout +5m should work quite well From 43eae7ea4ce050e628fcaf8a7d83ae9445f2e1f5 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 17 Jan 2025 12:59:14 +0200 Subject: [PATCH 7/8] Only run on weekdays Co-authored-by: Muhammad Atif Ali --- .github/workflows/nightly-gauntlet.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index 86d89f674c801..f6521c9520fa7 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -4,7 +4,7 @@ name: nightly-gauntlet on: schedule: # Every day at 4AM - - cron: "0 4 * * *" + - cron: "0 4 * * 1-5" workflow_dispatch: permissions: From 1442d50da7642ee973650b8b1691a74cacfd95f1 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 17 Jan 2025 16:21:48 +0000 Subject: [PATCH 8/8] Fix comment location Signed-off-by: Danny Kopping --- .github/workflows/nightly-gauntlet.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml index 86d89f674c801..326c2395cf084 100644 --- a/.github/workflows/nightly-gauntlet.yaml +++ b/.github/workflows/nightly-gauntlet.yaml @@ -76,13 +76,13 @@ jobs: # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755 mkdir -p "R:/temp/embedded-pg" go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg" - # Reduce test parallelism, mirroring what we do for race tests. - # We'd been encountering issues with timing related flakes, and - # this seems to help. else go run scripts/embedded-pg/main.go fi + # Reduce test parallelism, mirroring what we do for race tests. + # We'd been encountering issues with timing related flakes, and + # this seems to help. DB=ci gotestsum --format standard-quiet -- -v -short -count=1 -parallel 4 -p 4 ./... - name: Upload test stats to Datadog