Skip to content

chore: migrate all Coder modules to Registry repo #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Apr 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
1965655
chore: add module icons
Parkreiner Apr 16, 2025
ad58a76
chore: add sample module files
Parkreiner Apr 16, 2025
592e7a5
chore: add VSCode settings
Parkreiner Apr 16, 2025
2317f92
chore: add general module images for Coder
Parkreiner Apr 16, 2025
e905d7a
chore: copy over main modules
Parkreiner Apr 16, 2025
24fef19
chore: update .gitignore
Parkreiner Apr 16, 2025
4bbeb50
fix: add missing contributor README files
Parkreiner Apr 16, 2025
25b7225
fix: update path for TS test helpers
Parkreiner Apr 16, 2025
39b524c
chore: add current lint file
Parkreiner Apr 16, 2025
bad778e
chore: update more setup files
Parkreiner Apr 16, 2025
f7f7b3e
chore: update setup for windows-rdp type-checking
Parkreiner Apr 16, 2025
bdc4679
chore: move exoscale images to whizus namespace
Parkreiner Apr 16, 2025
5dec066
chore: update frontmatter image paths
Parkreiner Apr 16, 2025
f373279
fix: update frontmatter icons paths
Parkreiner Apr 16, 2025
9b11825
fix: update remaining fm paths
Parkreiner Apr 16, 2025
e87dc93
fix: update windows-rpd tsconfig
Parkreiner Apr 16, 2025
8480748
docs: make comment for clear
Parkreiner Apr 16, 2025
c3822ea
fix: make sure TF tests run properly
Parkreiner Apr 16, 2025
12e0eab
chore: add registry health check back
Parkreiner Apr 16, 2025
3886b0e
chore: add back other workflow files
Parkreiner Apr 16, 2025
8b26886
fix: standardize naming conventions for Bash files
Parkreiner Apr 16, 2025
a778878
fix: update display name for node.js module
Parkreiner Apr 16, 2025
82e4009
fix: remove typo that prevented GH Actions from picking up job
Parkreiner Apr 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/dependabot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
204 changes: 204 additions & 0 deletions .github/scripts/check_registry_site_health.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#!/usr/bin/env bash
set -o pipefail
set -u

VERBOSE="${VERBOSE:-0}"
if [[ "${VERBOSE}" -ne "0" ]]; then
set -x
fi

# List of required environment variables
required_vars=(
"INSTATUS_API_KEY"
"INSTATUS_PAGE_ID"
"INSTATUS_COMPONENT_ID"
"VERCEL_API_KEY"
)

# Check if each required variable is set
for var in "${required_vars[@]}"; do
if [[ -z "${!var:-}" ]]; then
echo "Error: Environment variable '$var' is not set."
exit 1
fi
done

REGISTRY_BASE_URL="${REGISTRY_BASE_URL:-https://registry.coder.com}"

status=0
declare -a modules=()
declare -a failures=()

# Collect all module directories containing a main.tf file
for path in $(find . -maxdepth 2 -not -path '*/.*' -type f -name main.tf | cut -d '/' -f 2 | sort -u); do
modules+=("${path}")
done

echo "Checking modules: ${modules[*]}"

# Function to update the component status on Instatus
update_component_status() {
local component_status=$1
# see https://instatus.com/help/api/components
(curl -X PUT "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/components/$INSTATUS_COMPONENT_ID" \
-H "Authorization: Bearer $INSTATUS_API_KEY" \
-H "Content-Type: application/json" \
-d "{\"status\": \"$component_status\"}")
}

# Function to create an incident
create_incident() {
local incident_name="Degraded Service"
local message="The following modules are experiencing issues:\n"
for i in "${!failures[@]}"; do
message+="$((i + 1)). ${failures[$i]}\n"
done

component_status="PARTIALOUTAGE"
if ((${#failures[@]} == ${#modules[@]})); then
component_status="MAJOROUTAGE"
fi
# see https://instatus.com/help/api/incidents
incident_id=$(curl -s -X POST "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \
-H "Authorization: Bearer $INSTATUS_API_KEY" \
-H "Content-Type: application/json" \
-d "{
\"name\": \"$incident_name\",
\"message\": \"$message\",
\"components\": [\"$INSTATUS_COMPONENT_ID\"],
\"status\": \"INVESTIGATING\",
\"notify\": true,
\"statuses\": [
{
\"id\": \"$INSTATUS_COMPONENT_ID\",
\"status\": \"PARTIALOUTAGE\"
}
]
}" | jq -r '.id')

echo "Created incident with ID: $incident_id"
}

# Function to check for existing unresolved incidents
check_existing_incident() {
# Fetch the latest incidents with status not equal to "RESOLVED"
local unresolved_incidents=$(curl -s -X GET "https://api.instatus.com/v1/$INSTATUS_PAGE_ID/incidents" \
-H "Authorization: Bearer $INSTATUS_API_KEY" \
-H "Content-Type: application/json" | jq -r '.incidents[] | select(.status != "RESOLVED") | .id')

if [[ -n "$unresolved_incidents" ]]; then
echo "Unresolved incidents found: $unresolved_incidents"
return 0 # Indicate that there are unresolved incidents
else
echo "No unresolved incidents found."
return 1 # Indicate that no unresolved incidents exist
fi
}

force_redeploy_registry() {
# These are not secret values; safe to just expose directly in script
local VERCEL_TEAM_SLUG="codercom"
local VERCEL_TEAM_ID="team_tGkWfhEGGelkkqUUm9nXq17r"
local VERCEL_APP="registry"

local latest_res
latest_res=$(
curl "https://api.vercel.com/v6/deployments?app=$VERCEL_APP&limit=1&slug=$VERCEL_TEAM_SLUG&teamId=$VERCEL_TEAM_ID&target=production&state=BUILDING,INITIALIZING,QUEUED,READY" \
--fail \
--silent \
--header "Authorization: Bearer $VERCEL_API_KEY" \
--header "Content-Type: application/json"
)

# If we have zero deployments, something is VERY wrong. Make the whole
# script exit with a non-zero status code
local latest_id
latest_id=$(echo "${latest_res}" | jq -r '.deployments[0].uid')
if [[ "${latest_id}" = "null" ]]; then
echo "Unable to pull any previous deployments for redeployment"
echo "Please redeploy the latest deployment manually in Vercel."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi

local latest_date_ts_seconds
latest_date_ts_seconds=$(echo "${latest_res}" | jq -r '.deployments[0].createdAt/1000|floor')
local current_date_ts_seconds
current_date_ts_seconds="$(date +%s)"
local max_redeploy_interval_seconds=7200 # 2 hours
if ((current_date_ts_seconds - latest_date_ts_seconds < max_redeploy_interval_seconds)); then
echo "The registry was deployed less than 2 hours ago."
echo "Not automatically re-deploying the regitstry."
echo "A human reading this message should decide if a redeployment is necessary."
echo "Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi

local latest_deployment_state
latest_deployment_state="$(echo "${latest_res}" | jq -r '.deployments[0].state')"
if [[ "${latest_deployment_state}" != "READY" ]]; then
echo "Last deployment was not in READY state. Skipping redeployment."
echo "A human reading this message should decide if a redeployment is necessary."
echo "Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi

echo "============================================================="
echo "!!! Redeploying registry with deployment ID: ${latest_id} !!!"
echo "============================================================="

if ! curl -X POST "https://api.vercel.com/v13/deployments?forceNew=1&skipAutoDetectionConfirmation=1&slug=$VERCEL_TEAM_SLUG&teamId=$VERCEL_TEAM_ID" \
--fail \
--header "Authorization: Bearer $VERCEL_API_KEY" \
--header "Content-Type: application/json" \
--data-raw "{ \"deploymentId\": \"${latest_id}\", \"name\": \"${VERCEL_APP}\", \"target\": \"production\" }"; then
echo "DEPLOYMENT FAILED! Please check the Vercel dashboard for more information."
echo "https://vercel.com/codercom/registry/deployments"
exit 1
fi
}

# Check each module's accessibility
for module in "${modules[@]}"; do
# Trim leading/trailing whitespace from module name
module=$(echo "${module}" | xargs)
url="${REGISTRY_BASE_URL}/modules/${module}"
printf "=== Checking module %s at %s\n" "${module}" "${url}"
status_code=$(curl --output /dev/null --head --silent --fail --location "${url}" --retry 3 --write-out "%{http_code}")
if ((status_code != 200)); then
printf "==> FAIL(%s)\n" "${status_code}"
status=1
failures+=("${module}")
else
printf "==> OK(%s)\n" "${status_code}"
fi
done

# Determine overall status and update Instatus component
if ((status == 0)); then
echo "All modules are operational."
# set to
update_component_status "OPERATIONAL"
else
echo "The following modules have issues: ${failures[*]}"
# check if all modules are down
if ((${#failures[@]} == ${#modules[@]})); then
update_component_status "MAJOROUTAGE"
else
update_component_status "PARTIALOUTAGE"
fi

# Check if there is an existing incident before creating a new one
if ! check_existing_incident; then
create_incident
fi

# If a module is down, force a reployment to try getting things back online
# ASAP
# EDIT: registry.coder.com is no longer hosted on vercel
#force_redeploy_registry
fi

exit "${status}"
23 changes: 23 additions & 0 deletions .github/workflows/check_registry_site_health.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Check modules health on registry.coder.com
name: check-registry-site-health
on:
schedule:
- cron: "0,15,30,45 * * * *" # Runs every 15 minutes
workflow_dispatch: # Allows manual triggering of the workflow if needed

jobs:
run-script:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Run check.sh
run: |
./.github/scripts/check.sh
env:
INSTATUS_API_KEY: ${{ secrets.INSTATUS_API_KEY }}
INSTATUS_PAGE_ID: ${{ secrets.INSTATUS_PAGE_ID }}
INSTATUS_COMPONENT_ID: ${{ secrets.INSTATUS_COMPONENT_ID }}
VERCEL_API_KEY: ${{ secrets.VERCEL_API_KEY }}
22 changes: 21 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
jobs:
validate-contributors:
validate-readme-files:
runs-on: ubuntu-latest
steps:
- name: Check out code
Expand All @@ -20,3 +20,23 @@ jobs:
run: go build ./scripts/contributors && ./contributors
- name: Remove build file artifact
run: rm ./contributors
test-terraform:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v4
- name: Set up Terraform
uses: coder/coder/.github/actions/setup-tf@main
- name: Set up Bun
uses: oven-sh/setup-bun@v2
with:
# We're using the latest version of Bun for now, but it might be worth
# reconsidering. They've pushed breaking changes in patch releases
# that have broken our CI.
# Our PR where issues started to pop up: https://github.com/coder/modules/pull/383
# The Bun PR that broke things: https://github.com/oven-sh/bun/pull/16067
bun-version: latest
- name: Install dependencies
run: bun install
- name: Run tests
run: bun test
36 changes: 36 additions & 0 deletions .github/workflows/deploy_registry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: deploy-registry
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bcpeinhardt I copied this file over directly, but do we need to change anything to make sure that it runs correctly, now that this is in a different repo?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we deploy this new architecture in parallel to the current one just to make sure we do not break existing stuff?


on:
push:
branches:
- main

jobs:
deploy:
runs-on: ubuntu-latest

# Set id-token permission for gcloud
# Adding a comment because retriggering the build manually hung? I am the lord of devops and you will bend?
permissions:
contents: read
id-token: write

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Authenticate to Google Cloud
uses: google-github-actions/auth@71f986410dfbc7added4569d411d040a91dc6935
with:
workload_identity_provider: projects/309789351055/locations/global/workloadIdentityPools/github-actions/providers/github
service_account: registry-v2-github@coder-registry-1.iam.gserviceaccount.com

- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@77e7a554d41e2ee56fc945c52dfd3f33d12def9a

# For the time being, let's have the first couple merges to main in modules deploy a new version
# to *dev*. Once we review and make sure everything's working, we can deploy a new version to *main*.
# Maybe in the future we could automate this based on the result of E2E tests.
- name: Deploy to dev.registry.coder.com
run: |
gcloud builds triggers run 29818181-126d-4f8a-a937-f228b27d3d34 --branch dev
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,11 @@ dist

# Script output
/contributors

# Terraform files generated during testing
.terraform*
*.tfstate
*.tfstate.lock.info

# Generated credentials from google-github-actions/auth
gha-creds-*.json
Loading