Skip to content

Commit 4b68a0b

Browse files
committed
Merge branch 'main' into dean/proxy-derp-map
2 parents ac99525 + 465fe86 commit 4b68a0b

File tree

86 files changed

+1837
-645
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+1837
-645
lines changed

.github/workflows/ci.yaml

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141

4242
# Check for any typos!
4343
- name: Check for typos
44-
uses: crate-ci/typos@v1.14.3
44+
uses: crate-ci/typos@v1.14.8
4545
with:
4646
config: .github/workflows/typos.toml
4747
- name: Fix the typos
@@ -487,14 +487,36 @@ jobs:
487487
488488
- name: Install Release
489489
run: |
490-
gcloud config set project coder-dogfood
491-
gcloud config set compute/zone us-central1-a
492-
gcloud compute scp ./build/coder_*_linux_amd64.deb coder:/tmp/coder.deb
493-
gcloud compute ssh coder -- sudo dpkg -i --force-confdef /tmp/coder.deb
494-
gcloud compute ssh coder -- sudo systemctl daemon-reload
490+
set -euo pipefail
495491
496-
- name: Start
497-
run: gcloud compute ssh coder -- sudo service coder restart
492+
regions=(
493+
# gcp-region-id instance-name systemd-service-name
494+
"us-central1-a coder coder"
495+
"australia-southeast1-b coder-sydney coder-workspace-proxy"
496+
"europe-west3-c coder-europe coder-workspace-proxy"
497+
"southamerica-east1-b coder-brazil coder-workspace-proxy"
498+
)
499+
500+
deb_pkg="./build/coder_$(./scripts/version.sh)_linux_amd64.deb"
501+
if [ ! -f "$deb_pkg" ]; then
502+
echo "deb package not found: $deb_pkg"
503+
ls -l ./build
504+
exit 1
505+
fi
506+
507+
gcloud config set project coder-dogfood
508+
for region in "${regions[@]}"; do
509+
echo "::group::$region"
510+
set -- $region
511+
512+
set -x
513+
gcloud config set compute/zone "$1"
514+
gcloud compute scp "$deb_pkg" "${2}:/tmp/coder.deb"
515+
gcloud compute ssh "$2" -- /bin/sh -c "set -eux; sudo dpkg -i --force-confdef /tmp/coder.deb; sudo systemctl daemon-reload; sudo service '$3' restart"
516+
set +x
517+
518+
echo "::endgroup::"
519+
done
498520
499521
- uses: actions/upload-artifact@v3
500522
with:

.github/workflows/dogfood.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
steps:
1818
- name: Get branch name
1919
id: branch-name
20-
uses: tj-actions/branch-names@v6.4
20+
uses: tj-actions/branch-names@v6.5
2121

2222
- name: "Branch name to Docker tag name"
2323
id: docker-tag-name

.github/workflows/security.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ jobs:
140140
echo "image=$(cat "$image_job")" >> $GITHUB_OUTPUT
141141
142142
- name: Run Trivy vulnerability scanner
143-
uses: aquasecurity/trivy-action@1f0aa582c8c8f5f7639610d6d38baddfea4fdcee
143+
uses: aquasecurity/trivy-action@e5f43133f6e8736992c9f3c1b3296e24b37e17f2
144144
with:
145145
image-ref: ${{ steps.build.outputs.image }}
146146
format: sarif

agent/agent.go

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -210,25 +210,31 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
210210
var out bytes.Buffer
211211
result := &codersdk.WorkspaceAgentMetadataResult{
212212
// CollectedAt is set here for testing purposes and overrode by
213-
// the server to the time the server received the result to protect
214-
// against clock skew.
213+
// coderd to the time of server receipt to solve clock skew.
215214
//
216215
// In the future, the server may accept the timestamp from the agent
217-
// if it is certain the clocks are in sync.
216+
// if it can guarantee the clocks are synchronized.
218217
CollectedAt: time.Now(),
219218
}
220219
cmd, err := a.sshServer.CreateCommand(ctx, md.Script, nil)
221220
if err != nil {
222-
result.Error = err.Error()
221+
result.Error = fmt.Sprintf("create cmd: %+v", err)
223222
return result
224223
}
225224

226225
cmd.Stdout = &out
227226
cmd.Stderr = &out
227+
cmd.Stdin = io.LimitReader(nil, 0)
228228

229-
// The error isn't mutually exclusive with useful output.
230-
err = cmd.Run()
229+
// We split up Start and Wait instead of calling Run so that we can return a more precise error.
230+
err = cmd.Start()
231+
if err != nil {
232+
result.Error = fmt.Sprintf("start cmd: %+v", err)
233+
return result
234+
}
231235

236+
// This error isn't mutually exclusive with useful output.
237+
err = cmd.Wait()
232238
const bufLimit = 10 << 10
233239
if out.Len() > bufLimit {
234240
err = errors.Join(
@@ -238,8 +244,12 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
238244
out.Truncate(bufLimit)
239245
}
240246

247+
// Important: if the command times out, we may see a misleading error like
248+
// "exit status 1", so it's important to include the context error.
249+
err = errors.Join(err, ctx.Err())
250+
241251
if err != nil {
242-
result.Error = err.Error()
252+
result.Error = fmt.Sprintf("run cmd: %+v", err)
243253
}
244254
result.Value = out.String()
245255
return result

agent/agent_test.go

Lines changed: 91 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -951,19 +951,17 @@ func TestAgent_StartupScript(t *testing.T) {
951951
func TestAgent_Metadata(t *testing.T) {
952952
t.Parallel()
953953

954+
echoHello := "echo 'hello'"
955+
954956
t.Run("Once", func(t *testing.T) {
955957
t.Parallel()
956-
script := "echo -n hello"
957-
if runtime.GOOS == "windows" {
958-
script = "powershell " + script
959-
}
960958
//nolint:dogsled
961959
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
962960
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
963961
{
964962
Key: "greeting",
965963
Interval: 0,
966-
Script: script,
964+
Script: echoHello,
967965
},
968966
},
969967
}, 0)
@@ -986,78 +984,111 @@ func TestAgent_Metadata(t *testing.T) {
986984
})
987985

988986
t.Run("Many", func(t *testing.T) {
989-
if runtime.GOOS == "windows" {
990-
// Shell scripting in Windows is a pain, and we have already tested
991-
// that the OS logic works in the simpler "Once" test above.
992-
t.Skip()
993-
}
994987
t.Parallel()
995-
996-
dir := t.TempDir()
997-
998-
const reportInterval = 2
999-
const intervalUnit = 100 * time.Millisecond
1000-
var (
1001-
greetingPath = filepath.Join(dir, "greeting")
1002-
script = "echo hello | tee -a " + greetingPath
1003-
)
988+
//nolint:dogsled
1004989
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
1005990
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
1006991
{
1007992
Key: "greeting",
1008-
Interval: reportInterval,
1009-
Script: script,
1010-
},
1011-
{
1012-
Key: "bad",
1013-
Interval: reportInterval,
1014-
Script: "exit 1",
993+
Interval: 1,
994+
Timeout: 100,
995+
Script: echoHello,
1015996
},
1016997
},
1017998
}, 0)
1018999

1000+
var gotMd map[string]agentsdk.PostMetadataRequest
10191001
require.Eventually(t, func() bool {
1020-
return len(client.getMetadata()) == 2
1002+
gotMd = client.getMetadata()
1003+
return len(gotMd) == 1
10211004
}, testutil.WaitShort, testutil.IntervalMedium)
10221005

1023-
for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
1024-
md := client.getMetadata()
1025-
if len(md) != 2 {
1026-
panic("unexpected number of metadata entries")
1027-
}
1006+
collectedAt1 := gotMd["greeting"].CollectedAt
1007+
if !assert.Equal(t, "hello", strings.TrimSpace(gotMd["greeting"].Value)) {
1008+
t.Errorf("got: %+v", gotMd)
1009+
}
10281010

1029-
require.Equal(t, "hello\n", md["greeting"].Value)
1030-
require.Equal(t, "exit status 1", md["bad"].Error)
1011+
if !assert.Eventually(t, func() bool {
1012+
gotMd = client.getMetadata()
1013+
return gotMd["greeting"].CollectedAt.After(collectedAt1)
1014+
}, testutil.WaitShort, testutil.IntervalMedium) {
1015+
t.Fatalf("expected metadata to be collected again")
1016+
}
1017+
})
1018+
}
10311019

1032-
greetingByt, err := os.ReadFile(greetingPath)
1033-
require.NoError(t, err)
1020+
func TestAgentMetadata_Timing(t *testing.T) {
1021+
if runtime.GOOS == "windows" {
1022+
// Shell scripting in Windows is a pain, and we have already tested
1023+
// that the OS logic works in the simpler tests.
1024+
t.Skip()
1025+
}
1026+
testutil.SkipIfNotTiming(t)
1027+
t.Parallel()
10341028

1035-
var (
1036-
numGreetings = bytes.Count(greetingByt, []byte("hello"))
1037-
idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
1038-
// We allow a 50% error margin because the report loop may backlog
1039-
// in CI and other toasters. In production, there is no hard
1040-
// guarantee on timing either, and the frontend gives similar
1041-
// wiggle room to the staleness of the value.
1042-
upperBound = int(idealNumGreetings) + 1
1043-
lowerBound = (int(idealNumGreetings) / 2)
1044-
)
1045-
1046-
if idealNumGreetings < 50 {
1047-
// There is an insufficient sample size.
1048-
continue
1049-
}
1029+
dir := t.TempDir()
10501030

1051-
t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
1052-
// The report loop may slow down on load, but it should never, ever
1053-
// speed up.
1054-
if numGreetings > upperBound {
1055-
t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
1056-
} else if numGreetings < lowerBound {
1057-
t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
1058-
}
1031+
const reportInterval = 2
1032+
const intervalUnit = 100 * time.Millisecond
1033+
var (
1034+
greetingPath = filepath.Join(dir, "greeting")
1035+
script = "echo hello | tee -a " + greetingPath
1036+
)
1037+
//nolint:dogsled
1038+
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
1039+
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
1040+
{
1041+
Key: "greeting",
1042+
Interval: reportInterval,
1043+
Script: script,
1044+
},
1045+
{
1046+
Key: "bad",
1047+
Interval: reportInterval,
1048+
Script: "exit 1",
1049+
},
1050+
},
1051+
}, 0)
1052+
1053+
require.Eventually(t, func() bool {
1054+
return len(client.getMetadata()) == 2
1055+
}, testutil.WaitShort, testutil.IntervalMedium)
1056+
1057+
for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
1058+
md := client.getMetadata()
1059+
require.Len(t, md, 2, "got: %+v", md)
1060+
1061+
require.Equal(t, "hello\n", md["greeting"].Value)
1062+
require.Equal(t, "run cmd: exit status 1", md["bad"].Error)
1063+
1064+
greetingByt, err := os.ReadFile(greetingPath)
1065+
require.NoError(t, err)
1066+
1067+
var (
1068+
numGreetings = bytes.Count(greetingByt, []byte("hello"))
1069+
idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
1070+
// We allow a 50% error margin because the report loop may backlog
1071+
// in CI and other toasters. In production, there is no hard
1072+
// guarantee on timing either, and the frontend gives similar
1073+
// wiggle room to the staleness of the value.
1074+
upperBound = int(idealNumGreetings) + 1
1075+
lowerBound = (int(idealNumGreetings) / 2)
1076+
)
1077+
1078+
if idealNumGreetings < 50 {
1079+
// There is an insufficient sample size.
1080+
continue
10591081
}
1060-
})
1082+
1083+
t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
1084+
// The report loop may slow down on load, but it should never, ever
1085+
// speed up.
1086+
if numGreetings > upperBound {
1087+
t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
1088+
} else if numGreetings < lowerBound {
1089+
t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
1090+
}
1091+
}
10611092
}
10621093

10631094
func TestAgent_Lifecycle(t *testing.T) {

cli/server.go

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ import (
8888
"github.com/coder/coder/provisionersdk"
8989
sdkproto "github.com/coder/coder/provisionersdk/proto"
9090
"github.com/coder/coder/tailnet"
91+
"github.com/coder/retry"
9192
"github.com/coder/wgtunnel/tunnelsdk"
9293
)
9394

@@ -1733,24 +1734,43 @@ func BuildLogger(inv *clibase.Invocation, cfg *codersdk.DeploymentValues) (slog.
17331734

17341735
func connectToPostgres(ctx context.Context, logger slog.Logger, driver string, dbURL string) (*sql.DB, error) {
17351736
logger.Debug(ctx, "connecting to postgresql")
1736-
sqlDB, err := sql.Open(driver, dbURL)
1737-
if err != nil {
1738-
return nil, xerrors.Errorf("dial postgres: %w", err)
1739-
}
17401737

1741-
ok := false
1738+
// Try to connect for 30 seconds.
1739+
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
1740+
defer cancel()
1741+
1742+
var (
1743+
sqlDB *sql.DB
1744+
err error
1745+
ok = false
1746+
tries int
1747+
)
1748+
for r := retry.New(time.Second, 3*time.Second); r.Wait(ctx); {
1749+
tries++
1750+
1751+
sqlDB, err = sql.Open(driver, dbURL)
1752+
if err != nil {
1753+
logger.Warn(ctx, "connect to postgres; retrying", slog.Error(err), slog.F("try", tries))
1754+
continue
1755+
}
1756+
1757+
err = pingPostgres(ctx, sqlDB)
1758+
if err != nil {
1759+
logger.Warn(ctx, "ping postgres; retrying", slog.Error(err), slog.F("try", tries))
1760+
continue
1761+
}
1762+
1763+
break
1764+
}
1765+
// Make sure we close the DB in case it opened but the ping failed for some
1766+
// reason.
17421767
defer func() {
1743-
if !ok {
1768+
if !ok && sqlDB != nil {
17441769
_ = sqlDB.Close()
17451770
}
17461771
}()
1747-
1748-
pingCtx, pingCancel := context.WithTimeout(ctx, 15*time.Second)
1749-
defer pingCancel()
1750-
1751-
err = sqlDB.PingContext(pingCtx)
17521772
if err != nil {
1753-
return nil, xerrors.Errorf("ping postgres: %w", err)
1773+
return nil, xerrors.Errorf("connect to postgres; tries %d; last error: %w", tries, err)
17541774
}
17551775

17561776
// Ensure the PostgreSQL version is >=13.0.0!
@@ -1799,6 +1819,12 @@ func connectToPostgres(ctx context.Context, logger slog.Logger, driver string, d
17991819
return sqlDB, nil
18001820
}
18011821

1822+
func pingPostgres(ctx context.Context, db *sql.DB) error {
1823+
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
1824+
defer cancel()
1825+
return db.PingContext(ctx)
1826+
}
1827+
18021828
type HTTPServers struct {
18031829
HTTPUrl *url.URL
18041830
HTTPListener net.Listener

coderd/httpmw/httpmw.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func parseUUID(rw http.ResponseWriter, r *http.Request, param string) (uuid.UUID
2626
parsed, err := uuid.Parse(rawID)
2727
if err != nil {
2828
httpapi.Write(r.Context(), rw, http.StatusBadRequest, codersdk.Response{
29-
Message: fmt.Sprintf("Invalid UUID %q.", param),
29+
Message: fmt.Sprintf("Invalid UUID %q.", rawID),
3030
Detail: err.Error(),
3131
})
3232
return uuid.UUID{}, false

0 commit comments

Comments
 (0)