Skip to content

Commit d0c2094

Browse files
Merge remote-tracking branch 'origin/16930' into yevhenii/510-reconciliation-loop-v2
2 parents 9ac7a2c + 61a88e4 commit d0c2094

File tree

223 files changed

+7709
-3405
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

223 files changed

+7709
-3405
lines changed

.github/actions/setup-tf/action.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ runs:
77
- name: Install Terraform
88
uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2
99
with:
10-
terraform_version: 1.11.2
10+
terraform_version: 1.11.4
1111
terraform_wrapper: false

.github/workflows/ci.yaml

+28
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,34 @@ jobs:
11801180
done
11811181
fi
11821182
1183+
- name: SBOM Generation and Attestation
1184+
if: github.ref == 'refs/heads/main'
1185+
continue-on-error: true
1186+
env:
1187+
COSIGN_EXPERIMENTAL: 1
1188+
run: |
1189+
set -euxo pipefail
1190+
1191+
# Define image base and tags
1192+
IMAGE_BASE="ghcr.io/coder/coder-preview"
1193+
TAGS=("${{ steps.build-docker.outputs.tag }}" "main" "latest")
1194+
1195+
# Generate and attest SBOM for each tag
1196+
for tag in "${TAGS[@]}"; do
1197+
IMAGE="${IMAGE_BASE}:${tag}"
1198+
SBOM_FILE="coder_sbom_${tag//[:\/]/_}.spdx.json"
1199+
1200+
echo "Generating SBOM for image: ${IMAGE}"
1201+
syft "${IMAGE}" -o spdx-json > "${SBOM_FILE}"
1202+
1203+
echo "Attesting SBOM to image: ${IMAGE}"
1204+
cosign clean --force=true "${IMAGE}"
1205+
cosign attest --type spdxjson \
1206+
--predicate "${SBOM_FILE}" \
1207+
--yes \
1208+
"${IMAGE}"
1209+
done
1210+
11831211
# GitHub attestation provides SLSA provenance for the Docker images, establishing a verifiable
11841212
# record that these images were built in GitHub Actions with specific inputs and environment.
11851213
# This complements our existing cosign attestations which focus on SBOMs.

.github/workflows/release.yaml

+60-7
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,39 @@ jobs:
496496
env:
497497
CODER_BASE_IMAGE_TAG: ${{ steps.image-base-tag.outputs.tag }}
498498

499+
- name: SBOM Generation and Attestation
500+
if: ${{ !inputs.dry_run }}
501+
env:
502+
COSIGN_EXPERIMENTAL: "1"
503+
run: |
504+
set -euxo pipefail
505+
506+
# Generate SBOM for multi-arch image with version in filename
507+
echo "Generating SBOM for multi-arch image: ${{ steps.build_docker.outputs.multiarch_image }}"
508+
syft "${{ steps.build_docker.outputs.multiarch_image }}" -o spdx-json > coder_${{ steps.version.outputs.version }}_sbom.spdx.json
509+
510+
# Attest SBOM to multi-arch image
511+
echo "Attesting SBOM to multi-arch image: ${{ steps.build_docker.outputs.multiarch_image }}"
512+
cosign clean --force=true "${{ steps.build_docker.outputs.multiarch_image }}"
513+
cosign attest --type spdxjson \
514+
--predicate coder_${{ steps.version.outputs.version }}_sbom.spdx.json \
515+
--yes \
516+
"${{ steps.build_docker.outputs.multiarch_image }}"
517+
518+
# If latest tag was created, also attest it
519+
if [[ "${{ steps.build_docker.outputs.created_latest_tag }}" == "true" ]]; then
520+
latest_tag="$(./scripts/image_tag.sh --version latest)"
521+
echo "Generating SBOM for latest image: ${latest_tag}"
522+
syft "${latest_tag}" -o spdx-json > coder_latest_sbom.spdx.json
523+
524+
echo "Attesting SBOM to latest image: ${latest_tag}"
525+
cosign clean --force=true "${latest_tag}"
526+
cosign attest --type spdxjson \
527+
--predicate coder_latest_sbom.spdx.json \
528+
--yes \
529+
"${latest_tag}"
530+
fi
531+
499532
- name: GitHub Attestation for Docker image
500533
id: attest_main
501534
if: ${{ !inputs.dry_run }}
@@ -612,16 +645,27 @@ jobs:
612645
fi
613646
declare -p publish_args
614647
648+
# Build the list of files to publish
649+
files=(
650+
./build/*_installer.exe
651+
./build/*.zip
652+
./build/*.tar.gz
653+
./build/*.tgz
654+
./build/*.apk
655+
./build/*.deb
656+
./build/*.rpm
657+
./coder_${{ steps.version.outputs.version }}_sbom.spdx.json
658+
)
659+
660+
# Only include the latest SBOM file if it was created
661+
if [[ "${{ steps.build_docker.outputs.created_latest_tag }}" == "true" ]]; then
662+
files+=(./coder_latest_sbom.spdx.json)
663+
fi
664+
615665
./scripts/release/publish.sh \
616666
"${publish_args[@]}" \
617667
--release-notes-file "$CODER_RELEASE_NOTES_FILE" \
618-
./build/*_installer.exe \
619-
./build/*.zip \
620-
./build/*.tar.gz \
621-
./build/*.tgz \
622-
./build/*.apk \
623-
./build/*.deb \
624-
./build/*.rpm
668+
"${files[@]}"
625669
env:
626670
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
627671
CODER_GPG_RELEASE_KEY_BASE64: ${{ secrets.GPG_RELEASE_KEY_BASE64 }}
@@ -663,6 +707,15 @@ jobs:
663707
./build/*.apk
664708
./build/*.deb
665709
./build/*.rpm
710+
./coder_${{ steps.version.outputs.version }}_sbom.spdx.json
711+
retention-days: 7
712+
713+
- name: Upload latest sbom artifact to actions (if dry-run)
714+
if: inputs.dry_run && steps.build_docker.outputs.created_latest_tag == 'true'
715+
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
716+
with:
717+
name: latest-sbom-artifact
718+
path: ./coder_latest_sbom.spdx.json
666719
retention-days: 7
667720

668721
- name: Send repository-dispatch event

Makefile

+7-1
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,8 @@ GEN_FILES := \
581581
$(TAILNETTEST_MOCKS) \
582582
coderd/database/pubsub/psmock/psmock.go \
583583
agent/agentcontainers/acmock/acmock.go \
584-
agent/agentcontainers/dcspec/dcspec_gen.go
584+
agent/agentcontainers/dcspec/dcspec_gen.go \
585+
coderd/httpmw/loggermock/loggermock.go
585586

586587
# all gen targets should be added here and to gen/mark-fresh
587588
gen: gen/db gen/golden-files $(GEN_FILES)
@@ -630,6 +631,7 @@ gen/mark-fresh:
630631
coderd/database/pubsub/psmock/psmock.go \
631632
agent/agentcontainers/acmock/acmock.go \
632633
agent/agentcontainers/dcspec/dcspec_gen.go \
634+
coderd/httpmw/loggermock/loggermock.go \
633635
"
634636

635637
for file in $$files; do
@@ -669,6 +671,10 @@ agent/agentcontainers/acmock/acmock.go: agent/agentcontainers/containers.go
669671
go generate ./agent/agentcontainers/acmock/
670672
touch "$@"
671673

674+
coderd/httpmw/loggermock/loggermock.go: coderd/httpmw/logger.go
675+
go generate ./coderd/httpmw/loggermock/
676+
touch "$@"
677+
672678
agent/agentcontainers/dcspec/dcspec_gen.go: \
673679
node_modules/.installed \
674680
agent/agentcontainers/dcspec/devContainer.base.schema.json \

agent/agent.go

+40-13
Original file line numberDiff line numberDiff line change
@@ -907,7 +907,7 @@ func (a *agent) run() (retErr error) {
907907
defer func() {
908908
cErr := aAPI.DRPCConn().Close()
909909
if cErr != nil {
910-
a.logger.Debug(a.hardCtx, "error closing drpc connection", slog.Error(err))
910+
a.logger.Debug(a.hardCtx, "error closing drpc connection", slog.Error(cErr))
911911
}
912912
}()
913913

@@ -1186,9 +1186,9 @@ func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(co
11861186
network := a.network
11871187
a.closeMutex.Unlock()
11881188
if network == nil {
1189-
keySeed, err := WorkspaceKeySeed(manifest.WorkspaceID, manifest.AgentName)
1189+
keySeed, err := SSHKeySeed(manifest.OwnerName, manifest.WorkspaceName, manifest.AgentName)
11901190
if err != nil {
1191-
return xerrors.Errorf("generate seed from workspace id: %w", err)
1191+
return xerrors.Errorf("generate SSH key seed: %w", err)
11921192
}
11931193
// use the graceful context here, because creating the tailnet is not itself tied to the
11941194
// agent API.
@@ -1408,7 +1408,7 @@ func (a *agent) createTailnet(
14081408
if rPTYServeErr != nil &&
14091409
a.gracefulCtx.Err() == nil &&
14101410
!strings.Contains(rPTYServeErr.Error(), "use of closed network connection") {
1411-
a.logger.Error(ctx, "error serving reconnecting PTY", slog.Error(err))
1411+
a.logger.Error(ctx, "error serving reconnecting PTY", slog.Error(rPTYServeErr))
14121412
}
14131413
}); err != nil {
14141414
return nil, err
@@ -1518,14 +1518,11 @@ func (a *agent) runCoordinator(ctx context.Context, tClient tailnetproto.DRPCTai
15181518
a.logger.Info(ctx, "connected to coordination RPC")
15191519

15201520
// This allows the Close() routine to wait for the coordinator to gracefully disconnect.
1521-
a.closeMutex.Lock()
1522-
if a.isClosed() {
1523-
return nil
1521+
disconnected := a.setCoordDisconnected()
1522+
if disconnected == nil {
1523+
return nil // already closed by something else
15241524
}
1525-
disconnected := make(chan struct{})
1526-
a.coordDisconnected = disconnected
15271525
defer close(disconnected)
1528-
a.closeMutex.Unlock()
15291526

15301527
ctrl := tailnet.NewAgentCoordinationController(a.logger, network)
15311528
coordination := ctrl.New(coordinate)
@@ -1547,6 +1544,17 @@ func (a *agent) runCoordinator(ctx context.Context, tClient tailnetproto.DRPCTai
15471544
return <-errCh
15481545
}
15491546

1547+
func (a *agent) setCoordDisconnected() chan struct{} {
1548+
a.closeMutex.Lock()
1549+
defer a.closeMutex.Unlock()
1550+
if a.isClosed() {
1551+
return nil
1552+
}
1553+
disconnected := make(chan struct{})
1554+
a.coordDisconnected = disconnected
1555+
return disconnected
1556+
}
1557+
15501558
// runDERPMapSubscriber runs a coordinator and returns if a reconnect should occur.
15511559
func (a *agent) runDERPMapSubscriber(ctx context.Context, tClient tailnetproto.DRPCTailnetClient24, network *tailnet.Conn) error {
15521560
defer a.logger.Debug(ctx, "disconnected from derp map RPC")
@@ -2068,12 +2076,31 @@ func PrometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger sl
20682076
})
20692077
}
20702078

2071-
// WorkspaceKeySeed converts a WorkspaceID UUID and agent name to an int64 hash.
2079+
// SSHKeySeed converts an owner userName, workspaceName and agentName to an int64 hash.
20722080
// This uses the FNV-1a hash algorithm which provides decent distribution and collision
20732081
// resistance for string inputs.
2074-
func WorkspaceKeySeed(workspaceID uuid.UUID, agentName string) (int64, error) {
2082+
//
2083+
// Why owner username, workspace name, and agent name? These are the components that are used in hostnames for the
2084+
// workspace over SSH, and so we want the workspace to have a stable key with respect to these. We don't use the
2085+
// respective UUIDs. The workspace UUID would be different if you delete and recreate a workspace with the same name.
2086+
// The agent UUID is regenerated on each build. Since Coder's Tailnet networking is handling the authentication, we
2087+
// should not be showing users warnings about host SSH keys.
2088+
func SSHKeySeed(userName, workspaceName, agentName string) (int64, error) {
20752089
h := fnv.New64a()
2076-
_, err := h.Write(workspaceID[:])
2090+
_, err := h.Write([]byte(userName))
2091+
if err != nil {
2092+
return 42, err
2093+
}
2094+
// null separators between strings so that (dog, foodstuff) is distinct from (dogfood, stuff)
2095+
_, err = h.Write([]byte{0})
2096+
if err != nil {
2097+
return 42, err
2098+
}
2099+
_, err = h.Write([]byte(workspaceName))
2100+
if err != nil {
2101+
return 42, err
2102+
}
2103+
_, err = h.Write([]byte{0})
20772104
if err != nil {
20782105
return 42, err
20792106
}

agent/agent_test.go

+2-3
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ func TestAgent_Stats_Magic(t *testing.T) {
190190
s, ok := <-stats
191191
t.Logf("got stats: ok=%t, ConnectionCount=%d, RxBytes=%d, TxBytes=%d, SessionCountVSCode=%d, ConnectionMedianLatencyMS=%f",
192192
ok, s.ConnectionCount, s.RxBytes, s.TxBytes, s.SessionCountVscode, s.ConnectionMedianLatencyMs)
193-
return ok && s.ConnectionCount > 0 && s.RxBytes > 0 && s.TxBytes > 0 &&
193+
return ok &&
194194
// Ensure that the connection didn't count as a "normal" SSH session.
195195
// This was a special one, so it should be labeled specially in the stats!
196196
s.SessionCountVscode == 1 &&
@@ -258,8 +258,7 @@ func TestAgent_Stats_Magic(t *testing.T) {
258258
s, ok := <-stats
259259
t.Logf("got stats with conn open: ok=%t, ConnectionCount=%d, SessionCountJetBrains=%d",
260260
ok, s.ConnectionCount, s.SessionCountJetbrains)
261-
return ok && s.ConnectionCount > 0 &&
262-
s.SessionCountJetbrains == 1
261+
return ok && s.SessionCountJetbrains == 1
263262
}, testutil.WaitLong, testutil.IntervalFast,
264263
"never saw stats with conn open",
265264
)

agent/agentscripts/agentscripts_test.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,16 @@ func TestEnv(t *testing.T) {
102102

103103
func TestTimeout(t *testing.T) {
104104
t.Parallel()
105+
if runtime.GOOS == "darwin" {
106+
t.Skip("this test is flaky on macOS, see https://github.com/coder/internal/issues/329")
107+
}
105108
runner := setup(t, nil)
106109
defer runner.Close()
107110
aAPI := agenttest.NewFakeAgentAPI(t, testutil.Logger(t), nil, nil)
108111
err := runner.Init([]codersdk.WorkspaceAgentScript{{
109112
LogSourceID: uuid.New(),
110113
Script: "sleep infinity",
111-
Timeout: time.Millisecond,
114+
Timeout: 100 * time.Millisecond,
112115
}}, aAPI.ScriptCompleted)
113116
require.NoError(t, err)
114117
require.ErrorIs(t, runner.Execute(context.Background(), agentscripts.ExecuteAllScripts), agentscripts.ErrTimeout)

agent/agentssh/agentssh.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -1060,8 +1060,10 @@ func (s *Server) Close() error {
10601060
// Guard against multiple calls to Close and
10611061
// accepting new connections during close.
10621062
if s.closing != nil {
1063+
closing := s.closing
10631064
s.mu.Unlock()
1064-
return xerrors.New("server is closing")
1065+
<-closing
1066+
return xerrors.New("server is closed")
10651067
}
10661068
s.closing = make(chan struct{})
10671069

agent/agentssh/agentssh_test.go

+16-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"strings"
1414
"sync"
1515
"testing"
16+
"time"
1617

1718
"github.com/prometheus/client_golang/prometheus"
1819
"github.com/spf13/afero"
@@ -153,7 +154,9 @@ func TestNewServer_CloseActiveConnections(t *testing.T) {
153154
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
154155
s, err := agentssh.NewServer(ctx, logger, prometheus.NewRegistry(), afero.NewMemMapFs(), agentexec.DefaultExecer, nil)
155156
require.NoError(t, err)
156-
defer s.Close()
157+
t.Cleanup(func() {
158+
_ = s.Close()
159+
})
157160
err = s.UpdateHostSigner(42)
158161
assert.NoError(t, err)
159162

@@ -190,10 +193,21 @@ func TestNewServer_CloseActiveConnections(t *testing.T) {
190193
}
191194
// The 60 seconds here is intended to be longer than the
192195
// test. The shutdown should propagate.
193-
err = sess.Start("/bin/bash -c 'trap \"sleep 60\" SIGTERM; sleep 60'")
196+
if runtime.GOOS == "windows" {
197+
// Best effort to at least partially test this in Windows.
198+
err = sess.Start("echo start\"ed\" && sleep 60")
199+
} else {
200+
err = sess.Start("/bin/bash -c 'trap \"sleep 60\" SIGTERM; echo start\"ed\"; sleep 60'")
201+
}
194202
assert.NoError(t, err)
195203

204+
// Allow the session to settle (i.e. reach echo).
205+
pty.ExpectMatchContext(ctx, "started")
206+
// Sleep a bit to ensure the sleep has started.
207+
time.Sleep(testutil.IntervalMedium)
208+
196209
close(ch)
210+
197211
err = sess.Wait()
198212
assert.Error(t, err)
199213
}(waitConns[i])

agent/agentssh/exec_windows.go

+7-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package agentssh
22

33
import (
44
"context"
5-
"os"
65
"os/exec"
76
"syscall"
87

@@ -15,7 +14,12 @@ func cmdSysProcAttr() *syscall.SysProcAttr {
1514

1615
func cmdCancel(ctx context.Context, logger slog.Logger, cmd *exec.Cmd) func() error {
1716
return func() error {
18-
logger.Debug(ctx, "cmdCancel: sending interrupt to process", slog.F("pid", cmd.Process.Pid))
19-
return cmd.Process.Signal(os.Interrupt)
17+
logger.Debug(ctx, "cmdCancel: killing process", slog.F("pid", cmd.Process.Pid))
18+
// Windows doesn't support sending signals to process groups, so we
19+
// have to kill the process directly. In the future, we may want to
20+
// implement a more sophisticated solution for process groups on
21+
// Windows, but for now, this is a simple way to ensure that the
22+
// process is terminated when the context is cancelled.
23+
return cmd.Process.Kill()
2024
}
2125
}

0 commit comments

Comments
 (0)