Skip to content

Commit 50ccdb2

Browse files
committed
Merge remote-tracking branch 'origin/main' into jjs/presets
2 parents 2838fb1 + 0149222 commit 50ccdb2

22 files changed

+1835
-218
lines changed

.github/workflows/docs-ci.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ on:
1515
- "**.md"
1616
- ".github/workflows/docs-ci.yaml"
1717

18+
permissions:
19+
contents: read
20+
1821
jobs:
1922
docs:
2023
runs-on: ubuntu-latest

agent/agent.go

+45-23
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,18 @@ import (
3838
"github.com/coder/coder/v2/agent/agentscripts"
3939
"github.com/coder/coder/v2/agent/agentssh"
4040
"github.com/coder/coder/v2/agent/proto"
41+
"github.com/coder/coder/v2/agent/proto/resourcesmonitor"
4142
"github.com/coder/coder/v2/agent/reconnectingpty"
4243
"github.com/coder/coder/v2/buildinfo"
44+
"github.com/coder/coder/v2/cli/clistat"
4345
"github.com/coder/coder/v2/cli/gitauth"
4446
"github.com/coder/coder/v2/coderd/database/dbtime"
4547
"github.com/coder/coder/v2/codersdk"
4648
"github.com/coder/coder/v2/codersdk/agentsdk"
4749
"github.com/coder/coder/v2/codersdk/workspacesdk"
4850
"github.com/coder/coder/v2/tailnet"
4951
tailnetproto "github.com/coder/coder/v2/tailnet/proto"
52+
"github.com/coder/quartz"
5053
"github.com/coder/retry"
5154
)
5255

@@ -87,8 +90,8 @@ type Options struct {
8790
}
8891

8992
type Client interface {
90-
ConnectRPC23(ctx context.Context) (
91-
proto.DRPCAgentClient23, tailnetproto.DRPCTailnetClient23, error,
93+
ConnectRPC24(ctx context.Context) (
94+
proto.DRPCAgentClient24, tailnetproto.DRPCTailnetClient24, error,
9295
)
9396
RewriteDERPMap(derpMap *tailcfg.DERPMap)
9497
}
@@ -406,7 +409,7 @@ func (t *trySingleflight) Do(key string, fn func()) {
406409
fn()
407410
}
408411

409-
func (a *agent) reportMetadata(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
412+
func (a *agent) reportMetadata(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
410413
tickerDone := make(chan struct{})
411414
collectDone := make(chan struct{})
412415
ctx, cancel := context.WithCancel(ctx)
@@ -622,7 +625,7 @@ func (a *agent) reportMetadata(ctx context.Context, aAPI proto.DRPCAgentClient23
622625

623626
// reportLifecycle reports the current lifecycle state once. All state
624627
// changes are reported in order.
625-
func (a *agent) reportLifecycle(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
628+
func (a *agent) reportLifecycle(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
626629
for {
627630
select {
628631
case <-a.lifecycleUpdate:
@@ -704,7 +707,7 @@ func (a *agent) setLifecycle(state codersdk.WorkspaceAgentLifecycle) {
704707
// fetchServiceBannerLoop fetches the service banner on an interval. It will
705708
// not be fetched immediately; the expectation is that it is primed elsewhere
706709
// (and must be done before the session actually starts).
707-
func (a *agent) fetchServiceBannerLoop(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
710+
func (a *agent) fetchServiceBannerLoop(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
708711
ticker := time.NewTicker(a.announcementBannersRefreshInterval)
709712
defer ticker.Stop()
710713
for {
@@ -740,7 +743,7 @@ func (a *agent) run() (retErr error) {
740743
a.sessionToken.Store(&sessionToken)
741744

742745
// ConnectRPC returns the dRPC connection we use for the Agent and Tailnet v2+ APIs
743-
aAPI, tAPI, err := a.client.ConnectRPC23(a.hardCtx)
746+
aAPI, tAPI, err := a.client.ConnectRPC24(a.hardCtx)
744747
if err != nil {
745748
return err
746749
}
@@ -757,7 +760,7 @@ func (a *agent) run() (retErr error) {
757760
connMan := newAPIConnRoutineManager(a.gracefulCtx, a.hardCtx, a.logger, aAPI, tAPI)
758761

759762
connMan.startAgentAPI("init notification banners", gracefulShutdownBehaviorStop,
760-
func(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
763+
func(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
761764
bannersProto, err := aAPI.GetAnnouncementBanners(ctx, &proto.GetAnnouncementBannersRequest{})
762765
if err != nil {
763766
return xerrors.Errorf("fetch service banner: %w", err)
@@ -774,7 +777,7 @@ func (a *agent) run() (retErr error) {
774777
// sending logs gets gracefulShutdownBehaviorRemain because we want to send logs generated by
775778
// shutdown scripts.
776779
connMan.startAgentAPI("send logs", gracefulShutdownBehaviorRemain,
777-
func(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
780+
func(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
778781
err := a.logSender.SendLoop(ctx, aAPI)
779782
if xerrors.Is(err, agentsdk.LogLimitExceededError) {
780783
// we don't want this error to tear down the API connection and propagate to the
@@ -792,6 +795,25 @@ func (a *agent) run() (retErr error) {
792795
// metadata reporting can cease as soon as we start gracefully shutting down
793796
connMan.startAgentAPI("report metadata", gracefulShutdownBehaviorStop, a.reportMetadata)
794797

798+
// resources monitor can cease as soon as we start gracefully shutting down.
799+
connMan.startAgentAPI("resources monitor", gracefulShutdownBehaviorStop, func(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
800+
logger := a.logger.Named("resources_monitor")
801+
clk := quartz.NewReal()
802+
config, err := aAPI.GetResourcesMonitoringConfiguration(ctx, &proto.GetResourcesMonitoringConfigurationRequest{})
803+
if err != nil {
804+
return xerrors.Errorf("failed to get resources monitoring configuration: %w", err)
805+
}
806+
807+
statfetcher, err := clistat.New()
808+
if err != nil {
809+
return xerrors.Errorf("failed to create resources fetcher: %w", err)
810+
}
811+
resourcesFetcher := resourcesmonitor.NewFetcher(statfetcher)
812+
813+
resourcesmonitor := resourcesmonitor.NewResourcesMonitor(logger, clk, config, resourcesFetcher, aAPI)
814+
return resourcesmonitor.Start(ctx)
815+
})
816+
795817
// channels to sync goroutines below
796818
// handle manifest
797819
// |
@@ -814,7 +836,7 @@ func (a *agent) run() (retErr error) {
814836
connMan.startAgentAPI("handle manifest", gracefulShutdownBehaviorStop, a.handleManifest(manifestOK))
815837

816838
connMan.startAgentAPI("app health reporter", gracefulShutdownBehaviorStop,
817-
func(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
839+
func(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
818840
if err := manifestOK.wait(ctx); err != nil {
819841
return xerrors.Errorf("no manifest: %w", err)
820842
}
@@ -829,7 +851,7 @@ func (a *agent) run() (retErr error) {
829851
a.createOrUpdateNetwork(manifestOK, networkOK))
830852

831853
connMan.startTailnetAPI("coordination", gracefulShutdownBehaviorStop,
832-
func(ctx context.Context, tAPI tailnetproto.DRPCTailnetClient23) error {
854+
func(ctx context.Context, tAPI tailnetproto.DRPCTailnetClient24) error {
833855
if err := networkOK.wait(ctx); err != nil {
834856
return xerrors.Errorf("no network: %w", err)
835857
}
@@ -838,7 +860,7 @@ func (a *agent) run() (retErr error) {
838860
)
839861

840862
connMan.startTailnetAPI("derp map subscriber", gracefulShutdownBehaviorStop,
841-
func(ctx context.Context, tAPI tailnetproto.DRPCTailnetClient23) error {
863+
func(ctx context.Context, tAPI tailnetproto.DRPCTailnetClient24) error {
842864
if err := networkOK.wait(ctx); err != nil {
843865
return xerrors.Errorf("no network: %w", err)
844866
}
@@ -847,7 +869,7 @@ func (a *agent) run() (retErr error) {
847869

848870
connMan.startAgentAPI("fetch service banner loop", gracefulShutdownBehaviorStop, a.fetchServiceBannerLoop)
849871

850-
connMan.startAgentAPI("stats report loop", gracefulShutdownBehaviorStop, func(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
872+
connMan.startAgentAPI("stats report loop", gracefulShutdownBehaviorStop, func(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
851873
if err := networkOK.wait(ctx); err != nil {
852874
return xerrors.Errorf("no network: %w", err)
853875
}
@@ -858,8 +880,8 @@ func (a *agent) run() (retErr error) {
858880
}
859881

860882
// handleManifest returns a function that fetches and processes the manifest
861-
func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
862-
return func(ctx context.Context, aAPI proto.DRPCAgentClient23) error {
883+
func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
884+
return func(ctx context.Context, aAPI proto.DRPCAgentClient24) error {
863885
var (
864886
sentResult = false
865887
err error
@@ -968,8 +990,8 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
968990

969991
// createOrUpdateNetwork waits for the manifest to be set using manifestOK, then creates or updates
970992
// the tailnet using the information in the manifest
971-
func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient23) error {
972-
return func(ctx context.Context, _ proto.DRPCAgentClient23) (retErr error) {
993+
func (a *agent) createOrUpdateNetwork(manifestOK, networkOK *checkpoint) func(context.Context, proto.DRPCAgentClient24) error {
994+
return func(ctx context.Context, _ proto.DRPCAgentClient24) (retErr error) {
973995
if err := manifestOK.wait(ctx); err != nil {
974996
return xerrors.Errorf("no manifest: %w", err)
975997
}
@@ -1273,7 +1295,7 @@ func (a *agent) createTailnet(ctx context.Context, agentID uuid.UUID, derpMap *t
12731295

12741296
// runCoordinator runs a coordinator and returns whether a reconnect
12751297
// should occur.
1276-
func (a *agent) runCoordinator(ctx context.Context, tClient tailnetproto.DRPCTailnetClient23, network *tailnet.Conn) error {
1298+
func (a *agent) runCoordinator(ctx context.Context, tClient tailnetproto.DRPCTailnetClient24, network *tailnet.Conn) error {
12771299
defer a.logger.Debug(ctx, "disconnected from coordination RPC")
12781300
// we run the RPC on the hardCtx so that we have a chance to send the disconnect message if we
12791301
// gracefully shut down.
@@ -1320,7 +1342,7 @@ func (a *agent) runCoordinator(ctx context.Context, tClient tailnetproto.DRPCTai
13201342
}
13211343

13221344
// runDERPMapSubscriber runs a coordinator and returns if a reconnect should occur.
1323-
func (a *agent) runDERPMapSubscriber(ctx context.Context, tClient tailnetproto.DRPCTailnetClient23, network *tailnet.Conn) error {
1345+
func (a *agent) runDERPMapSubscriber(ctx context.Context, tClient tailnetproto.DRPCTailnetClient24, network *tailnet.Conn) error {
13241346
defer a.logger.Debug(ctx, "disconnected from derp map RPC")
13251347
ctx, cancel := context.WithCancel(ctx)
13261348
defer cancel()
@@ -1690,16 +1712,16 @@ const (
16901712

16911713
type apiConnRoutineManager struct {
16921714
logger slog.Logger
1693-
aAPI proto.DRPCAgentClient23
1694-
tAPI tailnetproto.DRPCTailnetClient23
1715+
aAPI proto.DRPCAgentClient24
1716+
tAPI tailnetproto.DRPCTailnetClient24
16951717
eg *errgroup.Group
16961718
stopCtx context.Context
16971719
remainCtx context.Context
16981720
}
16991721

17001722
func newAPIConnRoutineManager(
17011723
gracefulCtx, hardCtx context.Context, logger slog.Logger,
1702-
aAPI proto.DRPCAgentClient23, tAPI tailnetproto.DRPCTailnetClient23,
1724+
aAPI proto.DRPCAgentClient24, tAPI tailnetproto.DRPCTailnetClient24,
17031725
) *apiConnRoutineManager {
17041726
// routines that remain in operation during graceful shutdown use the remainCtx. They'll still
17051727
// exit if the errgroup hits an error, which usually means a problem with the conn.
@@ -1732,7 +1754,7 @@ func newAPIConnRoutineManager(
17321754
// but for Tailnet.
17331755
func (a *apiConnRoutineManager) startAgentAPI(
17341756
name string, behavior gracefulShutdownBehavior,
1735-
f func(context.Context, proto.DRPCAgentClient23) error,
1757+
f func(context.Context, proto.DRPCAgentClient24) error,
17361758
) {
17371759
logger := a.logger.With(slog.F("name", name))
17381760
var ctx context.Context
@@ -1769,7 +1791,7 @@ func (a *apiConnRoutineManager) startAgentAPI(
17691791
// but for the Agent API.
17701792
func (a *apiConnRoutineManager) startTailnetAPI(
17711793
name string, behavior gracefulShutdownBehavior,
1772-
f func(context.Context, tailnetproto.DRPCTailnetClient23) error,
1794+
f func(context.Context, tailnetproto.DRPCTailnetClient24) error,
17731795
) {
17741796
logger := a.logger.With(slog.F("name", name))
17751797
var ctx context.Context

agent/agenttest/client.go

+32-3
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ func (c *Client) Close() {
9696
c.derpMapOnce.Do(func() { close(c.derpMapUpdates) })
9797
}
9898

99-
func (c *Client) ConnectRPC23(ctx context.Context) (
100-
agentproto.DRPCAgentClient23, proto.DRPCTailnetClient23, error,
99+
func (c *Client) ConnectRPC24(ctx context.Context) (
100+
agentproto.DRPCAgentClient24, proto.DRPCTailnetClient24, error,
101101
) {
102102
conn, lis := drpcsdk.MemTransportPipe()
103103
c.LastWorkspaceAgent = func() {
@@ -171,7 +171,9 @@ type FakeAgentAPI struct {
171171
metadata map[string]agentsdk.Metadata
172172
timings []*agentproto.Timing
173173

174-
getAnnouncementBannersFunc func() ([]codersdk.BannerConfig, error)
174+
getAnnouncementBannersFunc func() ([]codersdk.BannerConfig, error)
175+
getResourcesMonitoringConfigurationFunc func() (*agentproto.GetResourcesMonitoringConfigurationResponse, error)
176+
pushResourcesMonitoringUsageFunc func(*agentproto.PushResourcesMonitoringUsageRequest) (*agentproto.PushResourcesMonitoringUsageResponse, error)
175177
}
176178

177179
func (f *FakeAgentAPI) GetManifest(context.Context, *agentproto.GetManifestRequest) (*agentproto.Manifest, error) {
@@ -212,6 +214,33 @@ func (f *FakeAgentAPI) GetAnnouncementBanners(context.Context, *agentproto.GetAn
212214
return &agentproto.GetAnnouncementBannersResponse{AnnouncementBanners: bannersProto}, nil
213215
}
214216

217+
func (f *FakeAgentAPI) GetResourcesMonitoringConfiguration(_ context.Context, _ *agentproto.GetResourcesMonitoringConfigurationRequest) (*agentproto.GetResourcesMonitoringConfigurationResponse, error) {
218+
f.Lock()
219+
defer f.Unlock()
220+
221+
if f.getResourcesMonitoringConfigurationFunc == nil {
222+
return &agentproto.GetResourcesMonitoringConfigurationResponse{
223+
Config: &agentproto.GetResourcesMonitoringConfigurationResponse_Config{
224+
CollectionIntervalSeconds: 10,
225+
NumDatapoints: 20,
226+
},
227+
}, nil
228+
}
229+
230+
return f.getResourcesMonitoringConfigurationFunc()
231+
}
232+
233+
func (f *FakeAgentAPI) PushResourcesMonitoringUsage(_ context.Context, req *agentproto.PushResourcesMonitoringUsageRequest) (*agentproto.PushResourcesMonitoringUsageResponse, error) {
234+
f.Lock()
235+
defer f.Unlock()
236+
237+
if f.pushResourcesMonitoringUsageFunc == nil {
238+
return &agentproto.PushResourcesMonitoringUsageResponse{}, nil
239+
}
240+
241+
return f.pushResourcesMonitoringUsageFunc(req)
242+
}
243+
215244
func (f *FakeAgentAPI) UpdateStats(ctx context.Context, req *agentproto.UpdateStatsRequest) (*agentproto.UpdateStatsResponse, error) {
216245
f.logger.Debug(ctx, "update stats called", slog.F("req", req))
217246
// empty request is sent to get the interval; but our tests don't want empty stats requests

0 commit comments

Comments
 (0)