coder
diff --git a/‎coderd/agentapi/api.go
+25-3 b/‎coderd/agentapi/api.go
+25-3
diff --git a/‎coderd/agentapi/resources_monitoring.go
+198-9 b/‎coderd/agentapi/resources_monitoring.go
+198-9
@@ -17,10 +17,12 @@ import (
 
 	"cdr.dev/slog"
 	agentproto "github.com/coder/coder/v2/agent/proto"
+	"github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor"
 	"github.com/coder/coder/v2/coderd/appearance"
 	"github.com/coder/coder/v2/coderd/database"
 	"github.com/coder/coder/v2/coderd/database/pubsub"
 	"github.com/coder/coder/v2/coderd/externalauth"
+	"github.com/coder/coder/v2/coderd/notifications"
 	"github.com/coder/coder/v2/coderd/prometheusmetrics"
 	"github.com/coder/coder/v2/coderd/tracing"
 	"github.com/coder/coder/v2/coderd/workspacestats"
@@ -29,6 +31,7 @@ import (
 	"github.com/coder/coder/v2/codersdk/agentsdk"
 	"github.com/coder/coder/v2/tailnet"
 	tailnetproto "github.com/coder/coder/v2/tailnet/proto"
+	"github.com/coder/quartz"
 )
 
 // API implements the DRPC agent API interface from agent/proto. This struct is
@@ -59,7 +62,9 @@ type Options struct {
 
 	Ctx                               context.Context
 	Log                               slog.Logger
+	Clock                             quartz.Clock
 	Database                          database.Store
+	NotificationsEnqueuer             notifications.Enqueuer
 	Pubsub                            pubsub.Pubsub
 	DerpMapFn                         func() *tailcfg.DERPMap
 	TailnetCoordinator                *atomic.Pointer[tailnet.Coordinator]
@@ -82,6 +87,10 @@ type Options struct {
 }
 
 func New(opts Options) *API {
+	if opts.Clock == nil {
+		opts.Clock = quartz.NewReal()
+	}
+
 	api := &API{
 		opts: opts,
 		mu:   sync.Mutex{},
@@ -104,9 +113,22 @@ func New(opts Options) *API {
 	}
 
 	api.ResourcesMonitoringAPI = &ResourcesMonitoringAPI{
-		Log:      opts.Log,
-		AgentID:  opts.AgentID,
-		Database: opts.Database,
+		AgentID:               opts.AgentID,
+		WorkspaceID:           opts.WorkspaceID,
+		Clock:                 opts.Clock,
+		Database:              opts.Database,
+		NotificationsEnqueuer: opts.NotificationsEnqueuer,
+		Debounce:              5 * time.Minute,
+
+		Config: resourcesmonitor.Config{
+			NumDatapoints:      20,
+			CollectionInterval: 10 * time.Second,
+
+			Alert: resourcesmonitor.AlertConfig{
+				MinimumNOKsPercent:     20,
+				ConsecutiveNOKsPercent: 50,
+			},
+		},
 	}
 
 	api.StatsAPI = &StatsAPI{
 
@@ -4,20 +4,35 @@ import (
 	"context"
 	"database/sql"
 	"errors"
+	"fmt"
+	"time"
 
 	"golang.org/x/xerrors"
 
+	"cdr.dev/slog"
+
 	"github.com/google/uuid"
 
-	"cdr.dev/slog"
 	"github.com/coder/coder/v2/agent/proto"
+	"github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor"
 	"github.com/coder/coder/v2/coderd/database"
+	"github.com/coder/coder/v2/coderd/database/dbauthz"
+	"github.com/coder/coder/v2/coderd/database/dbtime"
+	"github.com/coder/coder/v2/coderd/notifications"
+	"github.com/coder/quartz"
 )
 
 type ResourcesMonitoringAPI struct {
-	AgentID  uuid.UUID
-	Database database.Store
-	Log      slog.Logger
+	AgentID     uuid.UUID
+	WorkspaceID uuid.UUID
+
+	Log                   slog.Logger
+	Clock                 quartz.Clock
+	Database              database.Store
+	NotificationsEnqueuer notifications.Enqueuer
+
+	Debounce time.Duration
+	Config   resourcesmonitor.Config
 }
 
 func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context.Context, _ *proto.GetResourcesMonitoringConfigurationRequest) (*proto.GetResourcesMonitoringConfigurationResponse, error) {
@@ -33,8 +48,8 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context
 
 	return &proto.GetResourcesMonitoringConfigurationResponse{
 		Config: &proto.GetResourcesMonitoringConfigurationResponse_Config{
-			CollectionIntervalSeconds: 10,
-			NumDatapoints:             20,
+			CollectionIntervalSeconds: int32(a.Config.CollectionInterval.Seconds()),
+			NumDatapoints:             a.Config.NumDatapoints,
 		},
 		Memory: func() *proto.GetResourcesMonitoringConfigurationResponse_Memory {
 			if memoryErr != nil {
@@ -60,8 +75,182 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context
 }
 
 func (a *ResourcesMonitoringAPI) PushResourcesMonitoringUsage(ctx context.Context, req *proto.PushResourcesMonitoringUsageRequest) (*proto.PushResourcesMonitoringUsageResponse, error) {
-	a.Log.Info(ctx, "resources monitoring usage received",
-		slog.F("request", req))
+	var err error
+
+	if memoryErr := a.monitorMemory(ctx, req.Datapoints); memoryErr != nil {
+		err = errors.Join(err, xerrors.Errorf("monitor memory: %w", memoryErr))
+	}
+
+	if volumeErr := a.monitorVolumes(ctx, req.Datapoints); volumeErr != nil {
+		err = errors.Join(err, xerrors.Errorf("monitor volume: %w", volumeErr))
+	}
+
+	return &proto.PushResourcesMonitoringUsageResponse{}, err
+}
+
+func (a *ResourcesMonitoringAPI) monitorMemory(ctx context.Context, datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint) error {
+	monitor, err := a.Database.FetchMemoryResourceMonitorsByAgentID(ctx, a.AgentID)
+	if err != nil {
+		// It is valid for an agent to not have a memory monitor, so we
+		// do not want to treat it as an error.
+		if errors.Is(err, sql.ErrNoRows) {
+			return nil
+		}
+
+		return xerrors.Errorf("fetch memory resource monitor: %w", err)
+	}
+
+	if !monitor.Enabled {
+		return nil
+	}
+
+	usageDatapoints := make([]*proto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage, 0, len(datapoints))
+	for _, datapoint := range datapoints {
+		usageDatapoints = append(usageDatapoints, datapoint.Memory)
+	}
+
+	usageStates := resourcesmonitor.CalculateMemoryUsageStates(monitor, usageDatapoints)
+
+	oldState := monitor.State
+	newState := resourcesmonitor.NextState(a.Config, oldState, usageStates)
+
+	debouncedUntil, shouldNotify := monitor.Debounce(a.Debounce, a.Clock.Now(), oldState, newState)
+
+	//nolint:gocritic // We need to be able to update the resource monitor here.
+	err = a.Database.UpdateMemoryResourceMonitor(dbauthz.AsResourceMonitor(ctx), database.UpdateMemoryResourceMonitorParams{
+		AgentID:        a.AgentID,
+		State:          newState,
+		UpdatedAt:      dbtime.Time(a.Clock.Now()),
+		DebouncedUntil: dbtime.Time(debouncedUntil),
+	})
+	if err != nil {
+		return xerrors.Errorf("update workspace monitor: %w", err)
+	}
+
+	if !shouldNotify {
+		return nil
+	}
+
+	workspace, err := a.Database.GetWorkspaceByID(ctx, a.WorkspaceID)
+	if err != nil {
+		return xerrors.Errorf("get workspace by id: %w", err)
+	}
+
+	_, err = a.NotificationsEnqueuer.EnqueueWithData(
+		// nolint:gocritic // We need to be able to send the notification.
+		dbauthz.AsNotifier(ctx),
+		workspace.OwnerID,
+		notifications.TemplateWorkspaceOutOfMemory,
+		map[string]string{
+			"workspace": workspace.Name,
+			"threshold": fmt.Sprintf("%d%%", monitor.Threshold),
+		},
+		map[string]any{
+			// NOTE(DanielleMaywood):
+			// When notifications are enqueued, they are checked to be
+			// unique within a single day. This means that if we attempt
+			// to send two OOM notifications for the same workspace on
+			// the same day, the enqueuer will prevent us from sending
+			// a second one. We are inject a timestamp to make the
+			// notifications appear different enough to circumvent this
+			// deduplication logic.
+			"timestamp": a.Clock.Now(),
+		},
+		"workspace-monitor-memory",
+	)
+	if err != nil {
+		return xerrors.Errorf("notify workspace OOM: %w", err)
+	}
+
+	return nil
+}
+
+func (a *ResourcesMonitoringAPI) monitorVolumes(ctx context.Context, datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint) error {
+	volumeMonitors, err := a.Database.FetchVolumesResourceMonitorsByAgentID(ctx, a.AgentID)
+	if err != nil {
+		return xerrors.Errorf("get or insert volume monitor: %w", err)
+	}
+
+	outOfDiskVolumes := make([]map[string]any, 0)
+
+	for _, monitor := range volumeMonitors {
+		if !monitor.Enabled {
+			continue
+		}
+
+		usageDatapoints := make([]*proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage, 0, len(datapoints))
+		for _, datapoint := range datapoints {
+			var usage *proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage
+
+			for _, volume := range datapoint.Volumes {
+				if volume.Volume == monitor.Path {
+					usage = volume
+					break
+				}
+			}
+
+			usageDatapoints = append(usageDatapoints, usage)
+		}
+
+		usageStates := resourcesmonitor.CalculateVolumeUsageStates(monitor, usageDatapoints)
+
+		oldState := monitor.State
+		newState := resourcesmonitor.NextState(a.Config, oldState, usageStates)
+
+		debouncedUntil, shouldNotify := monitor.Debounce(a.Debounce, a.Clock.Now(), oldState, newState)
+
+		if shouldNotify {
+			outOfDiskVolumes = append(outOfDiskVolumes, map[string]any{
+				"path":      monitor.Path,
+				"threshold": fmt.Sprintf("%d%%", monitor.Threshold),
+			})
+		}
+
+		//nolint:gocritic // We need to be able to update the resource monitor here.
+		if err := a.Database.UpdateVolumeResourceMonitor(dbauthz.AsResourceMonitor(ctx), database.UpdateVolumeResourceMonitorParams{
+			AgentID:        a.AgentID,
+			Path:           monitor.Path,
+			State:          newState,
+			UpdatedAt:      dbtime.Time(a.Clock.Now()),
+			DebouncedUntil: dbtime.Time(debouncedUntil),
+		}); err != nil {
+			return xerrors.Errorf("update workspace monitor: %w", err)
+		}
+	}
+
+	if len(outOfDiskVolumes) == 0 {
+		return nil
+	}
+
+	workspace, err := a.Database.GetWorkspaceByID(ctx, a.WorkspaceID)
+	if err != nil {
+		return xerrors.Errorf("get workspace by id: %w", err)
+	}
+
+	if _, err := a.NotificationsEnqueuer.EnqueueWithData(
+		// nolint:gocritic // We need to be able to send the notification.
+		dbauthz.AsNotifier(ctx),
+		workspace.OwnerID,
+		notifications.TemplateWorkspaceOutOfDisk,
+		map[string]string{
+			"workspace": workspace.Name,
+		},
+		map[string]any{
+			"volumes": outOfDiskVolumes,
+			// NOTE(DanielleMaywood):
+			// When notifications are enqueued, they are checked to be
+			// unique within a single day. This means that if we attempt
+			// to send two OOM notifications for the same workspace on
+			// the same day, the enqueuer will prevent us from sending
+			// a second one. We are inject a timestamp to make the
+			// notifications appear different enough to circumvent this
+			// deduplication logic.
+			"timestamp": a.Clock.Now(),
+		},
+		"workspace-monitor-volumes",
+	); err != nil {
+		return xerrors.Errorf("notify workspace OOD: %w", err)
+	}
 
-	return &proto.PushResourcesMonitoringUsageResponse{}, nil
+	return nil
 }