Skip to content

Commit 9ad09b2

Browse files
committed
WIP
1 parent da729e6 commit 9ad09b2

File tree

2 files changed

+66
-14
lines changed

2 files changed

+66
-14
lines changed

cli/server.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,16 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
849849
defer options.Telemetry.Close()
850850
}
851851

852+
databaseStoreWithoutAuth := options.Database
853+
854+
// We use a separate coderAPICloser so the Enterprise API
855+
// can have it's own close functions. This is cleaner
856+
// than abstracting the Coder API itself.
857+
coderAPI, coderAPICloser, err := newAPI(ctx, options)
858+
if err != nil {
859+
return xerrors.Errorf("create coder API: %w", err)
860+
}
861+
852862
// This prevents the pprof import from being accidentally deleted.
853863
_ = pprof.Handler
854864
if cfg.Pprof.Enable {
@@ -871,7 +881,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
871881
}
872882
defer closeWorkspacesFunc()
873883

874-
closeAgentsFunc, err := prometheusmetrics.Agents(ctx, options.PrometheusRegistry, options.Database, 0)
884+
closeAgentsFunc, err := prometheusmetrics.Agents(ctx, options.PrometheusRegistry, databaseStoreWithoutAuth, &coderAPI.TailnetCoordinator, options.DERPMap, 0)
875885
if err != nil {
876886
return xerrors.Errorf("register agents prometheus metric: %w", err)
877887
}
@@ -887,14 +897,6 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
887897
options.SwaggerEndpoint = cfg.Swagger.Enable.Value()
888898
}
889899

890-
// We use a separate coderAPICloser so the Enterprise API
891-
// can have it's own close functions. This is cleaner
892-
// than abstracting the Coder API itself.
893-
coderAPI, coderAPICloser, err := newAPI(ctx, options)
894-
if err != nil {
895-
return xerrors.Errorf("create coder API: %w", err)
896-
}
897-
898900
client := codersdk.New(localURL)
899901
if localURL.Scheme == "https" && isLocalhost(localURL.Hostname()) {
900902
// The certificate will likely be self-signed or for a different

coderd/prometheusmetrics/prometheusmetrics.go

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,20 @@ package prometheusmetrics
22

33
import (
44
"context"
5+
"fmt"
6+
"log"
7+
"strconv"
8+
"strings"
9+
"sync/atomic"
510
"time"
611

712
"github.com/google/uuid"
813
"github.com/prometheus/client_golang/prometheus"
14+
"tailscale.com/tailcfg"
915

1016
"github.com/coder/coder/coderd"
1117
"github.com/coder/coder/coderd/database"
18+
"github.com/coder/coder/tailnet"
1219
)
1320

1421
// ActiveUsers tracks the number of users that have authenticated within the past hour.
@@ -108,7 +115,7 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
108115
}
109116

110117
// Agents tracks the total number of workspaces with labels on status.
111-
func Agents(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (context.CancelFunc, error) {
118+
func Agents(ctx context.Context, registerer prometheus.Registerer, db database.Store, coordinator *atomic.Pointer[tailnet.Coordinator], derpMap *tailcfg.DERPMap, duration time.Duration) (context.CancelFunc, error) {
112119
if duration == 0 {
113120
duration = 15 * time.Second // TODO 5 * time.Minute
114121
}
@@ -124,23 +131,26 @@ func Agents(ctx context.Context, registerer prometheus.Registerer, db database.S
124131
return nil, err
125132
}
126133

127-
agentsUserLatenciesHistogram := prometheus.NewHistogramVec(prometheus.HistogramOpts{
134+
agentsUserLatenciesGauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
128135
Namespace: "coderd",
129136
Subsystem: "agents",
130137
Name: "user_latencies_seconds",
131138
Help: "The user's agent latency in seconds.",
132-
Buckets: []float64{0.001, 0.005, 0.010, 0.025, 0.050, 0.100, 0.500, 1, 5, 10, 30},
133-
}, []string{"agent_id", "workspace", "connection_type", "ide"})
134-
err = registerer.Register(agentsUserLatenciesHistogram)
139+
}, []string{"agent_id", "workspace_name", "derp_region", "preferred"})
140+
err = registerer.Register(agentsUserLatenciesGauge)
135141
if err != nil {
136142
return nil, err
137143
}
138144

145+
// FIXME connection_type ide
146+
139147
ctx, cancelFunc := context.WithCancel(ctx)
140148
ticker := time.NewTicker(duration)
141149
go func() {
142150
defer ticker.Stop()
143151
for {
152+
log.Println("Agents!!!")
153+
144154
select {
145155
case <-ctx.Done():
146156
return
@@ -151,30 +161,70 @@ func Agents(ctx context.Context, registerer prometheus.Registerer, db database.S
151161

152162
builds, err := db.GetLatestWorkspaceBuilds(ctx)
153163
if err != nil {
164+
log.Println("1", err)
154165
continue
155166
}
156167

157168
agentsConnectionGauge.Reset()
169+
agentsUserLatenciesGauge.Reset()
158170
for _, build := range builds {
159171
workspace, err := db.GetWorkspaceByID(ctx, build.WorkspaceID)
160172
if err != nil {
173+
log.Println("2", err)
161174
continue
162175
}
163176

164177
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, build.WorkspaceID)
165178
if err != nil {
179+
log.Println("3", err)
166180
continue
167181
}
168182

169183
if len(agents) == 0 {
170184
continue
171185
}
172186

187+
// FIXME publish workspace even if no agents
188+
173189
for _, agent := range agents {
174190
connectionStatus := agent.Status(6 * time.Second)
175191

176192
// FIXME AgentInactiveDisconnectTimeout
193+
log.Println("with value " + agent.Name)
177194
agentsConnectionGauge.WithLabelValues(agent.Name, workspace.Name, string(connectionStatus.Status)).Set(1)
195+
196+
node := (*coordinator.Load()).Node(agent.ID)
197+
if node != nil {
198+
log.Println("coordinator")
199+
200+
for rawRegion, latency := range node.DERPLatency {
201+
log.Println(rawRegion, latency)
202+
203+
regionParts := strings.SplitN(rawRegion, "-", 2)
204+
regionID, err := strconv.Atoi(regionParts[0])
205+
if err != nil {
206+
continue // xerrors.Errorf("convert derp region id %q: %w", rawRegion, err)
207+
}
208+
region, found := derpMap.Regions[regionID]
209+
if !found {
210+
// It's possible that a workspace agent is using an old DERPMap
211+
// and reports regions that do not exist. If that's the case,
212+
// report the region as unknown!
213+
region = &tailcfg.DERPRegion{
214+
RegionID: regionID,
215+
RegionName: fmt.Sprintf("Unnamed %d", regionID),
216+
}
217+
}
218+
219+
log.Println(region, latency)
220+
agentsUserLatenciesGauge.WithLabelValues(agent.Name, workspace.Name, region.RegionName, fmt.Sprintf("%v", node.PreferredDERP == regionID)).Set(latency)
221+
}
222+
} else {
223+
log.Println("node is null")
224+
}
225+
226+
// FIXME publish agent even if DERP is missing
227+
// FIXME IDE?
178228
}
179229
}
180230
}

0 commit comments

Comments
 (0)