-
Notifications
You must be signed in to change notification settings - Fork 928
feat: add cli command to report task status #18262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
package cli | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"net/url" | ||
"time" | ||
|
||
agentapi "github.com/coder/agentapi-sdk-go" | ||
"github.com/coder/coder/v2/cli/cliui" | ||
"github.com/coder/coder/v2/codersdk" | ||
"github.com/coder/coder/v2/codersdk/agentsdk" | ||
"github.com/coder/serpent" | ||
) | ||
|
||
func (r *RootCmd) taskCommand() *serpent.Command { | ||
cmd := &serpent.Command{ | ||
Use: "task", | ||
Short: "Interact with AI tasks.", | ||
Handler: func(i *serpent.Invocation) error { | ||
return i.Command.HelpHandler(i) | ||
}, | ||
Children: []*serpent.Command{ | ||
r.taskReportStatus(), | ||
}, | ||
} | ||
return cmd | ||
} | ||
|
||
func (r *RootCmd) taskReportStatus() *serpent.Command { | ||
var ( | ||
slug string | ||
interval time.Duration | ||
llmURL url.URL | ||
) | ||
cmd := &serpent.Command{ | ||
Use: "report-status", | ||
Short: "Report status of the currently running task to Coder.", | ||
Handler: func(inv *serpent.Invocation) error { | ||
ctx := inv.Context() | ||
|
||
// This is meant to run in a workspace, so instead of a regular client we | ||
// need a workspace agent client to update the status in coderd. | ||
agentClient, err := r.createAgentClient() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// We also need an agentapi client to get the LLM agent's current status. | ||
llmClient, err := agentapi.NewClient(llmURL.String()) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
notifyCtx, notifyCancel := inv.SignalNotifyContext(ctx, StopSignals...) | ||
defer notifyCancel() | ||
|
||
outerLoop: | ||
for { | ||
res, err := llmClient.GetStatus(notifyCtx) | ||
if err != nil && !errors.Is(err, context.Canceled) { | ||
cliui.Warnf(inv.Stderr, "failed to fetch status: %s", err) | ||
} else { | ||
// Currently we only update the status, which leaves the last summary | ||
// (if any) untouched. If we do want to update the summary here, we | ||
// will need to fetch the messages and generate one. | ||
status := codersdk.WorkspaceAppStatusStateWorking | ||
switch res.Status { | ||
case agentapi.StatusStable: // Stable == idle == done | ||
status = codersdk.WorkspaceAppStatusStateComplete | ||
case agentapi.StatusRunning: // Running == working | ||
} | ||
err = agentClient.PatchAppStatus(notifyCtx, agentsdk.PatchAppStatus{ | ||
AppSlug: slug, | ||
State: status, | ||
}) | ||
if err != nil && !errors.Is(err, context.Canceled) { | ||
cliui.Warnf(inv.Stderr, "failed to update status: %s", err) | ||
} | ||
} | ||
|
||
timer := time.NewTimer(interval) | ||
select { | ||
case <-notifyCtx.Done(): | ||
timer.Stop() | ||
break outerLoop | ||
case <-timer.C: | ||
} | ||
} | ||
|
||
return nil | ||
}, | ||
Options: []serpent.Option{ | ||
{ | ||
Flag: "app-slug", | ||
Description: "The app slug to use when reporting the status.", | ||
Env: "CODER_MCP_APP_STATUS_SLUG", | ||
Required: true, | ||
Value: serpent.StringOf(&slug), | ||
}, | ||
{ | ||
Flag: "agentapi-url", | ||
Description: "The URL of the LLM agent API.", | ||
Env: "CODER_AGENTAPI_URL", | ||
Required: true, | ||
Value: serpent.URLOf(&llmURL), | ||
}, | ||
{ | ||
Flag: "interval", | ||
Description: "The interval on which to poll for the status.", | ||
Env: "CODER_APP_STATUS_INTERVAL", | ||
Default: "30s", | ||
Value: serpent.DurationOf(&interval), | ||
}, | ||
}, | ||
} | ||
return cmd | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
package cli_test | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"io" | ||
"net/http" | ||
"net/http/httptest" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
|
||
agentapi "github.com/coder/agentapi-sdk-go" | ||
"github.com/coder/coder/v2/cli/clitest" | ||
"github.com/coder/coder/v2/coderd/httpapi" | ||
"github.com/coder/coder/v2/codersdk" | ||
"github.com/coder/coder/v2/codersdk/agentsdk" | ||
"github.com/coder/coder/v2/pty/ptytest" | ||
"github.com/coder/coder/v2/testutil" | ||
) | ||
|
||
func TestExpTask(t *testing.T) { | ||
t.Parallel() | ||
|
||
tests := []struct { | ||
name string | ||
resp *codersdk.Response | ||
status *agentapi.GetStatusResponse | ||
expected codersdk.WorkspaceAppStatusState | ||
}{ | ||
{ | ||
name: "ReportWorking", | ||
resp: nil, | ||
status: &agentapi.GetStatusResponse{ | ||
Status: agentapi.StatusRunning, | ||
}, | ||
expected: codersdk.WorkspaceAppStatusStateWorking, | ||
}, | ||
{ | ||
name: "ReportComplete", | ||
resp: nil, | ||
status: &agentapi.GetStatusResponse{ | ||
Status: agentapi.StatusStable, | ||
}, | ||
expected: codersdk.WorkspaceAppStatusStateComplete, | ||
}, | ||
{ | ||
name: "ReportUpdateError", | ||
resp: &codersdk.Response{ | ||
Message: "Failed to get workspace app.", | ||
Detail: "This is a test failure.", | ||
}, | ||
status: &agentapi.GetStatusResponse{ | ||
Status: agentapi.StatusStable, | ||
}, | ||
expected: codersdk.WorkspaceAppStatusStateComplete, | ||
}, | ||
{ | ||
name: "ReportStatusError", | ||
resp: nil, | ||
status: nil, | ||
expected: codersdk.WorkspaceAppStatusStateComplete, | ||
}, | ||
} | ||
|
||
for _, test := range tests { | ||
test := test | ||
t.Run(test.name, func(t *testing.T) { | ||
t.Parallel() | ||
done := make(chan codersdk.WorkspaceAppStatusState) | ||
|
||
// A mock server for coderd. | ||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||
body, err := io.ReadAll(r.Body) | ||
require.NoError(t, err) | ||
_ = r.Body.Close() | ||
|
||
var req agentsdk.PatchAppStatus | ||
err = json.Unmarshal(body, &req) | ||
require.NoError(t, err) | ||
|
||
if test.resp != nil { | ||
httpapi.Write(context.Background(), w, http.StatusBadRequest, test.resp) | ||
} else { | ||
httpapi.Write(context.Background(), w, http.StatusOK, nil) | ||
} | ||
done <- req.State | ||
})) | ||
t.Cleanup(srv.Close) | ||
agentURL := srv.URL | ||
|
||
// Another mock server for the LLM agent API. | ||
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||
if test.status != nil { | ||
httpapi.Write(context.Background(), w, http.StatusOK, test.status) | ||
} else { | ||
httpapi.Write(context.Background(), w, http.StatusBadRequest, nil) | ||
} | ||
})) | ||
t.Cleanup(srv.Close) | ||
agentapiURL := srv.URL | ||
|
||
inv, _ := clitest.New(t, "--agent-url", agentURL, "exp", "task", "report-status", | ||
"--app-slug", "claude-code", | ||
"--agentapi-url", agentapiURL) | ||
stdout := ptytest.New(t) | ||
inv.Stdout = stdout.Output() | ||
stderr := ptytest.New(t) | ||
inv.Stderr = stderr.Output() | ||
|
||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort) | ||
t.Cleanup(cancel) | ||
|
||
go func() { | ||
err := inv.WithContext(ctx).Run() | ||
assert.NoError(t, err) | ||
}() | ||
|
||
// Should only try to update the status if we got one. | ||
if test.status == nil { | ||
stderr.ExpectMatch("failed to fetch status") | ||
} else { | ||
got := <-done | ||
require.Equal(t, got, test.expected) | ||
} | ||
|
||
// Non-nil for the update means there was an error. | ||
if test.resp != nil { | ||
stderr.ExpectMatch("failed to update status") | ||
} | ||
}) | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AgentAPI reports that an agent is running whenever the underlying agent's terminal output changes. This also happens when a user uses the "control" mode to send raw keystrokes to the terminal. Once we implement web push notifications, they might start going off annoyingly in control mode.
A more robust solution might also take into account:
If we reported that the status is stable for a given length and summary, we could stop sending updates until either length or summary changes. If these values don't change, it likely means the user is just fiddling in the terminal rather than the agent doing actual work.
To avoid data races with the agent, the status update endpoint would:
In cases where the conversation length increases but the app status doesn't change, we could send a "Working..." summary update, which the agent would later replace with a richer description via MCP.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense.
Should we make these checks in agentapi itself? It feels surprising to me (as a casual user of agentapi) that I could hit
/status
and it might reportrunning
because of my own typing, especially since the description says it is the status of the agent.I have the endpoint skipping the update if the summary is unchanged by checking the last summary in the DB. We could have the client send the last summary, but we already have it in the DB, so it seems more robust to me if we just fetch it as part of the API call. Thoughts?
Oh this is perfect, I was not entirely sure what mechanisms I could or should use for this, the for update lock looks perfect ty ty Edit: maybe I should do the lock in code, to not block anything unrelated to statuses trying to update the workspace? Another thought I had was that maybe it could be done as some kind of procedure or trigger.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a good feature request, but it's a bit hard to do because of how agentapi is currently implemented. The "control mode" is an escape hatch that bypasses standard agentapi message processing. I'm worried that if I started refactoring the logic now it'd take too long, and we only have until the June 25th code freeze to ship tasks early access. I believe adding the check here would be more straightforward. Would you be ok with that?
I'm worried about the following data race:
If the task reporter submitted the summary it wanted to update as part of the message, coderd could detect a mismatch by comparing it with what's in the db and ignore the update in point 4.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aw I was hoping I could make it work by just checking if the message count or summary had changed in agentapi, but I guess I forgot we have no summary there so I see what you mean, we would have to fix it in some other way that requires a more fundamental change. 😭
Ahhhhhhhhh I see. If it was just the task reporter, there would be no out-of-order requests because it blocks waiting for the response, but with both the task reporter and the agent there could be out-of-order requests. 🤔
I think, though, because the task reporter would need to receive the last summary out of band, this introduces its own race. Something like:
{status: working, summary: doing thing 1, last_summary: <blank>}
, coderd receives{status: working, summary: doing thing 2, last_summary: doing thing 1}
, coderd receives{current_summary: doing thing 2}
{status: complete, summary: <blank>, last_summary: doing thing 2}
last_summary == current_summary
Also I wonder if the agent could race against itself, I am not sure if there are concurrency limits on the report tool in the MCP server.
Discarding might be unfortunate anyway though because then you lose the status history which we display on the workspace page. Personally I am not sure we even need the history though, maybe we could just store the latest status?
Hmmm...maybe we could add a
Date
header so we can insert the requests in the right order? Or, if we move this code into the MCP server itself and ensure correct ordering for all requests in one place? The date seems cool though, lets any number of clients update the status.Maybe we could try detecting if someone is typing in the prompt area, or if it looks like the screen is only changing because it is being scrolled or something, seems tricky though.
The only other thing I can think of, if we need to continue supporting direct terminal access, is if we abandon running the TUIs and create our own wrapper that lets us accurately (maybe??) report when an agent is executing.