Skip to content

feat: log resource drift warnings in all workspace builds #18355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions provisioner/terraform/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,20 +322,11 @@ func (e *executor) plan(ctx, killCtx context.Context, env, vars []string, logr l

// When a prebuild claim attempt is made, log a warning if a resource is due to be replaced, since this will obviate
// the point of prebuilding if the expensive resource is replaced once claimed!
var (
isPrebuildClaimAttempt = !destroy && metadata.GetPrebuiltWorkspaceBuildStage().IsPrebuiltWorkspaceClaim()
resReps []*proto.ResourceReplacement
)
// We now also log drift for all builds to help with debugging resource replacements.
var resReps []*proto.ResourceReplacement
if repsFromPlan := findResourceReplacements(plan); len(repsFromPlan) > 0 {
if isPrebuildClaimAttempt {
// TODO(dannyk): we should log drift always (not just during prebuild claim attempts); we're validating that this output
// will not be overwhelming for end-users, but it'll certainly be super valuable for template admins
// to diagnose this resource replacement issue, at least.
// Once prebuilds moves out of beta, consider deleting this condition.

// Lock held before calling (see top of method).
e.logDrift(ctx, killCtx, planfilePath, logr)
}
// Lock held before calling (see top of method).
e.logDrift(ctx, killCtx, planfilePath, logr)

resReps = make([]*proto.ResourceReplacement, 0, len(repsFromPlan))
for n, p := range repsFromPlan {
Expand Down
190 changes: 190 additions & 0 deletions provisioner/terraform/executor_internal_test.go
Original file line number Diff line number Diff line change
@@ -1,22 +1,35 @@
package terraform

import (
"context"
"encoding/json"
"os"
"path/filepath"
"strings"
"sync"
"testing"

tfjson "github.com/hashicorp/terraform-json"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel/trace/noop"

"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/provisionersdk/proto"
"github.com/coder/coder/v2/testutil"
)

type mockLogger struct {
mu sync.Mutex

logs []*proto.Log
}

var _ logSink = &mockLogger{}

func (m *mockLogger) ProvisionLog(l proto.LogLevel, o string) {
m.mu.Lock()
defer m.mu.Unlock()

m.logs = append(m.logs, &proto.Log{Level: l, Output: o})
}

Expand Down Expand Up @@ -173,3 +186,180 @@ func TestOnlyDataResources(t *testing.T) {
})
}
}

func TestLogDrift_WithRealTerraformPlan(t *testing.T) {
t.Parallel()

logger := testutil.Logger(t)
tmpDir := t.TempDir()

binPath, err := Install(t.Context(), logger, true, tmpDir, TerraformVersion)
require.NoError(t, err)

tfConfig := `
terraform {
required_providers {
local = {
source = "hashicorp/local"
version = "~> 2.0"
}
}
}

resource "local_file" "test_file" {
content = "initial content"
filename = "test.txt"
}
`

tfFile := filepath.Join(tmpDir, "main.tf")
require.NoError(t, os.WriteFile(tfFile, []byte(tfConfig), 0o600))

// Create a minimal server for the executor.
mockSrv := &server{
logger: logger,
execMut: &sync.Mutex{},
tracer: noop.NewTracerProvider().Tracer("test"),
}

e := &executor{
logger: logger,
binaryPath: binPath,
workdir: tmpDir,
mut: mockSrv.execMut,
server: mockSrv,
timings: newTimingAggregator(database.ProvisionerJobTimingStagePlan),
}

// These contexts must be explicitly separate from the test context.
// We have a log message which prints when these contexts are canceled (or when the test completes if using t.Context()),
// and this log output would be confusing to the casual reader, while innocuous.
// See interruptCommandOnCancel in executor.go.
ctx := context.Background()
killCtx := context.Background()

var mockSink mockLogger
err = e.init(ctx, killCtx, &mockSink)
require.NoError(t, err)

// Create initial plan to establish state.
_, err = e.plan(ctx, killCtx, e.basicEnv(), []string{}, &mockSink, &proto.Metadata{
WorkspaceTransition: proto.WorkspaceTransition_START,
})
require.NoError(t, err)

// Apply the plan to create initial state.
_, err = e.apply(ctx, killCtx, e.basicEnv(), &mockSink)
require.NoError(t, err)

// Now modify the terraform configuration to cause drift.
driftConfig := `
terraform {
required_providers {
local = {
source = "hashicorp/local"
version = "~> 2.0"
}
}
}

resource "local_file" "test_file" {
content = "changed content that forces replacement"
filename = "test.txt"
}
`

// Write the modified configuration.
require.NoError(t, os.WriteFile(tfFile, []byte(driftConfig), 0o600))

// Create a new plan that will show the drift/replacement.
driftLogger := &mockLogger{}
planResult, err := e.plan(ctx, killCtx, e.basicEnv(), []string{}, driftLogger, &proto.Metadata{
WorkspaceTransition: proto.WorkspaceTransition_START,
})
require.NoError(t, err)

// Verify we detected resource replacements (this triggers logDrift).
require.NotEmpty(t, planResult.ResourceReplacements, "Should detect resource replacements that trigger drift logging")

// Verify that drift logs were captured.
require.NotEmpty(t, driftLogger.logs, "logDrift should produce log output")

// Check that we have logs showing the resource replacement(s).
var (
foundReplacementLog, foundInfoLogs, foundWarnLogs bool
)

for _, log := range driftLogger.logs {
t.Logf("[%s] %s", log.Level.String(), log.Output)

if strings.Contains(log.Output, "# forces replacement") {
foundReplacementLog = true
require.Equal(t, proto.LogLevel_WARN, log.Level, "Lines containing '# forces replacement' should be logged at WARN level")
foundWarnLogs = true
}

if log.Level == proto.LogLevel_INFO {
foundInfoLogs = true
}
}

// Verify we found the expected log types.
require.True(t, foundReplacementLog, "Should find log lines containing '# forces replacement'")
require.True(t, foundInfoLogs, "Should find INFO level logs showing the drift details")
require.True(t, foundWarnLogs, "Should find WARN level logs for resource replacements")

// Verify that the drift shows the resource change.
logOutput := strings.Join(func() []string {
var outputs []string
for _, log := range driftLogger.logs {
outputs = append(outputs, log.Output)
}
return outputs
}(), "\n")

require.Contains(t, logOutput, "local_file.test_file", "Drift logs should mention the specific resource")
}

func TestResourceReplaceLogWriter(t *testing.T) {
t.Parallel()

var logr mockLogger
logger := testutil.Logger(t)
writer, doneLogging := resourceReplaceLogWriter(&logr, logger)

// Test input with both normal lines and replacement lines.
testInput := ` # local_file.test_file will be replaced
-/+ resource "local_file" "test_file" {
~ content = "initial content" -> "changed content" # forces replacement
~ filename = "test.txt"
id = "1234567890"
}

Plan: 1 to add, 0 to change, 1 to destroy.`

_, err := writer.Write([]byte(testInput))
require.NoError(t, err)
err = writer.Close()
require.NoError(t, err)
<-doneLogging

// Verify the logs
require.NotEmpty(t, logr.logs, "Should produce log output")

var foundReplacementWarn, foundInfoLogs bool

for _, log := range logr.logs {
t.Logf("[%s] %s", log.Level.String(), log.Output)

if strings.Contains(log.Output, "# forces replacement") {
require.Equal(t, proto.LogLevel_WARN, log.Level, "Lines containing '# forces replacement' should be WARN level")
foundReplacementWarn = true
} else if log.Level == proto.LogLevel_INFO {
foundInfoLogs = true
}
}

require.True(t, foundReplacementWarn, "Should find WARN level log for '# forces replacement' line")
require.True(t, foundInfoLogs, "Should find INFO level logs for other lines")
}
Loading