From 9cda37205de8adfa33e9233dd6956fbbe2eeb0f3 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Wed, 17 May 2017 15:26:21 -0700 Subject: [PATCH] backend/local: create local state file if backend write fails In the old remote state system we had the idea of a local backup, which is actually still present for the legacy backends but no longer applies for the new-style backends like the s3 backend. It's problematic when an apply runs for long enough that someone's time-limited AWS STS credentials expire and then Terraform fails and can't persist state to S3. To reduce the risk of lost state, here we add some extra fallback code for the local apply operation in particular. If either state writing or state persisting fail then we attempt to write the state to a special backup file errored.tfstate, and produce an error message that guides the user on how to retry uploading this state. In the unlikely event that we can't write to local disk either (e.g. permissions problems) we take a last-ditch attempt to dump the JSON onto stdout and advise the user to manually copy it into a file for import. If even that doesn't work for some reason, we assume a critical Terraform bug (JSON-serialization problem with states?) and bail out with an apologetic error message. This is implemented for the apply command in particular because this is the one command where new objects are created in real APIs that we don't want to lose track of. For other operations it's less bad to just generate a simple error message and have the user retry. This fixes #14298. --- backend/local/backend_apply.go | 80 ++++++++++++++++++++++++++++- backend/local/backend_apply_test.go | 76 +++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 2 deletions(-) diff --git a/backend/local/backend_apply.go b/backend/local/backend_apply.go index 9bc41f487..27883d5b2 100644 --- a/backend/local/backend_apply.go +++ b/backend/local/backend_apply.go @@ -1,7 +1,9 @@ package local import ( + "bytes" "context" + "errors" "fmt" "log" "strings" @@ -137,11 +139,11 @@ func (b *Local) opApply( // Persist the state if err := opState.WriteState(applyState); err != nil { - runningOp.Err = fmt.Errorf("Failed to save state: %s", err) + runningOp.Err = b.backupStateForError(applyState, err) return } if err := opState.PersistState(); err != nil { - runningOp.Err = fmt.Errorf("Failed to save state: %s", err) + runningOp.Err = b.backupStateForError(applyState, err) return } @@ -186,6 +188,42 @@ func (b *Local) opApply( } } +// backupStateForError is called in a scenario where we're unable to persist the +// state for some reason, and will attempt to save a backup copy of the state +// to local disk to help the user recover. This is a "last ditch effort" sort +// of thing, so we really don't want to end up in this codepath; we should do +// everything we possibly can to get the state saved _somewhere_. +func (b *Local) backupStateForError(applyState *terraform.State, err error) error { + b.CLI.Error(fmt.Sprintf("Failed to save state: %s\n", err)) + + local := &state.LocalState{Path: "errored.tfstate"} + writeErr := local.WriteState(applyState) + if writeErr != nil { + b.CLI.Error(fmt.Sprintf( + "Also failed to create local state file for recovery: %s\n\n", writeErr, + )) + // To avoid leaving the user with no state at all, our last resort + // is to print the JSON state out onto the terminal. This is an awful + // UX, so we should definitely avoid doing this if at all possible, + // but at least the user has _some_ path to recover if we end up + // here for some reason. + stateBuf := new(bytes.Buffer) + jsonErr := terraform.WriteState(applyState, stateBuf) + if jsonErr != nil { + b.CLI.Error(fmt.Sprintf( + "Also failed to JSON-serialize the state to print it: %s\n\n", jsonErr, + )) + return errors.New(stateWriteFatalError) + } + + b.CLI.Output(stateBuf.String()) + + return errors.New(stateWriteConsoleFallbackError) + } + + return errors.New(stateWriteBackedUpError) +} + const applyErrNoConfig = ` No configuration files found! @@ -194,3 +232,41 @@ would mark everything for destruction, which is normally not what is desired. If you would like to destroy everything, please run 'terraform destroy' instead which does not require any configuration files. ` + +const stateWriteBackedUpError = `Failed to persist state to backend. + +The error shown above has prevented Terraform from writing the updated state +to the configured backend. To allow for recovery, the state has been written +to the file "errored.tfstate" in the current working directory. + +Running "terraform apply" again at this point will create a forked state, +making it harder to recover. + +To retry writing this state, use the following command: + terraform state push errored.tfstate +` + +const stateWriteConsoleFallbackError = `Failed to persist state to backend. + +The errors shown above prevented Terraform from writing the updated state to +the configured backend and from creating a local backup file. As a fallback, +the raw state data is printed above as a JSON object. + +To retry writing this state, copy the state data (from the first { to the +last } inclusive) and save it into a local file called errored.tfstate, then +run the following command: + terraform state push errored.tfstate +` + +const stateWriteFatalError = `Failed to save state after apply. + +A catastrophic error has prevented Terraform from persisting the state file +or creating a backup. Unfortunately this means that the record of any resources +created during this apply has been lost, and such resources may exist outside +of Terraform's management. + +For resources that support import, it is possible to recover by manually +importing each resource using its id from the target system. + +This is a serious bug in Terraform and should be reported. +` diff --git a/backend/local/backend_apply_test.go b/backend/local/backend_apply_test.go index c761d1538..303f92969 100644 --- a/backend/local/backend_apply_test.go +++ b/backend/local/backend_apply_test.go @@ -2,14 +2,19 @@ package local import ( "context" + "errors" "fmt" "os" + "path/filepath" + "strings" "sync" "testing" "github.com/hashicorp/terraform/backend" "github.com/hashicorp/terraform/config/module" + "github.com/hashicorp/terraform/state" "github.com/hashicorp/terraform/terraform" + "github.com/mitchellh/cli" ) func TestLocal_applyBasic(t *testing.T) { @@ -158,6 +163,77 @@ test_instance.foo: `) } +func TestLocal_applyBackendFail(t *testing.T) { + mod, modCleanup := module.TestTree(t, "./test-fixtures/apply") + defer modCleanup() + + b := TestLocal(t) + wd, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current working directory") + } + err = os.Chdir(filepath.Dir(b.StatePath)) + if err != nil { + t.Fatalf("failed to set temporary working directory") + } + defer os.Chdir(wd) + + b.Backend = &backendWithFailingState{} + b.CLI = new(cli.MockUi) + p := TestLocalProvider(t, b, "test") + + p.ApplyReturn = &terraform.InstanceState{ID: "yes"} + + op := testOperationApply() + op.Module = mod + + run, err := b.Operation(context.Background(), op) + if err != nil { + t.Fatalf("bad: %s", err) + } + <-run.Done() + if run.Err == nil { + t.Fatalf("apply succeeded; want error") + } + + errStr := run.Err.Error() + if !strings.Contains(errStr, "terraform state push errored.tfstate") { + t.Fatalf("wrong error message:\n%s", errStr) + } + + msgStr := b.CLI.(*cli.MockUi).ErrorWriter.String() + if !strings.Contains(msgStr, "Failed to save state: fake failure") { + t.Fatalf("missing original error message in output:\n%s", msgStr) + } + + // The fallback behavior should've created a file errored.tfstate in the + // current working directory. + checkState(t, "errored.tfstate", ` +test_instance.foo: + ID = yes + `) +} + +type backendWithFailingState struct { + Local +} + +func (b *backendWithFailingState) State(name string) (state.State, error) { + return &failingState{ + &state.LocalState{ + Path: "failing-state.tfstate", + }, + }, nil +} + +type failingState struct { + *state.LocalState +} + +func (s failingState) WriteState(state *terraform.State) error { + return errors.New("fake failure") +} + func testOperationApply() *backend.Operation { return &backend.Operation{ Type: backend.OperationTypeApply,