backend/local: create local state file if backend write fails

In the old remote state system we had the idea of a local backup, which
is actually still present for the legacy backends but no longer applies
for the new-style backends like the s3 backend.

It's problematic when an apply runs for long enough that someone's
time-limited AWS STS credentials expire and then Terraform fails and can't
persist state to S3.

To reduce the risk of lost state, here we add some extra fallback code
for the local apply operation in particular. If either state writing
or state persisting fail then we attempt to write the state to a special
backup file errored.tfstate, and produce an error message that guides the
user on how to retry uploading this state.

In the unlikely event that we can't write to local disk either (e.g.
permissions problems) we take a last-ditch attempt to dump the JSON onto
stdout and advise the user to manually copy it into a file for import.
If even that doesn't work for some reason, we assume a critical Terraform
bug (JSON-serialization problem with states?) and bail out with an
apologetic error message.

This is implemented for the apply command in particular because this is
the one command where new objects are created in real APIs that we don't
want to lose track of. For other operations it's less bad to just generate
a simple error message and have the user retry.

This fixes #14298.
This commit is contained in:
Martin Atkins 2017-05-17 15:26:21 -07:00
parent 2037c1eebf
commit 9cda37205d
2 changed files with 154 additions and 2 deletions

View File

@ -1,7 +1,9 @@
package local
import (
"bytes"
"context"
"errors"
"fmt"
"log"
"strings"
@ -137,11 +139,11 @@ func (b *Local) opApply(
// Persist the state
if err := opState.WriteState(applyState); err != nil {
runningOp.Err = fmt.Errorf("Failed to save state: %s", err)
runningOp.Err = b.backupStateForError(applyState, err)
return
}
if err := opState.PersistState(); err != nil {
runningOp.Err = fmt.Errorf("Failed to save state: %s", err)
runningOp.Err = b.backupStateForError(applyState, err)
return
}
@ -186,6 +188,42 @@ func (b *Local) opApply(
}
}
// backupStateForError is called in a scenario where we're unable to persist the
// state for some reason, and will attempt to save a backup copy of the state
// to local disk to help the user recover. This is a "last ditch effort" sort
// of thing, so we really don't want to end up in this codepath; we should do
// everything we possibly can to get the state saved _somewhere_.
func (b *Local) backupStateForError(applyState *terraform.State, err error) error {
b.CLI.Error(fmt.Sprintf("Failed to save state: %s\n", err))
local := &state.LocalState{Path: "errored.tfstate"}
writeErr := local.WriteState(applyState)
if writeErr != nil {
b.CLI.Error(fmt.Sprintf(
"Also failed to create local state file for recovery: %s\n\n", writeErr,
))
// To avoid leaving the user with no state at all, our last resort
// is to print the JSON state out onto the terminal. This is an awful
// UX, so we should definitely avoid doing this if at all possible,
// but at least the user has _some_ path to recover if we end up
// here for some reason.
stateBuf := new(bytes.Buffer)
jsonErr := terraform.WriteState(applyState, stateBuf)
if jsonErr != nil {
b.CLI.Error(fmt.Sprintf(
"Also failed to JSON-serialize the state to print it: %s\n\n", jsonErr,
))
return errors.New(stateWriteFatalError)
}
b.CLI.Output(stateBuf.String())
return errors.New(stateWriteConsoleFallbackError)
}
return errors.New(stateWriteBackedUpError)
}
const applyErrNoConfig = `
No configuration files found!
@ -194,3 +232,41 @@ would mark everything for destruction, which is normally not what is desired.
If you would like to destroy everything, please run 'terraform destroy' instead
which does not require any configuration files.
`
const stateWriteBackedUpError = `Failed to persist state to backend.
The error shown above has prevented Terraform from writing the updated state
to the configured backend. To allow for recovery, the state has been written
to the file "errored.tfstate" in the current working directory.
Running "terraform apply" again at this point will create a forked state,
making it harder to recover.
To retry writing this state, use the following command:
terraform state push errored.tfstate
`
const stateWriteConsoleFallbackError = `Failed to persist state to backend.
The errors shown above prevented Terraform from writing the updated state to
the configured backend and from creating a local backup file. As a fallback,
the raw state data is printed above as a JSON object.
To retry writing this state, copy the state data (from the first { to the
last } inclusive) and save it into a local file called errored.tfstate, then
run the following command:
terraform state push errored.tfstate
`
const stateWriteFatalError = `Failed to save state after apply.
A catastrophic error has prevented Terraform from persisting the state file
or creating a backup. Unfortunately this means that the record of any resources
created during this apply has been lost, and such resources may exist outside
of Terraform's management.
For resources that support import, it is possible to recover by manually
importing each resource using its id from the target system.
This is a serious bug in Terraform and should be reported.
`

View File

@ -2,14 +2,19 @@ package local
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"testing"
"github.com/hashicorp/terraform/backend"
"github.com/hashicorp/terraform/config/module"
"github.com/hashicorp/terraform/state"
"github.com/hashicorp/terraform/terraform"
"github.com/mitchellh/cli"
)
func TestLocal_applyBasic(t *testing.T) {
@ -158,6 +163,77 @@ test_instance.foo:
`)
}
func TestLocal_applyBackendFail(t *testing.T) {
mod, modCleanup := module.TestTree(t, "./test-fixtures/apply")
defer modCleanup()
b := TestLocal(t)
wd, err := os.Getwd()
if err != nil {
t.Fatalf("failed to get current working directory")
}
err = os.Chdir(filepath.Dir(b.StatePath))
if err != nil {
t.Fatalf("failed to set temporary working directory")
}
defer os.Chdir(wd)
b.Backend = &backendWithFailingState{}
b.CLI = new(cli.MockUi)
p := TestLocalProvider(t, b, "test")
p.ApplyReturn = &terraform.InstanceState{ID: "yes"}
op := testOperationApply()
op.Module = mod
run, err := b.Operation(context.Background(), op)
if err != nil {
t.Fatalf("bad: %s", err)
}
<-run.Done()
if run.Err == nil {
t.Fatalf("apply succeeded; want error")
}
errStr := run.Err.Error()
if !strings.Contains(errStr, "terraform state push errored.tfstate") {
t.Fatalf("wrong error message:\n%s", errStr)
}
msgStr := b.CLI.(*cli.MockUi).ErrorWriter.String()
if !strings.Contains(msgStr, "Failed to save state: fake failure") {
t.Fatalf("missing original error message in output:\n%s", msgStr)
}
// The fallback behavior should've created a file errored.tfstate in the
// current working directory.
checkState(t, "errored.tfstate", `
test_instance.foo:
ID = yes
`)
}
type backendWithFailingState struct {
Local
}
func (b *backendWithFailingState) State(name string) (state.State, error) {
return &failingState{
&state.LocalState{
Path: "failing-state.tfstate",
},
}, nil
}
type failingState struct {
*state.LocalState
}
func (s failingState) WriteState(state *terraform.State) error {
return errors.New("fake failure")
}
func testOperationApply() *backend.Operation {
return &backend.Operation{
Type: backend.OperationTypeApply,