always wait for a RunningOperation to return
If the user wishes to interrupt the running operation, only the first interrupt was communicated to the operation by canceling the provided context. A second interrupt would start the shutdown process, but not communicate this to the running operation. This order of event could cause partial writes of state. What would happen is that once the command returns, the plugin system would stop the provider processes. Once the provider processes dies, all pending Eval operations would return return with an error, and quickly cause the operation to complete. Since the backend code didn't know that the process was shutting down imminently, it would continue by attempting to write out the last known state. Under the right conditions, the process would exit part way through the writing of the state file. Add Stop and Cancel CancelFuncs to the RunningOperation, to allow it to easily differentiate between the two signals. The backend will then be able to detect a shutdown and abort more gracefully. In order to ensure that the backend is not in the process of writing the state out, the command will always attempt to wait for the process to complete after cancellation.
This commit is contained in:
parent
f0a009c573
commit
7cba68326a
|
@ -145,8 +145,6 @@ type Operation struct {
|
|||
|
||||
// RunningOperation is the result of starting an operation.
|
||||
type RunningOperation struct {
|
||||
// Context should be used to track Done and Err for errors.
|
||||
//
|
||||
// For implementers of a backend, this context should not wrap the
|
||||
// passed in context. Otherwise, canceling the parent context will
|
||||
// immediately mark this context as "done" but those aren't the semantics
|
||||
|
@ -154,6 +152,16 @@ type RunningOperation struct {
|
|||
// is fully done.
|
||||
context.Context
|
||||
|
||||
// Stop requests the operation to complete early, by calling Stop on all
|
||||
// the plugins. If the process needs to terminate immediately, call Cancel.
|
||||
Stop context.CancelFunc
|
||||
|
||||
// Cancel is the context.CancelFunc associated with the embedded context,
|
||||
// and can be called to terminate the operation early.
|
||||
// Once Cancel is called, the operation should return as soon as possible
|
||||
// to avoid running operations during process exit.
|
||||
Cancel context.CancelFunc
|
||||
|
||||
// Err is the error of the operation. This is populated after
|
||||
// the operation has completed.
|
||||
Err error
|
||||
|
|
|
@ -224,7 +224,7 @@ func (b *Local) State(name string) (state.State, error) {
|
|||
// name conflicts, assume that the field is overwritten if set.
|
||||
func (b *Local) Operation(ctx context.Context, op *backend.Operation) (*backend.RunningOperation, error) {
|
||||
// Determine the function to call for our operation
|
||||
var f func(context.Context, *backend.Operation, *backend.RunningOperation)
|
||||
var f func(context.Context, context.Context, *backend.Operation, *backend.RunningOperation)
|
||||
switch op.Type {
|
||||
case backend.OperationTypeRefresh:
|
||||
f = b.opRefresh
|
||||
|
@ -244,14 +244,29 @@ func (b *Local) Operation(ctx context.Context, op *backend.Operation) (*backend.
|
|||
b.opLock.Lock()
|
||||
|
||||
// Build our running operation
|
||||
runningCtx, runningCtxCancel := context.WithCancel(context.Background())
|
||||
runningOp := &backend.RunningOperation{Context: runningCtx}
|
||||
// the runninCtx is only used to block until the operation returns.
|
||||
runningCtx, done := context.WithCancel(context.Background())
|
||||
runningOp := &backend.RunningOperation{
|
||||
Context: runningCtx,
|
||||
}
|
||||
|
||||
// stopCtx wraps the context passed in, and is used to signal a graceful Stop.
|
||||
stopCtx, stop := context.WithCancel(ctx)
|
||||
runningOp.Stop = stop
|
||||
|
||||
// cancelCtx is used to cancel the operation immediately, usually
|
||||
// indicating that the process is exiting.
|
||||
cancelCtx, cancel := context.WithCancel(context.Background())
|
||||
runningOp.Cancel = cancel
|
||||
|
||||
// Do it
|
||||
go func() {
|
||||
defer done()
|
||||
defer stop()
|
||||
defer cancel()
|
||||
|
||||
defer b.opLock.Unlock()
|
||||
defer runningCtxCancel()
|
||||
f(ctx, op, runningOp)
|
||||
f(stopCtx, cancelCtx, op, runningOp)
|
||||
}()
|
||||
|
||||
// Return
|
||||
|
|
|
@ -19,7 +19,8 @@ import (
|
|||
)
|
||||
|
||||
func (b *Local) opApply(
|
||||
ctx context.Context,
|
||||
stopCtx context.Context,
|
||||
cancelCtx context.Context,
|
||||
op *backend.Operation,
|
||||
runningOp *backend.RunningOperation) {
|
||||
log.Printf("[INFO] backend/local: starting Apply operation")
|
||||
|
@ -55,7 +56,7 @@ func (b *Local) opApply(
|
|||
}
|
||||
|
||||
if op.LockState {
|
||||
lockCtx, cancel := context.WithTimeout(ctx, op.StateLockTimeout)
|
||||
lockCtx, cancel := context.WithTimeout(stopCtx, op.StateLockTimeout)
|
||||
defer cancel()
|
||||
|
||||
lockInfo := state.NewLockInfo()
|
||||
|
@ -157,7 +158,7 @@ func (b *Local) opApply(
|
|||
// we can handle it properly.
|
||||
err = nil
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-stopCtx.Done():
|
||||
if b.CLI != nil {
|
||||
b.CLI.Output("stopping apply operation...")
|
||||
}
|
||||
|
@ -176,8 +177,18 @@ func (b *Local) opApply(
|
|||
// Stop execution
|
||||
go tfCtx.Stop()
|
||||
|
||||
// Wait for completion still
|
||||
<-doneCh
|
||||
select {
|
||||
case <-cancelCtx.Done():
|
||||
log.Println("[WARN] running operation canceled")
|
||||
// if the operation was canceled, we need to return immediately
|
||||
return
|
||||
case <-doneCh:
|
||||
}
|
||||
case <-cancelCtx.Done():
|
||||
// this should not be called without first attempting to stop the
|
||||
// operation
|
||||
log.Println("[ERROR] running operation canceled without Stop")
|
||||
return
|
||||
case <-doneCh:
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,8 @@ import (
|
|||
)
|
||||
|
||||
func (b *Local) opPlan(
|
||||
ctx context.Context,
|
||||
stopCtx context.Context,
|
||||
cancelCtx context.Context,
|
||||
op *backend.Operation,
|
||||
runningOp *backend.RunningOperation) {
|
||||
log.Printf("[INFO] backend/local: starting Plan operation")
|
||||
|
@ -62,7 +63,7 @@ func (b *Local) opPlan(
|
|||
}
|
||||
|
||||
if op.LockState {
|
||||
lockCtx, cancel := context.WithTimeout(ctx, op.StateLockTimeout)
|
||||
lockCtx, cancel := context.WithTimeout(stopCtx, op.StateLockTimeout)
|
||||
defer cancel()
|
||||
|
||||
lockInfo := state.NewLockInfo()
|
||||
|
@ -112,7 +113,7 @@ func (b *Local) opPlan(
|
|||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-stopCtx.Done():
|
||||
if b.CLI != nil {
|
||||
b.CLI.Output("stopping plan operation...")
|
||||
}
|
||||
|
@ -120,8 +121,18 @@ func (b *Local) opPlan(
|
|||
// Stop execution
|
||||
go tfCtx.Stop()
|
||||
|
||||
// Wait for completion still
|
||||
<-doneCh
|
||||
select {
|
||||
case <-cancelCtx.Done():
|
||||
log.Println("[WARN] running operation canceled")
|
||||
// if the operation was canceled, we need to return immediately
|
||||
return
|
||||
case <-doneCh:
|
||||
}
|
||||
case <-cancelCtx.Done():
|
||||
// this should not be called without first attempting to stop the
|
||||
// operation
|
||||
log.Println("[ERROR] running operation canceled without Stop")
|
||||
return
|
||||
case <-doneCh:
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,8 @@ import (
|
|||
)
|
||||
|
||||
func (b *Local) opRefresh(
|
||||
ctx context.Context,
|
||||
stopCtx context.Context,
|
||||
cancelCtx context.Context,
|
||||
op *backend.Operation,
|
||||
runningOp *backend.RunningOperation) {
|
||||
// Check if our state exists if we're performing a refresh operation. We
|
||||
|
@ -53,7 +54,7 @@ func (b *Local) opRefresh(
|
|||
}
|
||||
|
||||
if op.LockState {
|
||||
lockCtx, cancel := context.WithTimeout(ctx, op.StateLockTimeout)
|
||||
lockCtx, cancel := context.WithTimeout(stopCtx, op.StateLockTimeout)
|
||||
defer cancel()
|
||||
|
||||
lockInfo := state.NewLockInfo()
|
||||
|
@ -91,7 +92,7 @@ func (b *Local) opRefresh(
|
|||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-stopCtx.Done():
|
||||
if b.CLI != nil {
|
||||
b.CLI.Output("stopping refresh operation...")
|
||||
}
|
||||
|
@ -99,8 +100,18 @@ func (b *Local) opRefresh(
|
|||
// Stop execution
|
||||
go tfCtx.Stop()
|
||||
|
||||
// Wait for completion still
|
||||
<-doneCh
|
||||
select {
|
||||
case <-cancelCtx.Done():
|
||||
log.Println("[WARN] running operation canceled")
|
||||
// if the operation was canceled, we need to return immediately
|
||||
return
|
||||
case <-doneCh:
|
||||
}
|
||||
case <-cancelCtx.Done():
|
||||
// this should not be called without first attempting to stop the
|
||||
// operation
|
||||
log.Println("[ERROR] running operation canceled without Stop")
|
||||
return
|
||||
case <-doneCh:
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/terraform/tfdiags"
|
||||
|
||||
|
@ -159,10 +160,7 @@ func (c *ApplyCommand) Run(args []string) int {
|
|||
opReq.DestroyForce = destroyForce
|
||||
|
||||
// Perform the operation
|
||||
ctx, ctxCancel := context.WithCancel(context.Background())
|
||||
defer ctxCancel()
|
||||
|
||||
op, err := b.Operation(ctx, opReq)
|
||||
op, err := b.Operation(context.Background(), opReq)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error starting operation: %s", err))
|
||||
return 1
|
||||
|
@ -171,8 +169,8 @@ func (c *ApplyCommand) Run(args []string) int {
|
|||
// Wait for the operation to complete or an interrupt to occur
|
||||
select {
|
||||
case <-c.ShutdownCh:
|
||||
// Cancel our context so we can start gracefully exiting
|
||||
ctxCancel()
|
||||
// gracefully stop the operation
|
||||
op.Stop()
|
||||
|
||||
// Notify the user
|
||||
c.Ui.Output(outputInterrupt)
|
||||
|
@ -183,7 +181,19 @@ func (c *ApplyCommand) Run(args []string) int {
|
|||
c.Ui.Error(
|
||||
"Two interrupts received. Exiting immediately. Note that data\n" +
|
||||
"loss may have occurred.")
|
||||
|
||||
// cancel the operation completely
|
||||
op.Cancel()
|
||||
|
||||
// the operation should return asap
|
||||
// but timeout just in case
|
||||
select {
|
||||
case <-op.Done():
|
||||
case <-time.After(5 * time.Second):
|
||||
}
|
||||
|
||||
return 1
|
||||
|
||||
case <-op.Done():
|
||||
}
|
||||
case <-op.Done():
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/terraform/backend"
|
||||
"github.com/hashicorp/terraform/config"
|
||||
|
@ -107,10 +108,7 @@ func (c *PlanCommand) Run(args []string) int {
|
|||
opReq.Type = backend.OperationTypePlan
|
||||
|
||||
// Perform the operation
|
||||
ctx, ctxCancel := context.WithCancel(context.Background())
|
||||
defer ctxCancel()
|
||||
|
||||
op, err := b.Operation(ctx, opReq)
|
||||
op, err := b.Operation(context.Background(), opReq)
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Error starting operation: %s", err))
|
||||
return 1
|
||||
|
@ -119,7 +117,7 @@ func (c *PlanCommand) Run(args []string) int {
|
|||
select {
|
||||
case <-c.ShutdownCh:
|
||||
// Cancel our context so we can start gracefully exiting
|
||||
ctxCancel()
|
||||
op.Stop()
|
||||
|
||||
// Notify the user
|
||||
c.Ui.Output(outputInterrupt)
|
||||
|
@ -129,6 +127,17 @@ func (c *PlanCommand) Run(args []string) int {
|
|||
case <-c.ShutdownCh:
|
||||
c.Ui.Error(
|
||||
"Two interrupts received. Exiting immediately")
|
||||
|
||||
// cancel the operation completely
|
||||
op.Cancel()
|
||||
|
||||
// the operation should return asap
|
||||
// but timeout just in case
|
||||
select {
|
||||
case <-op.Done():
|
||||
case <-time.After(5 * time.Second):
|
||||
}
|
||||
|
||||
return 1
|
||||
case <-op.Done():
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/terraform/backend"
|
||||
"github.com/hashicorp/terraform/config"
|
||||
|
@ -82,10 +83,40 @@ func (c *RefreshCommand) Run(args []string) int {
|
|||
return 1
|
||||
}
|
||||
|
||||
// Wait for the operation to complete
|
||||
<-op.Done()
|
||||
if err := op.Err; err != nil {
|
||||
diags = diags.Append(err)
|
||||
// Wait for the operation to complete or an interrupt to occur
|
||||
select {
|
||||
case <-c.ShutdownCh:
|
||||
// gracefully stop the operation
|
||||
op.Stop()
|
||||
|
||||
// Notify the user
|
||||
c.Ui.Output(outputInterrupt)
|
||||
|
||||
// Still get the result, since there is still one
|
||||
select {
|
||||
case <-c.ShutdownCh:
|
||||
c.Ui.Error(
|
||||
"Two interrupts received. Exiting immediately. Note that data\n" +
|
||||
"loss may have occurred.")
|
||||
|
||||
// cancel the operation completely
|
||||
op.Cancel()
|
||||
|
||||
// the operation should return asap
|
||||
// but timeout just in case
|
||||
select {
|
||||
case <-op.Done():
|
||||
case <-time.After(5 * time.Second):
|
||||
}
|
||||
|
||||
return 1
|
||||
|
||||
case <-op.Done():
|
||||
}
|
||||
case <-op.Done():
|
||||
if err := op.Err; err != nil {
|
||||
diags = diags.Append(err)
|
||||
}
|
||||
}
|
||||
|
||||
c.showDiagnostics(diags)
|
||||
|
|
Loading…
Reference in New Issue