diff --git a/builtin/provisioners/chef/resource_provisioner.go b/builtin/provisioners/chef/resource_provisioner.go index c702c0f59..6f2f3ae52 100644 --- a/builtin/provisioners/chef/resource_provisioner.go +++ b/builtin/provisioners/chef/resource_provisioner.go @@ -15,6 +15,7 @@ import ( "strings" "sync" "text/template" + "time" "github.com/hashicorp/terraform/communicator" "github.com/hashicorp/terraform/communicator/remote" @@ -97,6 +98,7 @@ type provisioner struct { PolicyName string HTTPProxy string HTTPSProxy string + MaxRetries int NamedRunList string NOProxy []string NodeName string @@ -104,6 +106,7 @@ type provisioner struct { OSType string RecreateClient bool PreventSudo bool + RetryOnExitCode map[int]bool RunList []string SecretKey string ServerURL string @@ -114,6 +117,7 @@ type provisioner struct { UserKey string Vaults map[string][]string Version string + WaitForRetry time.Duration cleanupUserKeyCmd string createConfigFiles provisionFn @@ -197,6 +201,11 @@ func Provisioner() terraform.ResourceProvisioner { Type: schema.TypeString, Optional: true, }, + "max_retries": &schema.Schema{ + Type: schema.TypeInt, + Optional: true, + Default: 0, + }, "no_proxy": &schema.Schema{ Type: schema.TypeList, Elem: &schema.Schema{Type: schema.TypeString}, @@ -215,12 +224,17 @@ func Provisioner() terraform.ResourceProvisioner { Type: schema.TypeString, Optional: true, }, + "prevent_sudo": &schema.Schema{ + Type: schema.TypeBool, + Optional: true, + }, "recreate_client": &schema.Schema{ Type: schema.TypeBool, Optional: true, }, - "prevent_sudo": &schema.Schema{ - Type: schema.TypeBool, + "retry_on_exit_code": &schema.Schema{ + Type: schema.TypeList, + Elem: &schema.Schema{Type: schema.TypeInt}, Optional: true, }, "run_list": &schema.Schema{ @@ -252,6 +266,11 @@ func Provisioner() terraform.ResourceProvisioner { Type: schema.TypeString, Optional: true, }, + "wait_for_retry": &schema.Schema{ + Type: schema.TypeInt, + Optional: true, + Default: 30, + }, }, ApplyFunc: applyFn, @@ -371,11 +390,55 @@ func applyFn(ctx context.Context) error { once.Do(cleanupUserKey) o.Output("Starting initial Chef-Client run...") - if err := p.runChefClient(o, comm); err != nil { - return err + + for attempt := 0; attempt <= p.MaxRetries; attempt++ { + // We need a new retry context for each attempt, to make sure + // they all get the correct timeout. + retryCtx, cancel := context.WithTimeout(ctx, comm.Timeout()) + defer cancel() + + // Make sure to (re)connect before trying to run Chef-Client. + if err := communicator.Retry(retryCtx, func() error { + return comm.Connect(o) + }); err != nil { + return err + } + + err = p.runChefClient(o, comm) + if err == nil { + return nil + } + + // Allow RFC062 Exit Codes: + // https://github.com/chef/chef-rfc/blob/master/rfc062-exit-status.md + exitError, ok := err.(*remote.ExitError) + if !ok { + return err + } + + switch exitError.ExitStatus { + case 35: + o.Output("Reboot has been scheduled in the run state") + err = nil + case 37: + o.Output("Reboot needs to be completed") + err = nil + case 213: + o.Output("Chef has exited during a client upgrade") + err = nil + } + + if !p.RetryOnExitCode[exitError.ExitStatus] { + return err + } + + if attempt < p.MaxRetries { + o.Output(fmt.Sprintf("Waiting %s before retrying Chef-Client run...", p.WaitForRetry)) + time.Sleep(p.WaitForRetry) + } } - return nil + return err } func validateFn(c *terraform.ResourceConfig) (ws []string, es []error) { @@ -730,12 +793,14 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { HTTPProxy: d.Get("http_proxy").(string), HTTPSProxy: d.Get("https_proxy").(string), NOProxy: getStringList(d.Get("no_proxy")), + MaxRetries: d.Get("max_retries").(int), NamedRunList: d.Get("named_run_list").(string), NodeName: d.Get("node_name").(string), OhaiHints: getStringList(d.Get("ohai_hints")), OSType: d.Get("os_type").(string), RecreateClient: d.Get("recreate_client").(bool), PreventSudo: d.Get("prevent_sudo").(bool), + RetryOnExitCode: getRetryOnExitCodes(d), RunList: getStringList(d.Get("run_list")), SecretKey: d.Get("secret_key").(string), ServerURL: d.Get("server_url").(string), @@ -745,6 +810,7 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { UserName: d.Get("user_name").(string), UserKey: d.Get("user_key").(string), Version: d.Get("version").(string), + WaitForRetry: time.Duration(d.Get("wait_for_retry").(int)) * time.Second, } // Make sure the supplied URL has a trailing slash @@ -794,6 +860,31 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { return p, nil } +func getRetryOnExitCodes(d *schema.ResourceData) map[int]bool { + result := make(map[int]bool) + + v, ok := d.GetOk("retry_on_exit_code") + if !ok || v == nil { + // Use default exit codes + result[35] = true + result[37] = true + result[213] = true + return result + } + + switch v := v.(type) { + case []interface{}: + for _, vv := range v { + if vv, ok := vv.(int); ok { + result[vv] = true + } + } + return result + default: + panic(fmt.Sprintf("Unsupported type: %T", v)) + } +} + func getStringList(v interface{}) []string { var result []string diff --git a/communicator/winrm/communicator.go b/communicator/winrm/communicator.go index 1669e5957..6f48085e1 100644 --- a/communicator/winrm/communicator.go +++ b/communicator/winrm/communicator.go @@ -52,13 +52,12 @@ func New(s *terraform.InstanceState) (*Communicator, error) { // Connect implementation of communicator.Communicator interface func (c *Communicator) Connect(o terraform.UIOutput) error { - if c.client != nil { - return nil - } + // Set the client to nil since we'll (re)create it + c.client = nil params := winrm.DefaultParameters params.Timeout = formatDuration(c.Timeout()) - if c.connInfo.NTLM == true { + if c.connInfo.NTLM { params.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} } } @@ -189,7 +188,7 @@ func (c *Communicator) newCopyClient() (*winrmcp.Winrmcp, error) { MaxOperationsPerShell: 15, // lowest common denominator } - if c.connInfo.NTLM == true { + if c.connInfo.NTLM { config.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} } } diff --git a/website/docs/provisioners/chef.html.markdown b/website/docs/provisioners/chef.html.markdown index 69e882d39..8056f297f 100644 --- a/website/docs/provisioners/chef.html.markdown +++ b/website/docs/provisioners/chef.html.markdown @@ -57,7 +57,7 @@ resource "aws_instance" "web" { recreate_client = true user_name = "bork" user_key = "${file("../bork.pem")}" - version = "12.4.1" + version = "15.10.13" # If you have a self signed cert on your chef server change this to :verify_none ssl_verify_mode = ":verify_peer" } @@ -109,6 +109,9 @@ The following arguments are supported: * `https_proxy (string)` - (Optional) The proxy server for Chef Client HTTPS connections. +* `max_retries (integer)` - (Optional) The number of times to retry the provisioning process + after receiving an exit code in the `retry_on_error` list. Defaults to `0` + * `named_run_list (string)` - (Optional) The name of an alternate run-list to invoke during the initial Chef Client run. The run-list must already exist in the Policyfile that defines `policy_name`. Only applies when `use_policyfile` is `true`. @@ -131,6 +134,11 @@ The following arguments are supported: * `recreate_client (boolean)` - (Optional) If `true`, first delete any existing Chef Node and Client before registering the new Chef Client. +* `retry_on_error (array)` - (Optional) The error codes upon which Terraform should + gracefully retry the provisioning process. Intended for use with + [Chef RFC062 codes](https://github.com/chef-boneyard/chef-rfc/blob/master/rfc062-exit-status.md). + (Defaults to `[35, 37, 213]`) + * `run_list (array)` - (Optional) A list with recipes that will be invoked during the initial Chef Client run. The run-list will also be saved to the Chef Server after a successful initial run. Required if `use_policyfile` is `false`; ignored when `use_policyfile` is `true` @@ -169,3 +177,7 @@ The following arguments are supported: * `version (string)` - (Optional) The Chef Client version to install on the remote machine. If not set, the latest available version will be installed. + +* `wait_for_retry (integer)` - (Optional) - Amount of time in seconds to wait before + retrying the provisionining process after receiving an exit code in the `retry_on_error` + list. Defaults to `30`.