From 4701ed2e02e384e487d78506e668fde52746d868 Mon Sep 17 00:00:00 2001 From: Brian Dwyer Date: Tue, 28 Apr 2020 15:20:20 -0400 Subject: [PATCH 1/6] Gracefully handle Chef RFC062 Exit Codes Signed-off-by: Brian Dwyer --- .../provisioners/chef/resource_provisioner.go | 135 ++++++++++++++---- 1 file changed, 108 insertions(+), 27 deletions(-) diff --git a/builtin/provisioners/chef/resource_provisioner.go b/builtin/provisioners/chef/resource_provisioner.go index c702c0f59..7f5993bf1 100644 --- a/builtin/provisioners/chef/resource_provisioner.go +++ b/builtin/provisioners/chef/resource_provisioner.go @@ -15,6 +15,7 @@ import ( "strings" "sync" "text/template" + "time" "github.com/hashicorp/terraform/communicator" "github.com/hashicorp/terraform/communicator/remote" @@ -114,6 +115,9 @@ type provisioner struct { UserKey string Vaults map[string][]string Version string + RetryOnExitCode []int + WaitForRetry int + MaxRetries int cleanupUserKeyCmd string createConfigFiles provisionFn @@ -252,6 +256,23 @@ func Provisioner() terraform.ResourceProvisioner { Type: schema.TypeString, Optional: true, }, + // Same defaults as Test-Kitchen + // https://github.com/test-kitchen/test-kitchen/blob/e5998e0dd1aa42601c55659da78f9b112ff9f8ee/lib/kitchen/provisioner/base.rb#L36-38 + "retry_on_exit_code": &schema.Schema{ + Type: schema.TypeList, + Elem: &schema.Schema{Type: schema.TypeInt}, + Optional: true, + }, + "max_retries": &schema.Schema{ + Type: schema.TypeInt, + Optional: true, + Default: 1, + }, + "wait_for_retry": &schema.Schema{ + Type: schema.TypeInt, + Optional: true, + Default: 30, + }, }, ApplyFunc: applyFn, @@ -315,6 +336,10 @@ func applyFn(ctx context.Context) error { retryCtx, cancel := context.WithTimeout(ctx, comm.Timeout()) defer cancel() + // Graceful retry of RFC062 exit codes + var attempt int +retry: + // Wait and retry until we establish the connection err = communicator.Retry(retryCtx, func() error { return comm.Connect(o) @@ -334,44 +359,79 @@ func applyFn(ctx context.Context) error { } defer once.Do(cleanupUserKey) - if !p.SkipInstall { - if err := p.installChefClient(o, comm); err != nil { - return err - } - } - - o.Output("Creating configuration files...") - if err := p.createConfigFiles(o, comm); err != nil { - return err - } - - if !p.SkipRegister { - if p.FetchChefCertificates { - o.Output("Fetch Chef certificates...") - if err := p.fetchChefCertificates(o, comm); err != nil { + if attempt == 0 { + if !p.SkipInstall { + if err := p.installChefClient(o, comm); err != nil { return err } } - o.Output("Generate the private key...") - if err := p.generateClientKey(o, comm); err != nil { + o.Output("Creating configuration files...") + if err := p.createConfigFiles(o, comm); err != nil { return err } - } - if p.Vaults != nil { - o.Output("Configure Chef vaults...") - if err := p.configureVaults(o, comm); err != nil { - return err + if !p.SkipRegister { + if p.FetchChefCertificates { + o.Output("Fetch Chef certificates...") + if err := p.fetchChefCertificates(o, comm); err != nil { + return err + } + } + + o.Output("Generate the private key...") + if err := p.generateClientKey(o, comm); err != nil { + return err + } } + + if p.Vaults != nil { + o.Output("Configure Chef vaults...") + if err := p.configureVaults(o, comm); err != nil { + return err + } + } + + // Cleanup the user key before we run Chef-Client to prevent issues + // with rights caused by changing settings during the run. + once.Do(cleanupUserKey) + + o.Output("Starting initial Chef-Client run...") } - // Cleanup the user key before we run Chef-Client to prevent issues - // with rights caused by changing settings during the run. - once.Do(cleanupUserKey) - - o.Output("Starting initial Chef-Client run...") if err := p.runChefClient(o, comm); err != nil { + // Allow RFC062 Exit Codes + // https://github.com/chef/chef-rfc/blob/master/rfc062-exit-status.md + exitError, ok := err.(*remote.ExitError) + if !ok { + return fmt.Errorf("Expected remote.ExitError but got: %w", err) + } + exitStatus := exitError.ExitStatus + switch exitStatus { + case 35: + o.Output("Reboot has been scheduled in the run state") + err = nil + case 37: + o.Output("Reboot needs to be completed") + err = nil + case 213: + o.Output("Chef has exited during a client upgrade") + err = nil + } + + if len(p.RetryOnExitCode) == 0 || attempt == p.MaxRetries { + return err + } + + for _, code := range p.RetryOnExitCode { + if code == exitStatus { + o.Output(fmt.Sprintf("Waiting %v seconds before reconnecting & re-running Chef...", p.WaitForRetry)) + time.Sleep(time.Duration(p.WaitForRetry) * time.Second) + attempt++ + goto retry + } + } + return err } @@ -745,6 +805,9 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { UserName: d.Get("user_name").(string), UserKey: d.Get("user_key").(string), Version: d.Get("version").(string), + RetryOnExitCode: getIntList(d.Get("retry_on_exit_code")), + WaitForRetry: d.Get("wait_for_retry").(int), + MaxRetries: d.Get("max_retries").(int), } // Make sure the supplied URL has a trailing slash @@ -794,6 +857,24 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { return p, nil } +func getIntList(v interface{}) []int { + var result []int + + switch v := v.(type) { + case nil: + return result + case []interface{}: + for _, vv := range v { + if vv, ok := vv.(int); ok { + result = append(result, vv) + } + } + return result + default: + panic(fmt.Sprintf("Unsupported type: %T", v)) + } +} + func getStringList(v interface{}) []string { var result []string From 2f4067cf7064cb159c653bee73d376f6725ecf0b Mon Sep 17 00:00:00 2001 From: Brian Dwyer Date: Thu, 30 Apr 2020 08:04:54 -0400 Subject: [PATCH 2/6] Clean up and add docs Signed-off-by: Brian Dwyer --- .../provisioners/chef/resource_provisioner.go | 34 +++++++++---------- website/docs/provisioners/chef.html.markdown | 10 +++++- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/builtin/provisioners/chef/resource_provisioner.go b/builtin/provisioners/chef/resource_provisioner.go index 7f5993bf1..aa5b40097 100644 --- a/builtin/provisioners/chef/resource_provisioner.go +++ b/builtin/provisioners/chef/resource_provisioner.go @@ -98,6 +98,7 @@ type provisioner struct { PolicyName string HTTPProxy string HTTPSProxy string + MaxRetries int NamedRunList string NOProxy []string NodeName string @@ -105,6 +106,7 @@ type provisioner struct { OSType string RecreateClient bool PreventSudo bool + RetryOnExitCode []int RunList []string SecretKey string ServerURL string @@ -115,9 +117,7 @@ type provisioner struct { UserKey string Vaults map[string][]string Version string - RetryOnExitCode []int WaitForRetry int - MaxRetries int cleanupUserKeyCmd string createConfigFiles provisionFn @@ -201,6 +201,11 @@ func Provisioner() terraform.ResourceProvisioner { Type: schema.TypeString, Optional: true, }, + "max_retries": &schema.Schema{ + Type: schema.TypeInt, + Optional: true, + Default: 1, + }, "no_proxy": &schema.Schema{ Type: schema.TypeList, Elem: &schema.Schema{Type: schema.TypeString}, @@ -219,12 +224,17 @@ func Provisioner() terraform.ResourceProvisioner { Type: schema.TypeString, Optional: true, }, + "prevent_sudo": &schema.Schema{ + Type: schema.TypeBool, + Optional: true, + }, "recreate_client": &schema.Schema{ Type: schema.TypeBool, Optional: true, }, - "prevent_sudo": &schema.Schema{ - Type: schema.TypeBool, + "retry_on_exit_code": &schema.Schema{ + Type: schema.TypeList, + Elem: &schema.Schema{Type: schema.TypeInt}, Optional: true, }, "run_list": &schema.Schema{ @@ -256,18 +266,6 @@ func Provisioner() terraform.ResourceProvisioner { Type: schema.TypeString, Optional: true, }, - // Same defaults as Test-Kitchen - // https://github.com/test-kitchen/test-kitchen/blob/e5998e0dd1aa42601c55659da78f9b112ff9f8ee/lib/kitchen/provisioner/base.rb#L36-38 - "retry_on_exit_code": &schema.Schema{ - Type: schema.TypeList, - Elem: &schema.Schema{Type: schema.TypeInt}, - Optional: true, - }, - "max_retries": &schema.Schema{ - Type: schema.TypeInt, - Optional: true, - Default: 1, - }, "wait_for_retry": &schema.Schema{ Type: schema.TypeInt, Optional: true, @@ -790,12 +788,14 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { HTTPProxy: d.Get("http_proxy").(string), HTTPSProxy: d.Get("https_proxy").(string), NOProxy: getStringList(d.Get("no_proxy")), + MaxRetries: d.Get("max_retries").(int), NamedRunList: d.Get("named_run_list").(string), NodeName: d.Get("node_name").(string), OhaiHints: getStringList(d.Get("ohai_hints")), OSType: d.Get("os_type").(string), RecreateClient: d.Get("recreate_client").(bool), PreventSudo: d.Get("prevent_sudo").(bool), + RetryOnExitCode: getIntList(d.Get("retry_on_exit_code")), RunList: getStringList(d.Get("run_list")), SecretKey: d.Get("secret_key").(string), ServerURL: d.Get("server_url").(string), @@ -805,9 +805,7 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { UserName: d.Get("user_name").(string), UserKey: d.Get("user_key").(string), Version: d.Get("version").(string), - RetryOnExitCode: getIntList(d.Get("retry_on_exit_code")), WaitForRetry: d.Get("wait_for_retry").(int), - MaxRetries: d.Get("max_retries").(int), } // Make sure the supplied URL has a trailing slash diff --git a/website/docs/provisioners/chef.html.markdown b/website/docs/provisioners/chef.html.markdown index 69e882d39..65577c70c 100644 --- a/website/docs/provisioners/chef.html.markdown +++ b/website/docs/provisioners/chef.html.markdown @@ -57,9 +57,11 @@ resource "aws_instance" "web" { recreate_client = true user_name = "bork" user_key = "${file("../bork.pem")}" - version = "12.4.1" + version = "15.10.13" # If you have a self signed cert on your chef server change this to :verify_none ssl_verify_mode = ":verify_peer" + # Gracefully handle Chef upgrades, reboots, etc. + retry_on_exit_code = [35, 213] } } ``` @@ -109,6 +111,8 @@ The following arguments are supported: * `https_proxy (string)` - (Optional) The proxy server for Chef Client HTTPS connections. +* `max_retries (integer)` - (Optional) The number of times to retry the provisioning process after receiving an exit code in the `retry_on_error` list. Defaults to `1` + * `named_run_list (string)` - (Optional) The name of an alternate run-list to invoke during the initial Chef Client run. The run-list must already exist in the Policyfile that defines `policy_name`. Only applies when `use_policyfile` is `true`. @@ -131,6 +135,8 @@ The following arguments are supported: * `recreate_client (boolean)` - (Optional) If `true`, first delete any existing Chef Node and Client before registering the new Chef Client. +* `retry_on_error (list of integers)` - (Optional) The error codes upon which Terraform should gracefully retry the provisioning process. Intended for use with [Chef RFC062 codes.](https://github.com/chef-boneyard/chef-rfc/blob/69a19f632cceffe965bafaad6765e3376068fd5b/rfc062-exit-status.md) + * `run_list (array)` - (Optional) A list with recipes that will be invoked during the initial Chef Client run. The run-list will also be saved to the Chef Server after a successful initial run. Required if `use_policyfile` is `false`; ignored when `use_policyfile` is `true` @@ -169,3 +175,5 @@ The following arguments are supported: * `version (string)` - (Optional) The Chef Client version to install on the remote machine. If not set, the latest available version will be installed. + +* `wait_for_retry (integer)` - (Optional) - Amount of time in seconds to wait before retrying the provisionining process after receiving an exit code in the `retry_on_error` list. Defaults to `30`. From a61405692537d54168ebcfca253e783fe8f8537c Mon Sep 17 00:00:00 2001 From: Sander van Harmelen Date: Fri, 1 May 2020 11:05:08 +0200 Subject: [PATCH 3/6] Refactor the code a bit to make it more idiomatic --- .../provisioners/chef/resource_provisioner.go | 118 +++++++++--------- communicator/winrm/communicator.go | 4 +- website/docs/provisioners/chef.html.markdown | 11 +- 3 files changed, 67 insertions(+), 66 deletions(-) diff --git a/builtin/provisioners/chef/resource_provisioner.go b/builtin/provisioners/chef/resource_provisioner.go index aa5b40097..7c08d8417 100644 --- a/builtin/provisioners/chef/resource_provisioner.go +++ b/builtin/provisioners/chef/resource_provisioner.go @@ -106,7 +106,7 @@ type provisioner struct { OSType string RecreateClient bool PreventSudo bool - RetryOnExitCode []int + RetryOnExitCode map[int]bool RunList []string SecretKey string ServerURL string @@ -117,7 +117,7 @@ type provisioner struct { UserKey string Vaults map[string][]string Version string - WaitForRetry int + WaitForRetry time.Duration cleanupUserKeyCmd string createConfigFiles provisionFn @@ -334,10 +334,6 @@ func applyFn(ctx context.Context) error { retryCtx, cancel := context.WithTimeout(ctx, comm.Timeout()) defer cancel() - // Graceful retry of RFC062 exit codes - var attempt int -retry: - // Wait and retry until we establish the connection err = communicator.Retry(retryCtx, func() error { return comm.Connect(o) @@ -357,55 +353,65 @@ retry: } defer once.Do(cleanupUserKey) - if attempt == 0 { - if !p.SkipInstall { - if err := p.installChefClient(o, comm); err != nil { + if !p.SkipInstall { + if err := p.installChefClient(o, comm); err != nil { + return err + } + } + + o.Output("Creating configuration files...") + if err := p.createConfigFiles(o, comm); err != nil { + return err + } + + if !p.SkipRegister { + if p.FetchChefCertificates { + o.Output("Fetch Chef certificates...") + if err := p.fetchChefCertificates(o, comm); err != nil { return err } } - o.Output("Creating configuration files...") - if err := p.createConfigFiles(o, comm); err != nil { + o.Output("Generate the private key...") + if err := p.generateClientKey(o, comm); err != nil { + return err + } + } + + if p.Vaults != nil { + o.Output("Configure Chef vaults...") + if err := p.configureVaults(o, comm); err != nil { + return err + } + } + + // Cleanup the user key before we run Chef-Client to prevent issues + // with rights caused by changing settings during the run. + once.Do(cleanupUserKey) + + o.Output("Starting initial Chef-Client run...") + + for attempt := 0; attempt < p.MaxRetries; attempt++ { + // Make sure to (re)connect before trying to run Chef-Client. + if err := communicator.Retry(retryCtx, func() error { + return comm.Connect(o) + }); err != nil { return err } - if !p.SkipRegister { - if p.FetchChefCertificates { - o.Output("Fetch Chef certificates...") - if err := p.fetchChefCertificates(o, comm); err != nil { - return err - } - } - - o.Output("Generate the private key...") - if err := p.generateClientKey(o, comm); err != nil { - return err - } + err = p.runChefClient(o, comm) + if err == nil { + return nil } - if p.Vaults != nil { - o.Output("Configure Chef vaults...") - if err := p.configureVaults(o, comm); err != nil { - return err - } - } - - // Cleanup the user key before we run Chef-Client to prevent issues - // with rights caused by changing settings during the run. - once.Do(cleanupUserKey) - - o.Output("Starting initial Chef-Client run...") - } - - if err := p.runChefClient(o, comm); err != nil { - // Allow RFC062 Exit Codes + // Allow RFC062 Exit Codes: // https://github.com/chef/chef-rfc/blob/master/rfc062-exit-status.md exitError, ok := err.(*remote.ExitError) if !ok { - return fmt.Errorf("Expected remote.ExitError but got: %w", err) + return err } - exitStatus := exitError.ExitStatus - switch exitStatus { + + switch exitError.ExitStatus { case 35: o.Output("Reboot has been scheduled in the run state") err = nil @@ -417,23 +423,13 @@ retry: err = nil } - if len(p.RetryOnExitCode) == 0 || attempt == p.MaxRetries { - return err + if p.RetryOnExitCode[exitError.ExitStatus] { + o.Output(fmt.Sprintf("Waiting %s before retrying Chef-Client run...", p.WaitForRetry)) + time.Sleep(p.WaitForRetry) } - - for _, code := range p.RetryOnExitCode { - if code == exitStatus { - o.Output(fmt.Sprintf("Waiting %v seconds before reconnecting & re-running Chef...", p.WaitForRetry)) - time.Sleep(time.Duration(p.WaitForRetry) * time.Second) - attempt++ - goto retry - } - } - - return err } - return nil + return err } func validateFn(c *terraform.ResourceConfig) (ws []string, es []error) { @@ -795,7 +791,7 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { OSType: d.Get("os_type").(string), RecreateClient: d.Get("recreate_client").(bool), PreventSudo: d.Get("prevent_sudo").(bool), - RetryOnExitCode: getIntList(d.Get("retry_on_exit_code")), + RetryOnExitCode: getIntListAsMap(d.Get("retry_on_exit_code")), RunList: getStringList(d.Get("run_list")), SecretKey: d.Get("secret_key").(string), ServerURL: d.Get("server_url").(string), @@ -805,7 +801,7 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { UserName: d.Get("user_name").(string), UserKey: d.Get("user_key").(string), Version: d.Get("version").(string), - WaitForRetry: d.Get("wait_for_retry").(int), + WaitForRetry: time.Duration(d.Get("wait_for_retry").(int)) * time.Second, } // Make sure the supplied URL has a trailing slash @@ -855,8 +851,8 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { return p, nil } -func getIntList(v interface{}) []int { - var result []int +func getIntListAsMap(v interface{}) map[int]bool { + result := make(map[int]bool) switch v := v.(type) { case nil: @@ -864,7 +860,7 @@ func getIntList(v interface{}) []int { case []interface{}: for _, vv := range v { if vv, ok := vv.(int); ok { - result = append(result, vv) + result[vv] = true } } return result diff --git a/communicator/winrm/communicator.go b/communicator/winrm/communicator.go index 1669e5957..cdeab9383 100644 --- a/communicator/winrm/communicator.go +++ b/communicator/winrm/communicator.go @@ -58,7 +58,7 @@ func (c *Communicator) Connect(o terraform.UIOutput) error { params := winrm.DefaultParameters params.Timeout = formatDuration(c.Timeout()) - if c.connInfo.NTLM == true { + if c.connInfo.NTLM { params.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} } } @@ -189,7 +189,7 @@ func (c *Communicator) newCopyClient() (*winrmcp.Winrmcp, error) { MaxOperationsPerShell: 15, // lowest common denominator } - if c.connInfo.NTLM == true { + if c.connInfo.NTLM { config.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} } } diff --git a/website/docs/provisioners/chef.html.markdown b/website/docs/provisioners/chef.html.markdown index 65577c70c..59293aa97 100644 --- a/website/docs/provisioners/chef.html.markdown +++ b/website/docs/provisioners/chef.html.markdown @@ -111,7 +111,8 @@ The following arguments are supported: * `https_proxy (string)` - (Optional) The proxy server for Chef Client HTTPS connections. -* `max_retries (integer)` - (Optional) The number of times to retry the provisioning process after receiving an exit code in the `retry_on_error` list. Defaults to `1` +* `max_retries (integer)` - (Optional) The number of times to retry the provisioning process + after receiving an exit code in the `retry_on_error` list. Defaults to `1` * `named_run_list (string)` - (Optional) The name of an alternate run-list to invoke during the initial Chef Client run. The run-list must already exist in the Policyfile that defines @@ -135,7 +136,9 @@ The following arguments are supported: * `recreate_client (boolean)` - (Optional) If `true`, first delete any existing Chef Node and Client before registering the new Chef Client. -* `retry_on_error (list of integers)` - (Optional) The error codes upon which Terraform should gracefully retry the provisioning process. Intended for use with [Chef RFC062 codes.](https://github.com/chef-boneyard/chef-rfc/blob/69a19f632cceffe965bafaad6765e3376068fd5b/rfc062-exit-status.md) +* `retry_on_error (list of integers)` - (Optional) The error codes upon which Terraform should + gracefully retry the provisioning process. Intended for use with + [Chef RFC062 codes](https://github.com/chef-boneyard/chef-rfc/blob/master/rfc062-exit-status.md). * `run_list (array)` - (Optional) A list with recipes that will be invoked during the initial Chef Client run. The run-list will also be saved to the Chef Server after a successful @@ -176,4 +179,6 @@ The following arguments are supported: * `version (string)` - (Optional) The Chef Client version to install on the remote machine. If not set, the latest available version will be installed. -* `wait_for_retry (integer)` - (Optional) - Amount of time in seconds to wait before retrying the provisionining process after receiving an exit code in the `retry_on_error` list. Defaults to `30`. +* `wait_for_retry (integer)` - (Optional) - Amount of time in seconds to wait before + retrying the provisionining process after receiving an exit code in the `retry_on_error` + list. Defaults to `30`. From 10aab8605166ffdcb154c819245e901d134d8496 Mon Sep 17 00:00:00 2001 From: Sander van Harmelen Date: Tue, 5 May 2020 15:29:52 +0200 Subject: [PATCH 4/6] Make sure we use MaxRetries correctly Even if MaxRetries is 0, we should still execute the loop one time in order to run the Chef-Client at least once. Also waiting only makes sense when we have `attempts` left. And last but not least we want to exit immediately when the exit code is not in the retry list. So this PR fixes three small issues to make everything work as expected. --- builtin/provisioners/chef/resource_provisioner.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/builtin/provisioners/chef/resource_provisioner.go b/builtin/provisioners/chef/resource_provisioner.go index 7c08d8417..d729587d6 100644 --- a/builtin/provisioners/chef/resource_provisioner.go +++ b/builtin/provisioners/chef/resource_provisioner.go @@ -391,7 +391,7 @@ func applyFn(ctx context.Context) error { o.Output("Starting initial Chef-Client run...") - for attempt := 0; attempt < p.MaxRetries; attempt++ { + for attempt := 0; attempt <= p.MaxRetries; attempt++ { // Make sure to (re)connect before trying to run Chef-Client. if err := communicator.Retry(retryCtx, func() error { return comm.Connect(o) @@ -423,7 +423,11 @@ func applyFn(ctx context.Context) error { err = nil } - if p.RetryOnExitCode[exitError.ExitStatus] { + if !p.RetryOnExitCode[exitError.ExitStatus] { + return err + } + + if attempt < p.MaxRetries { o.Output(fmt.Sprintf("Waiting %s before retrying Chef-Client run...", p.WaitForRetry)) time.Sleep(p.WaitForRetry) } From 9453308c7848438f4b4a4a1c07749eb23304367d Mon Sep 17 00:00:00 2001 From: Sander van Harmelen Date: Tue, 5 May 2020 18:13:01 +0200 Subject: [PATCH 5/6] Make sure the WinRM communicator can reconnect --- .../provisioners/chef/resource_provisioner.go | 22 ++++++++++++++----- communicator/winrm/communicator.go | 5 ++--- website/docs/provisioners/chef.html.markdown | 5 +++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/builtin/provisioners/chef/resource_provisioner.go b/builtin/provisioners/chef/resource_provisioner.go index d729587d6..6f2f3ae52 100644 --- a/builtin/provisioners/chef/resource_provisioner.go +++ b/builtin/provisioners/chef/resource_provisioner.go @@ -204,7 +204,7 @@ func Provisioner() terraform.ResourceProvisioner { "max_retries": &schema.Schema{ Type: schema.TypeInt, Optional: true, - Default: 1, + Default: 0, }, "no_proxy": &schema.Schema{ Type: schema.TypeList, @@ -392,6 +392,11 @@ func applyFn(ctx context.Context) error { o.Output("Starting initial Chef-Client run...") for attempt := 0; attempt <= p.MaxRetries; attempt++ { + // We need a new retry context for each attempt, to make sure + // they all get the correct timeout. + retryCtx, cancel := context.WithTimeout(ctx, comm.Timeout()) + defer cancel() + // Make sure to (re)connect before trying to run Chef-Client. if err := communicator.Retry(retryCtx, func() error { return comm.Connect(o) @@ -795,7 +800,7 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { OSType: d.Get("os_type").(string), RecreateClient: d.Get("recreate_client").(bool), PreventSudo: d.Get("prevent_sudo").(bool), - RetryOnExitCode: getIntListAsMap(d.Get("retry_on_exit_code")), + RetryOnExitCode: getRetryOnExitCodes(d), RunList: getStringList(d.Get("run_list")), SecretKey: d.Get("secret_key").(string), ServerURL: d.Get("server_url").(string), @@ -855,12 +860,19 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) { return p, nil } -func getIntListAsMap(v interface{}) map[int]bool { +func getRetryOnExitCodes(d *schema.ResourceData) map[int]bool { result := make(map[int]bool) - switch v := v.(type) { - case nil: + v, ok := d.GetOk("retry_on_exit_code") + if !ok || v == nil { + // Use default exit codes + result[35] = true + result[37] = true + result[213] = true return result + } + + switch v := v.(type) { case []interface{}: for _, vv := range v { if vv, ok := vv.(int); ok { diff --git a/communicator/winrm/communicator.go b/communicator/winrm/communicator.go index cdeab9383..6f48085e1 100644 --- a/communicator/winrm/communicator.go +++ b/communicator/winrm/communicator.go @@ -52,9 +52,8 @@ func New(s *terraform.InstanceState) (*Communicator, error) { // Connect implementation of communicator.Communicator interface func (c *Communicator) Connect(o terraform.UIOutput) error { - if c.client != nil { - return nil - } + // Set the client to nil since we'll (re)create it + c.client = nil params := winrm.DefaultParameters params.Timeout = formatDuration(c.Timeout()) diff --git a/website/docs/provisioners/chef.html.markdown b/website/docs/provisioners/chef.html.markdown index 59293aa97..fcb330472 100644 --- a/website/docs/provisioners/chef.html.markdown +++ b/website/docs/provisioners/chef.html.markdown @@ -112,7 +112,7 @@ The following arguments are supported: * `https_proxy (string)` - (Optional) The proxy server for Chef Client HTTPS connections. * `max_retries (integer)` - (Optional) The number of times to retry the provisioning process - after receiving an exit code in the `retry_on_error` list. Defaults to `1` + after receiving an exit code in the `retry_on_error` list. Defaults to `0` * `named_run_list (string)` - (Optional) The name of an alternate run-list to invoke during the initial Chef Client run. The run-list must already exist in the Policyfile that defines @@ -136,9 +136,10 @@ The following arguments are supported: * `recreate_client (boolean)` - (Optional) If `true`, first delete any existing Chef Node and Client before registering the new Chef Client. -* `retry_on_error (list of integers)` - (Optional) The error codes upon which Terraform should +* `retry_on_error (array)` - (Optional) The error codes upon which Terraform should gracefully retry the provisioning process. Intended for use with [Chef RFC062 codes](https://github.com/chef-boneyard/chef-rfc/blob/master/rfc062-exit-status.md). + (Defaults to `[35, 37, 213]`) * `run_list (array)` - (Optional) A list with recipes that will be invoked during the initial Chef Client run. The run-list will also be saved to the Chef Server after a successful From 2e5fbdf6844116fb43e115e3d3c30b867f22bea2 Mon Sep 17 00:00:00 2001 From: Sander van Harmelen Date: Tue, 5 May 2020 22:23:51 +0200 Subject: [PATCH 6/6] Remove (now) incorrect example This example doesn't really show how these values should be used. The default of retry_on_exit_code is now already when most people want, so this line is not needed in most cases. I think the docs describe the new options just fine, so lets leave this out... --- website/docs/provisioners/chef.html.markdown | 2 -- 1 file changed, 2 deletions(-) diff --git a/website/docs/provisioners/chef.html.markdown b/website/docs/provisioners/chef.html.markdown index fcb330472..8056f297f 100644 --- a/website/docs/provisioners/chef.html.markdown +++ b/website/docs/provisioners/chef.html.markdown @@ -60,8 +60,6 @@ resource "aws_instance" "web" { version = "15.10.13" # If you have a self signed cert on your chef server change this to :verify_none ssl_verify_mode = ":verify_peer" - # Gracefully handle Chef upgrades, reboots, etc. - retry_on_exit_code = [35, 213] } } ```