Merge pull request #19155 from bdwyertech/chef-exit-codes

Chef: Gracefully Handle RFC062 Exit Codes
This commit is contained in:
James Bardin 2020-05-12 17:09:05 -04:00 committed by GitHub
commit e912dc8551
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 113 additions and 11 deletions

View File

@ -15,6 +15,7 @@ import (
"strings"
"sync"
"text/template"
"time"
"github.com/hashicorp/terraform/communicator"
"github.com/hashicorp/terraform/communicator/remote"
@ -97,6 +98,7 @@ type provisioner struct {
PolicyName string
HTTPProxy string
HTTPSProxy string
MaxRetries int
NamedRunList string
NOProxy []string
NodeName string
@ -104,6 +106,7 @@ type provisioner struct {
OSType string
RecreateClient bool
PreventSudo bool
RetryOnExitCode map[int]bool
RunList []string
SecretKey string
ServerURL string
@ -114,6 +117,7 @@ type provisioner struct {
UserKey string
Vaults map[string][]string
Version string
WaitForRetry time.Duration
cleanupUserKeyCmd string
createConfigFiles provisionFn
@ -197,6 +201,11 @@ func Provisioner() terraform.ResourceProvisioner {
Type: schema.TypeString,
Optional: true,
},
"max_retries": &schema.Schema{
Type: schema.TypeInt,
Optional: true,
Default: 0,
},
"no_proxy": &schema.Schema{
Type: schema.TypeList,
Elem: &schema.Schema{Type: schema.TypeString},
@ -215,12 +224,17 @@ func Provisioner() terraform.ResourceProvisioner {
Type: schema.TypeString,
Optional: true,
},
"prevent_sudo": &schema.Schema{
Type: schema.TypeBool,
Optional: true,
},
"recreate_client": &schema.Schema{
Type: schema.TypeBool,
Optional: true,
},
"prevent_sudo": &schema.Schema{
Type: schema.TypeBool,
"retry_on_exit_code": &schema.Schema{
Type: schema.TypeList,
Elem: &schema.Schema{Type: schema.TypeInt},
Optional: true,
},
"run_list": &schema.Schema{
@ -252,6 +266,11 @@ func Provisioner() terraform.ResourceProvisioner {
Type: schema.TypeString,
Optional: true,
},
"wait_for_retry": &schema.Schema{
Type: schema.TypeInt,
Optional: true,
Default: 30,
},
},
ApplyFunc: applyFn,
@ -371,11 +390,55 @@ func applyFn(ctx context.Context) error {
once.Do(cleanupUserKey)
o.Output("Starting initial Chef-Client run...")
if err := p.runChefClient(o, comm); err != nil {
return err
for attempt := 0; attempt <= p.MaxRetries; attempt++ {
// We need a new retry context for each attempt, to make sure
// they all get the correct timeout.
retryCtx, cancel := context.WithTimeout(ctx, comm.Timeout())
defer cancel()
// Make sure to (re)connect before trying to run Chef-Client.
if err := communicator.Retry(retryCtx, func() error {
return comm.Connect(o)
}); err != nil {
return err
}
err = p.runChefClient(o, comm)
if err == nil {
return nil
}
// Allow RFC062 Exit Codes:
// https://github.com/chef/chef-rfc/blob/master/rfc062-exit-status.md
exitError, ok := err.(*remote.ExitError)
if !ok {
return err
}
switch exitError.ExitStatus {
case 35:
o.Output("Reboot has been scheduled in the run state")
err = nil
case 37:
o.Output("Reboot needs to be completed")
err = nil
case 213:
o.Output("Chef has exited during a client upgrade")
err = nil
}
if !p.RetryOnExitCode[exitError.ExitStatus] {
return err
}
if attempt < p.MaxRetries {
o.Output(fmt.Sprintf("Waiting %s before retrying Chef-Client run...", p.WaitForRetry))
time.Sleep(p.WaitForRetry)
}
}
return nil
return err
}
func validateFn(c *terraform.ResourceConfig) (ws []string, es []error) {
@ -730,12 +793,14 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
HTTPProxy: d.Get("http_proxy").(string),
HTTPSProxy: d.Get("https_proxy").(string),
NOProxy: getStringList(d.Get("no_proxy")),
MaxRetries: d.Get("max_retries").(int),
NamedRunList: d.Get("named_run_list").(string),
NodeName: d.Get("node_name").(string),
OhaiHints: getStringList(d.Get("ohai_hints")),
OSType: d.Get("os_type").(string),
RecreateClient: d.Get("recreate_client").(bool),
PreventSudo: d.Get("prevent_sudo").(bool),
RetryOnExitCode: getRetryOnExitCodes(d),
RunList: getStringList(d.Get("run_list")),
SecretKey: d.Get("secret_key").(string),
ServerURL: d.Get("server_url").(string),
@ -745,6 +810,7 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
UserName: d.Get("user_name").(string),
UserKey: d.Get("user_key").(string),
Version: d.Get("version").(string),
WaitForRetry: time.Duration(d.Get("wait_for_retry").(int)) * time.Second,
}
// Make sure the supplied URL has a trailing slash
@ -794,6 +860,31 @@ func decodeConfig(d *schema.ResourceData) (*provisioner, error) {
return p, nil
}
func getRetryOnExitCodes(d *schema.ResourceData) map[int]bool {
result := make(map[int]bool)
v, ok := d.GetOk("retry_on_exit_code")
if !ok || v == nil {
// Use default exit codes
result[35] = true
result[37] = true
result[213] = true
return result
}
switch v := v.(type) {
case []interface{}:
for _, vv := range v {
if vv, ok := vv.(int); ok {
result[vv] = true
}
}
return result
default:
panic(fmt.Sprintf("Unsupported type: %T", v))
}
}
func getStringList(v interface{}) []string {
var result []string

View File

@ -52,13 +52,12 @@ func New(s *terraform.InstanceState) (*Communicator, error) {
// Connect implementation of communicator.Communicator interface
func (c *Communicator) Connect(o terraform.UIOutput) error {
if c.client != nil {
return nil
}
// Set the client to nil since we'll (re)create it
c.client = nil
params := winrm.DefaultParameters
params.Timeout = formatDuration(c.Timeout())
if c.connInfo.NTLM == true {
if c.connInfo.NTLM {
params.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} }
}
@ -189,7 +188,7 @@ func (c *Communicator) newCopyClient() (*winrmcp.Winrmcp, error) {
MaxOperationsPerShell: 15, // lowest common denominator
}
if c.connInfo.NTLM == true {
if c.connInfo.NTLM {
config.TransportDecorator = func() winrm.Transporter { return &winrm.ClientNTLM{} }
}

View File

@ -57,7 +57,7 @@ resource "aws_instance" "web" {
recreate_client = true
user_name = "bork"
user_key = "${file("../bork.pem")}"
version = "12.4.1"
version = "15.10.13"
# If you have a self signed cert on your chef server change this to :verify_none
ssl_verify_mode = ":verify_peer"
}
@ -109,6 +109,9 @@ The following arguments are supported:
* `https_proxy (string)` - (Optional) The proxy server for Chef Client HTTPS connections.
* `max_retries (integer)` - (Optional) The number of times to retry the provisioning process
after receiving an exit code in the `retry_on_error` list. Defaults to `0`
* `named_run_list (string)` - (Optional) The name of an alternate run-list to invoke during the
initial Chef Client run. The run-list must already exist in the Policyfile that defines
`policy_name`. Only applies when `use_policyfile` is `true`.
@ -131,6 +134,11 @@ The following arguments are supported:
* `recreate_client (boolean)` - (Optional) If `true`, first delete any existing Chef Node and
Client before registering the new Chef Client.
* `retry_on_error (array)` - (Optional) The error codes upon which Terraform should
gracefully retry the provisioning process. Intended for use with
[Chef RFC062 codes](https://github.com/chef-boneyard/chef-rfc/blob/master/rfc062-exit-status.md).
(Defaults to `[35, 37, 213]`)
* `run_list (array)` - (Optional) A list with recipes that will be invoked during the initial
Chef Client run. The run-list will also be saved to the Chef Server after a successful
initial run. Required if `use_policyfile` is `false`; ignored when `use_policyfile` is `true`
@ -169,3 +177,7 @@ The following arguments are supported:
* `version (string)` - (Optional) The Chef Client version to install on the remote machine.
If not set, the latest available version will be installed.
* `wait_for_retry (integer)` - (Optional) - Amount of time in seconds to wait before
retrying the provisionining process after receiving an exit code in the `retry_on_error`
list. Defaults to `30`.