provider/aws: Wait for Spot Fleet to drain before removing from state (#8938)

* provider/aws: Wait for Spot Fleet to drain before removing from state

Ensures the spot fleet is drained before reporting successful destroy
and moving on

* remove unreachable code

* hack to sleep and test regression/leak

* fix broken english in warning
This commit is contained in:
Clint 2016-09-22 15:22:27 -05:00 committed by GitHub
parent b0e751129a
commit becdfef87b
2 changed files with 48 additions and 2 deletions

View File

@ -936,7 +936,7 @@ func resourceAwsSpotFleetRequestDelete(d *schema.ResourceData, meta interface{})
conn := meta.(*AWSClient).ec2conn
log.Printf("[INFO] Cancelling spot fleet request: %s", d.Id())
_, err := conn.CancelSpotFleetRequests(&ec2.CancelSpotFleetRequestsInput{
resp, err := conn.CancelSpotFleetRequests(&ec2.CancelSpotFleetRequestsInput{
SpotFleetRequestIds: []*string{aws.String(d.Id())},
TerminateInstances: aws.Bool(d.Get("terminate_instances_with_expiration").(bool)),
})
@ -945,7 +945,36 @@ func resourceAwsSpotFleetRequestDelete(d *schema.ResourceData, meta interface{})
return fmt.Errorf("Error cancelling spot request (%s): %s", d.Id(), err)
}
return nil
// check response successfulFleetRequestSet to make sure our request was canceled
var found bool
for _, s := range resp.SuccessfulFleetRequests {
if *s.SpotFleetRequestId == d.Id() {
found = true
}
}
if !found {
return fmt.Errorf("[ERR] Spot Fleet request (%s) was not found to be successfully canceled, dangling resources may exit", d.Id())
}
return resource.Retry(5*time.Minute, func() *resource.RetryError {
resp, err := conn.DescribeSpotFleetInstances(&ec2.DescribeSpotFleetInstancesInput{
SpotFleetRequestId: aws.String(d.Id()),
})
if err != nil {
return resource.NonRetryableError(err)
}
if len(resp.ActiveInstances) == 0 {
log.Printf("[DEBUG] Active instance count is 0 for Spot Fleet Request (%s), removing", d.Id())
return nil
}
log.Printf("[DEBUG] Active instance count in Spot Fleet Request (%s): %d", d.Id(), len(resp.ActiveInstances))
return resource.RetryableError(
fmt.Errorf("fleet still has (%d) running instances", len(resp.ActiveInstances)))
})
}
func hashEphemeralBlockDevice(v interface{}) int {

View File

@ -3,7 +3,9 @@ package aws
import (
"encoding/base64"
"fmt"
"log"
"testing"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
@ -248,6 +250,20 @@ func TestAccAWSSpotFleetRequest_withWeightedCapacity(t *testing.T) {
var sfr ec2.SpotFleetRequestConfig
rName := acctest.RandString(10)
fulfillSleep := func() resource.TestCheckFunc {
// sleep so that EC2 can fuflill the request. We do this to guard against a
// regression and possible leak where we'll destroy the request and the
// associated IAM role before anything is actually provisioned and running,
// thus leaking when those newly started instances are attempted to be
// destroyed
// See https://github.com/hashicorp/terraform/pull/8938
return func(s *terraform.State) error {
log.Printf("[DEBUG] Test: Sleep to allow EC2 to actually begin fulfilling TestAccAWSSpotFleetRequest_withWeightedCapacity request")
time.Sleep(1 * time.Minute)
return nil
}
}
resource.Test(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
@ -256,6 +272,7 @@ func TestAccAWSSpotFleetRequest_withWeightedCapacity(t *testing.T) {
resource.TestStep{
Config: testAccAWSSpotFleetRequestConfigWithWeightedCapacity(rName),
Check: resource.ComposeAggregateTestCheckFunc(
fulfillSleep(),
testAccCheckAWSSpotFleetRequestExists(
"aws_spot_fleet_request.foo", &sfr),
resource.TestCheckResourceAttr(