retry join instead of failing

this make it easier to bootstrap a cluster and also makes the cluster
more resilient to full-cluster failures or restarts

fixes #6
This commit is contained in:
Leo Antunes 2019-08-06 21:53:38 +02:00
parent 3a401a7942
commit 5427aa5e84
4 changed files with 29 additions and 3 deletions

3
go.mod
View File

@ -1,12 +1,13 @@
module github.com/costela/wesher
require (
github.com/cenkalti/backoff v2.2.1+incompatible
github.com/hashicorp/errwrap v1.0.0
github.com/hashicorp/go-multierror v1.0.0
github.com/hashicorp/go-sockaddr v1.0.0
github.com/hashicorp/memberlist v0.1.3
github.com/mattn/go-isatty v0.0.7
github.com/mdlayher/genetlink v0.0.0-20190617154021-985b2115c31a
github.com/mdlayher/genetlink v0.0.0-20190617154021-985b2115c31a // indirect
github.com/pkg/errors v0.8.1
github.com/sirupsen/logrus v1.3.0
github.com/stevenroose/gonfig v0.1.4

3
go.sum
View File

@ -2,6 +2,8 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da h1:8GUt8eRujhVEGZFFEjBj46YV4rDjvGrNxb0KMWYkL2I=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@ -41,7 +43,6 @@ github.com/mdlayher/netlink v0.0.0-20190409211403-11939a169225/go.mod h1:eQB3mZE
github.com/mdlayher/netlink v0.0.0-20190513144208-ba284d510044/go.mod h1:gOrA34zDL0K3RsACQe54bDYLF/CeFspQ9m5DOycycQ8=
github.com/mdlayher/netlink v0.0.0-20190614145538-d8264f87dbe3 h1:3IPcWjiboJFnnvHeXxT4pYw33BiPJn/DC5BKhcGEbGk=
github.com/mdlayher/netlink v0.0.0-20190614145538-d8264f87dbe3/go.mod h1:ISujvOTprADlNr00kvJIu0d23q57wk2NSV/PT/TEk4E=
github.com/mdlayher/netlink v0.0.0-20190617153422-f82a9b10b2bc h1:deLjDmcgzsCAO+7m2aeuyhQCqvn1LuCCSMLWUARnad8=
github.com/microsoft/go-winio v0.4.12 h1:3vDRRsUnj2dKE7QKoedntu9hbuD8gzaVd2E2UZioqx4=
github.com/microsoft/go-winio v0.4.12/go.mod h1:kcIxxtKZE55DEncT/EOvFiygPobhUWpSDqDb47poQOU=
github.com/miekg/dns v1.0.14 h1:9jZdLNd/P4+SfEJ0TNyxYpsK8N4GtfylBLqtbYN1sbA=

11
main.go
View File

@ -5,6 +5,9 @@ import (
"os"
"os/signal"
"syscall"
"time"
"github.com/cenkalti/backoff"
"github.com/sirupsen/logrus"
@ -39,7 +42,13 @@ func main() {
}
nodec, errc := cluster.members() // avoid deadlocks by starting before join
if err := cluster.join(config.Join); err != nil {
if err := backoff.RetryNotify(
func() error { return cluster.join(config.Join) },
backoff.NewExponentialBackOff(),
func(err error, dur time.Duration) {
logrus.Errorf("could not join cluster, retrying in %s: %s", dur, err)
},
); err != nil {
logrus.Fatalf("could not join cluster: %s", err)
}

View File

@ -64,6 +64,21 @@ test_node_restart() {
stop_test_container test1-orig
}
test_cluster_simultaneous_start() {
run_test_container test1-orig test1 --join test2-orig,test3-orig
run_test_container test2-orig test2 --join test1-orig,test3-orig
run_test_container test3-orig test3 --join test1-orig,test2-orig
sleep 3
docker exec test1-orig ping -c1 -W1 test2 || (docker logs test1-orig; docker logs test2-orig; false)
docker exec test1-orig ping -c1 -W1 test3 || (docker logs test1-orig; docker logs test3-orig; false)
stop_test_container test3-orig
stop_test_container test2-orig
stop_test_container test1-orig
}
for test_func in $(declare -F | grep -Eo '\<test_.*$'); do
echo "--- Running $test_func:"
$test_func