update consul vendor

This commit is contained in:
James Bardin 2017-07-17 11:28:30 -04:00
parent 50f412bff4
commit 7584ac1247
68 changed files with 2224 additions and 10891 deletions

View File

@ -1,672 +0,0 @@
package acl
import (
"github.com/armon/go-radix"
)
var (
// allowAll is a singleton policy which allows all
// non-management actions
allowAll ACL
// denyAll is a singleton policy which denies all actions
denyAll ACL
// manageAll is a singleton policy which allows all
// actions, including management
manageAll ACL
)
func init() {
// Setup the singletons
allowAll = &StaticACL{
allowManage: false,
defaultAllow: true,
}
denyAll = &StaticACL{
allowManage: false,
defaultAllow: false,
}
manageAll = &StaticACL{
allowManage: true,
defaultAllow: true,
}
}
// ACL is the interface for policy enforcement.
type ACL interface {
// ACLList checks for permission to list all the ACLs
ACLList() bool
// ACLModify checks for permission to manipulate ACLs
ACLModify() bool
// AgentRead checks for permission to read from agent endpoints for a
// given node.
AgentRead(string) bool
// AgentWrite checks for permission to make changes via agent endpoints
// for a given node.
AgentWrite(string) bool
// EventRead determines if a specific event can be queried.
EventRead(string) bool
// EventWrite determines if a specific event may be fired.
EventWrite(string) bool
// KeyRead checks for permission to read a given key
KeyRead(string) bool
// KeyWrite checks for permission to write a given key
KeyWrite(string) bool
// KeyWritePrefix checks for permission to write to an
// entire key prefix. This means there must be no sub-policies
// that deny a write.
KeyWritePrefix(string) bool
// KeyringRead determines if the encryption keyring used in
// the gossip layer can be read.
KeyringRead() bool
// KeyringWrite determines if the keyring can be manipulated
KeyringWrite() bool
// NodeRead checks for permission to read (discover) a given node.
NodeRead(string) bool
// NodeWrite checks for permission to create or update (register) a
// given node.
NodeWrite(string) bool
// OperatorRead determines if the read-only Consul operator functions
// can be used.
OperatorRead() bool
// OperatorWrite determines if the state-changing Consul operator
// functions can be used.
OperatorWrite() bool
// PrepardQueryRead determines if a specific prepared query can be read
// to show its contents (this is not used for execution).
PreparedQueryRead(string) bool
// PreparedQueryWrite determines if a specific prepared query can be
// created, modified, or deleted.
PreparedQueryWrite(string) bool
// ServiceRead checks for permission to read a given service
ServiceRead(string) bool
// ServiceWrite checks for permission to create or update a given
// service
ServiceWrite(string) bool
// SessionRead checks for permission to read sessions for a given node.
SessionRead(string) bool
// SessionWrite checks for permission to create sessions for a given
// node.
SessionWrite(string) bool
// Snapshot checks for permission to take and restore snapshots.
Snapshot() bool
}
// StaticACL is used to implement a base ACL policy. It either
// allows or denies all requests. This can be used as a parent
// ACL to act in a blacklist or whitelist mode.
type StaticACL struct {
allowManage bool
defaultAllow bool
}
func (s *StaticACL) ACLList() bool {
return s.allowManage
}
func (s *StaticACL) ACLModify() bool {
return s.allowManage
}
func (s *StaticACL) AgentRead(string) bool {
return s.defaultAllow
}
func (s *StaticACL) AgentWrite(string) bool {
return s.defaultAllow
}
func (s *StaticACL) EventRead(string) bool {
return s.defaultAllow
}
func (s *StaticACL) EventWrite(string) bool {
return s.defaultAllow
}
func (s *StaticACL) KeyRead(string) bool {
return s.defaultAllow
}
func (s *StaticACL) KeyWrite(string) bool {
return s.defaultAllow
}
func (s *StaticACL) KeyWritePrefix(string) bool {
return s.defaultAllow
}
func (s *StaticACL) KeyringRead() bool {
return s.defaultAllow
}
func (s *StaticACL) KeyringWrite() bool {
return s.defaultAllow
}
func (s *StaticACL) NodeRead(string) bool {
return s.defaultAllow
}
func (s *StaticACL) NodeWrite(string) bool {
return s.defaultAllow
}
func (s *StaticACL) OperatorRead() bool {
return s.defaultAllow
}
func (s *StaticACL) OperatorWrite() bool {
return s.defaultAllow
}
func (s *StaticACL) PreparedQueryRead(string) bool {
return s.defaultAllow
}
func (s *StaticACL) PreparedQueryWrite(string) bool {
return s.defaultAllow
}
func (s *StaticACL) ServiceRead(string) bool {
return s.defaultAllow
}
func (s *StaticACL) ServiceWrite(string) bool {
return s.defaultAllow
}
func (s *StaticACL) SessionRead(string) bool {
return s.defaultAllow
}
func (s *StaticACL) SessionWrite(string) bool {
return s.defaultAllow
}
func (s *StaticACL) Snapshot() bool {
return s.allowManage
}
// AllowAll returns an ACL rule that allows all operations
func AllowAll() ACL {
return allowAll
}
// DenyAll returns an ACL rule that denies all operations
func DenyAll() ACL {
return denyAll
}
// ManageAll returns an ACL rule that can manage all resources
func ManageAll() ACL {
return manageAll
}
// RootACL returns a possible ACL if the ID matches a root policy
func RootACL(id string) ACL {
switch id {
case "allow":
return allowAll
case "deny":
return denyAll
case "manage":
return manageAll
default:
return nil
}
}
// PolicyACL is used to wrap a set of ACL policies to provide
// the ACL interface.
type PolicyACL struct {
// parent is used to resolve policy if we have
// no matching rule.
parent ACL
// agentRules contains the agent policies
agentRules *radix.Tree
// keyRules contains the key policies
keyRules *radix.Tree
// nodeRules contains the node policies
nodeRules *radix.Tree
// serviceRules contains the service policies
serviceRules *radix.Tree
// sessionRules contains the session policies
sessionRules *radix.Tree
// eventRules contains the user event policies
eventRules *radix.Tree
// preparedQueryRules contains the prepared query policies
preparedQueryRules *radix.Tree
// keyringRule contains the keyring policies. The keyring has
// a very simple yes/no without prefix matching, so here we
// don't need to use a radix tree.
keyringRule string
// operatorRule contains the operator policies.
operatorRule string
}
// New is used to construct a policy based ACL from a set of policies
// and a parent policy to resolve missing cases.
func New(parent ACL, policy *Policy) (*PolicyACL, error) {
p := &PolicyACL{
parent: parent,
agentRules: radix.New(),
keyRules: radix.New(),
nodeRules: radix.New(),
serviceRules: radix.New(),
sessionRules: radix.New(),
eventRules: radix.New(),
preparedQueryRules: radix.New(),
}
// Load the agent policy
for _, ap := range policy.Agents {
p.agentRules.Insert(ap.Node, ap.Policy)
}
// Load the key policy
for _, kp := range policy.Keys {
p.keyRules.Insert(kp.Prefix, kp.Policy)
}
// Load the node policy
for _, np := range policy.Nodes {
p.nodeRules.Insert(np.Name, np.Policy)
}
// Load the service policy
for _, sp := range policy.Services {
p.serviceRules.Insert(sp.Name, sp.Policy)
}
// Load the session policy
for _, sp := range policy.Sessions {
p.sessionRules.Insert(sp.Node, sp.Policy)
}
// Load the event policy
for _, ep := range policy.Events {
p.eventRules.Insert(ep.Event, ep.Policy)
}
// Load the prepared query policy
for _, pq := range policy.PreparedQueries {
p.preparedQueryRules.Insert(pq.Prefix, pq.Policy)
}
// Load the keyring policy
p.keyringRule = policy.Keyring
// Load the operator policy
p.operatorRule = policy.Operator
return p, nil
}
// ACLList checks if listing of ACLs is allowed
func (p *PolicyACL) ACLList() bool {
return p.parent.ACLList()
}
// ACLModify checks if modification of ACLs is allowed
func (p *PolicyACL) ACLModify() bool {
return p.parent.ACLModify()
}
// AgentRead checks for permission to read from agent endpoints for a given
// node.
func (p *PolicyACL) AgentRead(node string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.agentRules.LongestPrefix(node)
if ok {
switch rule {
case PolicyRead, PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.AgentRead(node)
}
// AgentWrite checks for permission to make changes via agent endpoints for a
// given node.
func (p *PolicyACL) AgentWrite(node string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.agentRules.LongestPrefix(node)
if ok {
switch rule {
case PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.AgentWrite(node)
}
// Snapshot checks if taking and restoring snapshots is allowed.
func (p *PolicyACL) Snapshot() bool {
return p.parent.Snapshot()
}
// EventRead is used to determine if the policy allows for a
// specific user event to be read.
func (p *PolicyACL) EventRead(name string) bool {
// Longest-prefix match on event names
if _, rule, ok := p.eventRules.LongestPrefix(name); ok {
switch rule {
case PolicyRead, PolicyWrite:
return true
default:
return false
}
}
// Nothing matched, use parent
return p.parent.EventRead(name)
}
// EventWrite is used to determine if new events can be created
// (fired) by the policy.
func (p *PolicyACL) EventWrite(name string) bool {
// Longest-prefix match event names
if _, rule, ok := p.eventRules.LongestPrefix(name); ok {
return rule == PolicyWrite
}
// No match, use parent
return p.parent.EventWrite(name)
}
// KeyRead returns if a key is allowed to be read
func (p *PolicyACL) KeyRead(key string) bool {
// Look for a matching rule
_, rule, ok := p.keyRules.LongestPrefix(key)
if ok {
switch rule.(string) {
case PolicyRead, PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.KeyRead(key)
}
// KeyWrite returns if a key is allowed to be written
func (p *PolicyACL) KeyWrite(key string) bool {
// Look for a matching rule
_, rule, ok := p.keyRules.LongestPrefix(key)
if ok {
switch rule.(string) {
case PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.KeyWrite(key)
}
// KeyWritePrefix returns if a prefix is allowed to be written
func (p *PolicyACL) KeyWritePrefix(prefix string) bool {
// Look for a matching rule that denies
_, rule, ok := p.keyRules.LongestPrefix(prefix)
if ok && rule.(string) != PolicyWrite {
return false
}
// Look if any of our children have a deny policy
deny := false
p.keyRules.WalkPrefix(prefix, func(path string, rule interface{}) bool {
// We have a rule to prevent a write in a sub-directory!
if rule.(string) != PolicyWrite {
deny = true
return true
}
return false
})
// Deny the write if any sub-rules may be violated
if deny {
return false
}
// If we had a matching rule, done
if ok {
return true
}
// No matching rule, use the parent.
return p.parent.KeyWritePrefix(prefix)
}
// KeyringRead is used to determine if the keyring can be
// read by the current ACL token.
func (p *PolicyACL) KeyringRead() bool {
switch p.keyringRule {
case PolicyRead, PolicyWrite:
return true
case PolicyDeny:
return false
default:
return p.parent.KeyringRead()
}
}
// KeyringWrite determines if the keyring can be manipulated.
func (p *PolicyACL) KeyringWrite() bool {
if p.keyringRule == PolicyWrite {
return true
}
return p.parent.KeyringWrite()
}
// OperatorRead determines if the read-only operator functions are allowed.
func (p *PolicyACL) OperatorRead() bool {
switch p.operatorRule {
case PolicyRead, PolicyWrite:
return true
case PolicyDeny:
return false
default:
return p.parent.OperatorRead()
}
}
// NodeRead checks if reading (discovery) of a node is allowed
func (p *PolicyACL) NodeRead(name string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.nodeRules.LongestPrefix(name)
if ok {
switch rule {
case PolicyRead, PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.NodeRead(name)
}
// NodeWrite checks if writing (registering) a node is allowed
func (p *PolicyACL) NodeWrite(name string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.nodeRules.LongestPrefix(name)
if ok {
switch rule {
case PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.NodeWrite(name)
}
// OperatorWrite determines if the state-changing operator functions are
// allowed.
func (p *PolicyACL) OperatorWrite() bool {
if p.operatorRule == PolicyWrite {
return true
}
return p.parent.OperatorWrite()
}
// PreparedQueryRead checks if reading (listing) of a prepared query is
// allowed - this isn't execution, just listing its contents.
func (p *PolicyACL) PreparedQueryRead(prefix string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.preparedQueryRules.LongestPrefix(prefix)
if ok {
switch rule {
case PolicyRead, PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.PreparedQueryRead(prefix)
}
// PreparedQueryWrite checks if writing (creating, updating, or deleting) of a
// prepared query is allowed.
func (p *PolicyACL) PreparedQueryWrite(prefix string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.preparedQueryRules.LongestPrefix(prefix)
if ok {
switch rule {
case PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.PreparedQueryWrite(prefix)
}
// ServiceRead checks if reading (discovery) of a service is allowed
func (p *PolicyACL) ServiceRead(name string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.serviceRules.LongestPrefix(name)
if ok {
switch rule {
case PolicyRead, PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.ServiceRead(name)
}
// ServiceWrite checks if writing (registering) a service is allowed
func (p *PolicyACL) ServiceWrite(name string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.serviceRules.LongestPrefix(name)
if ok {
switch rule {
case PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.ServiceWrite(name)
}
// SessionRead checks for permission to read sessions for a given node.
func (p *PolicyACL) SessionRead(node string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.sessionRules.LongestPrefix(node)
if ok {
switch rule {
case PolicyRead, PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.SessionRead(node)
}
// SessionWrite checks for permission to create sessions for a given node.
func (p *PolicyACL) SessionWrite(node string) bool {
// Check for an exact rule or catch-all
_, rule, ok := p.sessionRules.LongestPrefix(node)
if ok {
switch rule {
case PolicyWrite:
return true
default:
return false
}
}
// No matching rule, use the parent.
return p.parent.SessionWrite(node)
}

View File

@ -1,177 +0,0 @@
package acl
import (
"crypto/md5"
"fmt"
"github.com/hashicorp/golang-lru"
)
// FaultFunc is a function used to fault in the parent,
// rules for an ACL given its ID
type FaultFunc func(id string) (string, string, error)
// aclEntry allows us to store the ACL with it's policy ID
type aclEntry struct {
ACL ACL
Parent string
RuleID string
}
// Cache is used to implement policy and ACL caching
type Cache struct {
faultfn FaultFunc
aclCache *lru.TwoQueueCache // Cache id -> acl
policyCache *lru.TwoQueueCache // Cache policy -> acl
ruleCache *lru.TwoQueueCache // Cache rules -> policy
}
// NewCache constructs a new policy and ACL cache of a given size
func NewCache(size int, faultfn FaultFunc) (*Cache, error) {
if size <= 0 {
return nil, fmt.Errorf("Must provide positive cache size")
}
rc, err := lru.New2Q(size)
if err != nil {
return nil, err
}
pc, err := lru.New2Q(size)
if err != nil {
return nil, err
}
ac, err := lru.New2Q(size)
if err != nil {
return nil, err
}
c := &Cache{
faultfn: faultfn,
aclCache: ac,
policyCache: pc,
ruleCache: rc,
}
return c, nil
}
// GetPolicy is used to get a potentially cached policy set.
// If not cached, it will be parsed, and then cached.
func (c *Cache) GetPolicy(rules string) (*Policy, error) {
return c.getPolicy(RuleID(rules), rules)
}
// getPolicy is an internal method to get a cached policy,
// but it assumes a pre-computed ID
func (c *Cache) getPolicy(id, rules string) (*Policy, error) {
raw, ok := c.ruleCache.Get(id)
if ok {
return raw.(*Policy), nil
}
policy, err := Parse(rules)
if err != nil {
return nil, err
}
policy.ID = id
c.ruleCache.Add(id, policy)
return policy, nil
}
// RuleID is used to generate an ID for a rule
func RuleID(rules string) string {
return fmt.Sprintf("%x", md5.Sum([]byte(rules)))
}
// policyID returns the cache ID for a policy
func (c *Cache) policyID(parent, ruleID string) string {
return parent + ":" + ruleID
}
// GetACLPolicy is used to get the potentially cached ACL
// policy. If not cached, it will be generated and then cached.
func (c *Cache) GetACLPolicy(id string) (string, *Policy, error) {
// Check for a cached acl
if raw, ok := c.aclCache.Get(id); ok {
cached := raw.(aclEntry)
if raw, ok := c.ruleCache.Get(cached.RuleID); ok {
return cached.Parent, raw.(*Policy), nil
}
}
// Fault in the rules
parent, rules, err := c.faultfn(id)
if err != nil {
return "", nil, err
}
// Get cached
policy, err := c.GetPolicy(rules)
return parent, policy, err
}
// GetACL is used to get a potentially cached ACL policy.
// If not cached, it will be generated and then cached.
func (c *Cache) GetACL(id string) (ACL, error) {
// Look for the ACL directly
raw, ok := c.aclCache.Get(id)
if ok {
return raw.(aclEntry).ACL, nil
}
// Get the rules
parentID, rules, err := c.faultfn(id)
if err != nil {
return nil, err
}
ruleID := RuleID(rules)
// Check for a compiled ACL
policyID := c.policyID(parentID, ruleID)
var compiled ACL
if raw, ok := c.policyCache.Get(policyID); ok {
compiled = raw.(ACL)
} else {
// Get the policy
policy, err := c.getPolicy(ruleID, rules)
if err != nil {
return nil, err
}
// Get the parent ACL
parent := RootACL(parentID)
if parent == nil {
parent, err = c.GetACL(parentID)
if err != nil {
return nil, err
}
}
// Compile the ACL
acl, err := New(parent, policy)
if err != nil {
return nil, err
}
// Cache the compiled ACL
c.policyCache.Add(policyID, acl)
compiled = acl
}
// Cache and return the ACL
c.aclCache.Add(id, aclEntry{compiled, parentID, ruleID})
return compiled, nil
}
// ClearACL is used to clear the ACL cache if any
func (c *Cache) ClearACL(id string) {
c.aclCache.Remove(id)
}
// Purge is used to clear all the ACL caches. The
// rule and policy caches are not purged, since they
// are content-hashed anyways.
func (c *Cache) Purge() {
c.aclCache.Purge()
}

View File

@ -1,191 +0,0 @@
package acl
import (
"fmt"
"github.com/hashicorp/hcl"
)
const (
PolicyDeny = "deny"
PolicyRead = "read"
PolicyWrite = "write"
)
// Policy is used to represent the policy specified by
// an ACL configuration.
type Policy struct {
ID string `hcl:"-"`
Agents []*AgentPolicy `hcl:"agent,expand"`
Keys []*KeyPolicy `hcl:"key,expand"`
Nodes []*NodePolicy `hcl:"node,expand"`
Services []*ServicePolicy `hcl:"service,expand"`
Sessions []*SessionPolicy `hcl:"session,expand"`
Events []*EventPolicy `hcl:"event,expand"`
PreparedQueries []*PreparedQueryPolicy `hcl:"query,expand"`
Keyring string `hcl:"keyring"`
Operator string `hcl:"operator"`
}
// AgentPolicy represents a policy for working with agent endpoints on nodes
// with specific name prefixes.
type AgentPolicy struct {
Node string `hcl:",key"`
Policy string
}
func (a *AgentPolicy) GoString() string {
return fmt.Sprintf("%#v", *a)
}
// KeyPolicy represents a policy for a key
type KeyPolicy struct {
Prefix string `hcl:",key"`
Policy string
}
func (k *KeyPolicy) GoString() string {
return fmt.Sprintf("%#v", *k)
}
// NodePolicy represents a policy for a node
type NodePolicy struct {
Name string `hcl:",key"`
Policy string
}
func (n *NodePolicy) GoString() string {
return fmt.Sprintf("%#v", *n)
}
// ServicePolicy represents a policy for a service
type ServicePolicy struct {
Name string `hcl:",key"`
Policy string
}
func (s *ServicePolicy) GoString() string {
return fmt.Sprintf("%#v", *s)
}
// SessionPolicy represents a policy for making sessions tied to specific node
// name prefixes.
type SessionPolicy struct {
Node string `hcl:",key"`
Policy string
}
func (s *SessionPolicy) GoString() string {
return fmt.Sprintf("%#v", *s)
}
// EventPolicy represents a user event policy.
type EventPolicy struct {
Event string `hcl:",key"`
Policy string
}
func (e *EventPolicy) GoString() string {
return fmt.Sprintf("%#v", *e)
}
// PreparedQueryPolicy represents a prepared query policy.
type PreparedQueryPolicy struct {
Prefix string `hcl:",key"`
Policy string
}
func (p *PreparedQueryPolicy) GoString() string {
return fmt.Sprintf("%#v", *p)
}
// isPolicyValid makes sure the given string matches one of the valid policies.
func isPolicyValid(policy string) bool {
switch policy {
case PolicyDeny:
return true
case PolicyRead:
return true
case PolicyWrite:
return true
default:
return false
}
}
// Parse is used to parse the specified ACL rules into an
// intermediary set of policies, before being compiled into
// the ACL
func Parse(rules string) (*Policy, error) {
// Decode the rules
p := &Policy{}
if rules == "" {
// Hot path for empty rules
return p, nil
}
if err := hcl.Decode(p, rules); err != nil {
return nil, fmt.Errorf("Failed to parse ACL rules: %v", err)
}
// Validate the agent policy
for _, ap := range p.Agents {
if !isPolicyValid(ap.Policy) {
return nil, fmt.Errorf("Invalid agent policy: %#v", ap)
}
}
// Validate the key policy
for _, kp := range p.Keys {
if !isPolicyValid(kp.Policy) {
return nil, fmt.Errorf("Invalid key policy: %#v", kp)
}
}
// Validate the node policies
for _, np := range p.Nodes {
if !isPolicyValid(np.Policy) {
return nil, fmt.Errorf("Invalid node policy: %#v", np)
}
}
// Validate the service policies
for _, sp := range p.Services {
if !isPolicyValid(sp.Policy) {
return nil, fmt.Errorf("Invalid service policy: %#v", sp)
}
}
// Validate the session policies
for _, sp := range p.Sessions {
if !isPolicyValid(sp.Policy) {
return nil, fmt.Errorf("Invalid session policy: %#v", sp)
}
}
// Validate the user event policies
for _, ep := range p.Events {
if !isPolicyValid(ep.Policy) {
return nil, fmt.Errorf("Invalid event policy: %#v", ep)
}
}
// Validate the prepared query policies
for _, pq := range p.PreparedQueries {
if !isPolicyValid(pq.Policy) {
return nil, fmt.Errorf("Invalid query policy: %#v", pq)
}
}
// Validate the keyring policy - this one is allowed to be empty
if p.Keyring != "" && !isPolicyValid(p.Keyring) {
return nil, fmt.Errorf("Invalid keyring policy: %#v", p.Keyring)
}
// Validate the operator policy - this one is allowed to be empty
if p.Operator != "" && !isPolicyValid(p.Operator) {
return nil, fmt.Errorf("Invalid operator policy: %#v", p.Operator)
}
return p, nil
}

View File

@ -1,5 +1,9 @@
package api
import (
"time"
)
const (
// ACLCLientType is the client type token
ACLClientType = "client"
@ -18,6 +22,16 @@ type ACLEntry struct {
Rules string
}
// ACLReplicationStatus is used to represent the status of ACL replication.
type ACLReplicationStatus struct {
Enabled bool
Running bool
SourceDatacenter string
ReplicatedIndex uint64
LastSuccess time.Time
LastError time.Time
}
// ACL can be used to query the ACL endpoints
type ACL struct {
c *Client
@ -138,3 +152,24 @@ func (a *ACL) List(q *QueryOptions) ([]*ACLEntry, *QueryMeta, error) {
}
return entries, qm, nil
}
// Replication returns the status of the ACL replication process in the datacenter
func (a *ACL) Replication(q *QueryOptions) (*ACLReplicationStatus, *QueryMeta, error) {
r := a.c.newRequest("GET", "/v1/acl/replication")
r.setQueryOptions(q)
rtt, resp, err := requireOK(a.c.doRequest(r))
if err != nil {
return nil, nil, err
}
defer resp.Body.Close()
qm := &QueryMeta{}
parseQueryMeta(resp, qm)
qm.RequestTime = rtt
var entries *ACLReplicationStatus
if err := decodeBody(resp, &entries); err != nil {
return nil, nil, err
}
return entries, qm, nil
}

View File

@ -25,6 +25,8 @@ type AgentService struct {
Port int
Address string
EnableTagOverride bool
CreateIndex uint64
ModifyIndex uint64
}
// AgentMember represents a cluster member known to the agent
@ -65,17 +67,19 @@ type AgentCheckRegistration struct {
// AgentServiceCheck is used to define a node or service level check
type AgentServiceCheck struct {
Script string `json:",omitempty"`
DockerContainerID string `json:",omitempty"`
Shell string `json:",omitempty"` // Only supported for Docker.
Interval string `json:",omitempty"`
Timeout string `json:",omitempty"`
TTL string `json:",omitempty"`
HTTP string `json:",omitempty"`
TCP string `json:",omitempty"`
Status string `json:",omitempty"`
Notes string `json:",omitempty"`
TLSSkipVerify bool `json:",omitempty"`
Script string `json:",omitempty"`
DockerContainerID string `json:",omitempty"`
Shell string `json:",omitempty"` // Only supported for Docker.
Interval string `json:",omitempty"`
Timeout string `json:",omitempty"`
TTL string `json:",omitempty"`
HTTP string `json:",omitempty"`
Header map[string][]string `json:",omitempty"`
Method string `json:",omitempty"`
TCP string `json:",omitempty"`
Status string `json:",omitempty"`
Notes string `json:",omitempty"`
TLSSkipVerify bool `json:",omitempty"`
// In Consul 0.7 and later, checks that are associated with a service
// may also contain this optional DeregisterCriticalServiceAfter field,
@ -438,7 +442,7 @@ func (a *Agent) DisableNodeMaintenance() error {
// Monitor returns a channel which will receive streaming logs from the agent
// Providing a non-nil stopCh can be used to close the connection and stop the
// log stream
func (a *Agent) Monitor(loglevel string, stopCh chan struct{}, q *QueryOptions) (chan string, error) {
func (a *Agent) Monitor(loglevel string, stopCh <-chan struct{}, q *QueryOptions) (chan string, error) {
r := a.c.newRequest("GET", "/v1/agent/monitor")
r.setQueryOptions(q)
if loglevel != "" {

View File

@ -2,8 +2,8 @@ package api
import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
"encoding/json"
"fmt"
"io"
@ -18,6 +18,7 @@ import (
"time"
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/go-rootcerts"
)
const (
@ -37,6 +38,26 @@ const (
// whether or not to use HTTPS.
HTTPSSLEnvName = "CONSUL_HTTP_SSL"
// HTTPCAFile defines an environment variable name which sets the
// CA file to use for talking to Consul over TLS.
HTTPCAFile = "CONSUL_CACERT"
// HTTPCAPath defines an environment variable name which sets the
// path to a directory of CA certs to use for talking to Consul over TLS.
HTTPCAPath = "CONSUL_CAPATH"
// HTTPClientCert defines an environment variable name which sets the
// client cert file to use for talking to Consul over TLS.
HTTPClientCert = "CONSUL_CLIENT_CERT"
// HTTPClientKey defines an environment variable name which sets the
// client key file to use for talking to Consul over TLS.
HTTPClientKey = "CONSUL_CLIENT_KEY"
// HTTPTLSServerName defines an environment variable name which sets the
// server name to use as the SNI host when connecting via TLS
HTTPTLSServerName = "CONSUL_TLS_SERVER_NAME"
// HTTPSSLVerifyEnvName defines an environment variable name which sets
// whether or not to disable certificate checking.
HTTPSSLVerifyEnvName = "CONSUL_HTTP_SSL_VERIFY"
@ -79,6 +100,31 @@ type QueryOptions struct {
// metadata key/value pairs. Currently, only one key/value pair can
// be provided for filtering.
NodeMeta map[string]string
// RelayFactor is used in keyring operations to cause reponses to be
// relayed back to the sender through N other random nodes. Must be
// a value from 0 to 5 (inclusive).
RelayFactor uint8
// ctx is an optional context pass through to the underlying HTTP
// request layer. Use Context() and WithContext() to manage this.
ctx context.Context
}
func (o *QueryOptions) Context() context.Context {
if o != nil && o.ctx != nil {
return o.ctx
}
return context.Background()
}
func (o *QueryOptions) WithContext(ctx context.Context) *QueryOptions {
o2 := new(QueryOptions)
if o != nil {
*o2 = *o
}
o2.ctx = ctx
return o2
}
// WriteOptions are used to parameterize a write
@ -90,6 +136,31 @@ type WriteOptions struct {
// Token is used to provide a per-request ACL token
// which overrides the agent's default token.
Token string
// RelayFactor is used in keyring operations to cause reponses to be
// relayed back to the sender through N other random nodes. Must be
// a value from 0 to 5 (inclusive).
RelayFactor uint8
// ctx is an optional context pass through to the underlying HTTP
// request layer. Use Context() and WithContext() to manage this.
ctx context.Context
}
func (o *WriteOptions) Context() context.Context {
if o != nil && o.ctx != nil {
return o.ctx
}
return context.Background()
}
func (o *WriteOptions) WithContext(ctx context.Context) *WriteOptions {
o2 := new(WriteOptions)
if o != nil {
*o2 = *o
}
o2.ctx = ctx
return o2
}
// QueryMeta is used to return meta data about a query
@ -138,6 +209,9 @@ type Config struct {
// Datacenter to use. If not provided, the default agent datacenter is used.
Datacenter string
// Transport is the Transport to use for the http client.
Transport *http.Transport
// HttpClient is the client to use. Default will be
// used if not provided.
HttpClient *http.Client
@ -152,6 +226,8 @@ type Config struct {
// Token is used to provide a per-request ACL token
// which overrides the agent's default token.
Token string
TLSConfig TLSConfig
}
// TLSConfig is used to generate a TLSClientConfig that's useful for talking to
@ -166,6 +242,10 @@ type TLSConfig struct {
// communication, defaults to the system bundle if not specified.
CAFile string
// CAPath is the optional path to a directory of CA certificates to use for
// Consul communication, defaults to the system bundle if not specified.
CAPath string
// CertFile is the optional path to the certificate for Consul
// communication. If this is set then you need to also set KeyFile.
CertFile string
@ -201,11 +281,9 @@ func DefaultNonPooledConfig() *Config {
// given function to make the transport.
func defaultConfig(transportFn func() *http.Transport) *Config {
config := &Config{
Address: "127.0.0.1:8500",
Scheme: "http",
HttpClient: &http.Client{
Transport: transportFn(),
},
Address: "127.0.0.1:8500",
Scheme: "http",
Transport: transportFn(),
}
if addr := os.Getenv(HTTPAddrEnvName); addr != "" {
@ -243,27 +321,28 @@ func defaultConfig(transportFn func() *http.Transport) *Config {
}
}
if verify := os.Getenv(HTTPSSLVerifyEnvName); verify != "" {
doVerify, err := strconv.ParseBool(verify)
if v := os.Getenv(HTTPTLSServerName); v != "" {
config.TLSConfig.Address = v
}
if v := os.Getenv(HTTPCAFile); v != "" {
config.TLSConfig.CAFile = v
}
if v := os.Getenv(HTTPCAPath); v != "" {
config.TLSConfig.CAPath = v
}
if v := os.Getenv(HTTPClientCert); v != "" {
config.TLSConfig.CertFile = v
}
if v := os.Getenv(HTTPClientKey); v != "" {
config.TLSConfig.KeyFile = v
}
if v := os.Getenv(HTTPSSLVerifyEnvName); v != "" {
doVerify, err := strconv.ParseBool(v)
if err != nil {
log.Printf("[WARN] client: could not parse %s: %s", HTTPSSLVerifyEnvName, err)
}
if !doVerify {
tlsClientConfig, err := SetupTLSConfig(&TLSConfig{
InsecureSkipVerify: true,
})
// We don't expect this to fail given that we aren't
// parsing any of the input, but we panic just in case
// since this doesn't have an error return.
if err != nil {
panic(err)
}
transport := transportFn()
transport.TLSClientConfig = tlsClientConfig
config.HttpClient.Transport = transport
config.TLSConfig.InsecureSkipVerify = true
}
}
@ -298,17 +377,12 @@ func SetupTLSConfig(tlsConfig *TLSConfig) (*tls.Config, error) {
tlsClientConfig.Certificates = []tls.Certificate{tlsCert}
}
if tlsConfig.CAFile != "" {
data, err := ioutil.ReadFile(tlsConfig.CAFile)
if err != nil {
return nil, fmt.Errorf("failed to read CA file: %v", err)
}
caPool := x509.NewCertPool()
if !caPool.AppendCertsFromPEM(data) {
return nil, fmt.Errorf("failed to parse CA certificate")
}
tlsClientConfig.RootCAs = caPool
rootConfig := &rootcerts.Config{
CAFile: tlsConfig.CAFile,
CAPath: tlsConfig.CAPath,
}
if err := rootcerts.ConfigureTLS(tlsClientConfig, rootConfig); err != nil {
return nil, err
}
return tlsClientConfig, nil
@ -332,17 +406,58 @@ func NewClient(config *Config) (*Client, error) {
config.Scheme = defConfig.Scheme
}
if config.HttpClient == nil {
config.HttpClient = defConfig.HttpClient
if config.Transport == nil {
config.Transport = defConfig.Transport
}
if parts := strings.SplitN(config.Address, "unix://", 2); len(parts) == 2 {
trans := cleanhttp.DefaultTransport()
trans.Dial = func(_, _ string) (net.Conn, error) {
return net.Dial("unix", parts[1])
if config.TLSConfig.Address == "" {
config.TLSConfig.Address = defConfig.TLSConfig.Address
}
if config.TLSConfig.CAFile == "" {
config.TLSConfig.CAFile = defConfig.TLSConfig.CAFile
}
if config.TLSConfig.CAPath == "" {
config.TLSConfig.CAPath = defConfig.TLSConfig.CAPath
}
if config.TLSConfig.CertFile == "" {
config.TLSConfig.CertFile = defConfig.TLSConfig.CertFile
}
if config.TLSConfig.KeyFile == "" {
config.TLSConfig.KeyFile = defConfig.TLSConfig.KeyFile
}
if !config.TLSConfig.InsecureSkipVerify {
config.TLSConfig.InsecureSkipVerify = defConfig.TLSConfig.InsecureSkipVerify
}
if config.HttpClient == nil {
var err error
config.HttpClient, err = NewHttpClient(config.Transport, config.TLSConfig)
if err != nil {
return nil, err
}
config.HttpClient = &http.Client{
Transport: trans,
}
parts := strings.SplitN(config.Address, "://", 2)
if len(parts) == 2 {
switch parts[0] {
case "http":
case "https":
config.Scheme = "https"
case "unix":
trans := cleanhttp.DefaultTransport()
trans.DialContext = func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", parts[1])
}
config.HttpClient = &http.Client{
Transport: trans,
}
default:
return nil, fmt.Errorf("Unknown protocol scheme: %s", parts[0])
}
config.Address = parts[1]
}
@ -353,6 +468,26 @@ func NewClient(config *Config) (*Client, error) {
return client, nil
}
// NewHttpClient returns an http client configured with the given Transport and TLS
// config.
func NewHttpClient(transport *http.Transport, tlsConf TLSConfig) (*http.Client, error) {
client := &http.Client{
Transport: transport,
}
if transport.TLSClientConfig == nil {
tlsClientConfig, err := SetupTLSConfig(&tlsConf)
if err != nil {
return nil, err
}
transport.TLSClientConfig = tlsClientConfig
}
return client, nil
}
// request is used to help build up a request
type request struct {
config *Config
@ -362,6 +497,7 @@ type request struct {
body io.Reader
header http.Header
obj interface{}
ctx context.Context
}
// setQueryOptions is used to annotate the request with
@ -396,6 +532,10 @@ func (r *request) setQueryOptions(q *QueryOptions) {
r.params.Add("node-meta", key+":"+value)
}
}
if q.RelayFactor != 0 {
r.params.Set("relay-factor", strconv.Itoa(int(q.RelayFactor)))
}
r.ctx = q.ctx
}
// durToMsec converts a duration to a millisecond specified string. If the
@ -437,6 +577,10 @@ func (r *request) setWriteOptions(q *WriteOptions) {
if q.Token != "" {
r.header.Set("X-Consul-Token", q.Token)
}
if q.RelayFactor != 0 {
r.params.Set("relay-factor", strconv.Itoa(int(q.RelayFactor)))
}
r.ctx = q.ctx
}
// toHTTP converts the request to an HTTP request
@ -446,11 +590,11 @@ func (r *request) toHTTP() (*http.Request, error) {
// Check if we should encode the body
if r.body == nil && r.obj != nil {
if b, err := encodeBody(r.obj); err != nil {
b, err := encodeBody(r.obj)
if err != nil {
return nil, err
} else {
r.body = b
}
r.body = b
}
// Create the HTTP request
@ -468,8 +612,11 @@ func (r *request) toHTTP() (*http.Request, error) {
if r.config.HttpAuth != nil {
req.SetBasicAuth(r.config.HttpAuth.Username, r.config.HttpAuth.Password)
}
return req, nil
if r.ctx != nil {
return req.WithContext(r.ctx), nil
} else {
return req, nil
}
}
// newRequest is used to create a new request
@ -548,6 +695,8 @@ func (c *Client) write(endpoint string, in, out interface{}, q *WriteOptions) (*
if err := decodeBody(resp, &out); err != nil {
return nil, err
}
} else if _, err := ioutil.ReadAll(resp.Body); err != nil {
return nil, err
}
return wm, nil
}

View File

@ -4,14 +4,18 @@ type Node struct {
ID string
Node string
Address string
Datacenter string
TaggedAddresses map[string]string
Meta map[string]string
CreateIndex uint64
ModifyIndex uint64
}
type CatalogService struct {
ID string
Node string
Address string
Datacenter string
TaggedAddresses map[string]string
NodeMeta map[string]string
ServiceID string

View File

@ -10,10 +10,11 @@ type CoordinateEntry struct {
Coord *coordinate.Coordinate
}
// CoordinateDatacenterMap represents a datacenter and its associated WAN
// nodes and their associates coordinates.
// CoordinateDatacenterMap has the coordinates for servers in a given datacenter
// and area. Network coordinates are only compatible within the same area.
type CoordinateDatacenterMap struct {
Datacenter string
AreaID string
Coordinates []CoordinateEntry
}

View File

@ -33,6 +33,7 @@ type HealthCheck struct {
Output string
ServiceID string
ServiceName string
ServiceTags []string
}
// HealthChecks is a collection of HealthCheck structs.

View File

@ -49,17 +49,18 @@ type KVPairs []*KVPair
type KVOp string
const (
KVSet KVOp = "set"
KVDelete KVOp = "delete"
KVDeleteCAS KVOp = "delete-cas"
KVDeleteTree KVOp = "delete-tree"
KVCAS KVOp = "cas"
KVLock KVOp = "lock"
KVUnlock KVOp = "unlock"
KVGet KVOp = "get"
KVGetTree KVOp = "get-tree"
KVCheckSession KVOp = "check-session"
KVCheckIndex KVOp = "check-index"
KVSet KVOp = "set"
KVDelete KVOp = "delete"
KVDeleteCAS KVOp = "delete-cas"
KVDeleteTree KVOp = "delete-tree"
KVCAS KVOp = "cas"
KVLock KVOp = "lock"
KVUnlock KVOp = "unlock"
KVGet KVOp = "get"
KVGetTree KVOp = "get-tree"
KVCheckSession KVOp = "check-session"
KVCheckIndex KVOp = "check-index"
KVCheckNotExists KVOp = "check-not-exists"
)
// KVTxnOp defines a single operation inside a transaction.

View File

@ -143,22 +143,23 @@ func (l *Lock) Lock(stopCh <-chan struct{}) (<-chan struct{}, error) {
// Check if we need to create a session first
l.lockSession = l.opts.Session
if l.lockSession == "" {
if s, err := l.createSession(); err != nil {
s, err := l.createSession()
if err != nil {
return nil, fmt.Errorf("failed to create session: %v", err)
} else {
l.sessionRenew = make(chan struct{})
l.lockSession = s
session := l.c.Session()
go session.RenewPeriodic(l.opts.SessionTTL, s, nil, l.sessionRenew)
// If we fail to acquire the lock, cleanup the session
defer func() {
if !l.isHeld {
close(l.sessionRenew)
l.sessionRenew = nil
}
}()
}
l.sessionRenew = make(chan struct{})
l.lockSession = s
session := l.c.Session()
go session.RenewPeriodic(l.opts.SessionTTL, s, nil, l.sessionRenew)
// If we fail to acquire the lock, cleanup the session
defer func() {
if !l.isHeld {
close(l.sessionRenew)
l.sessionRenew = nil
}
}()
}
// Setup the query options

View File

@ -9,155 +9,3 @@ type Operator struct {
func (c *Client) Operator() *Operator {
return &Operator{c}
}
// RaftServer has information about a server in the Raft configuration.
type RaftServer struct {
// ID is the unique ID for the server. These are currently the same
// as the address, but they will be changed to a real GUID in a future
// release of Consul.
ID string
// Node is the node name of the server, as known by Consul, or this
// will be set to "(unknown)" otherwise.
Node string
// Address is the IP:port of the server, used for Raft communications.
Address string
// Leader is true if this server is the current cluster leader.
Leader bool
// Voter is true if this server has a vote in the cluster. This might
// be false if the server is staging and still coming online, or if
// it's a non-voting server, which will be added in a future release of
// Consul.
Voter bool
}
// RaftConfigration is returned when querying for the current Raft configuration.
type RaftConfiguration struct {
// Servers has the list of servers in the Raft configuration.
Servers []*RaftServer
// Index has the Raft index of this configuration.
Index uint64
}
// keyringRequest is used for performing Keyring operations
type keyringRequest struct {
Key string
}
// KeyringResponse is returned when listing the gossip encryption keys
type KeyringResponse struct {
// Whether this response is for a WAN ring
WAN bool
// The datacenter name this request corresponds to
Datacenter string
// A map of the encryption keys to the number of nodes they're installed on
Keys map[string]int
// The total number of nodes in this ring
NumNodes int
}
// RaftGetConfiguration is used to query the current Raft peer set.
func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, error) {
r := op.c.newRequest("GET", "/v1/operator/raft/configuration")
r.setQueryOptions(q)
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, err
}
defer resp.Body.Close()
var out RaftConfiguration
if err := decodeBody(resp, &out); err != nil {
return nil, err
}
return &out, nil
}
// RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft
// quorum but no longer known to Serf or the catalog) by address in the form of
// "IP:port".
func (op *Operator) RaftRemovePeerByAddress(address string, q *WriteOptions) error {
r := op.c.newRequest("DELETE", "/v1/operator/raft/peer")
r.setWriteOptions(q)
// TODO (slackpad) Currently we made address a query parameter. Once
// IDs are in place this will be DELETE /v1/operator/raft/peer/<id>.
r.params.Set("address", string(address))
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}
// KeyringInstall is used to install a new gossip encryption key into the cluster
func (op *Operator) KeyringInstall(key string, q *WriteOptions) error {
r := op.c.newRequest("POST", "/v1/operator/keyring")
r.setWriteOptions(q)
r.obj = keyringRequest{
Key: key,
}
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}
// KeyringList is used to list the gossip keys installed in the cluster
func (op *Operator) KeyringList(q *QueryOptions) ([]*KeyringResponse, error) {
r := op.c.newRequest("GET", "/v1/operator/keyring")
r.setQueryOptions(q)
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, err
}
defer resp.Body.Close()
var out []*KeyringResponse
if err := decodeBody(resp, &out); err != nil {
return nil, err
}
return out, nil
}
// KeyringRemove is used to remove a gossip encryption key from the cluster
func (op *Operator) KeyringRemove(key string, q *WriteOptions) error {
r := op.c.newRequest("DELETE", "/v1/operator/keyring")
r.setWriteOptions(q)
r.obj = keyringRequest{
Key: key,
}
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}
// KeyringUse is used to change the active gossip encryption key
func (op *Operator) KeyringUse(key string, q *WriteOptions) error {
r := op.c.newRequest("PUT", "/v1/operator/keyring")
r.setWriteOptions(q)
r.obj = keyringRequest{
Key: key,
}
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}

168
vendor/github.com/hashicorp/consul/api/operator_area.go generated vendored Normal file
View File

@ -0,0 +1,168 @@
// The /v1/operator/area endpoints are available only in Consul Enterprise and
// interact with its network area subsystem. Network areas are used to link
// together Consul servers in different Consul datacenters. With network areas,
// Consul datacenters can be linked together in ways other than a fully-connected
// mesh, as is required for Consul's WAN.
package api
import (
"net"
"time"
)
// Area defines a network area.
type Area struct {
// ID is this identifier for an area (a UUID). This must be left empty
// when creating a new area.
ID string
// PeerDatacenter is the peer Consul datacenter that will make up the
// other side of this network area. Network areas always involve a pair
// of datacenters: the datacenter where the area was created, and the
// peer datacenter. This is required.
PeerDatacenter string
// RetryJoin specifies the address of Consul servers to join to, such as
// an IPs or hostnames with an optional port number. This is optional.
RetryJoin []string
}
// AreaJoinResponse is returned when a join occurs and gives the result for each
// address.
type AreaJoinResponse struct {
// The address that was joined.
Address string
// Whether or not the join was a success.
Joined bool
// If we couldn't join, this is the message with information.
Error string
}
// SerfMember is a generic structure for reporting information about members in
// a Serf cluster. This is only used by the area endpoints right now, but this
// could be expanded to other endpoints in the future.
type SerfMember struct {
// ID is the node identifier (a UUID).
ID string
// Name is the node name.
Name string
// Addr has the IP address.
Addr net.IP
// Port is the RPC port.
Port uint16
// Datacenter is the DC name.
Datacenter string
// Role is "client", "server", or "unknown".
Role string
// Build has the version of the Consul agent.
Build string
// Protocol is the protocol of the Consul agent.
Protocol int
// Status is the Serf health status "none", "alive", "leaving", "left",
// or "failed".
Status string
// RTT is the estimated round trip time from the server handling the
// request to the this member. This will be negative if no RTT estimate
// is available.
RTT time.Duration
}
// AreaCreate will create a new network area. The ID in the given structure must
// be empty and a generated ID will be returned on success.
func (op *Operator) AreaCreate(area *Area, q *WriteOptions) (string, *WriteMeta, error) {
r := op.c.newRequest("POST", "/v1/operator/area")
r.setWriteOptions(q)
r.obj = area
rtt, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return "", nil, err
}
defer resp.Body.Close()
wm := &WriteMeta{}
wm.RequestTime = rtt
var out struct{ ID string }
if err := decodeBody(resp, &out); err != nil {
return "", nil, err
}
return out.ID, wm, nil
}
// AreaGet returns a single network area.
func (op *Operator) AreaGet(areaID string, q *QueryOptions) ([]*Area, *QueryMeta, error) {
var out []*Area
qm, err := op.c.query("/v1/operator/area/"+areaID, &out, q)
if err != nil {
return nil, nil, err
}
return out, qm, nil
}
// AreaList returns all the available network areas.
func (op *Operator) AreaList(q *QueryOptions) ([]*Area, *QueryMeta, error) {
var out []*Area
qm, err := op.c.query("/v1/operator/area", &out, q)
if err != nil {
return nil, nil, err
}
return out, qm, nil
}
// AreaDelete deletes the given network area.
func (op *Operator) AreaDelete(areaID string, q *WriteOptions) (*WriteMeta, error) {
r := op.c.newRequest("DELETE", "/v1/operator/area/"+areaID)
r.setWriteOptions(q)
rtt, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, err
}
defer resp.Body.Close()
wm := &WriteMeta{}
wm.RequestTime = rtt
return wm, nil
}
// AreaJoin attempts to join the given set of join addresses to the given
// network area. See the Area structure for details about join addresses.
func (op *Operator) AreaJoin(areaID string, addresses []string, q *WriteOptions) ([]*AreaJoinResponse, *WriteMeta, error) {
r := op.c.newRequest("PUT", "/v1/operator/area/"+areaID+"/join")
r.setWriteOptions(q)
r.obj = addresses
rtt, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, nil, err
}
defer resp.Body.Close()
wm := &WriteMeta{}
wm.RequestTime = rtt
var out []*AreaJoinResponse
if err := decodeBody(resp, &out); err != nil {
return nil, nil, err
}
return out, wm, nil
}
// AreaMembers lists the Serf information about the members in the given area.
func (op *Operator) AreaMembers(areaID string, q *QueryOptions) ([]*SerfMember, *QueryMeta, error) {
var out []*SerfMember
qm, err := op.c.query("/v1/operator/area/"+areaID+"/members", &out, q)
if err != nil {
return nil, nil, err
}
return out, qm, nil
}

View File

@ -0,0 +1,219 @@
package api
import (
"bytes"
"fmt"
"io"
"strconv"
"strings"
"time"
)
// AutopilotConfiguration is used for querying/setting the Autopilot configuration.
// Autopilot helps manage operator tasks related to Consul servers like removing
// failed servers from the Raft quorum.
type AutopilotConfiguration struct {
// CleanupDeadServers controls whether to remove dead servers from the Raft
// peer list when a new server joins
CleanupDeadServers bool
// LastContactThreshold is the limit on the amount of time a server can go
// without leader contact before being considered unhealthy.
LastContactThreshold *ReadableDuration
// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
// be behind before being considered unhealthy.
MaxTrailingLogs uint64
// ServerStabilizationTime is the minimum amount of time a server must be
// in a stable, healthy state before it can be added to the cluster. Only
// applicable with Raft protocol version 3 or higher.
ServerStabilizationTime *ReadableDuration
// (Enterprise-only) RedundancyZoneTag is the node tag to use for separating
// servers into zones for redundancy. If left blank, this feature will be disabled.
RedundancyZoneTag string
// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
// strategy of waiting until enough newer-versioned servers have been added to the
// cluster before promoting them to voters.
DisableUpgradeMigration bool
// (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when
// performing upgrade migrations. If left blank, the Consul version will be used.
UpgradeVersionTag string
// CreateIndex holds the index corresponding the creation of this configuration.
// This is a read-only field.
CreateIndex uint64
// ModifyIndex will be set to the index of the last update when retrieving the
// Autopilot configuration. Resubmitting a configuration with
// AutopilotCASConfiguration will perform a check-and-set operation which ensures
// there hasn't been a subsequent update since the configuration was retrieved.
ModifyIndex uint64
}
// ServerHealth is the health (from the leader's point of view) of a server.
type ServerHealth struct {
// ID is the raft ID of the server.
ID string
// Name is the node name of the server.
Name string
// Address is the address of the server.
Address string
// The status of the SerfHealth check for the server.
SerfStatus string
// Version is the Consul version of the server.
Version string
// Leader is whether this server is currently the leader.
Leader bool
// LastContact is the time since this node's last contact with the leader.
LastContact *ReadableDuration
// LastTerm is the highest leader term this server has a record of in its Raft log.
LastTerm uint64
// LastIndex is the last log index this server has a record of in its Raft log.
LastIndex uint64
// Healthy is whether or not the server is healthy according to the current
// Autopilot config.
Healthy bool
// Voter is whether this is a voting server.
Voter bool
// StableSince is the last time this server's Healthy value changed.
StableSince time.Time
}
// OperatorHealthReply is a representation of the overall health of the cluster
type OperatorHealthReply struct {
// Healthy is true if all the servers in the cluster are healthy.
Healthy bool
// FailureTolerance is the number of healthy servers that could be lost without
// an outage occurring.
FailureTolerance int
// Servers holds the health of each server.
Servers []ServerHealth
}
// ReadableDuration is a duration type that is serialized to JSON in human readable format.
type ReadableDuration time.Duration
func NewReadableDuration(dur time.Duration) *ReadableDuration {
d := ReadableDuration(dur)
return &d
}
func (d *ReadableDuration) String() string {
return d.Duration().String()
}
func (d *ReadableDuration) Duration() time.Duration {
if d == nil {
return time.Duration(0)
}
return time.Duration(*d)
}
func (d *ReadableDuration) MarshalJSON() ([]byte, error) {
return []byte(fmt.Sprintf(`"%s"`, d.Duration().String())), nil
}
func (d *ReadableDuration) UnmarshalJSON(raw []byte) error {
if d == nil {
return fmt.Errorf("cannot unmarshal to nil pointer")
}
str := string(raw)
if len(str) < 2 || str[0] != '"' || str[len(str)-1] != '"' {
return fmt.Errorf("must be enclosed with quotes: %s", str)
}
dur, err := time.ParseDuration(str[1 : len(str)-1])
if err != nil {
return err
}
*d = ReadableDuration(dur)
return nil
}
// AutopilotGetConfiguration is used to query the current Autopilot configuration.
func (op *Operator) AutopilotGetConfiguration(q *QueryOptions) (*AutopilotConfiguration, error) {
r := op.c.newRequest("GET", "/v1/operator/autopilot/configuration")
r.setQueryOptions(q)
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, err
}
defer resp.Body.Close()
var out AutopilotConfiguration
if err := decodeBody(resp, &out); err != nil {
return nil, err
}
return &out, nil
}
// AutopilotSetConfiguration is used to set the current Autopilot configuration.
func (op *Operator) AutopilotSetConfiguration(conf *AutopilotConfiguration, q *WriteOptions) error {
r := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration")
r.setWriteOptions(q)
r.obj = conf
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}
// AutopilotCASConfiguration is used to perform a Check-And-Set update on the
// Autopilot configuration. The ModifyIndex value will be respected. Returns
// true on success or false on failures.
func (op *Operator) AutopilotCASConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (bool, error) {
r := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration")
r.setWriteOptions(q)
r.params.Set("cas", strconv.FormatUint(conf.ModifyIndex, 10))
r.obj = conf
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return false, err
}
defer resp.Body.Close()
var buf bytes.Buffer
if _, err := io.Copy(&buf, resp.Body); err != nil {
return false, fmt.Errorf("Failed to read response: %v", err)
}
res := strings.Contains(string(buf.Bytes()), "true")
return res, nil
}
// AutopilotServerHealth
func (op *Operator) AutopilotServerHealth(q *QueryOptions) (*OperatorHealthReply, error) {
r := op.c.newRequest("GET", "/v1/operator/autopilot/health")
r.setQueryOptions(q)
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, err
}
defer resp.Body.Close()
var out OperatorHealthReply
if err := decodeBody(resp, &out); err != nil {
return nil, err
}
return &out, nil
}

View File

@ -0,0 +1,83 @@
package api
// keyringRequest is used for performing Keyring operations
type keyringRequest struct {
Key string
}
// KeyringResponse is returned when listing the gossip encryption keys
type KeyringResponse struct {
// Whether this response is for a WAN ring
WAN bool
// The datacenter name this request corresponds to
Datacenter string
// A map of the encryption keys to the number of nodes they're installed on
Keys map[string]int
// The total number of nodes in this ring
NumNodes int
}
// KeyringInstall is used to install a new gossip encryption key into the cluster
func (op *Operator) KeyringInstall(key string, q *WriteOptions) error {
r := op.c.newRequest("POST", "/v1/operator/keyring")
r.setWriteOptions(q)
r.obj = keyringRequest{
Key: key,
}
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}
// KeyringList is used to list the gossip keys installed in the cluster
func (op *Operator) KeyringList(q *QueryOptions) ([]*KeyringResponse, error) {
r := op.c.newRequest("GET", "/v1/operator/keyring")
r.setQueryOptions(q)
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, err
}
defer resp.Body.Close()
var out []*KeyringResponse
if err := decodeBody(resp, &out); err != nil {
return nil, err
}
return out, nil
}
// KeyringRemove is used to remove a gossip encryption key from the cluster
func (op *Operator) KeyringRemove(key string, q *WriteOptions) error {
r := op.c.newRequest("DELETE", "/v1/operator/keyring")
r.setWriteOptions(q)
r.obj = keyringRequest{
Key: key,
}
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}
// KeyringUse is used to change the active gossip encryption key
func (op *Operator) KeyringUse(key string, q *WriteOptions) error {
r := op.c.newRequest("PUT", "/v1/operator/keyring")
r.setWriteOptions(q)
r.obj = keyringRequest{
Key: key,
}
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}

View File

@ -0,0 +1,86 @@
package api
// RaftServer has information about a server in the Raft configuration.
type RaftServer struct {
// ID is the unique ID for the server. These are currently the same
// as the address, but they will be changed to a real GUID in a future
// release of Consul.
ID string
// Node is the node name of the server, as known by Consul, or this
// will be set to "(unknown)" otherwise.
Node string
// Address is the IP:port of the server, used for Raft communications.
Address string
// Leader is true if this server is the current cluster leader.
Leader bool
// Voter is true if this server has a vote in the cluster. This might
// be false if the server is staging and still coming online, or if
// it's a non-voting server, which will be added in a future release of
// Consul.
Voter bool
}
// RaftConfigration is returned when querying for the current Raft configuration.
type RaftConfiguration struct {
// Servers has the list of servers in the Raft configuration.
Servers []*RaftServer
// Index has the Raft index of this configuration.
Index uint64
}
// RaftGetConfiguration is used to query the current Raft peer set.
func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, error) {
r := op.c.newRequest("GET", "/v1/operator/raft/configuration")
r.setQueryOptions(q)
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return nil, err
}
defer resp.Body.Close()
var out RaftConfiguration
if err := decodeBody(resp, &out); err != nil {
return nil, err
}
return &out, nil
}
// RaftRemovePeerByAddress is used to kick a stale peer (one that it in the Raft
// quorum but no longer known to Serf or the catalog) by address in the form of
// "IP:port".
func (op *Operator) RaftRemovePeerByAddress(address string, q *WriteOptions) error {
r := op.c.newRequest("DELETE", "/v1/operator/raft/peer")
r.setWriteOptions(q)
r.params.Set("address", string(address))
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}
// RaftRemovePeerByID is used to kick a stale peer (one that it in the Raft
// quorum but no longer known to Serf or the catalog) by ID.
func (op *Operator) RaftRemovePeerByID(id string, q *WriteOptions) error {
r := op.c.newRequest("DELETE", "/v1/operator/raft/peer")
r.setWriteOptions(q)
r.params.Set("id", string(id))
_, resp, err := requireOK(op.c.doRequest(r))
if err != nil {
return err
}
resp.Body.Close()
return nil
}

View File

@ -155,22 +155,23 @@ func (s *Semaphore) Acquire(stopCh <-chan struct{}) (<-chan struct{}, error) {
// Check if we need to create a session first
s.lockSession = s.opts.Session
if s.lockSession == "" {
if sess, err := s.createSession(); err != nil {
sess, err := s.createSession()
if err != nil {
return nil, fmt.Errorf("failed to create session: %v", err)
} else {
s.sessionRenew = make(chan struct{})
s.lockSession = sess
session := s.c.Session()
go session.RenewPeriodic(s.opts.SessionTTL, sess, nil, s.sessionRenew)
// If we fail to acquire the lock, cleanup the session
defer func() {
if !s.isHeld {
close(s.sessionRenew)
s.sessionRenew = nil
}
}()
}
s.sessionRenew = make(chan struct{})
s.lockSession = sess
session := s.c.Session()
go session.RenewPeriodic(s.opts.SessionTTL, sess, nil, s.sessionRenew)
// If we fail to acquire the lock, cleanup the session
defer func() {
if !s.isHeld {
close(s.sessionRenew)
s.sessionRenew = nil
}
}()
}
// Create the contender entry

View File

@ -145,7 +145,9 @@ func (s *Session) Renew(id string, q *WriteOptions) (*SessionEntry, *WriteMeta,
// RenewPeriodic is used to periodically invoke Session.Renew on a
// session until a doneCh is closed. This is meant to be used in a long running
// goroutine to ensure a session stays valid.
func (s *Session) RenewPeriodic(initialTTL string, id string, q *WriteOptions, doneCh chan struct{}) error {
func (s *Session) RenewPeriodic(initialTTL string, id string, q *WriteOptions, doneCh <-chan struct{}) error {
ctx := q.Context()
ttl, err := time.ParseDuration(initialTTL)
if err != nil {
return err
@ -179,6 +181,11 @@ func (s *Session) RenewPeriodic(initialTTL string, id string, q *WriteOptions, d
// Attempt a session destroy
s.Destroy(id, q)
return nil
case <-ctx.Done():
// Bail immediately since attempting the destroy would
// use the canceled context in q, which would just bail.
return ctx.Err()
}
}
}

View File

@ -1,57 +0,0 @@
package structs
import (
"github.com/hashicorp/raft"
)
// RaftServer has information about a server in the Raft configuration.
type RaftServer struct {
// ID is the unique ID for the server. These are currently the same
// as the address, but they will be changed to a real GUID in a future
// release of Consul.
ID raft.ServerID
// Node is the node name of the server, as known by Consul, or this
// will be set to "(unknown)" otherwise.
Node string
// Address is the IP:port of the server, used for Raft communications.
Address raft.ServerAddress
// Leader is true if this server is the current cluster leader.
Leader bool
// Voter is true if this server has a vote in the cluster. This might
// be false if the server is staging and still coming online, or if
// it's a non-voting server, which will be added in a future release of
// Consul.
Voter bool
}
// RaftConfigrationResponse is returned when querying for the current Raft
// configuration.
type RaftConfigurationResponse struct {
// Servers has the list of servers in the Raft configuration.
Servers []*RaftServer
// Index has the Raft index of this configuration.
Index uint64
}
// RaftPeerByAddressRequest is used by the Operator endpoint to apply a Raft
// operation on a specific Raft peer by address in the form of "IP:port".
type RaftPeerByAddressRequest struct {
// Datacenter is the target this request is intended for.
Datacenter string
// Address is the peer to remove, in the form "IP:port".
Address raft.ServerAddress
// WriteRequest holds the ACL token to go along with this request.
WriteRequest
}
// RequestDatacenter returns the datacenter for a given request.
func (op *RaftPeerByAddressRequest) RequestDatacenter() string {
return op.Datacenter
}

View File

@ -1,257 +0,0 @@
package structs
// QueryDatacenterOptions sets options about how we fail over if there are no
// healthy nodes in the local datacenter.
type QueryDatacenterOptions struct {
// NearestN is set to the number of remote datacenters to try, based on
// network coordinates.
NearestN int
// Datacenters is a fixed list of datacenters to try after NearestN. We
// never try a datacenter multiple times, so those are subtracted from
// this list before proceeding.
Datacenters []string
}
// QueryDNSOptions controls settings when query results are served over DNS.
type QueryDNSOptions struct {
// TTL is the time to live for the served DNS results.
TTL string
}
// ServiceQuery is used to query for a set of healthy nodes offering a specific
// service.
type ServiceQuery struct {
// Service is the service to query.
Service string
// Failover controls what we do if there are no healthy nodes in the
// local datacenter.
Failover QueryDatacenterOptions
// If OnlyPassing is true then we will only include nodes with passing
// health checks (critical AND warning checks will cause a node to be
// discarded)
OnlyPassing bool
// Near allows the query to always prefer the node nearest the given
// node. If the node does not exist, results are returned in their
// normal randomly-shuffled order. Supplying the magic "_agent" value
// is supported to sort near the agent which initiated the request.
Near string
// Tags are a set of required and/or disallowed tags. If a tag is in
// this list it must be present. If the tag is preceded with "!" then
// it is disallowed.
Tags []string
// NodeMeta is a map of required node metadata fields. If a key/value
// pair is in this map it must be present on the node in order for the
// service entry to be returned.
NodeMeta map[string]string
}
const (
// QueryTemplateTypeNamePrefixMatch uses the Name field of the query as
// a prefix to select the template.
QueryTemplateTypeNamePrefixMatch = "name_prefix_match"
)
// QueryTemplateOptions controls settings if this query is a template.
type QueryTemplateOptions struct {
// Type, if non-empty, means that this query is a template. This is
// set to one of the QueryTemplateType* constants above.
Type string
// Regexp is an optional regular expression to use to parse the full
// name, once the prefix match has selected a template. This can be
// used to extract parts of the name and choose a service name, set
// tags, etc.
Regexp string
}
// PreparedQuery defines a complete prepared query, and is the structure we
// maintain in the state store.
type PreparedQuery struct {
// ID is this UUID-based ID for the query, always generated by Consul.
ID string
// Name is an optional friendly name for the query supplied by the
// user. NOTE - if this feature is used then it will reduce the security
// of any read ACL associated with this query/service since this name
// can be used to locate nodes with supplying any ACL.
Name string
// Session is an optional session to tie this query's lifetime to. If
// this is omitted then the query will not expire.
Session string
// Token is the ACL token used when the query was created, and it is
// used when a query is subsequently executed. This token, or a token
// with management privileges, must be used to change the query later.
Token string
// Template is used to configure this query as a template, which will
// respond to queries based on the Name, and then will be rendered
// before it is executed.
Template QueryTemplateOptions
// Service defines a service query (leaving things open for other types
// later).
Service ServiceQuery
// DNS has options that control how the results of this query are
// served over DNS.
DNS QueryDNSOptions
RaftIndex
}
// GetACLPrefix returns the prefix to look up the prepared_query ACL policy for
// this query, and whether the prefix applies to this query. You always need to
// check the ok value before using the prefix.
func (pq *PreparedQuery) GetACLPrefix() (string, bool) {
if pq.Name != "" || pq.Template.Type != "" {
return pq.Name, true
}
return "", false
}
type PreparedQueries []*PreparedQuery
type IndexedPreparedQueries struct {
Queries PreparedQueries
QueryMeta
}
type PreparedQueryOp string
const (
PreparedQueryCreate PreparedQueryOp = "create"
PreparedQueryUpdate PreparedQueryOp = "update"
PreparedQueryDelete PreparedQueryOp = "delete"
)
// QueryRequest is used to create or change prepared queries.
type PreparedQueryRequest struct {
// Datacenter is the target this request is intended for.
Datacenter string
// Op is the operation to apply.
Op PreparedQueryOp
// Query is the query itself.
Query *PreparedQuery
// WriteRequest holds the ACL token to go along with this request.
WriteRequest
}
// RequestDatacenter returns the datacenter for a given request.
func (q *PreparedQueryRequest) RequestDatacenter() string {
return q.Datacenter
}
// PreparedQuerySpecificRequest is used to get information about a prepared
// query.
type PreparedQuerySpecificRequest struct {
// Datacenter is the target this request is intended for.
Datacenter string
// QueryID is the ID of a query.
QueryID string
// QueryOptions (unfortunately named here) controls the consistency
// settings for the query lookup itself, as well as the service lookups.
QueryOptions
}
// RequestDatacenter returns the datacenter for a given request.
func (q *PreparedQuerySpecificRequest) RequestDatacenter() string {
return q.Datacenter
}
// PreparedQueryExecuteRequest is used to execute a prepared query.
type PreparedQueryExecuteRequest struct {
// Datacenter is the target this request is intended for.
Datacenter string
// QueryIDOrName is the ID of a query _or_ the name of one, either can
// be provided.
QueryIDOrName string
// Limit will trim the resulting list down to the given limit.
Limit int
// Source is used to sort the results relative to a given node using
// network coordinates.
Source QuerySource
// Agent is used to carry around a reference to the agent which initiated
// the execute request. Used to distance-sort relative to the local node.
Agent QuerySource
// QueryOptions (unfortunately named here) controls the consistency
// settings for the query lookup itself, as well as the service lookups.
QueryOptions
}
// RequestDatacenter returns the datacenter for a given request.
func (q *PreparedQueryExecuteRequest) RequestDatacenter() string {
return q.Datacenter
}
// PreparedQueryExecuteRemoteRequest is used when running a local query in a
// remote datacenter.
type PreparedQueryExecuteRemoteRequest struct {
// Datacenter is the target this request is intended for.
Datacenter string
// Query is a copy of the query to execute. We have to ship the entire
// query over since it won't be present in the remote state store.
Query PreparedQuery
// Limit will trim the resulting list down to the given limit.
Limit int
// QueryOptions (unfortunately named here) controls the consistency
// settings for the the service lookups.
QueryOptions
}
// RequestDatacenter returns the datacenter for a given request.
func (q *PreparedQueryExecuteRemoteRequest) RequestDatacenter() string {
return q.Datacenter
}
// PreparedQueryExecuteResponse has the results of executing a query.
type PreparedQueryExecuteResponse struct {
// Service is the service that was queried.
Service string
// Nodes has the nodes that were output by the query.
Nodes CheckServiceNodes
// DNS has the options for serving these results over DNS.
DNS QueryDNSOptions
// Datacenter is the datacenter that these results came from.
Datacenter string
// Failovers is a count of how many times we had to query a remote
// datacenter.
Failovers int
// QueryMeta has freshness information about the query.
QueryMeta
}
// PreparedQueryExplainResponse has the results when explaining a query/
type PreparedQueryExplainResponse struct {
// Query has the fully-rendered query.
Query PreparedQuery
// QueryMeta has freshness information about the query.
QueryMeta
}

View File

@ -1,40 +0,0 @@
package structs
type SnapshotOp int
const (
SnapshotSave SnapshotOp = iota
SnapshotRestore
)
// SnapshotRequest is used as a header for a snapshot RPC request. This will
// precede any streaming data that's part of the request and is JSON-encoded on
// the wire.
type SnapshotRequest struct {
// Datacenter is the target datacenter for this request. The request
// will be forwarded if necessary.
Datacenter string
// Token is the ACL token to use for the operation. If ACLs are enabled
// then all operations require a management token.
Token string
// If set, any follower can service the request. Results may be
// arbitrarily stale. Only applies to SnapshotSave.
AllowStale bool
// Op is the operation code for the RPC.
Op SnapshotOp
}
// SnapshotResponse is used header for a snapshot RPC response. This will
// precede any streaming data that's part of the request and is JSON-encoded on
// the wire.
type SnapshotResponse struct {
// Error is the overall error status of the RPC request.
Error string
// QueryMeta has freshness information about the server that handled the
// request. It is only filled in for a SnapshotSave.
QueryMeta
}

File diff suppressed because it is too large Load Diff

View File

@ -1,85 +0,0 @@
package structs
import (
"fmt"
)
// TxnKVOp is used to define a single operation on the KVS inside a
// transaction
type TxnKVOp struct {
Verb KVSOp
DirEnt DirEntry
}
// TxnKVResult is used to define the result of a single operation on the KVS
// inside a transaction.
type TxnKVResult *DirEntry
// TxnOp is used to define a single operation inside a transaction. Only one
// of the types should be filled out per entry.
type TxnOp struct {
KV *TxnKVOp
}
// TxnOps is a list of operations within a transaction.
type TxnOps []*TxnOp
// TxnRequest is used to apply multiple operations to the state store in a
// single transaction
type TxnRequest struct {
Datacenter string
Ops TxnOps
WriteRequest
}
func (r *TxnRequest) RequestDatacenter() string {
return r.Datacenter
}
// TxnReadRequest is used as a fast path for read-only transactions that don't
// modify the state store.
type TxnReadRequest struct {
Datacenter string
Ops TxnOps
QueryOptions
}
func (r *TxnReadRequest) RequestDatacenter() string {
return r.Datacenter
}
// TxnError is used to return information about an error for a specific
// operation.
type TxnError struct {
OpIndex int
What string
}
// Error returns the string representation of an atomic error.
func (e TxnError) Error() string {
return fmt.Sprintf("op %d: %s", e.OpIndex, e.What)
}
// TxnErrors is a list of TxnError entries.
type TxnErrors []*TxnError
// TxnResult is used to define the result of a given operation inside a
// transaction. Only one of the types should be filled out per entry.
type TxnResult struct {
KV TxnKVResult
}
// TxnResults is a list of TxnResult entries.
type TxnResults []*TxnResult
// TxnResponse is the structure returned by a TxnRequest.
type TxnResponse struct {
Results TxnResults
Errors TxnErrors
}
// TxnReadResponse is the structure returned by a TxnReadRequest.
type TxnReadResponse struct {
TxnResponse
QueryMeta
}

View File

@ -25,41 +25,54 @@ import (
"github.com/hashicorp/consul/testutil"
)
func TestMain(t *testing.T) {
func TestFoo_bar(t *testing.T) {
// Create a test Consul server
srv1 := testutil.NewTestServer(t)
srv1, err := testutil.NewTestServer()
if err != nil {
t.Fatal(err)
}
defer srv1.Stop()
// Create a secondary server, passing in configuration
// to avoid bootstrapping as we are forming a cluster.
srv2 := testutil.NewTestServerConfig(t, func(c *testutil.TestServerConfig) {
srv2, err := testutil.NewTestServerConfig(t, func(c *testutil.TestServerConfig) {
c.Bootstrap = false
})
if err != nil {
t.Fatal(err)
}
defer srv2.Stop()
// Join the servers together
srv1.JoinLAN(srv2.LANAddr)
srv1.JoinLAN(t, srv2.LANAddr)
// Create a test key/value pair
srv1.SetKV("foo", []byte("bar"))
srv1.SetKV(t, "foo", []byte("bar"))
// Create lots of test key/value pairs
srv1.PopulateKV(map[string][]byte{
srv1.PopulateKV(t, map[string][]byte{
"bar": []byte("123"),
"baz": []byte("456"),
})
// Create a service
srv1.AddService("redis", structs.HealthPassing, []string{"master"})
srv1.AddService(t, "redis", structs.HealthPassing, []string{"master"})
// Create a service that will be accessed in target source code
srv1.AddAccessibleService("redis", structs.HealthPassing, "127.0.0.1", 6379, []string{"master"})
// Create a service check
srv1.AddCheck("service:redis", "redis", structs.HealthPassing)
srv1.AddCheck(t, "service:redis", "redis", structs.HealthPassing)
// Create a node check
srv1.AddCheck("mem", "", structs.HealthCritical)
srv1.AddCheck(t, "mem", "", structs.HealthCritical)
// The HTTPAddr field contains the address of the Consul
// API on the new test server instance.
println(srv1.HTTPAddr)
// All functions also have a wrapper method to limit the passing of "t"
wrap := srv1.Wrap(t)
wrap.SetKV("foo", []byte("bar"))
}
```

61
vendor/github.com/hashicorp/consul/testutil/io.go generated vendored Normal file
View File

@ -0,0 +1,61 @@
package testutil
import (
"fmt"
"io/ioutil"
"os"
"strings"
"testing"
)
// tmpdir is the base directory for all temporary directories
// and files created with TempDir and TempFile. This could be
// achieved by setting a system environment variable but then
// the test execution would depend on whether or not the
// environment variable is set.
//
// On macOS the temp base directory is quite long and that
// triggers a problem with some tests that bind to UNIX sockets
// where the filename seems to be too long. Using a shorter name
// fixes this and makes the paths more readable.
//
// It also provides a single base directory for cleanup.
var tmpdir = "/tmp/consul-test"
func init() {
if err := os.MkdirAll(tmpdir, 0755); err != nil {
fmt.Printf("Cannot create %s. Reverting to /tmp\n", tmpdir)
tmpdir = "/tmp"
}
}
// TempDir creates a temporary directory within tmpdir
// with the name 'testname-name'. If the directory cannot
// be created t.Fatal is called.
func TempDir(t *testing.T, name string) string {
if t != nil && t.Name() != "" {
name = t.Name() + "-" + name
}
name = strings.Replace(name, "/", "_", -1)
d, err := ioutil.TempDir(tmpdir, name)
if err != nil {
t.Fatalf("err: %s", err)
}
return d
}
// TempFile creates a temporary file within tmpdir
// with the name 'testname-name'. If the file cannot
// be created t.Fatal is called. If a temporary directory
// has been created before consider storing the file
// inside this directory to avoid double cleanup.
func TempFile(t *testing.T, name string) *os.File {
if t != nil && t.Name() != "" {
name = t.Name() + "-" + name
}
f, err := ioutil.TempFile(tmpdir, name)
if err != nil {
t.Fatalf("err: %s", err)
}
return f
}

View File

@ -0,0 +1,197 @@
// Package retry provides support for repeating operations in tests.
//
// A sample retry operation looks like this:
//
// func TestX(t *testing.T) {
// retry.Run(t, func(r *retry.R) {
// if err := foo(); err != nil {
// r.Fatal("f: ", err)
// }
// })
// }
//
package retry
import (
"bytes"
"fmt"
"runtime"
"strings"
"sync"
"time"
)
// Failer is an interface compatible with testing.T.
type Failer interface {
// Log is called for the final test output
Log(args ...interface{})
// FailNow is called when the retrying is abandoned.
FailNow()
}
// R provides context for the retryer.
type R struct {
fail bool
output []string
}
func (r *R) FailNow() {
r.fail = true
runtime.Goexit()
}
func (r *R) Fatal(args ...interface{}) {
r.log(fmt.Sprint(args...))
r.FailNow()
}
func (r *R) Fatalf(format string, args ...interface{}) {
r.log(fmt.Sprintf(format, args...))
r.FailNow()
}
func (r *R) Error(args ...interface{}) {
r.log(fmt.Sprint(args...))
r.fail = true
}
func (r *R) Check(err error) {
if err != nil {
r.log(err.Error())
r.FailNow()
}
}
func (r *R) log(s string) {
r.output = append(r.output, decorate(s))
}
func decorate(s string) string {
_, file, line, ok := runtime.Caller(3)
if ok {
n := strings.LastIndex(file, "/")
if n >= 0 {
file = file[n+1:]
}
} else {
file = "???"
line = 1
}
return fmt.Sprintf("%s:%d: %s", file, line, s)
}
func Run(t Failer, f func(r *R)) {
run(TwoSeconds(), t, f)
}
func RunWith(r Retryer, t Failer, f func(r *R)) {
run(r, t, f)
}
func dedup(a []string) string {
if len(a) == 0 {
return ""
}
m := map[string]int{}
for _, s := range a {
m[s] = m[s] + 1
}
var b bytes.Buffer
for _, s := range a {
if _, ok := m[s]; ok {
b.WriteString(s)
b.WriteRune('\n')
delete(m, s)
}
}
return string(b.Bytes())
}
func run(r Retryer, t Failer, f func(r *R)) {
rr := &R{}
fail := func() {
out := dedup(rr.output)
if out != "" {
t.Log(out)
}
t.FailNow()
}
for r.NextOr(fail) {
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
f(rr)
}()
wg.Wait()
if rr.fail {
rr.fail = false
continue
}
break
}
}
// TwoSeconds repeats an operation for two seconds and waits 25ms in between.
func TwoSeconds() *Timer {
return &Timer{Timeout: 2 * time.Second, Wait: 25 * time.Millisecond}
}
// ThreeTimes repeats an operation three times and waits 25ms in between.
func ThreeTimes() *Counter {
return &Counter{Count: 3, Wait: 25 * time.Millisecond}
}
// Retryer provides an interface for repeating operations
// until they succeed or an exit condition is met.
type Retryer interface {
// NextOr returns true if the operation should be repeated.
// Otherwise, it calls fail and returns false.
NextOr(fail func()) bool
}
// Counter repeats an operation a given number of
// times and waits between subsequent operations.
type Counter struct {
Count int
Wait time.Duration
count int
}
func (r *Counter) NextOr(fail func()) bool {
if r.count == r.Count {
fail()
return false
}
if r.count > 0 {
time.Sleep(r.Wait)
}
r.count++
return true
}
// Timer repeats an operation for a given amount
// of time and waits between subsequent operations.
type Timer struct {
Timeout time.Duration
Wait time.Duration
// stop is the timeout deadline.
// Set on the first invocation of Next().
stop time.Time
}
func (r *Timer) NextOr(fail func()) bool {
if r.stop.IsZero() {
r.stop = time.Now().Add(r.Timeout)
return true
}
if time.Now().After(r.stop) {
fail()
return false
}
time.Sleep(r.Wait)
return true
}

View File

@ -12,8 +12,7 @@ package testutil
// otherwise cause an import cycle.
import (
"bytes"
"encoding/base64"
"context"
"encoding/json"
"fmt"
"io"
@ -22,11 +21,16 @@ import (
"net/http"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"testing"
"time"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/consul/testutil/retry"
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/go-uuid"
"github.com/pkg/errors"
)
// TestPerformanceConfig configures the performance parameters.
@ -39,10 +43,13 @@ type TestPerformanceConfig struct {
type TestPortConfig struct {
DNS int `json:"dns,omitempty"`
HTTP int `json:"http,omitempty"`
RPC int `json:"rpc,omitempty"`
HTTPS int `json:"https,omitempty"`
SerfLan int `json:"serf_lan,omitempty"`
SerfWan int `json:"serf_wan,omitempty"`
Server int `json:"server,omitempty"`
// Deprecated
RPC int `json:"rpc,omitempty"`
}
// TestAddressConfig contains the bind addresses for various
@ -53,24 +60,36 @@ type TestAddressConfig struct {
// TestServerConfig is the main server configuration struct.
type TestServerConfig struct {
NodeName string `json:"node_name"`
NodeMeta map[string]string `json:"node_meta,omitempty"`
Performance *TestPerformanceConfig `json:"performance,omitempty"`
Bootstrap bool `json:"bootstrap,omitempty"`
Server bool `json:"server,omitempty"`
DataDir string `json:"data_dir,omitempty"`
Datacenter string `json:"datacenter,omitempty"`
DisableCheckpoint bool `json:"disable_update_check"`
LogLevel string `json:"log_level,omitempty"`
Bind string `json:"bind_addr,omitempty"`
Addresses *TestAddressConfig `json:"addresses,omitempty"`
Ports *TestPortConfig `json:"ports,omitempty"`
ACLMasterToken string `json:"acl_master_token,omitempty"`
ACLDatacenter string `json:"acl_datacenter,omitempty"`
ACLDefaultPolicy string `json:"acl_default_policy,omitempty"`
Encrypt string `json:"encrypt,omitempty"`
Stdout, Stderr io.Writer `json:"-"`
Args []string `json:"-"`
NodeName string `json:"node_name"`
NodeID string `json:"node_id"`
NodeMeta map[string]string `json:"node_meta,omitempty"`
Performance *TestPerformanceConfig `json:"performance,omitempty"`
Bootstrap bool `json:"bootstrap,omitempty"`
Server bool `json:"server,omitempty"`
DataDir string `json:"data_dir,omitempty"`
Datacenter string `json:"datacenter,omitempty"`
DisableCheckpoint bool `json:"disable_update_check"`
LogLevel string `json:"log_level,omitempty"`
Bind string `json:"bind_addr,omitempty"`
Addresses *TestAddressConfig `json:"addresses,omitempty"`
Ports *TestPortConfig `json:"ports,omitempty"`
RaftProtocol int `json:"raft_protocol,omitempty"`
ACLMasterToken string `json:"acl_master_token,omitempty"`
ACLDatacenter string `json:"acl_datacenter,omitempty"`
ACLDefaultPolicy string `json:"acl_default_policy,omitempty"`
ACLEnforceVersion8 bool `json:"acl_enforce_version_8"`
Encrypt string `json:"encrypt,omitempty"`
CAFile string `json:"ca_file,omitempty"`
CertFile string `json:"cert_file,omitempty"`
KeyFile string `json:"key_file,omitempty"`
VerifyIncoming bool `json:"verify_incoming,omitempty"`
VerifyIncomingRPC bool `json:"verify_incoming_rpc,omitempty"`
VerifyIncomingHTTPS bool `json:"verify_incoming_https,omitempty"`
VerifyOutgoing bool `json:"verify_outgoing,omitempty"`
EnableScriptChecks bool `json:"enable_script_checks,omitempty"`
ReadyTimeout time.Duration `json:"-"`
Stdout, Stderr io.Writer `json:"-"`
Args []string `json:"-"`
}
// ServerConfigCallback is a function interface which can be
@ -80,8 +99,14 @@ type ServerConfigCallback func(c *TestServerConfig)
// defaultServerConfig returns a new TestServerConfig struct
// with all of the listen ports incremented by one.
func defaultServerConfig() *TestServerConfig {
nodeID, err := uuid.GenerateUUID()
if err != nil {
panic(err)
}
return &TestServerConfig{
NodeName: fmt.Sprintf("node%d", randomPort()),
NodeID: nodeID,
DisableCheckpoint: true,
Performance: &TestPerformanceConfig{
RaftMultiplier: 1,
@ -94,11 +119,13 @@ func defaultServerConfig() *TestServerConfig {
Ports: &TestPortConfig{
DNS: randomPort(),
HTTP: randomPort(),
RPC: randomPort(),
HTTPS: randomPort(),
SerfLan: randomPort(),
SerfWan: randomPort(),
Server: randomPort(),
RPC: randomPort(),
},
ReadyTimeout: 10 * time.Second,
}
}
@ -129,15 +156,6 @@ type TestCheck struct {
TTL string `json:",omitempty"`
}
// TestingT is an interface wrapper around TestingT
type TestingT interface {
Logf(format string, args ...interface{})
Errorf(format string, args ...interface{})
Fatalf(format string, args ...interface{})
Fatal(args ...interface{})
Skip(args ...interface{})
}
// TestKVResponse is what we use to decode KV data.
type TestKVResponse struct {
Value string
@ -147,382 +165,229 @@ type TestKVResponse struct {
type TestServer struct {
cmd *exec.Cmd
Config *TestServerConfig
t TestingT
HTTPAddr string
LANAddr string
WANAddr string
HTTPAddr string
HTTPSAddr string
LANAddr string
WANAddr string
HttpClient *http.Client
HTTPClient *http.Client
tmpdir string
}
// NewTestServer is an easy helper method to create a new Consul
// test server with the most basic configuration.
func NewTestServer(t TestingT) *TestServer {
return NewTestServerConfig(t, nil)
func NewTestServer() (*TestServer, error) {
return NewTestServerConfigT(nil, nil)
}
// NewTestServerConfig creates a new TestServer, and makes a call to
// an optional callback function to modify the configuration.
func NewTestServerConfig(t TestingT, cb ServerConfigCallback) *TestServer {
if path, err := exec.LookPath("consul"); err != nil || path == "" {
t.Fatal("consul not found on $PATH - download and install " +
func NewTestServerConfig(cb ServerConfigCallback) (*TestServer, error) {
return NewTestServerConfigT(nil, cb)
}
// NewTestServerConfig creates a new TestServer, and makes a call to an optional
// callback function to modify the configuration. If there is an error
// configuring or starting the server, the server will NOT be running when the
// function returns (thus you do not need to stop it).
func NewTestServerConfigT(t *testing.T, cb ServerConfigCallback) (*TestServer, error) {
var server *TestServer
retry.Run(t, func(r *retry.R) {
var err error
server, err = newTestServerConfigT(t, cb)
if err != nil {
r.Fatalf("failed starting test server: %v", err)
}
})
return server, nil
}
// newTestServerConfigT is the internal helper for NewTestServerConfigT.
func newTestServerConfigT(t *testing.T, cb ServerConfigCallback) (*TestServer, error) {
path, err := exec.LookPath("consul")
if err != nil || path == "" {
return nil, fmt.Errorf("consul not found on $PATH - download and install " +
"consul or skip this test")
}
dataDir, err := ioutil.TempDir("", "consul")
if err != nil {
t.Fatalf("err: %s", err)
}
configFile, err := ioutil.TempFile(dataDir, "config")
if err != nil {
defer os.RemoveAll(dataDir)
t.Fatalf("err: %s", err)
}
consulConfig := defaultServerConfig()
consulConfig.DataDir = dataDir
tmpdir := TempDir(t, "consul")
cfg := defaultServerConfig()
cfg.DataDir = filepath.Join(tmpdir, "data")
if cb != nil {
cb(consulConfig)
cb(cfg)
}
configContent, err := json.Marshal(consulConfig)
b, err := json.Marshal(cfg)
if err != nil {
t.Fatalf("err: %s", err)
return nil, errors.Wrap(err, "failed marshaling json")
}
if _, err := configFile.Write(configContent); err != nil {
t.Fatalf("err: %s", err)
configFile := filepath.Join(tmpdir, "config.json")
if err := ioutil.WriteFile(configFile, b, 0644); err != nil {
defer os.RemoveAll(tmpdir)
return nil, errors.Wrap(err, "failed writing config content")
}
configFile.Close()
stdout := io.Writer(os.Stdout)
if consulConfig.Stdout != nil {
stdout = consulConfig.Stdout
if cfg.Stdout != nil {
stdout = cfg.Stdout
}
stderr := io.Writer(os.Stderr)
if consulConfig.Stderr != nil {
stderr = consulConfig.Stderr
if cfg.Stderr != nil {
stderr = cfg.Stderr
}
// Start the server
args := []string{"agent", "-config-file", configFile.Name()}
args = append(args, consulConfig.Args...)
args := []string{"agent", "-config-file", configFile}
args = append(args, cfg.Args...)
cmd := exec.Command("consul", args...)
cmd.Stdout = stdout
cmd.Stderr = stderr
if err := cmd.Start(); err != nil {
t.Fatalf("err: %s", err)
return nil, errors.Wrap(err, "failed starting command")
}
var httpAddr string
var client *http.Client
if strings.HasPrefix(consulConfig.Addresses.HTTP, "unix://") {
httpAddr = consulConfig.Addresses.HTTP
trans := cleanhttp.DefaultTransport()
trans.Dial = func(_, _ string) (net.Conn, error) {
return net.Dial("unix", httpAddr[7:])
httpAddr := fmt.Sprintf("127.0.0.1:%d", cfg.Ports.HTTP)
client := cleanhttp.DefaultClient()
if strings.HasPrefix(cfg.Addresses.HTTP, "unix://") {
httpAddr = cfg.Addresses.HTTP
tr := cleanhttp.DefaultTransport()
tr.DialContext = func(_ context.Context, _, _ string) (net.Conn, error) {
return net.Dial("unix", httpAddr[len("unix://"):])
}
client = &http.Client{
Transport: trans,
}
} else {
httpAddr = fmt.Sprintf("127.0.0.1:%d", consulConfig.Ports.HTTP)
client = cleanhttp.DefaultClient()
client = &http.Client{Transport: tr}
}
server := &TestServer{
Config: consulConfig,
Config: cfg,
cmd: cmd,
t: t,
HTTPAddr: httpAddr,
LANAddr: fmt.Sprintf("127.0.0.1:%d", consulConfig.Ports.SerfLan),
WANAddr: fmt.Sprintf("127.0.0.1:%d", consulConfig.Ports.SerfWan),
HTTPAddr: httpAddr,
HTTPSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.HTTPS),
LANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfLan),
WANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfWan),
HttpClient: client,
HTTPClient: client,
tmpdir: tmpdir,
}
// Wait for the server to be ready
if consulConfig.Bootstrap {
server.waitForLeader()
if cfg.Bootstrap {
err = server.waitForLeader()
} else {
server.waitForAPI()
err = server.waitForAPI()
}
return server
if err != nil {
defer server.Stop()
return nil, errors.Wrap(err, "failed waiting for server to start")
}
return server, nil
}
// Stop stops the test Consul server, and removes the Consul data
// directory once we are done.
func (s *TestServer) Stop() {
defer os.RemoveAll(s.Config.DataDir)
func (s *TestServer) Stop() error {
defer os.RemoveAll(s.tmpdir)
if err := s.cmd.Process.Kill(); err != nil {
s.t.Errorf("err: %s", err)
// There was no process
if s.cmd == nil {
return nil
}
if s.cmd.Process != nil {
if err := s.cmd.Process.Signal(os.Interrupt); err != nil {
return errors.Wrap(err, "failed to kill consul server")
}
}
// wait for the process to exit to be sure that the data dir can be
// deleted on all platforms.
s.cmd.Wait()
return s.cmd.Wait()
}
type failer struct {
failed bool
}
func (f *failer) Log(args ...interface{}) { fmt.Println(args) }
func (f *failer) FailNow() { f.failed = true }
// waitForAPI waits for only the agent HTTP endpoint to start
// responding. This is an indication that the agent has started,
// but will likely return before a leader is elected.
func (s *TestServer) waitForAPI() {
WaitForResult(func() (bool, error) {
resp, err := s.HttpClient.Get(s.url("/v1/agent/self"))
func (s *TestServer) waitForAPI() error {
f := &failer{}
retry.Run(f, func(r *retry.R) {
resp, err := s.HTTPClient.Get(s.url("/v1/agent/self"))
if err != nil {
return false, err
r.Fatal(err)
}
defer resp.Body.Close()
if err := s.requireOK(resp); err != nil {
return false, err
r.Fatal("failed OK respose", err)
}
return true, nil
}, func(err error) {
defer s.Stop()
s.t.Fatalf("err: %s", err)
})
if f.failed {
return errors.New("failed waiting for API")
}
return nil
}
// waitForLeader waits for the Consul server's HTTP API to become
// available, and then waits for a known leader and an index of
// 1 or more to be observed to confirm leader election is done.
// It then waits to ensure the anti-entropy sync has completed.
func (s *TestServer) waitForLeader() {
func (s *TestServer) waitForLeader() error {
f := &failer{}
timer := &retry.Timer{
Timeout: s.Config.ReadyTimeout,
Wait: 250 * time.Millisecond,
}
var index int64
WaitForResult(func() (bool, error) {
retry.RunWith(timer, f, func(r *retry.R) {
// Query the API and check the status code.
url := s.url(fmt.Sprintf("/v1/catalog/nodes?index=%d&wait=2s", index))
resp, err := s.HttpClient.Get(url)
url := s.url(fmt.Sprintf("/v1/catalog/nodes?index=%d", index))
resp, err := s.HTTPClient.Get(url)
if err != nil {
return false, err
r.Fatal("failed http get", err)
}
defer resp.Body.Close()
if err := s.requireOK(resp); err != nil {
return false, err
r.Fatal("failed OK response", err)
}
// Ensure we have a leader and a node registration.
if leader := resp.Header.Get("X-Consul-KnownLeader"); leader != "true" {
return false, fmt.Errorf("Consul leader status: %#v", leader)
r.Fatalf("Consul leader status: %#v", leader)
}
index, err = strconv.ParseInt(resp.Header.Get("X-Consul-Index"), 10, 64)
if err != nil {
return false, fmt.Errorf("Consul index was bad: %v", err)
r.Fatal("bad consul index", err)
}
if index == 0 {
return false, fmt.Errorf("Consul index is 0")
r.Fatal("consul index is 0")
}
// Watch for the anti-entropy sync to finish.
var parsed []map[string]interface{}
var v []map[string]interface{}
dec := json.NewDecoder(resp.Body)
if err := dec.Decode(&parsed); err != nil {
return false, err
if err := dec.Decode(&v); err != nil {
r.Fatal(err)
}
if len(parsed) < 1 {
return false, fmt.Errorf("No nodes")
if len(v) < 1 {
r.Fatal("No nodes")
}
taggedAddresses, ok := parsed[0]["TaggedAddresses"].(map[string]interface{})
taggedAddresses, ok := v[0]["TaggedAddresses"].(map[string]interface{})
if !ok {
return false, fmt.Errorf("Missing tagged addresses")
r.Fatal("Missing tagged addresses")
}
if _, ok := taggedAddresses["lan"]; !ok {
return false, fmt.Errorf("No lan tagged addresses")
r.Fatal("No lan tagged addresses")
}
return true, nil
}, func(err error) {
defer s.Stop()
s.t.Fatalf("err: %s", err)
})
}
// url is a helper function which takes a relative URL and
// makes it into a proper URL against the local Consul server.
func (s *TestServer) url(path string) string {
return fmt.Sprintf("http://127.0.0.1:%d%s", s.Config.Ports.HTTP, path)
}
// requireOK checks the HTTP response code and ensures it is acceptable.
func (s *TestServer) requireOK(resp *http.Response) error {
if resp.StatusCode != 200 {
return fmt.Errorf("Bad status code: %d", resp.StatusCode)
if f.failed {
return errors.New("failed waiting for leader")
}
return nil
}
// put performs a new HTTP PUT request.
func (s *TestServer) put(path string, body io.Reader) *http.Response {
req, err := http.NewRequest("PUT", s.url(path), body)
if err != nil {
s.t.Fatalf("err: %s", err)
}
resp, err := s.HttpClient.Do(req)
if err != nil {
s.t.Fatalf("err: %s", err)
}
if err := s.requireOK(resp); err != nil {
defer resp.Body.Close()
s.t.Fatal(err)
}
return resp
}
// get performs a new HTTP GET request.
func (s *TestServer) get(path string) *http.Response {
resp, err := s.HttpClient.Get(s.url(path))
if err != nil {
s.t.Fatalf("err: %s", err)
}
if err := s.requireOK(resp); err != nil {
defer resp.Body.Close()
s.t.Fatal(err)
}
return resp
}
// encodePayload returns a new io.Reader wrapping the encoded contents
// of the payload, suitable for passing directly to a new request.
func (s *TestServer) encodePayload(payload interface{}) io.Reader {
var encoded bytes.Buffer
enc := json.NewEncoder(&encoded)
if err := enc.Encode(payload); err != nil {
s.t.Fatalf("err: %s", err)
}
return &encoded
}
// JoinLAN is used to join nodes within the same datacenter.
func (s *TestServer) JoinLAN(addr string) {
resp := s.get("/v1/agent/join/" + addr)
resp.Body.Close()
}
// JoinWAN is used to join remote datacenters together.
func (s *TestServer) JoinWAN(addr string) {
resp := s.get("/v1/agent/join/" + addr + "?wan=1")
resp.Body.Close()
}
// SetKV sets an individual key in the K/V store.
func (s *TestServer) SetKV(key string, val []byte) {
resp := s.put("/v1/kv/"+key, bytes.NewBuffer(val))
resp.Body.Close()
}
// GetKV retrieves a single key and returns its value
func (s *TestServer) GetKV(key string) []byte {
resp := s.get("/v1/kv/" + key)
defer resp.Body.Close()
raw, err := ioutil.ReadAll(resp.Body)
if err != nil {
s.t.Fatalf("err: %s", err)
}
var result []*TestKVResponse
if err := json.Unmarshal(raw, &result); err != nil {
s.t.Fatalf("err: %s", err)
}
if len(result) < 1 {
s.t.Fatalf("key does not exist: %s", key)
}
v, err := base64.StdEncoding.DecodeString(result[0].Value)
if err != nil {
s.t.Fatalf("err: %s", err)
}
return v
}
// PopulateKV fills the Consul KV with data from a generic map.
func (s *TestServer) PopulateKV(data map[string][]byte) {
for k, v := range data {
s.SetKV(k, v)
}
}
// ListKV returns a list of keys present in the KV store. This will list all
// keys under the given prefix recursively and return them as a slice.
func (s *TestServer) ListKV(prefix string) []string {
resp := s.get("/v1/kv/" + prefix + "?keys")
defer resp.Body.Close()
raw, err := ioutil.ReadAll(resp.Body)
if err != nil {
s.t.Fatalf("err: %s", err)
}
var result []string
if err := json.Unmarshal(raw, &result); err != nil {
s.t.Fatalf("err: %s", err)
}
return result
}
// AddService adds a new service to the Consul instance. It also
// automatically adds a health check with the given status, which
// can be one of "passing", "warning", or "critical".
func (s *TestServer) AddService(name, status string, tags []string) {
svc := &TestService{
Name: name,
Tags: tags,
}
payload := s.encodePayload(svc)
s.put("/v1/agent/service/register", payload)
chkName := "service:" + name
chk := &TestCheck{
Name: chkName,
ServiceID: name,
TTL: "10m",
}
payload = s.encodePayload(chk)
s.put("/v1/agent/check/register", payload)
switch status {
case structs.HealthPassing:
s.put("/v1/agent/check/pass/"+chkName, nil)
case structs.HealthWarning:
s.put("/v1/agent/check/warn/"+chkName, nil)
case structs.HealthCritical:
s.put("/v1/agent/check/fail/"+chkName, nil)
default:
s.t.Fatalf("Unrecognized status: %s", status)
}
}
// AddCheck adds a check to the Consul instance. If the serviceID is
// left empty (""), then the check will be associated with the node.
// The check status may be "passing", "warning", or "critical".
func (s *TestServer) AddCheck(name, serviceID, status string) {
chk := &TestCheck{
ID: name,
Name: name,
TTL: "10m",
}
if serviceID != "" {
chk.ServiceID = serviceID
}
payload := s.encodePayload(chk)
s.put("/v1/agent/check/register", payload)
switch status {
case structs.HealthPassing:
s.put("/v1/agent/check/pass/"+name, nil)
case structs.HealthWarning:
s.put("/v1/agent/check/warn/"+name, nil)
case structs.HealthCritical:
s.put("/v1/agent/check/fail/"+name, nil)
default:
s.t.Fatalf("Unrecognized status: %s", status)
}
}

View File

@ -0,0 +1,256 @@
package testutil
import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"testing"
"github.com/pkg/errors"
)
// copied from testutil to break circular dependency
const (
HealthAny = "any"
HealthPassing = "passing"
HealthWarning = "warning"
HealthCritical = "critical"
HealthMaint = "maintenance"
)
// JoinLAN is used to join local datacenters together.
func (s *TestServer) JoinLAN(t *testing.T, addr string) {
resp := s.get(t, "/v1/agent/join/"+addr)
defer resp.Body.Close()
}
// JoinWAN is used to join remote datacenters together.
func (s *TestServer) JoinWAN(t *testing.T, addr string) {
resp := s.get(t, "/v1/agent/join/"+addr+"?wan=1")
resp.Body.Close()
}
// SetKV sets an individual key in the K/V store.
func (s *TestServer) SetKV(t *testing.T, key string, val []byte) {
resp := s.put(t, "/v1/kv/"+key, bytes.NewBuffer(val))
resp.Body.Close()
}
// SetKVString sets an individual key in the K/V store, but accepts a string
// instead of []byte.
func (s *TestServer) SetKVString(t *testing.T, key string, val string) {
resp := s.put(t, "/v1/kv/"+key, bytes.NewBufferString(val))
resp.Body.Close()
}
// GetKV retrieves a single key and returns its value
func (s *TestServer) GetKV(t *testing.T, key string) []byte {
resp := s.get(t, "/v1/kv/"+key)
defer resp.Body.Close()
raw, err := ioutil.ReadAll(resp.Body)
if err != nil {
t.Fatalf("failed to read body: %s", err)
}
var result []*TestKVResponse
if err := json.Unmarshal(raw, &result); err != nil {
t.Fatalf("failed to unmarshal: %s", err)
}
if len(result) < 1 {
t.Fatalf("key does not exist: %s", key)
}
v, err := base64.StdEncoding.DecodeString(result[0].Value)
if err != nil {
t.Fatalf("failed to base64 decode: %s", err)
}
return v
}
// GetKVString retrieves a value from the store, but returns as a string instead
// of []byte.
func (s *TestServer) GetKVString(t *testing.T, key string) string {
return string(s.GetKV(t, key))
}
// PopulateKV fills the Consul KV with data from a generic map.
func (s *TestServer) PopulateKV(t *testing.T, data map[string][]byte) {
for k, v := range data {
s.SetKV(t, k, v)
}
}
// ListKV returns a list of keys present in the KV store. This will list all
// keys under the given prefix recursively and return them as a slice.
func (s *TestServer) ListKV(t *testing.T, prefix string) []string {
resp := s.get(t, "/v1/kv/"+prefix+"?keys")
defer resp.Body.Close()
raw, err := ioutil.ReadAll(resp.Body)
if err != nil {
t.Fatalf("failed to read body: %s", err)
}
var result []string
if err := json.Unmarshal(raw, &result); err != nil {
t.Fatalf("failed to unmarshal: %s", err)
}
return result
}
// AddService adds a new service to the Consul instance. It also
// automatically adds a health check with the given status, which
// can be one of "passing", "warning", or "critical".
func (s *TestServer) AddService(t *testing.T, name, status string, tags []string) {
s.AddAddressableService(t, name, status, "", 0, tags) // set empty address and 0 as port for non-accessible service
}
// AddAddressableService adds a new service to the Consul instance by
// passing "address" and "port". It is helpful when you need to prepare a fakeService
// that maybe accessed with in target source code.
// It also automatically adds a health check with the given status, which
// can be one of "passing", "warning", or "critical", just like `AddService` does.
func (s *TestServer) AddAddressableService(t *testing.T, name, status, address string, port int, tags []string) {
svc := &TestService{
Name: name,
Tags: tags,
Address: address,
Port: port,
}
payload, err := s.encodePayload(svc)
if err != nil {
t.Fatal(err)
}
s.put(t, "/v1/agent/service/register", payload)
chkName := "service:" + name
chk := &TestCheck{
Name: chkName,
ServiceID: name,
TTL: "10m",
}
payload, err = s.encodePayload(chk)
if err != nil {
t.Fatal(err)
}
s.put(t, "/v1/agent/check/register", payload)
switch status {
case HealthPassing:
s.put(t, "/v1/agent/check/pass/"+chkName, nil)
case HealthWarning:
s.put(t, "/v1/agent/check/warn/"+chkName, nil)
case HealthCritical:
s.put(t, "/v1/agent/check/fail/"+chkName, nil)
default:
t.Fatalf("Unrecognized status: %s", status)
}
}
// AddCheck adds a check to the Consul instance. If the serviceID is
// left empty (""), then the check will be associated with the node.
// The check status may be "passing", "warning", or "critical".
func (s *TestServer) AddCheck(t *testing.T, name, serviceID, status string) {
chk := &TestCheck{
ID: name,
Name: name,
TTL: "10m",
}
if serviceID != "" {
chk.ServiceID = serviceID
}
payload, err := s.encodePayload(chk)
if err != nil {
t.Fatal(err)
}
s.put(t, "/v1/agent/check/register", payload)
switch status {
case HealthPassing:
s.put(t, "/v1/agent/check/pass/"+name, nil)
case HealthWarning:
s.put(t, "/v1/agent/check/warn/"+name, nil)
case HealthCritical:
s.put(t, "/v1/agent/check/fail/"+name, nil)
default:
t.Fatalf("Unrecognized status: %s", status)
}
}
// put performs a new HTTP PUT request.
func (s *TestServer) put(t *testing.T, path string, body io.Reader) *http.Response {
req, err := http.NewRequest("PUT", s.url(path), body)
if err != nil {
t.Fatalf("failed to create PUT request: %s", err)
}
resp, err := s.HTTPClient.Do(req)
if err != nil {
t.Fatalf("failed to make PUT request: %s", err)
}
if err := s.requireOK(resp); err != nil {
defer resp.Body.Close()
t.Fatalf("not OK PUT: %s", err)
}
return resp
}
// get performs a new HTTP GET request.
func (s *TestServer) get(t *testing.T, path string) *http.Response {
resp, err := s.HTTPClient.Get(s.url(path))
if err != nil {
t.Fatalf("failed to create GET request: %s", err)
}
if err := s.requireOK(resp); err != nil {
defer resp.Body.Close()
t.Fatalf("not OK GET: %s", err)
}
return resp
}
// encodePayload returns a new io.Reader wrapping the encoded contents
// of the payload, suitable for passing directly to a new request.
func (s *TestServer) encodePayload(payload interface{}) (io.Reader, error) {
var encoded bytes.Buffer
enc := json.NewEncoder(&encoded)
if err := enc.Encode(payload); err != nil {
return nil, errors.Wrap(err, "failed to encode payload")
}
return &encoded, nil
}
// url is a helper function which takes a relative URL and
// makes it into a proper URL against the local Consul server.
func (s *TestServer) url(path string) string {
if s == nil {
log.Fatal("s is nil")
}
if s.Config == nil {
log.Fatal("s.Config is nil")
}
if s.Config.Ports == nil {
log.Fatal("s.Config.Ports is nil")
}
if s.Config.Ports.HTTP == 0 {
log.Fatal("s.Config.Ports.HTTP is 0")
}
if path == "" {
log.Fatal("path is empty")
}
return fmt.Sprintf("http://127.0.0.1:%d%s", s.Config.Ports.HTTP, path)
}
// requireOK checks the HTTP response code and ensures it is acceptable.
func (s *TestServer) requireOK(resp *http.Response) error {
if resp.StatusCode != 200 {
return fmt.Errorf("Bad status code: %d", resp.StatusCode)
}
return nil
}

View File

@ -0,0 +1,65 @@
package testutil
import "testing"
type WrappedServer struct {
s *TestServer
t *testing.T
}
// Wrap wraps the test server in a `testing.t` for convenience.
//
// For example, the following code snippets are equivalent.
//
// server.JoinLAN(t, "1.2.3.4")
// server.Wrap(t).JoinLAN("1.2.3.4")
//
// This is useful when you are calling multiple functions and save the wrapped
// value as another variable to reduce the inclusion of "t".
func (s *TestServer) Wrap(t *testing.T) *WrappedServer {
return &WrappedServer{s, t}
}
func (w *WrappedServer) JoinLAN(addr string) {
w.s.JoinLAN(w.t, addr)
}
func (w *WrappedServer) JoinWAN(addr string) {
w.s.JoinWAN(w.t, addr)
}
func (w *WrappedServer) SetKV(key string, val []byte) {
w.s.SetKV(w.t, key, val)
}
func (w *WrappedServer) SetKVString(key string, val string) {
w.s.SetKVString(w.t, key, val)
}
func (w *WrappedServer) GetKV(key string) []byte {
return w.s.GetKV(w.t, key)
}
func (w *WrappedServer) GetKVString(key string) string {
return w.s.GetKVString(w.t, key)
}
func (w *WrappedServer) PopulateKV(data map[string][]byte) {
w.s.PopulateKV(w.t, data)
}
func (w *WrappedServer) ListKV(prefix string) []string {
return w.s.ListKV(w.t, prefix)
}
func (w *WrappedServer) AddService(name, status string, tags []string) {
w.s.AddService(w.t, name, status, tags)
}
func (w *WrappedServer) AddAddressableService(name, status, address string, port int, tags []string) {
w.s.AddAddressableService(w.t, name, status, address, port, tags)
}
func (w *WrappedServer) AddCheck(name, serviceID, status string) {
w.s.AddCheck(w.t, name, serviceID, status)
}

View File

@ -1,62 +0,0 @@
package testutil
import (
"fmt"
"testing"
"time"
"github.com/hashicorp/consul/consul/structs"
)
type testFn func() (bool, error)
type errorFn func(error)
const (
baseWait = 1 * time.Millisecond
maxWait = 100 * time.Millisecond
)
func WaitForResult(try testFn, fail errorFn) {
var err error
wait := baseWait
for retries := 100; retries > 0; retries-- {
var success bool
success, err = try()
if success {
time.Sleep(25 * time.Millisecond)
return
}
time.Sleep(wait)
wait *= 2
if wait > maxWait {
wait = maxWait
}
}
fail(err)
}
type rpcFn func(string, interface{}, interface{}) error
func WaitForLeader(t *testing.T, rpc rpcFn, dc string) structs.IndexedNodes {
var out structs.IndexedNodes
WaitForResult(func() (bool, error) {
// Ensure we have a leader and a node registration.
args := &structs.DCSpecificRequest{
Datacenter: dc,
}
if err := rpc("Catalog.ListNodes", args, &out); err != nil {
return false, fmt.Errorf("Catalog.ListNodes failed: %v", err)
}
if !out.QueryMeta.KnownLeader {
return false, fmt.Errorf("No leader")
}
if out.Index == 0 {
return false, fmt.Errorf("Consul index is 0")
}
return true, nil
}, func(err error) {
t.Fatalf("failed to find leader: %v", err)
})
return out
}

View File

@ -1,39 +0,0 @@
# Consul `types` Package
The Go language has a strong type system built into the language. The
`types` package corrals named types into a single package that is terminal in
`go`'s import graph. The `types` package should not have any downstream
dependencies. Each subsystem that defines its own set of types exists in its
own file, but all types are defined in the same package.
# Why
> Everything should be made as simple as possible, but not simpler.
`string` is a useful container and underlying type for identifiers, however
the `string` type is effectively opaque to the compiler in terms of how a
given string is intended to be used. For instance, there is nothing
preventing the following from happening:
```go
// `map` of Widgets, looked up by ID
var widgetLookup map[string]*Widget
// ...
var widgetID string = "widgetID"
w, found := widgetLookup[widgetID]
// Bad!
var widgetName string = "name of widget"
w, found := widgetLookup[widgetName]
```
but this class of problem is entirely preventable:
```go
type WidgetID string
var widgetLookup map[WidgetID]*Widget
var widgetName
```
TL;DR: intentions and idioms aren't statically checked by compilers. The
`types` package uses Go's strong type system to prevent this class of bug.

View File

@ -1,5 +0,0 @@
package types
// CheckID is a strongly typed string used to uniquely represent a Consul
// Check on an Agent (a CheckID is not globally unique).
type CheckID string

View File

@ -1,4 +0,0 @@
package types
// NodeID is a unique identifier for a node across space and time.
type NodeID string

View File

@ -1,354 +0,0 @@
Mozilla Public License, version 2.0
1. Definitions
1.1. “Contributor”
means each individual or legal entity that creates, contributes to the
creation of, or owns Covered Software.
1.2. “Contributor Version”
means the combination of the Contributions of others (if any) used by a
Contributor and that particular Contributors Contribution.
1.3. “Contribution”
means Covered Software of a particular Contributor.
1.4. “Covered Software”
means Source Code Form to which the initial Contributor has attached the
notice in Exhibit A, the Executable Form of such Source Code Form, and
Modifications of such Source Code Form, in each case including portions
thereof.
1.5. “Incompatible With Secondary Licenses”
means
a. that the initial Contributor has attached the notice described in
Exhibit B to the Covered Software; or
b. that the Covered Software was made available under the terms of version
1.1 or earlier of the License, but not also under the terms of a
Secondary License.
1.6. “Executable Form”
means any form of the work other than Source Code Form.
1.7. “Larger Work”
means a work that combines Covered Software with other material, in a separate
file or files, that is not Covered Software.
1.8. “License”
means this document.
1.9. “Licensable”
means having the right to grant, to the maximum extent possible, whether at the
time of the initial grant or subsequently, any and all of the rights conveyed by
this License.
1.10. “Modifications”
means any of the following:
a. any file in Source Code Form that results from an addition to, deletion
from, or modification of the contents of Covered Software; or
b. any new file in Source Code Form that contains any Covered Software.
1.11. “Patent Claims” of a Contributor
means any patent claim(s), including without limitation, method, process,
and apparatus claims, in any patent Licensable by such Contributor that
would be infringed, but for the grant of the License, by the making,
using, selling, offering for sale, having made, import, or transfer of
either its Contributions or its Contributor Version.
1.12. “Secondary License”
means either the GNU General Public License, Version 2.0, the GNU Lesser
General Public License, Version 2.1, the GNU Affero General Public
License, Version 3.0, or any later versions of those licenses.
1.13. “Source Code Form”
means the form of the work preferred for making modifications.
1.14. “You” (or “Your”)
means an individual or a legal entity exercising rights under this
License. For legal entities, “You” includes any entity that controls, is
controlled by, or is under common control with You. For purposes of this
definition, “control” means (a) the power, direct or indirect, to cause
the direction or management of such entity, whether by contract or
otherwise, or (b) ownership of more than fifty percent (50%) of the
outstanding shares or beneficial ownership of such entity.
2. License Grants and Conditions
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
a. under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or as
part of a Larger Work; and
b. under Patent Claims of such Contributor to make, use, sell, offer for
sale, have made, import, and otherwise transfer either its Contributions
or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution become
effective for each Contribution on the date the Contributor first distributes
such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under this
License. No additional rights or licenses will be implied from the distribution
or licensing of Covered Software under this License. Notwithstanding Section
2.1(b) above, no patent license is granted by a Contributor:
a. for any code that a Contributor has removed from Covered Software; or
b. for infringements caused by: (i) Your and any other third partys
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
c. under Patent Claims infringed by Covered Software in the absence of its
Contributions.
This License does not grant any rights in the trademarks, service marks, or
logos of any Contributor (except as may be necessary to comply with the
notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this License
(see Section 10.2) or under the terms of a Secondary License (if permitted
under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its Contributions
are its original creation(s) or it has sufficient rights to grant the
rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under applicable
copyright doctrines of fair use, fair dealing, or other equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
Section 2.1.
3. Responsibilities
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under the
terms of this License. You must inform recipients that the Source Code Form
of the Covered Software is governed by the terms of this License, and how
they can obtain a copy of this License. You may not attempt to alter or
restrict the recipients rights in the Source Code Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
a. such Covered Software must also be made available in Source Code Form,
as described in Section 3.1, and You must inform recipients of the
Executable Form how they can obtain a copy of such Source Code Form by
reasonable means in a timely manner, at a charge no more than the cost
of distribution to the recipient; and
b. You may distribute such Executable Form under the terms of this License,
or sublicense it under different terms, provided that the license for
the Executable Form does not attempt to limit or alter the recipients
rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for the
Covered Software. If the Larger Work is a combination of Covered Software
with a work governed by one or more Secondary Licenses, and the Covered
Software is not Incompatible With Secondary Licenses, this License permits
You to additionally distribute such Covered Software under the terms of
such Secondary License(s), so that the recipient of the Larger Work may, at
their option, further distribute the Covered Software under the terms of
either this License or such Secondary License(s).
3.4. Notices
You may not remove or alter the substance of any license notices (including
copyright notices, patent notices, disclaimers of warranty, or limitations
of liability) contained within the Source Code Form of the Covered
Software, except that You may alter any license notices to the extent
required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on behalf
of any Contributor. You must make it absolutely clear that any such
warranty, support, indemnity, or liability obligation is offered by You
alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
If it is impossible for You to comply with any of the terms of this License
with respect to some or all of the Covered Software due to statute, judicial
order, or regulation then You must: (a) comply with the terms of this License
to the maximum extent possible; and (b) describe the limitations and the code
they affect. Such description must be placed in a text file included with all
distributions of the Covered Software under this License. Except to the
extent prohibited by statute or regulation, such description must be
sufficiently detailed for a recipient of ordinary skill to be able to
understand it.
5. Termination
5.1. The rights granted under this License will terminate automatically if You
fail to comply with any of its terms. However, if You become compliant,
then the rights granted under this License from a particular Contributor
are reinstated (a) provisionally, unless and until such Contributor
explicitly and finally terminates Your grants, and (b) on an ongoing basis,
if such Contributor fails to notify You of the non-compliance by some
reasonable means prior to 60 days after You have come back into compliance.
Moreover, Your grants from a particular Contributor are reinstated on an
ongoing basis if such Contributor notifies You of the non-compliance by
some reasonable means, this is the first time You have received notice of
non-compliance with this License from such Contributor, and You become
compliant prior to 30 days after Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions, counter-claims,
and cross-claims) alleging that a Contributor Version directly or
indirectly infringes any patent, then the rights granted to You by any and
all Contributors for the Covered Software under Section 2.1 of this License
shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
license agreements (excluding distributors and resellers) which have been
validly granted by You or Your distributors under this License prior to
termination shall survive termination.
6. Disclaimer of Warranty
Covered Software is provided under this License on an “as is” basis, without
warranty of any kind, either expressed, implied, or statutory, including,
without limitation, warranties that the Covered Software is free of defects,
merchantable, fit for a particular purpose or non-infringing. The entire
risk as to the quality and performance of the Covered Software is with You.
Should any Covered Software prove defective in any respect, You (not any
Contributor) assume the cost of any necessary servicing, repair, or
correction. This disclaimer of warranty constitutes an essential part of this
License. No use of any Covered Software is authorized under this License
except under this disclaimer.
7. Limitation of Liability
Under no circumstances and under no legal theory, whether tort (including
negligence), contract, or otherwise, shall any Contributor, or anyone who
distributes Covered Software as permitted above, be liable to You for any
direct, indirect, special, incidental, or consequential damages of any
character including, without limitation, damages for lost profits, loss of
goodwill, work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses, even if such party shall have been
informed of the possibility of such damages. This limitation of liability
shall not apply to liability for death or personal injury resulting from such
partys negligence to the extent applicable law prohibits such limitation.
Some jurisdictions do not allow the exclusion or limitation of incidental or
consequential damages, so this exclusion and limitation may not apply to You.
8. Litigation
Any litigation relating to this License may be brought only in the courts of
a jurisdiction where the defendant maintains its principal place of business
and such litigation shall be governed by laws of that jurisdiction, without
reference to its conflict-of-law provisions. Nothing in this Section shall
prevent a partys ability to bring cross-claims or counter-claims.
9. Miscellaneous
This License represents the complete agreement concerning the subject matter
hereof. If any provision of this License is held to be unenforceable, such
provision shall be reformed only to the extent necessary to make it
enforceable. Any law or regulation which provides that the language of a
contract shall be construed against the drafter shall not be used to construe
this License against a Contributor.
10. Versions of the License
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version of
the License under which You originally received the Covered Software, or
under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a modified
version of this License if you rename the license and remove any
references to the name of the license steward (except to note that such
modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
This Source Code Form is subject to the
terms of the Mozilla Public License, v.
2.0. If a copy of the MPL was not
distributed with this file, You can
obtain one at
http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular file, then
You may include the notice in a location (such as a LICENSE file in a relevant
directory) where a recipient would be likely to look for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - “Incompatible With Secondary Licenses” Notice
This Source Code Form is “Incompatible
With Secondary Licenses”, as defined by
the Mozilla Public License, v. 2.0.

View File

@ -1,17 +0,0 @@
DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
test:
go test -timeout=60s ./...
integ: test
INTEG_TESTS=yes go test -timeout=5s -run=Integ ./...
deps:
go get -d -v ./...
echo $(DEPS) | xargs -n1 go get -d
cov:
INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html
open /tmp/coverage.html
.PHONY: test cov integ deps

View File

@ -1,89 +0,0 @@
raft [![Build Status](https://travis-ci.org/hashicorp/raft.png)](https://travis-ci.org/hashicorp/raft)
====
raft is a [Go](http://www.golang.org) library that manages a replicated
log and can be used with an FSM to manage replicated state machines. It
is library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).
The use cases for such a library are far-reaching as replicated state
machines are a key component of many distributed systems. They enable
building Consistent, Partition Tolerant (CP) systems, with limited
fault tolerance as well.
## Building
If you wish to build raft you'll need Go version 1.2+ installed.
Please check your installation with:
```
go version
```
## Documentation
For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).
To prevent complications with cgo, the primary backend `MDBStore` is in a separate repository,
called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
for the `LogStore` and `StableStore`.
A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
and `StableStore`.
## Protocol
raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
A high level overview of the Raft protocol is described below, but for details please read the full
[Raft paper](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
followed by the raft source. Any questions about the raft protocol should be sent to the
[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).
### Protocol Description
Raft nodes are always in one of three states: follower, candidate or leader. All
nodes initially start out as a follower. In this state, nodes can accept log entries
from a leader and cast votes. If no entries are received for some time, nodes
self-promote to the candidate state. In the candidate state nodes request votes from
their peers. If a candidate receives a quorum of votes, then it is promoted to a leader.
The leader must accept new log entries and replicate to all the other followers.
In addition, if stale reads are not acceptable, all queries must also be performed on
the leader.
Once a cluster has a leader, it is able to accept new log entries. A client can
request that a leader append a new log entry, which is an opaque binary blob to
Raft. The leader then writes the entry to durable storage and attempts to replicate
to a quorum of followers. Once the log entry is considered *committed*, it can be
*applied* to a finite state machine. The finite state machine is application specific,
and is implemented using an interface.
An obvious question relates to the unbounded nature of a replicated log. Raft provides
a mechanism by which the current state is snapshotted, and the log is compacted. Because
of the FSM abstraction, restoring the state of the FSM must result in the same state
as a replay of old logs. This allows Raft to capture the FSM state at a point in time,
and then remove all the logs that were used to reach that state. This is performed automatically
without user intervention, and prevents unbounded disk usage as well as minimizing
time spent replaying logs.
Lastly, there is the issue of updating the peer set when new servers are joining
or existing servers are leaving. As long as a quorum of nodes is available, this
is not an issue as Raft provides mechanisms to dynamically update the peer set.
If a quorum of nodes is unavailable, then this becomes a very challenging issue.
For example, suppose there are only 2 peers, A and B. The quorum size is also
2, meaning both nodes must agree to commit a log entry. If either A or B fails,
it is now impossible to reach quorum. This means the cluster is unable to add,
or remove a node, or commit any additional log entries. This results in *unavailability*.
At this point, manual intervention would be required to remove either A or B,
and to restart the remaining node in bootstrap mode.
A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster
of 5 can tolerate 2 node failures. The recommended configuration is to either
run 3 or 5 raft servers. This maximizes availability without
greatly sacrificing performance.
In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
committing a log entry requires a single round trip to half of the cluster.
Thus performance is bound by disk I/O and network latency.

1007
vendor/github.com/hashicorp/raft/api.go generated vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,151 +0,0 @@
package raft
// RPCHeader is a common sub-structure used to pass along protocol version and
// other information about the cluster. For older Raft implementations before
// versioning was added this will default to a zero-valued structure when read
// by newer Raft versions.
type RPCHeader struct {
// ProtocolVersion is the version of the protocol the sender is
// speaking.
ProtocolVersion ProtocolVersion
}
// WithRPCHeader is an interface that exposes the RPC header.
type WithRPCHeader interface {
GetRPCHeader() RPCHeader
}
// AppendEntriesRequest is the command used to append entries to the
// replicated log.
type AppendEntriesRequest struct {
RPCHeader
// Provide the current term and leader
Term uint64
Leader []byte
// Provide the previous entries for integrity checking
PrevLogEntry uint64
PrevLogTerm uint64
// New entries to commit
Entries []*Log
// Commit index on the leader
LeaderCommitIndex uint64
}
// See WithRPCHeader.
func (r *AppendEntriesRequest) GetRPCHeader() RPCHeader {
return r.RPCHeader
}
// AppendEntriesResponse is the response returned from an
// AppendEntriesRequest.
type AppendEntriesResponse struct {
RPCHeader
// Newer term if leader is out of date
Term uint64
// Last Log is a hint to help accelerate rebuilding slow nodes
LastLog uint64
// We may not succeed if we have a conflicting entry
Success bool
// There are scenarios where this request didn't succeed
// but there's no need to wait/back-off the next attempt.
NoRetryBackoff bool
}
// See WithRPCHeader.
func (r *AppendEntriesResponse) GetRPCHeader() RPCHeader {
return r.RPCHeader
}
// RequestVoteRequest is the command used by a candidate to ask a Raft peer
// for a vote in an election.
type RequestVoteRequest struct {
RPCHeader
// Provide the term and our id
Term uint64
Candidate []byte
// Used to ensure safety
LastLogIndex uint64
LastLogTerm uint64
}
// See WithRPCHeader.
func (r *RequestVoteRequest) GetRPCHeader() RPCHeader {
return r.RPCHeader
}
// RequestVoteResponse is the response returned from a RequestVoteRequest.
type RequestVoteResponse struct {
RPCHeader
// Newer term if leader is out of date.
Term uint64
// Peers is deprecated, but required by servers that only understand
// protocol version 0. This is not populated in protocol version 2
// and later.
Peers []byte
// Is the vote granted.
Granted bool
}
// See WithRPCHeader.
func (r *RequestVoteResponse) GetRPCHeader() RPCHeader {
return r.RPCHeader
}
// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
// log (and state machine) from a snapshot on another peer.
type InstallSnapshotRequest struct {
RPCHeader
SnapshotVersion SnapshotVersion
Term uint64
Leader []byte
// These are the last index/term included in the snapshot
LastLogIndex uint64
LastLogTerm uint64
// Peer Set in the snapshot. This is deprecated in favor of Configuration
// but remains here in case we receive an InstallSnapshot from a leader
// that's running old code.
Peers []byte
// Cluster membership.
Configuration []byte
// Log index where 'Configuration' entry was originally written.
ConfigurationIndex uint64
// Size of the snapshot
Size int64
}
// See WithRPCHeader.
func (r *InstallSnapshotRequest) GetRPCHeader() RPCHeader {
return r.RPCHeader
}
// InstallSnapshotResponse is the response returned from an
// InstallSnapshotRequest.
type InstallSnapshotResponse struct {
RPCHeader
Term uint64
Success bool
}
// See WithRPCHeader.
func (r *InstallSnapshotResponse) GetRPCHeader() RPCHeader {
return r.RPCHeader
}

View File

@ -1,101 +0,0 @@
package raft
import (
"sort"
"sync"
)
// Commitment is used to advance the leader's commit index. The leader and
// replication goroutines report in newly written entries with Match(), and
// this notifies on commitCh when the commit index has advanced.
type commitment struct {
// protectes matchIndexes and commitIndex
sync.Mutex
// notified when commitIndex increases
commitCh chan struct{}
// voter ID to log index: the server stores up through this log entry
matchIndexes map[ServerID]uint64
// a quorum stores up through this log entry. monotonically increases.
commitIndex uint64
// the first index of this leader's term: this needs to be replicated to a
// majority of the cluster before this leader may mark anything committed
// (per Raft's commitment rule)
startIndex uint64
}
// newCommitment returns an commitment struct that notifies the provided
// channel when log entries have been committed. A new commitment struct is
// created each time this server becomes leader for a particular term.
// 'configuration' is the servers in the cluster.
// 'startIndex' is the first index created in this term (see
// its description above).
func newCommitment(commitCh chan struct{}, configuration Configuration, startIndex uint64) *commitment {
matchIndexes := make(map[ServerID]uint64)
for _, server := range configuration.Servers {
if server.Suffrage == Voter {
matchIndexes[server.ID] = 0
}
}
return &commitment{
commitCh: commitCh,
matchIndexes: matchIndexes,
commitIndex: 0,
startIndex: startIndex,
}
}
// Called when a new cluster membership configuration is created: it will be
// used to determine commitment from now on. 'configuration' is the servers in
// the cluster.
func (c *commitment) setConfiguration(configuration Configuration) {
c.Lock()
defer c.Unlock()
oldMatchIndexes := c.matchIndexes
c.matchIndexes = make(map[ServerID]uint64)
for _, server := range configuration.Servers {
if server.Suffrage == Voter {
c.matchIndexes[server.ID] = oldMatchIndexes[server.ID] // defaults to 0
}
}
c.recalculate()
}
// Called by leader after commitCh is notified
func (c *commitment) getCommitIndex() uint64 {
c.Lock()
defer c.Unlock()
return c.commitIndex
}
// Match is called once a server completes writing entries to disk: either the
// leader has written the new entry or a follower has replied to an
// AppendEntries RPC. The given server's disk agrees with this server's log up
// through the given index.
func (c *commitment) match(server ServerID, matchIndex uint64) {
c.Lock()
defer c.Unlock()
if prev, hasVote := c.matchIndexes[server]; hasVote && matchIndex > prev {
c.matchIndexes[server] = matchIndex
c.recalculate()
}
}
// Internal helper to calculate new commitIndex from matchIndexes.
// Must be called with lock held.
func (c *commitment) recalculate() {
if len(c.matchIndexes) == 0 {
return
}
matched := make([]uint64, 0, len(c.matchIndexes))
for _, idx := range c.matchIndexes {
matched = append(matched, idx)
}
sort.Sort(uint64Slice(matched))
quorumMatchIndex := matched[(len(matched)-1)/2]
if quorumMatchIndex > c.commitIndex && quorumMatchIndex >= c.startIndex {
c.commitIndex = quorumMatchIndex
asyncNotifyCh(c.commitCh)
}
}

View File

@ -1,258 +0,0 @@
package raft
import (
"fmt"
"io"
"log"
"time"
)
// These are the versions of the protocol (which includes RPC messages as
// well as Raft-specific log entries) that this server can _understand_. Use
// the ProtocolVersion member of the Config object to control the version of
// the protocol to use when _speaking_ to other servers. Note that depending on
// the protocol version being spoken, some otherwise understood RPC messages
// may be refused. See dispositionRPC for details of this logic.
//
// There are notes about the upgrade path in the description of the versions
// below. If you are starting a fresh cluster then there's no reason not to
// jump right to the latest protocol version. If you need to interoperate with
// older, version 0 Raft servers you'll need to drive the cluster through the
// different versions in order.
//
// The version details are complicated, but here's a summary of what's required
// to get from a version 0 cluster to version 3:
//
// 1. In version N of your app that starts using the new Raft library with
// versioning, set ProtocolVersion to 1.
// 2. Make version N+1 of your app require version N as a prerequisite (all
// servers must be upgraded). For version N+1 of your app set ProtocolVersion
// to 2.
// 3. Similarly, make version N+2 of your app require version N+1 as a
// prerequisite. For version N+2 of your app, set ProtocolVersion to 3.
//
// During this upgrade, older cluster members will still have Server IDs equal
// to their network addresses. To upgrade an older member and give it an ID, it
// needs to leave the cluster and re-enter:
//
// 1. Remove the server from the cluster with RemoveServer, using its network
// address as its ServerID.
// 2. Update the server's config to a better ID (restarting the server).
// 3. Add the server back to the cluster with AddVoter, using its new ID.
//
// You can do this during the rolling upgrade from N+1 to N+2 of your app, or
// as a rolling change at any time after the upgrade.
//
// Version History
//
// 0: Original Raft library before versioning was added. Servers running this
// version of the Raft library use AddPeerDeprecated/RemovePeerDeprecated
// for all configuration changes, and have no support for LogConfiguration.
// 1: First versioned protocol, used to interoperate with old servers, and begin
// the migration path to newer versions of the protocol. Under this version
// all configuration changes are propagated using the now-deprecated
// RemovePeerDeprecated Raft log entry. This means that server IDs are always
// set to be the same as the server addresses (since the old log entry type
// cannot transmit an ID), and only AddPeer/RemovePeer APIs are supported.
// Servers running this version of the protocol can understand the new
// LogConfiguration Raft log entry but will never generate one so they can
// remain compatible with version 0 Raft servers in the cluster.
// 2: Transitional protocol used when migrating an existing cluster to the new
// server ID system. Server IDs are still set to be the same as server
// addresses, but all configuration changes are propagated using the new
// LogConfiguration Raft log entry type, which can carry full ID information.
// This version supports the old AddPeer/RemovePeer APIs as well as the new
// ID-based AddVoter/RemoveServer APIs which should be used when adding
// version 3 servers to the cluster later. This version sheds all
// interoperability with version 0 servers, but can interoperate with newer
// Raft servers running with protocol version 1 since they can understand the
// new LogConfiguration Raft log entry, and this version can still understand
// their RemovePeerDeprecated Raft log entries. We need this protocol version
// as an intermediate step between 1 and 3 so that servers will propagate the
// ID information that will come from newly-added (or -rolled) servers using
// protocol version 3, but since they are still using their address-based IDs
// from the previous step they will still be able to track commitments and
// their own voting status properly. If we skipped this step, servers would
// be started with their new IDs, but they wouldn't see themselves in the old
// address-based configuration, so none of the servers would think they had a
// vote.
// 3: Protocol adding full support for server IDs and new ID-based server APIs
// (AddVoter, AddNonvoter, etc.), old AddPeer/RemovePeer APIs are no longer
// supported. Version 2 servers should be swapped out by removing them from
// the cluster one-by-one and re-adding them with updated configuration for
// this protocol version, along with their server ID. The remove/add cycle
// is required to populate their server ID. Note that removing must be done
// by ID, which will be the old server's address.
type ProtocolVersion int
const (
ProtocolVersionMin ProtocolVersion = 0
ProtocolVersionMax = 3
)
// These are versions of snapshots that this server can _understand_. Currently,
// it is always assumed that this server generates the latest version, though
// this may be changed in the future to include a configurable version.
//
// Version History
//
// 0: Original Raft library before versioning was added. The peers portion of
// these snapshots is encoded in the legacy format which requires decodePeers
// to parse. This version of snapshots should only be produced by the
// unversioned Raft library.
// 1: New format which adds support for a full configuration structure and its
// associated log index, with support for server IDs and non-voting server
// modes. To ease upgrades, this also includes the legacy peers structure but
// that will never be used by servers that understand version 1 snapshots.
// Since the original Raft library didn't enforce any versioning, we must
// include the legacy peers structure for this version, but we can deprecate
// it in the next snapshot version.
type SnapshotVersion int
const (
SnapshotVersionMin SnapshotVersion = 0
SnapshotVersionMax = 1
)
// Config provides any necessary configuration for the Raft server.
type Config struct {
// ProtocolVersion allows a Raft server to inter-operate with older
// Raft servers running an older version of the code. This is used to
// version the wire protocol as well as Raft-specific log entries that
// the server uses when _speaking_ to other servers. There is currently
// no auto-negotiation of versions so all servers must be manually
// configured with compatible versions. See ProtocolVersionMin and
// ProtocolVersionMax for the versions of the protocol that this server
// can _understand_.
ProtocolVersion ProtocolVersion
// HeartbeatTimeout specifies the time in follower state without
// a leader before we attempt an election.
HeartbeatTimeout time.Duration
// ElectionTimeout specifies the time in candidate state without
// a leader before we attempt an election.
ElectionTimeout time.Duration
// CommitTimeout controls the time without an Apply() operation
// before we heartbeat to ensure a timely commit. Due to random
// staggering, may be delayed as much as 2x this value.
CommitTimeout time.Duration
// MaxAppendEntries controls the maximum number of append entries
// to send at once. We want to strike a balance between efficiency
// and avoiding waste if the follower is going to reject because of
// an inconsistent log.
MaxAppendEntries int
// If we are a member of a cluster, and RemovePeer is invoked for the
// local node, then we forget all peers and transition into the follower state.
// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
// we can become a leader of a cluster containing only this node.
ShutdownOnRemove bool
// TrailingLogs controls how many logs we leave after a snapshot. This is
// used so that we can quickly replay logs on a follower instead of being
// forced to send an entire snapshot.
TrailingLogs uint64
// SnapshotInterval controls how often we check if we should perform a snapshot.
// We randomly stagger between this value and 2x this value to avoid the entire
// cluster from performing a snapshot at once.
SnapshotInterval time.Duration
// SnapshotThreshold controls how many outstanding logs there must be before
// we perform a snapshot. This is to prevent excessive snapshots when we can
// just replay a small set of logs.
SnapshotThreshold uint64
// LeaderLeaseTimeout is used to control how long the "lease" lasts
// for being the leader without being able to contact a quorum
// of nodes. If we reach this interval without contact, we will
// step down as leader.
LeaderLeaseTimeout time.Duration
// StartAsLeader forces Raft to start in the leader state. This should
// never be used except for testing purposes, as it can cause a split-brain.
StartAsLeader bool
// The unique ID for this server across all time. When running with
// ProtocolVersion < 3, you must set this to be the same as the network
// address of your transport.
LocalID ServerID
// NotifyCh is used to provide a channel that will be notified of leadership
// changes. Raft will block writing to this channel, so it should either be
// buffered or aggressively consumed.
NotifyCh chan<- bool
// LogOutput is used as a sink for logs, unless Logger is specified.
// Defaults to os.Stderr.
LogOutput io.Writer
// Logger is a user-provided logger. If nil, a logger writing to LogOutput
// is used.
Logger *log.Logger
}
// DefaultConfig returns a Config with usable defaults.
func DefaultConfig() *Config {
return &Config{
ProtocolVersion: ProtocolVersionMax,
HeartbeatTimeout: 1000 * time.Millisecond,
ElectionTimeout: 1000 * time.Millisecond,
CommitTimeout: 50 * time.Millisecond,
MaxAppendEntries: 64,
ShutdownOnRemove: true,
TrailingLogs: 10240,
SnapshotInterval: 120 * time.Second,
SnapshotThreshold: 8192,
LeaderLeaseTimeout: 500 * time.Millisecond,
}
}
// ValidateConfig is used to validate a sane configuration
func ValidateConfig(config *Config) error {
// We don't actually support running as 0 in the library any more, but
// we do understand it.
protocolMin := ProtocolVersionMin
if protocolMin == 0 {
protocolMin = 1
}
if config.ProtocolVersion < protocolMin ||
config.ProtocolVersion > ProtocolVersionMax {
return fmt.Errorf("Protocol version %d must be >= %d and <= %d",
config.ProtocolVersion, protocolMin, ProtocolVersionMax)
}
if len(config.LocalID) == 0 {
return fmt.Errorf("LocalID cannot be empty")
}
if config.HeartbeatTimeout < 5*time.Millisecond {
return fmt.Errorf("Heartbeat timeout is too low")
}
if config.ElectionTimeout < 5*time.Millisecond {
return fmt.Errorf("Election timeout is too low")
}
if config.CommitTimeout < time.Millisecond {
return fmt.Errorf("Commit timeout is too low")
}
if config.MaxAppendEntries <= 0 {
return fmt.Errorf("MaxAppendEntries must be positive")
}
if config.MaxAppendEntries > 1024 {
return fmt.Errorf("MaxAppendEntries is too large")
}
if config.SnapshotInterval < 5*time.Millisecond {
return fmt.Errorf("Snapshot interval is too low")
}
if config.LeaderLeaseTimeout < 5*time.Millisecond {
return fmt.Errorf("Leader lease timeout is too low")
}
if config.LeaderLeaseTimeout > config.HeartbeatTimeout {
return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout")
}
if config.ElectionTimeout < config.HeartbeatTimeout {
return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout")
}
return nil
}

View File

@ -1,343 +0,0 @@
package raft
import "fmt"
// ServerSuffrage determines whether a Server in a Configuration gets a vote.
type ServerSuffrage int
// Note: Don't renumber these, since the numbers are written into the log.
const (
// Voter is a server whose vote is counted in elections and whose match index
// is used in advancing the leader's commit index.
Voter ServerSuffrage = iota
// Nonvoter is a server that receives log entries but is not considered for
// elections or commitment purposes.
Nonvoter
// Staging is a server that acts like a nonvoter with one exception: once a
// staging server receives enough log entries to be sufficiently caught up to
// the leader's log, the leader will invoke a membership change to change
// the Staging server to a Voter.
Staging
)
func (s ServerSuffrage) String() string {
switch s {
case Voter:
return "Voter"
case Nonvoter:
return "Nonvoter"
case Staging:
return "Staging"
}
return "ServerSuffrage"
}
// ServerID is a unique string identifying a server for all time.
type ServerID string
// ServerAddress is a network address for a server that a transport can contact.
type ServerAddress string
// Server tracks the information about a single server in a configuration.
type Server struct {
// Suffrage determines whether the server gets a vote.
Suffrage ServerSuffrage
// ID is a unique string identifying this server for all time.
ID ServerID
// Address is its network address that a transport can contact.
Address ServerAddress
}
// Configuration tracks which servers are in the cluster, and whether they have
// votes. This should include the local server, if it's a member of the cluster.
// The servers are listed no particular order, but each should only appear once.
// These entries are appended to the log during membership changes.
type Configuration struct {
Servers []Server
}
// Clone makes a deep copy of a Configuration.
func (c *Configuration) Clone() (copy Configuration) {
copy.Servers = append(copy.Servers, c.Servers...)
return
}
// ConfigurationChangeCommand is the different ways to change the cluster
// configuration.
type ConfigurationChangeCommand uint8
const (
// AddStaging makes a server Staging unless its Voter.
AddStaging ConfigurationChangeCommand = iota
// AddNonvoter makes a server Nonvoter unless its Staging or Voter.
AddNonvoter
// DemoteVoter makes a server Nonvoter unless its absent.
DemoteVoter
// RemoveServer removes a server entirely from the cluster membership.
RemoveServer
// Promote is created automatically by a leader; it turns a Staging server
// into a Voter.
Promote
)
func (c ConfigurationChangeCommand) String() string {
switch c {
case AddStaging:
return "AddStaging"
case AddNonvoter:
return "AddNonvoter"
case DemoteVoter:
return "DemoteVoter"
case RemoveServer:
return "RemoveServer"
case Promote:
return "Promote"
}
return "ConfigurationChangeCommand"
}
// configurationChangeRequest describes a change that a leader would like to
// make to its current configuration. It's used only within a single server
// (never serialized into the log), as part of `configurationChangeFuture`.
type configurationChangeRequest struct {
command ConfigurationChangeCommand
serverID ServerID
serverAddress ServerAddress // only present for AddStaging, AddNonvoter
// prevIndex, if nonzero, is the index of the only configuration upon which
// this change may be applied; if another configuration entry has been
// added in the meantime, this request will fail.
prevIndex uint64
}
// configurations is state tracked on every server about its Configurations.
// Note that, per Diego's dissertation, there can be at most one uncommitted
// configuration at a time (the next configuration may not be created until the
// prior one has been committed).
//
// One downside to storing just two configurations is that if you try to take a
// snahpsot when your state machine hasn't yet applied the committedIndex, we
// have no record of the configuration that would logically fit into that
// snapshot. We disallow snapshots in that case now. An alternative approach,
// which LogCabin uses, is to track every configuration change in the
// log.
type configurations struct {
// committed is the latest configuration in the log/snapshot that has been
// committed (the one with the largest index).
committed Configuration
// committedIndex is the log index where 'committed' was written.
committedIndex uint64
// latest is the latest configuration in the log/snapshot (may be committed
// or uncommitted)
latest Configuration
// latestIndex is the log index where 'latest' was written.
latestIndex uint64
}
// Clone makes a deep copy of a configurations object.
func (c *configurations) Clone() (copy configurations) {
copy.committed = c.committed.Clone()
copy.committedIndex = c.committedIndex
copy.latest = c.latest.Clone()
copy.latestIndex = c.latestIndex
return
}
// hasVote returns true if the server identified by 'id' is a Voter in the
// provided Configuration.
func hasVote(configuration Configuration, id ServerID) bool {
for _, server := range configuration.Servers {
if server.ID == id {
return server.Suffrage == Voter
}
}
return false
}
// checkConfiguration tests a cluster membership configuration for common
// errors.
func checkConfiguration(configuration Configuration) error {
idSet := make(map[ServerID]bool)
addressSet := make(map[ServerAddress]bool)
var voters int
for _, server := range configuration.Servers {
if server.ID == "" {
return fmt.Errorf("Empty ID in configuration: %v", configuration)
}
if server.Address == "" {
return fmt.Errorf("Empty address in configuration: %v", server)
}
if idSet[server.ID] {
return fmt.Errorf("Found duplicate ID in configuration: %v", server.ID)
}
idSet[server.ID] = true
if addressSet[server.Address] {
return fmt.Errorf("Found duplicate address in configuration: %v", server.Address)
}
addressSet[server.Address] = true
if server.Suffrage == Voter {
voters++
}
}
if voters == 0 {
return fmt.Errorf("Need at least one voter in configuration: %v", configuration)
}
return nil
}
// nextConfiguration generates a new Configuration from the current one and a
// configuration change request. It's split from appendConfigurationEntry so
// that it can be unit tested easily.
func nextConfiguration(current Configuration, currentIndex uint64, change configurationChangeRequest) (Configuration, error) {
if change.prevIndex > 0 && change.prevIndex != currentIndex {
return Configuration{}, fmt.Errorf("Configuration changed since %v (latest is %v)", change.prevIndex, currentIndex)
}
configuration := current.Clone()
switch change.command {
case AddStaging:
// TODO: barf on new address?
newServer := Server{
// TODO: This should add the server as Staging, to be automatically
// promoted to Voter later. However, the promoton to Voter is not yet
// implemented, and doing so is not trivial with the way the leader loop
// coordinates with the replication goroutines today. So, for now, the
// server will have a vote right away, and the Promote case below is
// unused.
Suffrage: Voter,
ID: change.serverID,
Address: change.serverAddress,
}
found := false
for i, server := range configuration.Servers {
if server.ID == change.serverID {
if server.Suffrage == Voter {
configuration.Servers[i].Address = change.serverAddress
} else {
configuration.Servers[i] = newServer
}
found = true
break
}
}
if !found {
configuration.Servers = append(configuration.Servers, newServer)
}
case AddNonvoter:
newServer := Server{
Suffrage: Nonvoter,
ID: change.serverID,
Address: change.serverAddress,
}
found := false
for i, server := range configuration.Servers {
if server.ID == change.serverID {
if server.Suffrage != Nonvoter {
configuration.Servers[i].Address = change.serverAddress
} else {
configuration.Servers[i] = newServer
}
found = true
break
}
}
if !found {
configuration.Servers = append(configuration.Servers, newServer)
}
case DemoteVoter:
for i, server := range configuration.Servers {
if server.ID == change.serverID {
configuration.Servers[i].Suffrage = Nonvoter
break
}
}
case RemoveServer:
for i, server := range configuration.Servers {
if server.ID == change.serverID {
configuration.Servers = append(configuration.Servers[:i], configuration.Servers[i+1:]...)
break
}
}
case Promote:
for i, server := range configuration.Servers {
if server.ID == change.serverID && server.Suffrage == Staging {
configuration.Servers[i].Suffrage = Voter
break
}
}
}
// Make sure we didn't do something bad like remove the last voter
if err := checkConfiguration(configuration); err != nil {
return Configuration{}, err
}
return configuration, nil
}
// encodePeers is used to serialize a Configuration into the old peers format.
// This is here for backwards compatibility when operating with a mix of old
// servers and should be removed once we deprecate support for protocol version 1.
func encodePeers(configuration Configuration, trans Transport) []byte {
// Gather up all the voters, other suffrage types are not supported by
// this data format.
var encPeers [][]byte
for _, server := range configuration.Servers {
if server.Suffrage == Voter {
encPeers = append(encPeers, trans.EncodePeer(server.Address))
}
}
// Encode the entire array.
buf, err := encodeMsgPack(encPeers)
if err != nil {
panic(fmt.Errorf("failed to encode peers: %v", err))
}
return buf.Bytes()
}
// decodePeers is used to deserialize an old list of peers into a Configuration.
// This is here for backwards compatibility with old log entries and snapshots;
// it should be removed eventually.
func decodePeers(buf []byte, trans Transport) Configuration {
// Decode the buffer first.
var encPeers [][]byte
if err := decodeMsgPack(buf, &encPeers); err != nil {
panic(fmt.Errorf("failed to decode peers: %v", err))
}
// Deserialize each peer.
var servers []Server
for _, enc := range encPeers {
p := trans.DecodePeer(enc)
servers = append(servers, Server{
Suffrage: Voter,
ID: ServerID(p),
Address: ServerAddress(p),
})
}
return Configuration{
Servers: servers,
}
}
// encodeConfiguration serializes a Configuration using MsgPack, or panics on
// errors.
func encodeConfiguration(configuration Configuration) []byte {
buf, err := encodeMsgPack(configuration)
if err != nil {
panic(fmt.Errorf("failed to encode configuration: %v", err))
}
return buf.Bytes()
}
// decodeConfiguration deserializes a Configuration using MsgPack, or panics on
// errors.
func decodeConfiguration(buf []byte) Configuration {
var configuration Configuration
if err := decodeMsgPack(buf, &configuration); err != nil {
panic(fmt.Errorf("failed to decode configuration: %v", err))
}
return configuration
}

View File

@ -1,49 +0,0 @@
package raft
import (
"fmt"
"io"
)
// DiscardSnapshotStore is used to successfully snapshot while
// always discarding the snapshot. This is useful for when the
// log should be truncated but no snapshot should be retained.
// This should never be used for production use, and is only
// suitable for testing.
type DiscardSnapshotStore struct{}
type DiscardSnapshotSink struct{}
// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
func NewDiscardSnapshotStore() *DiscardSnapshotStore {
return &DiscardSnapshotStore{}
}
func (d *DiscardSnapshotStore) Create(version SnapshotVersion, index, term uint64,
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
return &DiscardSnapshotSink{}, nil
}
func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
return nil, nil
}
func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
return nil, nil, fmt.Errorf("open is not supported")
}
func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
return len(b), nil
}
func (d *DiscardSnapshotSink) Close() error {
return nil
}
func (d *DiscardSnapshotSink) ID() string {
return "discard"
}
func (d *DiscardSnapshotSink) Cancel() error {
return nil
}

View File

@ -1,494 +0,0 @@
package raft
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"hash"
"hash/crc64"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
const (
testPath = "permTest"
snapPath = "snapshots"
metaFilePath = "meta.json"
stateFilePath = "state.bin"
tmpSuffix = ".tmp"
)
// FileSnapshotStore implements the SnapshotStore interface and allows
// snapshots to be made on the local disk.
type FileSnapshotStore struct {
path string
retain int
logger *log.Logger
}
type snapMetaSlice []*fileSnapshotMeta
// FileSnapshotSink implements SnapshotSink with a file.
type FileSnapshotSink struct {
store *FileSnapshotStore
logger *log.Logger
dir string
meta fileSnapshotMeta
stateFile *os.File
stateHash hash.Hash64
buffered *bufio.Writer
closed bool
}
// fileSnapshotMeta is stored on disk. We also put a CRC
// on disk so that we can verify the snapshot.
type fileSnapshotMeta struct {
SnapshotMeta
CRC []byte
}
// bufferedFile is returned when we open a snapshot. This way
// reads are buffered and the file still gets closed.
type bufferedFile struct {
bh *bufio.Reader
fh *os.File
}
func (b *bufferedFile) Read(p []byte) (n int, err error) {
return b.bh.Read(p)
}
func (b *bufferedFile) Close() error {
return b.fh.Close()
}
// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based
// on a base directory. The `retain` parameter controls how many
// snapshots are retained. Must be at least 1.
func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) {
if retain < 1 {
return nil, fmt.Errorf("must retain at least one snapshot")
}
if logger == nil {
logger = log.New(os.Stderr, "", log.LstdFlags)
}
// Ensure our path exists
path := filepath.Join(base, snapPath)
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
return nil, fmt.Errorf("snapshot path not accessible: %v", err)
}
// Setup the store
store := &FileSnapshotStore{
path: path,
retain: retain,
logger: logger,
}
// Do a permissions test
if err := store.testPermissions(); err != nil {
return nil, fmt.Errorf("permissions test failed: %v", err)
}
return store, nil
}
// NewFileSnapshotStore creates a new FileSnapshotStore based
// on a base directory. The `retain` parameter controls how many
// snapshots are retained. Must be at least 1.
func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {
if logOutput == nil {
logOutput = os.Stderr
}
return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags))
}
// testPermissions tries to touch a file in our path to see if it works.
func (f *FileSnapshotStore) testPermissions() error {
path := filepath.Join(f.path, testPath)
fh, err := os.Create(path)
if err != nil {
return err
}
if err = fh.Close(); err != nil {
return err
}
if err = os.Remove(path); err != nil {
return err
}
return nil
}
// snapshotName generates a name for the snapshot.
func snapshotName(term, index uint64) string {
now := time.Now()
msec := now.UnixNano() / int64(time.Millisecond)
return fmt.Sprintf("%d-%d-%d", term, index, msec)
}
// Create is used to start a new snapshot
func (f *FileSnapshotStore) Create(version SnapshotVersion, index, term uint64,
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
// We only support version 1 snapshots at this time.
if version != 1 {
return nil, fmt.Errorf("unsupported snapshot version %d", version)
}
// Create a new path
name := snapshotName(term, index)
path := filepath.Join(f.path, name+tmpSuffix)
f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path)
// Make the directory
if err := os.MkdirAll(path, 0755); err != nil {
f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err)
return nil, err
}
// Create the sink
sink := &FileSnapshotSink{
store: f,
logger: f.logger,
dir: path,
meta: fileSnapshotMeta{
SnapshotMeta: SnapshotMeta{
Version: version,
ID: name,
Index: index,
Term: term,
Peers: encodePeers(configuration, trans),
Configuration: configuration,
ConfigurationIndex: configurationIndex,
},
CRC: nil,
},
}
// Write out the meta data
if err := sink.writeMeta(); err != nil {
f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
return nil, err
}
// Open the state file
statePath := filepath.Join(path, stateFilePath)
fh, err := os.Create(statePath)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err)
return nil, err
}
sink.stateFile = fh
// Create a CRC64 hash
sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))
// Wrap both the hash and file in a MultiWriter with buffering
multi := io.MultiWriter(sink.stateFile, sink.stateHash)
sink.buffered = bufio.NewWriter(multi)
// Done
return sink, nil
}
// List returns available snapshots in the store.
func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {
// Get the eligible snapshots
snapshots, err := f.getSnapshots()
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
return nil, err
}
var snapMeta []*SnapshotMeta
for _, meta := range snapshots {
snapMeta = append(snapMeta, &meta.SnapshotMeta)
if len(snapMeta) == f.retain {
break
}
}
return snapMeta, nil
}
// getSnapshots returns all the known snapshots.
func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {
// Get the eligible snapshots
snapshots, err := ioutil.ReadDir(f.path)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err)
return nil, err
}
// Populate the metadata
var snapMeta []*fileSnapshotMeta
for _, snap := range snapshots {
// Ignore any files
if !snap.IsDir() {
continue
}
// Ignore any temporary snapshots
dirName := snap.Name()
if strings.HasSuffix(dirName, tmpSuffix) {
f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName)
continue
}
// Try to read the meta data
meta, err := f.readMeta(dirName)
if err != nil {
f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err)
continue
}
// Make sure we can understand this version.
if meta.Version < SnapshotVersionMin || meta.Version > SnapshotVersionMax {
f.logger.Printf("[WARN] snapshot: Snapshot version for %v not supported: %d", dirName, meta.Version)
continue
}
// Append, but only return up to the retain count
snapMeta = append(snapMeta, meta)
}
// Sort the snapshot, reverse so we get new -> old
sort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))
return snapMeta, nil
}
// readMeta is used to read the meta data for a given named backup
func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {
// Open the meta file
metaPath := filepath.Join(f.path, name, metaFilePath)
fh, err := os.Open(metaPath)
if err != nil {
return nil, err
}
defer fh.Close()
// Buffer the file IO
buffered := bufio.NewReader(fh)
// Read in the JSON
meta := &fileSnapshotMeta{}
dec := json.NewDecoder(buffered)
if err := dec.Decode(meta); err != nil {
return nil, err
}
return meta, nil
}
// Open takes a snapshot ID and returns a ReadCloser for that snapshot.
func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
// Get the metadata
meta, err := f.readMeta(id)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err)
return nil, nil, err
}
// Open the state file
statePath := filepath.Join(f.path, id, stateFilePath)
fh, err := os.Open(statePath)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err)
return nil, nil, err
}
// Create a CRC64 hash
stateHash := crc64.New(crc64.MakeTable(crc64.ECMA))
// Compute the hash
_, err = io.Copy(stateHash, fh)
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err)
fh.Close()
return nil, nil, err
}
// Verify the hash
computed := stateHash.Sum(nil)
if bytes.Compare(meta.CRC, computed) != 0 {
f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)",
meta.CRC, computed)
fh.Close()
return nil, nil, fmt.Errorf("CRC mismatch")
}
// Seek to the start
if _, err := fh.Seek(0, 0); err != nil {
f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err)
fh.Close()
return nil, nil, err
}
// Return a buffered file
buffered := &bufferedFile{
bh: bufio.NewReader(fh),
fh: fh,
}
return &meta.SnapshotMeta, buffered, nil
}
// ReapSnapshots reaps any snapshots beyond the retain count.
func (f *FileSnapshotStore) ReapSnapshots() error {
snapshots, err := f.getSnapshots()
if err != nil {
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
return err
}
for i := f.retain; i < len(snapshots); i++ {
path := filepath.Join(f.path, snapshots[i].ID)
f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path)
if err := os.RemoveAll(path); err != nil {
f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err)
return err
}
}
return nil
}
// ID returns the ID of the snapshot, can be used with Open()
// after the snapshot is finalized.
func (s *FileSnapshotSink) ID() string {
return s.meta.ID
}
// Write is used to append to the state file. We write to the
// buffered IO object to reduce the amount of context switches.
func (s *FileSnapshotSink) Write(b []byte) (int, error) {
return s.buffered.Write(b)
}
// Close is used to indicate a successful end.
func (s *FileSnapshotSink) Close() error {
// Make sure close is idempotent
if s.closed {
return nil
}
s.closed = true
// Close the open handles
if err := s.finalize(); err != nil {
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
return err
}
// Write out the meta data
if err := s.writeMeta(); err != nil {
s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
return err
}
// Move the directory into place
newPath := strings.TrimSuffix(s.dir, tmpSuffix)
if err := os.Rename(s.dir, newPath); err != nil {
s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err)
return err
}
// Reap any old snapshots
if err := s.store.ReapSnapshots(); err != nil {
return err
}
return nil
}
// Cancel is used to indicate an unsuccessful end.
func (s *FileSnapshotSink) Cancel() error {
// Make sure close is idempotent
if s.closed {
return nil
}
s.closed = true
// Close the open handles
if err := s.finalize(); err != nil {
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
return err
}
// Attempt to remove all artifacts
return os.RemoveAll(s.dir)
}
// finalize is used to close all of our resources.
func (s *FileSnapshotSink) finalize() error {
// Flush any remaining data
if err := s.buffered.Flush(); err != nil {
return err
}
// Get the file size
stat, statErr := s.stateFile.Stat()
// Close the file
if err := s.stateFile.Close(); err != nil {
return err
}
// Set the file size, check after we close
if statErr != nil {
return statErr
}
s.meta.Size = stat.Size()
// Set the CRC
s.meta.CRC = s.stateHash.Sum(nil)
return nil
}
// writeMeta is used to write out the metadata we have.
func (s *FileSnapshotSink) writeMeta() error {
// Open the meta file
metaPath := filepath.Join(s.dir, metaFilePath)
fh, err := os.Create(metaPath)
if err != nil {
return err
}
defer fh.Close()
// Buffer the file IO
buffered := bufio.NewWriter(fh)
defer buffered.Flush()
// Write out as JSON
enc := json.NewEncoder(buffered)
if err := enc.Encode(&s.meta); err != nil {
return err
}
return nil
}
// Implement the sort interface for []*fileSnapshotMeta.
func (s snapMetaSlice) Len() int {
return len(s)
}
func (s snapMetaSlice) Less(i, j int) bool {
if s[i].Term != s[j].Term {
return s[i].Term < s[j].Term
}
if s[i].Index != s[j].Index {
return s[i].Index < s[j].Index
}
return s[i].ID < s[j].ID
}
func (s snapMetaSlice) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}

View File

@ -1,136 +0,0 @@
package raft
import (
"fmt"
"io"
"time"
"github.com/armon/go-metrics"
)
// FSM provides an interface that can be implemented by
// clients to make use of the replicated log.
type FSM interface {
// Apply log is invoked once a log entry is committed.
// It returns a value which will be made available in the
// ApplyFuture returned by Raft.Apply method if that
// method was called on the same Raft node as the FSM.
Apply(*Log) interface{}
// Snapshot is used to support log compaction. This call should
// return an FSMSnapshot which can be used to save a point-in-time
// snapshot of the FSM. Apply and Snapshot are not called in multiple
// threads, but Apply will be called concurrently with Persist. This means
// the FSM should be implemented in a fashion that allows for concurrent
// updates while a snapshot is happening.
Snapshot() (FSMSnapshot, error)
// Restore is used to restore an FSM from a snapshot. It is not called
// concurrently with any other command. The FSM must discard all previous
// state.
Restore(io.ReadCloser) error
}
// FSMSnapshot is returned by an FSM in response to a Snapshot
// It must be safe to invoke FSMSnapshot methods with concurrent
// calls to Apply.
type FSMSnapshot interface {
// Persist should dump all necessary state to the WriteCloser 'sink',
// and call sink.Close() when finished or call sink.Cancel() on error.
Persist(sink SnapshotSink) error
// Release is invoked when we are finished with the snapshot.
Release()
}
// runFSM is a long running goroutine responsible for applying logs
// to the FSM. This is done async of other logs since we don't want
// the FSM to block our internal operations.
func (r *Raft) runFSM() {
var lastIndex, lastTerm uint64
commit := func(req *commitTuple) {
// Apply the log if a command
var resp interface{}
if req.log.Type == LogCommand {
start := time.Now()
resp = r.fsm.Apply(req.log)
metrics.MeasureSince([]string{"raft", "fsm", "apply"}, start)
}
// Update the indexes
lastIndex = req.log.Index
lastTerm = req.log.Term
// Invoke the future if given
if req.future != nil {
req.future.response = resp
req.future.respond(nil)
}
}
restore := func(req *restoreFuture) {
// Open the snapshot
meta, source, err := r.snapshots.Open(req.ID)
if err != nil {
req.respond(fmt.Errorf("failed to open snapshot %v: %v", req.ID, err))
return
}
// Attempt to restore
start := time.Now()
if err := r.fsm.Restore(source); err != nil {
req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err))
source.Close()
return
}
source.Close()
metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start)
// Update the last index and term
lastIndex = meta.Index
lastTerm = meta.Term
req.respond(nil)
}
snapshot := func(req *reqSnapshotFuture) {
// Is there something to snapshot?
if lastIndex == 0 {
req.respond(ErrNothingNewToSnapshot)
return
}
// Start a snapshot
start := time.Now()
snap, err := r.fsm.Snapshot()
metrics.MeasureSince([]string{"raft", "fsm", "snapshot"}, start)
// Respond to the request
req.index = lastIndex
req.term = lastTerm
req.snapshot = snap
req.respond(err)
}
for {
select {
case ptr := <-r.fsmMutateCh:
switch req := ptr.(type) {
case *commitTuple:
commit(req)
case *restoreFuture:
restore(req)
default:
panic(fmt.Errorf("bad type passed to fsmMutateCh: %#v", ptr))
}
case req := <-r.fsmSnapshotCh:
snapshot(req)
case <-r.shutdownCh:
return
}
}
}

View File

@ -1,289 +0,0 @@
package raft
import (
"fmt"
"io"
"sync"
"time"
)
// Future is used to represent an action that may occur in the future.
type Future interface {
// Error blocks until the future arrives and then
// returns the error status of the future.
// This may be called any number of times - all
// calls will return the same value.
// Note that it is not OK to call this method
// twice concurrently on the same Future instance.
Error() error
}
// IndexFuture is used for future actions that can result in a raft log entry
// being created.
type IndexFuture interface {
Future
// Index holds the index of the newly applied log entry.
// This must not be called until after the Error method has returned.
Index() uint64
}
// ApplyFuture is used for Apply and can return the FSM response.
type ApplyFuture interface {
IndexFuture
// Response returns the FSM response as returned
// by the FSM.Apply method. This must not be called
// until after the Error method has returned.
Response() interface{}
}
// ConfigurationFuture is used for GetConfiguration and can return the
// latest configuration in use by Raft.
type ConfigurationFuture interface {
IndexFuture
// Configuration contains the latest configuration. This must
// not be called until after the Error method has returned.
Configuration() Configuration
}
// SnapshotFuture is used for waiting on a user-triggered snapshot to complete.
type SnapshotFuture interface {
Future
// Open is a function you can call to access the underlying snapshot and
// its metadata. This must not be called until after the Error method
// has returned.
Open() (*SnapshotMeta, io.ReadCloser, error)
}
// errorFuture is used to return a static error.
type errorFuture struct {
err error
}
func (e errorFuture) Error() error {
return e.err
}
func (e errorFuture) Response() interface{} {
return nil
}
func (e errorFuture) Index() uint64 {
return 0
}
// deferError can be embedded to allow a future
// to provide an error in the future.
type deferError struct {
err error
errCh chan error
responded bool
}
func (d *deferError) init() {
d.errCh = make(chan error, 1)
}
func (d *deferError) Error() error {
if d.err != nil {
// Note that when we've received a nil error, this
// won't trigger, but the channel is closed after
// send so we'll still return nil below.
return d.err
}
if d.errCh == nil {
panic("waiting for response on nil channel")
}
d.err = <-d.errCh
return d.err
}
func (d *deferError) respond(err error) {
if d.errCh == nil {
return
}
if d.responded {
return
}
d.errCh <- err
close(d.errCh)
d.responded = true
}
// There are several types of requests that cause a configuration entry to
// be appended to the log. These are encoded here for leaderLoop() to process.
// This is internal to a single server.
type configurationChangeFuture struct {
logFuture
req configurationChangeRequest
}
// bootstrapFuture is used to attempt a live bootstrap of the cluster. See the
// Raft object's BootstrapCluster member function for more details.
type bootstrapFuture struct {
deferError
// configuration is the proposed bootstrap configuration to apply.
configuration Configuration
}
// logFuture is used to apply a log entry and waits until
// the log is considered committed.
type logFuture struct {
deferError
log Log
response interface{}
dispatch time.Time
}
func (l *logFuture) Response() interface{} {
return l.response
}
func (l *logFuture) Index() uint64 {
return l.log.Index
}
type shutdownFuture struct {
raft *Raft
}
func (s *shutdownFuture) Error() error {
if s.raft == nil {
return nil
}
s.raft.waitShutdown()
if closeable, ok := s.raft.trans.(WithClose); ok {
closeable.Close()
}
return nil
}
// userSnapshotFuture is used for waiting on a user-triggered snapshot to
// complete.
type userSnapshotFuture struct {
deferError
// opener is a function used to open the snapshot. This is filled in
// once the future returns with no error.
opener func() (*SnapshotMeta, io.ReadCloser, error)
}
// Open is a function you can call to access the underlying snapshot and its
// metadata.
func (u *userSnapshotFuture) Open() (*SnapshotMeta, io.ReadCloser, error) {
if u.opener == nil {
return nil, nil, fmt.Errorf("no snapshot available")
} else {
// Invalidate the opener so it can't get called multiple times,
// which isn't generally safe.
defer func() {
u.opener = nil
}()
return u.opener()
}
}
// userRestoreFuture is used for waiting on a user-triggered restore of an
// external snapshot to complete.
type userRestoreFuture struct {
deferError
// meta is the metadata that belongs with the snapshot.
meta *SnapshotMeta
// reader is the interface to read the snapshot contents from.
reader io.Reader
}
// reqSnapshotFuture is used for requesting a snapshot start.
// It is only used internally.
type reqSnapshotFuture struct {
deferError
// snapshot details provided by the FSM runner before responding
index uint64
term uint64
snapshot FSMSnapshot
}
// restoreFuture is used for requesting an FSM to perform a
// snapshot restore. Used internally only.
type restoreFuture struct {
deferError
ID string
}
// verifyFuture is used to verify the current node is still
// the leader. This is to prevent a stale read.
type verifyFuture struct {
deferError
notifyCh chan *verifyFuture
quorumSize int
votes int
voteLock sync.Mutex
}
// configurationsFuture is used to retrieve the current configurations. This is
// used to allow safe access to this information outside of the main thread.
type configurationsFuture struct {
deferError
configurations configurations
}
// Configuration returns the latest configuration in use by Raft.
func (c *configurationsFuture) Configuration() Configuration {
return c.configurations.latest
}
// Index returns the index of the latest configuration in use by Raft.
func (c *configurationsFuture) Index() uint64 {
return c.configurations.latestIndex
}
// vote is used to respond to a verifyFuture.
// This may block when responding on the notifyCh.
func (v *verifyFuture) vote(leader bool) {
v.voteLock.Lock()
defer v.voteLock.Unlock()
// Guard against having notified already
if v.notifyCh == nil {
return
}
if leader {
v.votes++
if v.votes >= v.quorumSize {
v.notifyCh <- v
v.notifyCh = nil
}
} else {
v.notifyCh <- v
v.notifyCh = nil
}
}
// appendFuture is used for waiting on a pipelined append
// entries RPC.
type appendFuture struct {
deferError
start time.Time
args *AppendEntriesRequest
resp *AppendEntriesResponse
}
func (a *appendFuture) Start() time.Time {
return a.start
}
func (a *appendFuture) Request() *AppendEntriesRequest {
return a.args
}
func (a *appendFuture) Response() *AppendEntriesResponse {
return a.resp
}

View File

@ -1,106 +0,0 @@
package raft
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"sync"
)
// InmemSnapshotStore implements the SnapshotStore interface and
// retains only the most recent snapshot
type InmemSnapshotStore struct {
latest *InmemSnapshotSink
hasSnapshot bool
sync.RWMutex
}
// InmemSnapshotSink implements SnapshotSink in memory
type InmemSnapshotSink struct {
meta SnapshotMeta
contents *bytes.Buffer
}
// NewInmemSnapshotStore creates a blank new InmemSnapshotStore
func NewInmemSnapshotStore() *InmemSnapshotStore {
return &InmemSnapshotStore{
latest: &InmemSnapshotSink{
contents: &bytes.Buffer{},
},
}
}
// Create replaces the stored snapshot with a new one using the given args
func (m *InmemSnapshotStore) Create(version SnapshotVersion, index, term uint64,
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
// We only support version 1 snapshots at this time.
if version != 1 {
return nil, fmt.Errorf("unsupported snapshot version %d", version)
}
name := snapshotName(term, index)
m.Lock()
defer m.Unlock()
sink := &InmemSnapshotSink{
meta: SnapshotMeta{
Version: version,
ID: name,
Index: index,
Term: term,
Peers: encodePeers(configuration, trans),
Configuration: configuration,
ConfigurationIndex: configurationIndex,
},
contents: &bytes.Buffer{},
}
m.hasSnapshot = true
m.latest = sink
return sink, nil
}
// List returns the latest snapshot taken
func (m *InmemSnapshotStore) List() ([]*SnapshotMeta, error) {
m.RLock()
defer m.RUnlock()
if !m.hasSnapshot {
return []*SnapshotMeta{}, nil
}
return []*SnapshotMeta{&m.latest.meta}, nil
}
// Open wraps an io.ReadCloser around the snapshot contents
func (m *InmemSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
m.RLock()
defer m.RUnlock()
if m.latest.meta.ID != id {
return nil, nil, fmt.Errorf("[ERR] snapshot: failed to open snapshot id: %s", id)
}
return &m.latest.meta, ioutil.NopCloser(m.latest.contents), nil
}
// Write appends the given bytes to the snapshot contents
func (s *InmemSnapshotSink) Write(p []byte) (n int, err error) {
written, err := io.Copy(s.contents, bytes.NewReader(p))
s.meta.Size += written
return int(written), err
}
// Close updates the Size and is otherwise a no-op
func (s *InmemSnapshotSink) Close() error {
return nil
}
func (s *InmemSnapshotSink) ID() string {
return s.meta.ID
}
func (s *InmemSnapshotSink) Cancel() error {
return nil
}

View File

@ -1,125 +0,0 @@
package raft
import (
"sync"
)
// InmemStore implements the LogStore and StableStore interface.
// It should NOT EVER be used for production. It is used only for
// unit tests. Use the MDBStore implementation instead.
type InmemStore struct {
l sync.RWMutex
lowIndex uint64
highIndex uint64
logs map[uint64]*Log
kv map[string][]byte
kvInt map[string]uint64
}
// NewInmemStore returns a new in-memory backend. Do not ever
// use for production. Only for testing.
func NewInmemStore() *InmemStore {
i := &InmemStore{
logs: make(map[uint64]*Log),
kv: make(map[string][]byte),
kvInt: make(map[string]uint64),
}
return i
}
// FirstIndex implements the LogStore interface.
func (i *InmemStore) FirstIndex() (uint64, error) {
i.l.RLock()
defer i.l.RUnlock()
return i.lowIndex, nil
}
// LastIndex implements the LogStore interface.
func (i *InmemStore) LastIndex() (uint64, error) {
i.l.RLock()
defer i.l.RUnlock()
return i.highIndex, nil
}
// GetLog implements the LogStore interface.
func (i *InmemStore) GetLog(index uint64, log *Log) error {
i.l.RLock()
defer i.l.RUnlock()
l, ok := i.logs[index]
if !ok {
return ErrLogNotFound
}
*log = *l
return nil
}
// StoreLog implements the LogStore interface.
func (i *InmemStore) StoreLog(log *Log) error {
return i.StoreLogs([]*Log{log})
}
// StoreLogs implements the LogStore interface.
func (i *InmemStore) StoreLogs(logs []*Log) error {
i.l.Lock()
defer i.l.Unlock()
for _, l := range logs {
i.logs[l.Index] = l
if i.lowIndex == 0 {
i.lowIndex = l.Index
}
if l.Index > i.highIndex {
i.highIndex = l.Index
}
}
return nil
}
// DeleteRange implements the LogStore interface.
func (i *InmemStore) DeleteRange(min, max uint64) error {
i.l.Lock()
defer i.l.Unlock()
for j := min; j <= max; j++ {
delete(i.logs, j)
}
if min <= i.lowIndex {
i.lowIndex = max + 1
}
if max >= i.highIndex {
i.highIndex = min - 1
}
if i.lowIndex > i.highIndex {
i.lowIndex = 0
i.highIndex = 0
}
return nil
}
// Set implements the StableStore interface.
func (i *InmemStore) Set(key []byte, val []byte) error {
i.l.Lock()
defer i.l.Unlock()
i.kv[string(key)] = val
return nil
}
// Get implements the StableStore interface.
func (i *InmemStore) Get(key []byte) ([]byte, error) {
i.l.RLock()
defer i.l.RUnlock()
return i.kv[string(key)], nil
}
// SetUint64 implements the StableStore interface.
func (i *InmemStore) SetUint64(key []byte, val uint64) error {
i.l.Lock()
defer i.l.Unlock()
i.kvInt[string(key)] = val
return nil
}
// GetUint64 implements the StableStore interface.
func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
i.l.RLock()
defer i.l.RUnlock()
return i.kvInt[string(key)], nil
}

View File

@ -1,322 +0,0 @@
package raft
import (
"fmt"
"io"
"sync"
"time"
)
// NewInmemAddr returns a new in-memory addr with
// a randomly generate UUID as the ID.
func NewInmemAddr() ServerAddress {
return ServerAddress(generateUUID())
}
// inmemPipeline is used to pipeline requests for the in-mem transport.
type inmemPipeline struct {
trans *InmemTransport
peer *InmemTransport
peerAddr ServerAddress
doneCh chan AppendFuture
inprogressCh chan *inmemPipelineInflight
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
}
type inmemPipelineInflight struct {
future *appendFuture
respCh <-chan RPCResponse
}
// InmemTransport Implements the Transport interface, to allow Raft to be
// tested in-memory without going over a network.
type InmemTransport struct {
sync.RWMutex
consumerCh chan RPC
localAddr ServerAddress
peers map[ServerAddress]*InmemTransport
pipelines []*inmemPipeline
timeout time.Duration
}
// NewInmemTransport is used to initialize a new transport
// and generates a random local address if none is specified
func NewInmemTransport(addr ServerAddress) (ServerAddress, *InmemTransport) {
if string(addr) == "" {
addr = NewInmemAddr()
}
trans := &InmemTransport{
consumerCh: make(chan RPC, 16),
localAddr: addr,
peers: make(map[ServerAddress]*InmemTransport),
timeout: 50 * time.Millisecond,
}
return addr, trans
}
// SetHeartbeatHandler is used to set optional fast-path for
// heartbeats, not supported for this transport.
func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {
}
// Consumer implements the Transport interface.
func (i *InmemTransport) Consumer() <-chan RPC {
return i.consumerCh
}
// LocalAddr implements the Transport interface.
func (i *InmemTransport) LocalAddr() ServerAddress {
return i.localAddr
}
// AppendEntriesPipeline returns an interface that can be used to pipeline
// AppendEntries requests.
func (i *InmemTransport) AppendEntriesPipeline(target ServerAddress) (AppendPipeline, error) {
i.RLock()
peer, ok := i.peers[target]
i.RUnlock()
if !ok {
return nil, fmt.Errorf("failed to connect to peer: %v", target)
}
pipeline := newInmemPipeline(i, peer, target)
i.Lock()
i.pipelines = append(i.pipelines, pipeline)
i.Unlock()
return pipeline, nil
}
// AppendEntries implements the Transport interface.
func (i *InmemTransport) AppendEntries(target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
if err != nil {
return err
}
// Copy the result back
out := rpcResp.Response.(*AppendEntriesResponse)
*resp = *out
return nil
}
// RequestVote implements the Transport interface.
func (i *InmemTransport) RequestVote(target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error {
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
if err != nil {
return err
}
// Copy the result back
out := rpcResp.Response.(*RequestVoteResponse)
*resp = *out
return nil
}
// InstallSnapshot implements the Transport interface.
func (i *InmemTransport) InstallSnapshot(target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)
if err != nil {
return err
}
// Copy the result back
out := rpcResp.Response.(*InstallSnapshotResponse)
*resp = *out
return nil
}
func (i *InmemTransport) makeRPC(target ServerAddress, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {
i.RLock()
peer, ok := i.peers[target]
i.RUnlock()
if !ok {
err = fmt.Errorf("failed to connect to peer: %v", target)
return
}
// Send the RPC over
respCh := make(chan RPCResponse)
peer.consumerCh <- RPC{
Command: args,
Reader: r,
RespChan: respCh,
}
// Wait for a response
select {
case rpcResp = <-respCh:
if rpcResp.Error != nil {
err = rpcResp.Error
}
case <-time.After(timeout):
err = fmt.Errorf("command timed out")
}
return
}
// EncodePeer implements the Transport interface.
func (i *InmemTransport) EncodePeer(p ServerAddress) []byte {
return []byte(p)
}
// DecodePeer implements the Transport interface.
func (i *InmemTransport) DecodePeer(buf []byte) ServerAddress {
return ServerAddress(buf)
}
// Connect is used to connect this transport to another transport for
// a given peer name. This allows for local routing.
func (i *InmemTransport) Connect(peer ServerAddress, t Transport) {
trans := t.(*InmemTransport)
i.Lock()
defer i.Unlock()
i.peers[peer] = trans
}
// Disconnect is used to remove the ability to route to a given peer.
func (i *InmemTransport) Disconnect(peer ServerAddress) {
i.Lock()
defer i.Unlock()
delete(i.peers, peer)
// Disconnect any pipelines
n := len(i.pipelines)
for idx := 0; idx < n; idx++ {
if i.pipelines[idx].peerAddr == peer {
i.pipelines[idx].Close()
i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil
idx--
n--
}
}
i.pipelines = i.pipelines[:n]
}
// DisconnectAll is used to remove all routes to peers.
func (i *InmemTransport) DisconnectAll() {
i.Lock()
defer i.Unlock()
i.peers = make(map[ServerAddress]*InmemTransport)
// Handle pipelines
for _, pipeline := range i.pipelines {
pipeline.Close()
}
i.pipelines = nil
}
// Close is used to permanently disable the transport
func (i *InmemTransport) Close() error {
i.DisconnectAll()
return nil
}
func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr ServerAddress) *inmemPipeline {
i := &inmemPipeline{
trans: trans,
peer: peer,
peerAddr: addr,
doneCh: make(chan AppendFuture, 16),
inprogressCh: make(chan *inmemPipelineInflight, 16),
shutdownCh: make(chan struct{}),
}
go i.decodeResponses()
return i
}
func (i *inmemPipeline) decodeResponses() {
timeout := i.trans.timeout
for {
select {
case inp := <-i.inprogressCh:
var timeoutCh <-chan time.Time
if timeout > 0 {
timeoutCh = time.After(timeout)
}
select {
case rpcResp := <-inp.respCh:
// Copy the result back
*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)
inp.future.respond(rpcResp.Error)
select {
case i.doneCh <- inp.future:
case <-i.shutdownCh:
return
}
case <-timeoutCh:
inp.future.respond(fmt.Errorf("command timed out"))
select {
case i.doneCh <- inp.future:
case <-i.shutdownCh:
return
}
case <-i.shutdownCh:
return
}
case <-i.shutdownCh:
return
}
}
}
func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
// Create a new future
future := &appendFuture{
start: time.Now(),
args: args,
resp: resp,
}
future.init()
// Handle a timeout
var timeout <-chan time.Time
if i.trans.timeout > 0 {
timeout = time.After(i.trans.timeout)
}
// Send the RPC over
respCh := make(chan RPCResponse, 1)
rpc := RPC{
Command: args,
RespChan: respCh,
}
select {
case i.peer.consumerCh <- rpc:
case <-timeout:
return nil, fmt.Errorf("command enqueue timeout")
case <-i.shutdownCh:
return nil, ErrPipelineShutdown
}
// Send to be decoded
select {
case i.inprogressCh <- &inmemPipelineInflight{future, respCh}:
return future, nil
case <-i.shutdownCh:
return nil, ErrPipelineShutdown
}
}
func (i *inmemPipeline) Consumer() <-chan AppendFuture {
return i.doneCh
}
func (i *inmemPipeline) Close() error {
i.shutdownLock.Lock()
defer i.shutdownLock.Unlock()
if i.shutdown {
return nil
}
i.shutdown = true
close(i.shutdownCh)
return nil
}

View File

@ -1,72 +0,0 @@
package raft
// LogType describes various types of log entries.
type LogType uint8
const (
// LogCommand is applied to a user FSM.
LogCommand LogType = iota
// LogNoop is used to assert leadership.
LogNoop
// LogAddPeer is used to add a new peer. This should only be used with
// older protocol versions designed to be compatible with unversioned
// Raft servers. See comments in config.go for details.
LogAddPeerDeprecated
// LogRemovePeer is used to remove an existing peer. This should only be
// used with older protocol versions designed to be compatible with
// unversioned Raft servers. See comments in config.go for details.
LogRemovePeerDeprecated
// LogBarrier is used to ensure all preceding operations have been
// applied to the FSM. It is similar to LogNoop, but instead of returning
// once committed, it only returns once the FSM manager acks it. Otherwise
// it is possible there are operations committed but not yet applied to
// the FSM.
LogBarrier
// LogConfiguration establishes a membership change configuration. It is
// created when a server is added, removed, promoted, etc. Only used
// when protocol version 1 or greater is in use.
LogConfiguration
)
// Log entries are replicated to all members of the Raft cluster
// and form the heart of the replicated state machine.
type Log struct {
// Index holds the index of the log entry.
Index uint64
// Term holds the election term of the log entry.
Term uint64
// Type holds the type of the log entry.
Type LogType
// Data holds the log entry's type-specific data.
Data []byte
}
// LogStore is used to provide an interface for storing
// and retrieving logs in a durable fashion.
type LogStore interface {
// FirstIndex returns the first index written. 0 for no entries.
FirstIndex() (uint64, error)
// LastIndex returns the last index written. 0 for no entries.
LastIndex() (uint64, error)
// GetLog gets a log entry at a given index.
GetLog(index uint64, log *Log) error
// StoreLog stores a log entry.
StoreLog(log *Log) error
// StoreLogs stores multiple log entries.
StoreLogs(logs []*Log) error
// DeleteRange deletes a range of log entries. The range is inclusive.
DeleteRange(min, max uint64) error
}

View File

@ -1,79 +0,0 @@
package raft
import (
"fmt"
"sync"
)
// LogCache wraps any LogStore implementation to provide an
// in-memory ring buffer. This is used to cache access to
// the recently written entries. For implementations that do not
// cache themselves, this can provide a substantial boost by
// avoiding disk I/O on recent entries.
type LogCache struct {
store LogStore
cache []*Log
l sync.RWMutex
}
// NewLogCache is used to create a new LogCache with the
// given capacity and backend store.
func NewLogCache(capacity int, store LogStore) (*LogCache, error) {
if capacity <= 0 {
return nil, fmt.Errorf("capacity must be positive")
}
c := &LogCache{
store: store,
cache: make([]*Log, capacity),
}
return c, nil
}
func (c *LogCache) GetLog(idx uint64, log *Log) error {
// Check the buffer for an entry
c.l.RLock()
cached := c.cache[idx%uint64(len(c.cache))]
c.l.RUnlock()
// Check if entry is valid
if cached != nil && cached.Index == idx {
*log = *cached
return nil
}
// Forward request on cache miss
return c.store.GetLog(idx, log)
}
func (c *LogCache) StoreLog(log *Log) error {
return c.StoreLogs([]*Log{log})
}
func (c *LogCache) StoreLogs(logs []*Log) error {
// Insert the logs into the ring buffer
c.l.Lock()
for _, l := range logs {
c.cache[l.Index%uint64(len(c.cache))] = l
}
c.l.Unlock()
return c.store.StoreLogs(logs)
}
func (c *LogCache) FirstIndex() (uint64, error) {
return c.store.FirstIndex()
}
func (c *LogCache) LastIndex() (uint64, error) {
return c.store.LastIndex()
}
func (c *LogCache) DeleteRange(min, max uint64) error {
// Invalidate the cache on deletes
c.l.Lock()
c.cache = make([]*Log, len(c.cache))
c.l.Unlock()
return c.store.DeleteRange(min, max)
}

View File

@ -1,83 +0,0 @@
Simon (@superfell) and I (@ongardie) talked through reworking this library's cluster membership changes last Friday. We don't see a way to split this into independent patches, so we're taking the next best approach: submitting the plan here for review, then working on an enormous PR. Your feedback would be appreciated. (@superfell is out this week, however, so don't expect him to respond quickly.)
These are the main goals:
- Bringing things in line with the description in my PhD dissertation;
- Catching up new servers prior to granting them a vote, as well as allowing permanent non-voting members; and
- Eliminating the `peers.json` file, to avoid issues of consistency between that and the log/snapshot.
## Data-centric view
We propose to re-define a *configuration* as a set of servers, where each server includes an address (as it does today) and a mode that is either:
- *Voter*: a server whose vote is counted in elections and whose match index is used in advancing the leader's commit index.
- *Nonvoter*: a server that receives log entries but is not considered for elections or commitment purposes.
- *Staging*: a server that acts like a nonvoter with one exception: once a staging server receives enough log entries to catch up sufficiently to the leader's log, the leader will invoke a membership change to change the staging server to a voter.
All changes to the configuration will be done by writing a new configuration to the log. The new configuration will be in affect as soon as it is appended to the log (not when it is committed like a normal state machine command). Note that, per my dissertation, there can be at most one uncommitted configuration at a time (the next configuration may not be created until the prior one has been committed). It's not strictly necessary to follow these same rules for the nonvoter/staging servers, but we think its best to treat all changes uniformly.
Each server will track two configurations:
1. its *committed configuration*: the latest configuration in the log/snapshot that has been committed, along with its index.
2. its *latest configuration*: the latest configuration in the log/snapshot (may be committed or uncommitted), along with its index.
When there's no membership change happening, these two will be the same. The latest configuration is almost always the one used, except:
- When followers truncate the suffix of their logs, they may need to fall back to the committed configuration.
- When snapshotting, the committed configuration is written, to correspond with the committed log prefix that is being snapshotted.
## Application API
We propose the following operations for clients to manipulate the cluster configuration:
- AddVoter: server becomes staging unless voter,
- AddNonvoter: server becomes nonvoter unless staging or voter,
- DemoteVoter: server becomes nonvoter unless absent,
- RemovePeer: server removed from configuration,
- GetConfiguration: waits for latest config to commit, returns committed config.
This diagram, of which I'm quite proud, shows the possible transitions:
```
+-----------------------------------------------------------------------------+
| |
| Start -> +--------+ |
| ,------<------------| | |
| / | absent | |
| / RemovePeer--> | | <---RemovePeer |
| / | +--------+ \ |
| / | | \ |
| AddNonvoter | AddVoter \ |
| | ,->---' `--<-. | \ |
| v / \ v \ |
| +----------+ +----------+ +----------+ |
| | | ---AddVoter--> | | -log caught up --> | | |
| | nonvoter | | staging | | voter | |
| | | <-DemoteVoter- | | ,- | | |
| +----------+ \ +----------+ / +----------+ |
| \ / |
| `--------------<---------------' |
| |
+-----------------------------------------------------------------------------+
```
While these operations aren't quite symmetric, we think they're a good set to capture
the possible intent of the user. For example, if I want to make sure a server doesn't have a vote, but the server isn't part of the configuration at all, it probably shouldn't be added as a nonvoting server.
Each of these application-level operations will be interpreted by the leader and, if it has an effect, will cause the leader to write a new configuration entry to its log. Which particular application-level operation caused the log entry to be written need not be part of the log entry.
## Code implications
This is a non-exhaustive list, but we came up with a few things:
- Remove the PeerStore: the `peers.json` file introduces the possibility of getting out of sync with the log and snapshot, and it's hard to maintain this atomically as the log changes. It's not clear whether it's meant to track the committed or latest configuration, either.
- Servers will have to search their snapshot and log to find the committed configuration and the latest configuration on startup.
- Bootstrap will no longer use `peers.json` but should initialize the log or snapshot with an application-provided configuration entry.
- Snapshots should store the index of their configuration along with the configuration itself. In my experience with LogCabin, the original log index of the configuration is very useful to include in debug log messages.
- As noted in hashicorp/raft#84, configuration change requests should come in via a separate channel, and one may not proceed until the last has been committed.
- As to deciding when a log is sufficiently caught up, implementing a sophisticated algorithm *is* something that can be done in a separate PR. An easy and decent placeholder is: once the staging server has reached 95% of the leader's commit index, promote it.
## Feedback
Again, we're looking for feedback here before we start working on this. Here are some questions to think about:
- Does this seem like where we want things to go?
- Is there anything here that should be left out?
- Is there anything else we're forgetting about?
- Is there a good way to break this up?
- What do we need to worry about in terms of backwards compatibility?
- What implication will this have on current tests?
- What's the best way to test this code, in particular the small changes that will be sprinkled all over the library?

View File

@ -1,622 +0,0 @@
package raft
import (
"bufio"
"errors"
"fmt"
"io"
"log"
"net"
"os"
"sync"
"time"
"github.com/hashicorp/go-msgpack/codec"
)
const (
rpcAppendEntries uint8 = iota
rpcRequestVote
rpcInstallSnapshot
// DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport.
DefaultTimeoutScale = 256 * 1024 // 256KB
// rpcMaxPipeline controls the maximum number of outstanding
// AppendEntries RPC calls.
rpcMaxPipeline = 128
)
var (
// ErrTransportShutdown is returned when operations on a transport are
// invoked after it's been terminated.
ErrTransportShutdown = errors.New("transport shutdown")
// ErrPipelineShutdown is returned when the pipeline is closed.
ErrPipelineShutdown = errors.New("append pipeline closed")
)
/*
NetworkTransport provides a network based transport that can be
used to communicate with Raft on remote machines. It requires
an underlying stream layer to provide a stream abstraction, which can
be simple TCP, TLS, etc.
This transport is very simple and lightweight. Each RPC request is
framed by sending a byte that indicates the message type, followed
by the MsgPack encoded request.
The response is an error string followed by the response object,
both are encoded using MsgPack.
InstallSnapshot is special, in that after the RPC request we stream
the entire state. That socket is not re-used as the connection state
is not known if there is an error.
*/
type NetworkTransport struct {
connPool map[ServerAddress][]*netConn
connPoolLock sync.Mutex
consumeCh chan RPC
heartbeatFn func(RPC)
heartbeatFnLock sync.Mutex
logger *log.Logger
maxPool int
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
stream StreamLayer
timeout time.Duration
TimeoutScale int
}
// StreamLayer is used with the NetworkTransport to provide
// the low level stream abstraction.
type StreamLayer interface {
net.Listener
// Dial is used to create a new outgoing connection
Dial(address ServerAddress, timeout time.Duration) (net.Conn, error)
}
type netConn struct {
target ServerAddress
conn net.Conn
r *bufio.Reader
w *bufio.Writer
dec *codec.Decoder
enc *codec.Encoder
}
func (n *netConn) Release() error {
return n.conn.Close()
}
type netPipeline struct {
conn *netConn
trans *NetworkTransport
doneCh chan AppendFuture
inprogressCh chan *appendFuture
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
}
// NewNetworkTransport creates a new network transport with the given dialer
// and listener. The maxPool controls how many connections we will pool. The
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
// the timeout by (SnapshotSize / TimeoutScale).
func NewNetworkTransport(
stream StreamLayer,
maxPool int,
timeout time.Duration,
logOutput io.Writer,
) *NetworkTransport {
if logOutput == nil {
logOutput = os.Stderr
}
return NewNetworkTransportWithLogger(stream, maxPool, timeout, log.New(logOutput, "", log.LstdFlags))
}
// NewNetworkTransportWithLogger creates a new network transport with the given dialer
// and listener. The maxPool controls how many connections we will pool. The
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
// the timeout by (SnapshotSize / TimeoutScale).
func NewNetworkTransportWithLogger(
stream StreamLayer,
maxPool int,
timeout time.Duration,
logger *log.Logger,
) *NetworkTransport {
if logger == nil {
logger = log.New(os.Stderr, "", log.LstdFlags)
}
trans := &NetworkTransport{
connPool: make(map[ServerAddress][]*netConn),
consumeCh: make(chan RPC),
logger: logger,
maxPool: maxPool,
shutdownCh: make(chan struct{}),
stream: stream,
timeout: timeout,
TimeoutScale: DefaultTimeoutScale,
}
go trans.listen()
return trans
}
// SetHeartbeatHandler is used to setup a heartbeat handler
// as a fast-pass. This is to avoid head-of-line blocking from
// disk IO.
func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) {
n.heartbeatFnLock.Lock()
defer n.heartbeatFnLock.Unlock()
n.heartbeatFn = cb
}
// Close is used to stop the network transport.
func (n *NetworkTransport) Close() error {
n.shutdownLock.Lock()
defer n.shutdownLock.Unlock()
if !n.shutdown {
close(n.shutdownCh)
n.stream.Close()
n.shutdown = true
}
return nil
}
// Consumer implements the Transport interface.
func (n *NetworkTransport) Consumer() <-chan RPC {
return n.consumeCh
}
// LocalAddr implements the Transport interface.
func (n *NetworkTransport) LocalAddr() ServerAddress {
return ServerAddress(n.stream.Addr().String())
}
// IsShutdown is used to check if the transport is shutdown.
func (n *NetworkTransport) IsShutdown() bool {
select {
case <-n.shutdownCh:
return true
default:
return false
}
}
// getExistingConn is used to grab a pooled connection.
func (n *NetworkTransport) getPooledConn(target ServerAddress) *netConn {
n.connPoolLock.Lock()
defer n.connPoolLock.Unlock()
conns, ok := n.connPool[target]
if !ok || len(conns) == 0 {
return nil
}
var conn *netConn
num := len(conns)
conn, conns[num-1] = conns[num-1], nil
n.connPool[target] = conns[:num-1]
return conn
}
// getConn is used to get a connection from the pool.
func (n *NetworkTransport) getConn(target ServerAddress) (*netConn, error) {
// Check for a pooled conn
if conn := n.getPooledConn(target); conn != nil {
return conn, nil
}
// Dial a new connection
conn, err := n.stream.Dial(target, n.timeout)
if err != nil {
return nil, err
}
// Wrap the conn
netConn := &netConn{
target: target,
conn: conn,
r: bufio.NewReader(conn),
w: bufio.NewWriter(conn),
}
// Setup encoder/decoders
netConn.dec = codec.NewDecoder(netConn.r, &codec.MsgpackHandle{})
netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{})
// Done
return netConn, nil
}
// returnConn returns a connection back to the pool.
func (n *NetworkTransport) returnConn(conn *netConn) {
n.connPoolLock.Lock()
defer n.connPoolLock.Unlock()
key := conn.target
conns, _ := n.connPool[key]
if !n.IsShutdown() && len(conns) < n.maxPool {
n.connPool[key] = append(conns, conn)
} else {
conn.Release()
}
}
// AppendEntriesPipeline returns an interface that can be used to pipeline
// AppendEntries requests.
func (n *NetworkTransport) AppendEntriesPipeline(target ServerAddress) (AppendPipeline, error) {
// Get a connection
conn, err := n.getConn(target)
if err != nil {
return nil, err
}
// Create the pipeline
return newNetPipeline(n, conn), nil
}
// AppendEntries implements the Transport interface.
func (n *NetworkTransport) AppendEntries(target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
return n.genericRPC(target, rpcAppendEntries, args, resp)
}
// RequestVote implements the Transport interface.
func (n *NetworkTransport) RequestVote(target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error {
return n.genericRPC(target, rpcRequestVote, args, resp)
}
// genericRPC handles a simple request/response RPC.
func (n *NetworkTransport) genericRPC(target ServerAddress, rpcType uint8, args interface{}, resp interface{}) error {
// Get a conn
conn, err := n.getConn(target)
if err != nil {
return err
}
// Set a deadline
if n.timeout > 0 {
conn.conn.SetDeadline(time.Now().Add(n.timeout))
}
// Send the RPC
if err = sendRPC(conn, rpcType, args); err != nil {
return err
}
// Decode the response
canReturn, err := decodeResponse(conn, resp)
if canReturn {
n.returnConn(conn)
}
return err
}
// InstallSnapshot implements the Transport interface.
func (n *NetworkTransport) InstallSnapshot(target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
// Get a conn, always close for InstallSnapshot
conn, err := n.getConn(target)
if err != nil {
return err
}
defer conn.Release()
// Set a deadline, scaled by request size
if n.timeout > 0 {
timeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale))
if timeout < n.timeout {
timeout = n.timeout
}
conn.conn.SetDeadline(time.Now().Add(timeout))
}
// Send the RPC
if err = sendRPC(conn, rpcInstallSnapshot, args); err != nil {
return err
}
// Stream the state
if _, err = io.Copy(conn.w, data); err != nil {
return err
}
// Flush
if err = conn.w.Flush(); err != nil {
return err
}
// Decode the response, do not return conn
_, err = decodeResponse(conn, resp)
return err
}
// EncodePeer implements the Transport interface.
func (n *NetworkTransport) EncodePeer(p ServerAddress) []byte {
return []byte(p)
}
// DecodePeer implements the Transport interface.
func (n *NetworkTransport) DecodePeer(buf []byte) ServerAddress {
return ServerAddress(buf)
}
// listen is used to handling incoming connections.
func (n *NetworkTransport) listen() {
for {
// Accept incoming connections
conn, err := n.stream.Accept()
if err != nil {
if n.IsShutdown() {
return
}
n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err)
continue
}
n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr())
// Handle the connection in dedicated routine
go n.handleConn(conn)
}
}
// handleConn is used to handle an inbound connection for its lifespan.
func (n *NetworkTransport) handleConn(conn net.Conn) {
defer conn.Close()
r := bufio.NewReader(conn)
w := bufio.NewWriter(conn)
dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
enc := codec.NewEncoder(w, &codec.MsgpackHandle{})
for {
if err := n.handleCommand(r, dec, enc); err != nil {
if err != io.EOF {
n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err)
}
return
}
if err := w.Flush(); err != nil {
n.logger.Printf("[ERR] raft-net: Failed to flush response: %v", err)
return
}
}
}
// handleCommand is used to decode and dispatch a single command.
func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error {
// Get the rpc type
rpcType, err := r.ReadByte()
if err != nil {
return err
}
// Create the RPC object
respCh := make(chan RPCResponse, 1)
rpc := RPC{
RespChan: respCh,
}
// Decode the command
isHeartbeat := false
switch rpcType {
case rpcAppendEntries:
var req AppendEntriesRequest
if err := dec.Decode(&req); err != nil {
return err
}
rpc.Command = &req
// Check if this is a heartbeat
if req.Term != 0 && req.Leader != nil &&
req.PrevLogEntry == 0 && req.PrevLogTerm == 0 &&
len(req.Entries) == 0 && req.LeaderCommitIndex == 0 {
isHeartbeat = true
}
case rpcRequestVote:
var req RequestVoteRequest
if err := dec.Decode(&req); err != nil {
return err
}
rpc.Command = &req
case rpcInstallSnapshot:
var req InstallSnapshotRequest
if err := dec.Decode(&req); err != nil {
return err
}
rpc.Command = &req
rpc.Reader = io.LimitReader(r, req.Size)
default:
return fmt.Errorf("unknown rpc type %d", rpcType)
}
// Check for heartbeat fast-path
if isHeartbeat {
n.heartbeatFnLock.Lock()
fn := n.heartbeatFn
n.heartbeatFnLock.Unlock()
if fn != nil {
fn(rpc)
goto RESP
}
}
// Dispatch the RPC
select {
case n.consumeCh <- rpc:
case <-n.shutdownCh:
return ErrTransportShutdown
}
// Wait for response
RESP:
select {
case resp := <-respCh:
// Send the error first
respErr := ""
if resp.Error != nil {
respErr = resp.Error.Error()
}
if err := enc.Encode(respErr); err != nil {
return err
}
// Send the response
if err := enc.Encode(resp.Response); err != nil {
return err
}
case <-n.shutdownCh:
return ErrTransportShutdown
}
return nil
}
// decodeResponse is used to decode an RPC response and reports whether
// the connection can be reused.
func decodeResponse(conn *netConn, resp interface{}) (bool, error) {
// Decode the error if any
var rpcError string
if err := conn.dec.Decode(&rpcError); err != nil {
conn.Release()
return false, err
}
// Decode the response
if err := conn.dec.Decode(resp); err != nil {
conn.Release()
return false, err
}
// Format an error if any
if rpcError != "" {
return true, fmt.Errorf(rpcError)
}
return true, nil
}
// sendRPC is used to encode and send the RPC.
func sendRPC(conn *netConn, rpcType uint8, args interface{}) error {
// Write the request type
if err := conn.w.WriteByte(rpcType); err != nil {
conn.Release()
return err
}
// Send the request
if err := conn.enc.Encode(args); err != nil {
conn.Release()
return err
}
// Flush
if err := conn.w.Flush(); err != nil {
conn.Release()
return err
}
return nil
}
// newNetPipeline is used to construct a netPipeline from a given
// transport and connection.
func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline {
n := &netPipeline{
conn: conn,
trans: trans,
doneCh: make(chan AppendFuture, rpcMaxPipeline),
inprogressCh: make(chan *appendFuture, rpcMaxPipeline),
shutdownCh: make(chan struct{}),
}
go n.decodeResponses()
return n
}
// decodeResponses is a long running routine that decodes the responses
// sent on the connection.
func (n *netPipeline) decodeResponses() {
timeout := n.trans.timeout
for {
select {
case future := <-n.inprogressCh:
if timeout > 0 {
n.conn.conn.SetReadDeadline(time.Now().Add(timeout))
}
_, err := decodeResponse(n.conn, future.resp)
future.respond(err)
select {
case n.doneCh <- future:
case <-n.shutdownCh:
return
}
case <-n.shutdownCh:
return
}
}
}
// AppendEntries is used to pipeline a new append entries request.
func (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
// Create a new future
future := &appendFuture{
start: time.Now(),
args: args,
resp: resp,
}
future.init()
// Add a send timeout
if timeout := n.trans.timeout; timeout > 0 {
n.conn.conn.SetWriteDeadline(time.Now().Add(timeout))
}
// Send the RPC
if err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil {
return nil, err
}
// Hand-off for decoding, this can also cause back-pressure
// to prevent too many inflight requests
select {
case n.inprogressCh <- future:
return future, nil
case <-n.shutdownCh:
return nil, ErrPipelineShutdown
}
}
// Consumer returns a channel that can be used to consume complete futures.
func (n *netPipeline) Consumer() <-chan AppendFuture {
return n.doneCh
}
// Closed is used to shutdown the pipeline connection.
func (n *netPipeline) Close() error {
n.shutdownLock.Lock()
defer n.shutdownLock.Unlock()
if n.shutdown {
return nil
}
// Release the connection
n.conn.Release()
n.shutdown = true
close(n.shutdownCh)
return nil
}

View File

@ -1,115 +0,0 @@
package raft
import (
"sync/atomic"
)
// Observation is sent along the given channel to observers when an event occurs.
type Observation struct {
// Raft holds the Raft instance generating the observation.
Raft *Raft
// Data holds observation-specific data. Possible types are
// *RequestVoteRequest and RaftState.
Data interface{}
}
// nextObserverId is used to provide a unique ID for each observer to aid in
// deregistration.
var nextObserverID uint64
// FilterFn is a function that can be registered in order to filter observations.
// The function reports whether the observation should be included - if
// it returns false, the observation will be filtered out.
type FilterFn func(o *Observation) bool
// Observer describes what to do with a given observation.
type Observer struct {
// channel receives observations.
channel chan Observation
// blocking, if true, will cause Raft to block when sending an observation
// to this observer. This should generally be set to false.
blocking bool
// filter will be called to determine if an observation should be sent to
// the channel.
filter FilterFn
// id is the ID of this observer in the Raft map.
id uint64
// numObserved and numDropped are performance counters for this observer.
numObserved uint64
numDropped uint64
}
// NewObserver creates a new observer that can be registered
// to make observations on a Raft instance. Observations
// will be sent on the given channel if they satisfy the
// given filter.
//
// If blocking is true, the observer will block when it can't
// send on the channel, otherwise it may discard events.
func NewObserver(channel chan Observation, blocking bool, filter FilterFn) *Observer {
return &Observer{
channel: channel,
blocking: blocking,
filter: filter,
id: atomic.AddUint64(&nextObserverID, 1),
}
}
// GetNumObserved returns the number of observations.
func (or *Observer) GetNumObserved() uint64 {
return atomic.LoadUint64(&or.numObserved)
}
// GetNumDropped returns the number of dropped observations due to blocking.
func (or *Observer) GetNumDropped() uint64 {
return atomic.LoadUint64(&or.numDropped)
}
// RegisterObserver registers a new observer.
func (r *Raft) RegisterObserver(or *Observer) {
r.observersLock.Lock()
defer r.observersLock.Unlock()
r.observers[or.id] = or
}
// DeregisterObserver deregisters an observer.
func (r *Raft) DeregisterObserver(or *Observer) {
r.observersLock.Lock()
defer r.observersLock.Unlock()
delete(r.observers, or.id)
}
// observe sends an observation to every observer.
func (r *Raft) observe(o interface{}) {
// In general observers should not block. But in any case this isn't
// disastrous as we only hold a read lock, which merely prevents
// registration / deregistration of observers.
r.observersLock.RLock()
defer r.observersLock.RUnlock()
for _, or := range r.observers {
// It's wasteful to do this in the loop, but for the common case
// where there are no observers we won't create any objects.
ob := Observation{Raft: r, Data: o}
if or.filter != nil && !or.filter(&ob) {
continue
}
if or.channel == nil {
continue
}
if or.blocking {
or.channel <- ob
atomic.AddUint64(&or.numObserved, 1)
} else {
select {
case or.channel <- ob:
atomic.AddUint64(&or.numObserved, 1)
default:
atomic.AddUint64(&or.numDropped, 1)
}
}
}
}

View File

@ -1,46 +0,0 @@
package raft
import (
"bytes"
"encoding/json"
"io/ioutil"
)
// ReadPeersJSON consumes a legacy peers.json file in the format of the old JSON
// peer store and creates a new-style configuration structure. This can be used
// to migrate this data or perform manual recovery when running protocol versions
// that can interoperate with older, unversioned Raft servers. This should not be
// used once server IDs are in use, because the old peers.json file didn't have
// support for these, nor non-voter suffrage types.
func ReadPeersJSON(path string) (Configuration, error) {
// Read in the file.
buf, err := ioutil.ReadFile(path)
if err != nil {
return Configuration{}, err
}
// Parse it as JSON.
var peers []string
dec := json.NewDecoder(bytes.NewReader(buf))
if err := dec.Decode(&peers); err != nil {
return Configuration{}, err
}
// Map it into the new-style configuration structure. We can only specify
// voter roles here, and the ID has to be the same as the address.
var configuration Configuration
for _, peer := range peers {
server := Server{
Suffrage: Voter,
ID: ServerID(peer),
Address: ServerAddress(peer),
}
configuration.Servers = append(configuration.Servers, server)
}
// We should only ingest valid configurations.
if err := checkConfiguration(configuration); err != nil {
return Configuration{}, err
}
return configuration, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -1,561 +0,0 @@
package raft
import (
"errors"
"fmt"
"sync"
"time"
"github.com/armon/go-metrics"
)
const (
maxFailureScale = 12
failureWait = 10 * time.Millisecond
)
var (
// ErrLogNotFound indicates a given log entry is not available.
ErrLogNotFound = errors.New("log not found")
// ErrPipelineReplicationNotSupported can be returned by the transport to
// signal that pipeline replication is not supported in general, and that
// no error message should be produced.
ErrPipelineReplicationNotSupported = errors.New("pipeline replication not supported")
)
// followerReplication is in charge of sending snapshots and log entries from
// this leader during this particular term to a remote follower.
type followerReplication struct {
// peer contains the network address and ID of the remote follower.
peer Server
// commitment tracks the entries acknowledged by followers so that the
// leader's commit index can advance. It is updated on successsful
// AppendEntries responses.
commitment *commitment
// stopCh is notified/closed when this leader steps down or the follower is
// removed from the cluster. In the follower removed case, it carries a log
// index; replication should be attempted with a best effort up through that
// index, before exiting.
stopCh chan uint64
// triggerCh is notified every time new entries are appended to the log.
triggerCh chan struct{}
// currentTerm is the term of this leader, to be included in AppendEntries
// requests.
currentTerm uint64
// nextIndex is the index of the next log entry to send to the follower,
// which may fall past the end of the log.
nextIndex uint64
// lastContact is updated to the current time whenever any response is
// received from the follower (successful or not). This is used to check
// whether the leader should step down (Raft.checkLeaderLease()).
lastContact time.Time
// lastContactLock protects 'lastContact'.
lastContactLock sync.RWMutex
// failures counts the number of failed RPCs since the last success, which is
// used to apply backoff.
failures uint64
// notifyCh is notified to send out a heartbeat, which is used to check that
// this server is still leader.
notifyCh chan struct{}
// notify is a list of futures to be resolved upon receipt of an
// acknowledgement, then cleared from this list.
notify []*verifyFuture
// notifyLock protects 'notify'.
notifyLock sync.Mutex
// stepDown is used to indicate to the leader that we
// should step down based on information from a follower.
stepDown chan struct{}
// allowPipeline is used to determine when to pipeline the AppendEntries RPCs.
// It is private to this replication goroutine.
allowPipeline bool
}
// notifyAll is used to notify all the waiting verify futures
// if the follower believes we are still the leader.
func (s *followerReplication) notifyAll(leader bool) {
// Clear the waiting notifies minimizing lock time
s.notifyLock.Lock()
n := s.notify
s.notify = nil
s.notifyLock.Unlock()
// Submit our votes
for _, v := range n {
v.vote(leader)
}
}
// LastContact returns the time of last contact.
func (s *followerReplication) LastContact() time.Time {
s.lastContactLock.RLock()
last := s.lastContact
s.lastContactLock.RUnlock()
return last
}
// setLastContact sets the last contact to the current time.
func (s *followerReplication) setLastContact() {
s.lastContactLock.Lock()
s.lastContact = time.Now()
s.lastContactLock.Unlock()
}
// replicate is a long running routine that replicates log entries to a single
// follower.
func (r *Raft) replicate(s *followerReplication) {
// Start an async heartbeating routing
stopHeartbeat := make(chan struct{})
defer close(stopHeartbeat)
r.goFunc(func() { r.heartbeat(s, stopHeartbeat) })
RPC:
shouldStop := false
for !shouldStop {
select {
case maxIndex := <-s.stopCh:
// Make a best effort to replicate up to this index
if maxIndex > 0 {
r.replicateTo(s, maxIndex)
}
return
case <-s.triggerCh:
lastLogIdx, _ := r.getLastLog()
shouldStop = r.replicateTo(s, lastLogIdx)
case <-randomTimeout(r.conf.CommitTimeout): // TODO: what is this?
lastLogIdx, _ := r.getLastLog()
shouldStop = r.replicateTo(s, lastLogIdx)
}
// If things looks healthy, switch to pipeline mode
if !shouldStop && s.allowPipeline {
goto PIPELINE
}
}
return
PIPELINE:
// Disable until re-enabled
s.allowPipeline = false
// Replicates using a pipeline for high performance. This method
// is not able to gracefully recover from errors, and so we fall back
// to standard mode on failure.
if err := r.pipelineReplicate(s); err != nil {
if err != ErrPipelineReplicationNotSupported {
r.logger.Printf("[ERR] raft: Failed to start pipeline replication to %s: %s", s.peer, err)
}
}
goto RPC
}
// replicateTo is a hepler to replicate(), used to replicate the logs up to a
// given last index.
// If the follower log is behind, we take care to bring them up to date.
func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) {
// Create the base request
var req AppendEntriesRequest
var resp AppendEntriesResponse
var start time.Time
START:
// Prevent an excessive retry rate on errors
if s.failures > 0 {
select {
case <-time.After(backoff(failureWait, s.failures, maxFailureScale)):
case <-r.shutdownCh:
}
}
// Setup the request
if err := r.setupAppendEntries(s, &req, s.nextIndex, lastIndex); err == ErrLogNotFound {
goto SEND_SNAP
} else if err != nil {
return
}
// Make the RPC call
start = time.Now()
if err := r.trans.AppendEntries(s.peer.Address, &req, &resp); err != nil {
r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err)
s.failures++
return
}
appendStats(string(s.peer.ID), start, float32(len(req.Entries)))
// Check for a newer term, stop running
if resp.Term > req.Term {
r.handleStaleTerm(s)
return true
}
// Update the last contact
s.setLastContact()
// Update s based on success
if resp.Success {
// Update our replication state
updateLastAppended(s, &req)
// Clear any failures, allow pipelining
s.failures = 0
s.allowPipeline = true
} else {
s.nextIndex = max(min(s.nextIndex-1, resp.LastLog+1), 1)
if resp.NoRetryBackoff {
s.failures = 0
} else {
s.failures++
}
r.logger.Printf("[WARN] raft: AppendEntries to %v rejected, sending older logs (next: %d)", s.peer, s.nextIndex)
}
CHECK_MORE:
// Poll the stop channel here in case we are looping and have been asked
// to stop, or have stepped down as leader. Even for the best effort case
// where we are asked to replicate to a given index and then shutdown,
// it's better to not loop in here to send lots of entries to a straggler
// that's leaving the cluster anyways.
select {
case <-s.stopCh:
return true
default:
}
// Check if there are more logs to replicate
if s.nextIndex <= lastIndex {
goto START
}
return
// SEND_SNAP is used when we fail to get a log, usually because the follower
// is too far behind, and we must ship a snapshot down instead
SEND_SNAP:
if stop, err := r.sendLatestSnapshot(s); stop {
return true
} else if err != nil {
r.logger.Printf("[ERR] raft: Failed to send snapshot to %v: %v", s.peer, err)
return
}
// Check if there is more to replicate
goto CHECK_MORE
}
// sendLatestSnapshot is used to send the latest snapshot we have
// down to our follower.
func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) {
// Get the snapshots
snapshots, err := r.snapshots.List()
if err != nil {
r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err)
return false, err
}
// Check we have at least a single snapshot
if len(snapshots) == 0 {
return false, fmt.Errorf("no snapshots found")
}
// Open the most recent snapshot
snapID := snapshots[0].ID
meta, snapshot, err := r.snapshots.Open(snapID)
if err != nil {
r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapID, err)
return false, err
}
defer snapshot.Close()
// Setup the request
req := InstallSnapshotRequest{
RPCHeader: r.getRPCHeader(),
SnapshotVersion: meta.Version,
Term: s.currentTerm,
Leader: r.trans.EncodePeer(r.localAddr),
LastLogIndex: meta.Index,
LastLogTerm: meta.Term,
Peers: meta.Peers,
Size: meta.Size,
Configuration: encodeConfiguration(meta.Configuration),
ConfigurationIndex: meta.ConfigurationIndex,
}
// Make the call
start := time.Now()
var resp InstallSnapshotResponse
if err := r.trans.InstallSnapshot(s.peer.Address, &req, &resp, snapshot); err != nil {
r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err)
s.failures++
return false, err
}
metrics.MeasureSince([]string{"raft", "replication", "installSnapshot", string(s.peer.ID)}, start)
// Check for a newer term, stop running
if resp.Term > req.Term {
r.handleStaleTerm(s)
return true, nil
}
// Update the last contact
s.setLastContact()
// Check for success
if resp.Success {
// Update the indexes
s.nextIndex = meta.Index + 1
s.commitment.match(s.peer.ID, meta.Index)
// Clear any failures
s.failures = 0
// Notify we are still leader
s.notifyAll(true)
} else {
s.failures++
r.logger.Printf("[WARN] raft: InstallSnapshot to %v rejected", s.peer)
}
return false, nil
}
// heartbeat is used to periodically invoke AppendEntries on a peer
// to ensure they don't time out. This is done async of replicate(),
// since that routine could potentially be blocked on disk IO.
func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) {
var failures uint64
req := AppendEntriesRequest{
RPCHeader: r.getRPCHeader(),
Term: s.currentTerm,
Leader: r.trans.EncodePeer(r.localAddr),
}
var resp AppendEntriesResponse
for {
// Wait for the next heartbeat interval or forced notify
select {
case <-s.notifyCh:
case <-randomTimeout(r.conf.HeartbeatTimeout / 10):
case <-stopCh:
return
}
start := time.Now()
if err := r.trans.AppendEntries(s.peer.Address, &req, &resp); err != nil {
r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer.Address, err)
failures++
select {
case <-time.After(backoff(failureWait, failures, maxFailureScale)):
case <-stopCh:
}
} else {
s.setLastContact()
failures = 0
metrics.MeasureSince([]string{"raft", "replication", "heartbeat", string(s.peer.ID)}, start)
s.notifyAll(resp.Success)
}
}
}
// pipelineReplicate is used when we have synchronized our state with the follower,
// and want to switch to a higher performance pipeline mode of replication.
// We only pipeline AppendEntries commands, and if we ever hit an error, we fall
// back to the standard replication which can handle more complex situations.
func (r *Raft) pipelineReplicate(s *followerReplication) error {
// Create a new pipeline
pipeline, err := r.trans.AppendEntriesPipeline(s.peer.Address)
if err != nil {
return err
}
defer pipeline.Close()
// Log start and stop of pipeline
r.logger.Printf("[INFO] raft: pipelining replication to peer %v", s.peer)
defer r.logger.Printf("[INFO] raft: aborting pipeline replication to peer %v", s.peer)
// Create a shutdown and finish channel
stopCh := make(chan struct{})
finishCh := make(chan struct{})
// Start a dedicated decoder
r.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) })
// Start pipeline sends at the last good nextIndex
nextIndex := s.nextIndex
shouldStop := false
SEND:
for !shouldStop {
select {
case <-finishCh:
break SEND
case maxIndex := <-s.stopCh:
// Make a best effort to replicate up to this index
if maxIndex > 0 {
r.pipelineSend(s, pipeline, &nextIndex, maxIndex)
}
break SEND
case <-s.triggerCh:
lastLogIdx, _ := r.getLastLog()
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)
case <-randomTimeout(r.conf.CommitTimeout):
lastLogIdx, _ := r.getLastLog()
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)
}
}
// Stop our decoder, and wait for it to finish
close(stopCh)
select {
case <-finishCh:
case <-r.shutdownCh:
}
return nil
}
// pipelineSend is used to send data over a pipeline. It is a helper to
// pipelineReplicate.
func (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) {
// Create a new append request
req := new(AppendEntriesRequest)
if err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil {
return true
}
// Pipeline the append entries
if _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil {
r.logger.Printf("[ERR] raft: Failed to pipeline AppendEntries to %v: %v", s.peer, err)
return true
}
// Increase the next send log to avoid re-sending old logs
if n := len(req.Entries); n > 0 {
last := req.Entries[n-1]
*nextIdx = last.Index + 1
}
return false
}
// pipelineDecode is used to decode the responses of pipelined requests.
func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) {
defer close(finishCh)
respCh := p.Consumer()
for {
select {
case ready := <-respCh:
req, resp := ready.Request(), ready.Response()
appendStats(string(s.peer.ID), ready.Start(), float32(len(req.Entries)))
// Check for a newer term, stop running
if resp.Term > req.Term {
r.handleStaleTerm(s)
return
}
// Update the last contact
s.setLastContact()
// Abort pipeline if not successful
if !resp.Success {
return
}
// Update our replication state
updateLastAppended(s, req)
case <-stopCh:
return
}
}
}
// setupAppendEntries is used to setup an append entries request.
func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
req.RPCHeader = r.getRPCHeader()
req.Term = s.currentTerm
req.Leader = r.trans.EncodePeer(r.localAddr)
req.LeaderCommitIndex = r.getCommitIndex()
if err := r.setPreviousLog(req, nextIndex); err != nil {
return err
}
if err := r.setNewLogs(req, nextIndex, lastIndex); err != nil {
return err
}
return nil
}
// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an
// AppendEntriesRequest given the next index to replicate.
func (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error {
// Guard for the first index, since there is no 0 log entry
// Guard against the previous index being a snapshot as well
lastSnapIdx, lastSnapTerm := r.getLastSnapshot()
if nextIndex == 1 {
req.PrevLogEntry = 0
req.PrevLogTerm = 0
} else if (nextIndex - 1) == lastSnapIdx {
req.PrevLogEntry = lastSnapIdx
req.PrevLogTerm = lastSnapTerm
} else {
var l Log
if err := r.logs.GetLog(nextIndex-1, &l); err != nil {
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v",
nextIndex-1, err)
return err
}
// Set the previous index and term (0 if nextIndex is 1)
req.PrevLogEntry = l.Index
req.PrevLogTerm = l.Term
}
return nil
}
// setNewLogs is used to setup the logs which should be appended for a request.
func (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
// Append up to MaxAppendEntries or up to the lastIndex
req.Entries = make([]*Log, 0, r.conf.MaxAppendEntries)
maxIndex := min(nextIndex+uint64(r.conf.MaxAppendEntries)-1, lastIndex)
for i := nextIndex; i <= maxIndex; i++ {
oldLog := new(Log)
if err := r.logs.GetLog(i, oldLog); err != nil {
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", i, err)
return err
}
req.Entries = append(req.Entries, oldLog)
}
return nil
}
// appendStats is used to emit stats about an AppendEntries invocation.
func appendStats(peer string, start time.Time, logs float32) {
metrics.MeasureSince([]string{"raft", "replication", "appendEntries", "rpc", peer}, start)
metrics.IncrCounter([]string{"raft", "replication", "appendEntries", "logs", peer}, logs)
}
// handleStaleTerm is used when a follower indicates that we have a stale term.
func (r *Raft) handleStaleTerm(s *followerReplication) {
r.logger.Printf("[ERR] raft: peer %v has newer term, stopping replication", s.peer)
s.notifyAll(false) // No longer leader
asyncNotifyCh(s.stepDown)
}
// updateLastAppended is used to update follower replication state after a
// successful AppendEntries RPC.
// TODO: This isn't used during InstallSnapshot, but the code there is similar.
func updateLastAppended(s *followerReplication, req *AppendEntriesRequest) {
// Mark any inflight logs as committed
if logs := req.Entries; len(logs) > 0 {
last := logs[len(logs)-1]
s.nextIndex = last.Index + 1
s.commitment.match(s.peer.ID, last.Index)
}
// Notify still leader
s.notifyAll(true)
}

View File

@ -1,239 +0,0 @@
package raft
import (
"fmt"
"io"
"time"
"github.com/armon/go-metrics"
)
// SnapshotMeta is for metadata of a snapshot.
type SnapshotMeta struct {
// Version is the version number of the snapshot metadata. This does not cover
// the application's data in the snapshot, that should be versioned
// separately.
Version SnapshotVersion
// ID is opaque to the store, and is used for opening.
ID string
// Index and Term store when the snapshot was taken.
Index uint64
Term uint64
// Peers is deprecated and used to support version 0 snapshots, but will
// be populated in version 1 snapshots as well to help with upgrades.
Peers []byte
// Configuration and ConfigurationIndex are present in version 1
// snapshots and later.
Configuration Configuration
ConfigurationIndex uint64
// Size is the size of the snapshot in bytes.
Size int64
}
// SnapshotStore interface is used to allow for flexible implementations
// of snapshot storage and retrieval. For example, a client could implement
// a shared state store such as S3, allowing new nodes to restore snapshots
// without streaming from the leader.
type SnapshotStore interface {
// Create is used to begin a snapshot at a given index and term, and with
// the given committed configuration. The version parameter controls
// which snapshot version to create.
Create(version SnapshotVersion, index, term uint64, configuration Configuration,
configurationIndex uint64, trans Transport) (SnapshotSink, error)
// List is used to list the available snapshots in the store.
// It should return then in descending order, with the highest index first.
List() ([]*SnapshotMeta, error)
// Open takes a snapshot ID and provides a ReadCloser. Once close is
// called it is assumed the snapshot is no longer needed.
Open(id string) (*SnapshotMeta, io.ReadCloser, error)
}
// SnapshotSink is returned by StartSnapshot. The FSM will Write state
// to the sink and call Close on completion. On error, Cancel will be invoked.
type SnapshotSink interface {
io.WriteCloser
ID() string
Cancel() error
}
// runSnapshots is a long running goroutine used to manage taking
// new snapshots of the FSM. It runs in parallel to the FSM and
// main goroutines, so that snapshots do not block normal operation.
func (r *Raft) runSnapshots() {
for {
select {
case <-randomTimeout(r.conf.SnapshotInterval):
// Check if we should snapshot
if !r.shouldSnapshot() {
continue
}
// Trigger a snapshot
if _, err := r.takeSnapshot(); err != nil {
r.logger.Printf("[ERR] raft: Failed to take snapshot: %v", err)
}
case future := <-r.userSnapshotCh:
// User-triggered, run immediately
id, err := r.takeSnapshot()
if err != nil {
r.logger.Printf("[ERR] raft: Failed to take snapshot: %v", err)
} else {
future.opener = func() (*SnapshotMeta, io.ReadCloser, error) {
return r.snapshots.Open(id)
}
}
future.respond(err)
case <-r.shutdownCh:
return
}
}
}
// shouldSnapshot checks if we meet the conditions to take
// a new snapshot.
func (r *Raft) shouldSnapshot() bool {
// Check the last snapshot index
lastSnap, _ := r.getLastSnapshot()
// Check the last log index
lastIdx, err := r.logs.LastIndex()
if err != nil {
r.logger.Printf("[ERR] raft: Failed to get last log index: %v", err)
return false
}
// Compare the delta to the threshold
delta := lastIdx - lastSnap
return delta >= r.conf.SnapshotThreshold
}
// takeSnapshot is used to take a new snapshot. This must only be called from
// the snapshot thread, never the main thread. This returns the ID of the new
// snapshot, along with an error.
func (r *Raft) takeSnapshot() (string, error) {
defer metrics.MeasureSince([]string{"raft", "snapshot", "takeSnapshot"}, time.Now())
// Create a request for the FSM to perform a snapshot.
snapReq := &reqSnapshotFuture{}
snapReq.init()
// Wait for dispatch or shutdown.
select {
case r.fsmSnapshotCh <- snapReq:
case <-r.shutdownCh:
return "", ErrRaftShutdown
}
// Wait until we get a response
if err := snapReq.Error(); err != nil {
if err != ErrNothingNewToSnapshot {
err = fmt.Errorf("failed to start snapshot: %v", err)
}
return "", err
}
defer snapReq.snapshot.Release()
// Make a request for the configurations and extract the committed info.
// We have to use the future here to safely get this information since
// it is owned by the main thread.
configReq := &configurationsFuture{}
configReq.init()
select {
case r.configurationsCh <- configReq:
case <-r.shutdownCh:
return "", ErrRaftShutdown
}
if err := configReq.Error(); err != nil {
return "", err
}
committed := configReq.configurations.committed
committedIndex := configReq.configurations.committedIndex
// We don't support snapshots while there's a config change outstanding
// since the snapshot doesn't have a means to represent this state. This
// is a little weird because we need the FSM to apply an index that's
// past the configuration change, even though the FSM itself doesn't see
// the configuration changes. It should be ok in practice with normal
// application traffic flowing through the FSM. If there's none of that
// then it's not crucial that we snapshot, since there's not much going
// on Raft-wise.
if snapReq.index < committedIndex {
return "", fmt.Errorf("cannot take snapshot now, wait until the configuration entry at %v has been applied (have applied %v)",
committedIndex, snapReq.index)
}
// Create a new snapshot.
r.logger.Printf("[INFO] raft: Starting snapshot up to %d", snapReq.index)
start := time.Now()
version := getSnapshotVersion(r.protocolVersion)
sink, err := r.snapshots.Create(version, snapReq.index, snapReq.term, committed, committedIndex, r.trans)
if err != nil {
return "", fmt.Errorf("failed to create snapshot: %v", err)
}
metrics.MeasureSince([]string{"raft", "snapshot", "create"}, start)
// Try to persist the snapshot.
start = time.Now()
if err := snapReq.snapshot.Persist(sink); err != nil {
sink.Cancel()
return "", fmt.Errorf("failed to persist snapshot: %v", err)
}
metrics.MeasureSince([]string{"raft", "snapshot", "persist"}, start)
// Close and check for error.
if err := sink.Close(); err != nil {
return "", fmt.Errorf("failed to close snapshot: %v", err)
}
// Update the last stable snapshot info.
r.setLastSnapshot(snapReq.index, snapReq.term)
// Compact the logs.
if err := r.compactLogs(snapReq.index); err != nil {
return "", err
}
r.logger.Printf("[INFO] raft: Snapshot to %d complete", snapReq.index)
return sink.ID(), nil
}
// compactLogs takes the last inclusive index of a snapshot
// and trims the logs that are no longer needed.
func (r *Raft) compactLogs(snapIdx uint64) error {
defer metrics.MeasureSince([]string{"raft", "compactLogs"}, time.Now())
// Determine log ranges to compact
minLog, err := r.logs.FirstIndex()
if err != nil {
return fmt.Errorf("failed to get first log index: %v", err)
}
// Check if we have enough logs to truncate
lastLogIdx, _ := r.getLastLog()
if lastLogIdx <= r.conf.TrailingLogs {
return nil
}
// Truncate up to the end of the snapshot, or `TrailingLogs`
// back from the head, which ever is further back. This ensures
// at least `TrailingLogs` entries, but does not allow logs
// after the snapshot to be removed.
maxLog := min(snapIdx, lastLogIdx-r.conf.TrailingLogs)
// Log this
r.logger.Printf("[INFO] raft: Compacting logs from %d to %d", minLog, maxLog)
// Compact the logs
if err := r.logs.DeleteRange(minLog, maxLog); err != nil {
return fmt.Errorf("log compaction failed: %v", err)
}
return nil
}

View File

@ -1,15 +0,0 @@
package raft
// StableStore is used to provide stable storage
// of key configurations to ensure safety.
type StableStore interface {
Set(key []byte, val []byte) error
// Get returns the value for key, or an empty byte slice if key was not found.
Get(key []byte) ([]byte, error)
SetUint64(key []byte, val uint64) error
// GetUint64 returns the uint64 value for key, or 0 if key was not found.
GetUint64(key []byte) (uint64, error)
}

View File

@ -1,167 +0,0 @@
package raft
import (
"sync"
"sync/atomic"
)
// RaftState captures the state of a Raft node: Follower, Candidate, Leader,
// or Shutdown.
type RaftState uint32
const (
// Follower is the initial state of a Raft node.
Follower RaftState = iota
// Candidate is one of the valid states of a Raft node.
Candidate
// Leader is one of the valid states of a Raft node.
Leader
// Shutdown is the terminal state of a Raft node.
Shutdown
)
func (s RaftState) String() string {
switch s {
case Follower:
return "Follower"
case Candidate:
return "Candidate"
case Leader:
return "Leader"
case Shutdown:
return "Shutdown"
default:
return "Unknown"
}
}
// raftState is used to maintain various state variables
// and provides an interface to set/get the variables in a
// thread safe manner.
type raftState struct {
// The current term, cache of StableStore
currentTerm uint64
// Highest committed log entry
commitIndex uint64
// Last applied log to the FSM
lastApplied uint64
// protects 4 next fields
lastLock sync.Mutex
// Cache the latest snapshot index/term
lastSnapshotIndex uint64
lastSnapshotTerm uint64
// Cache the latest log from LogStore
lastLogIndex uint64
lastLogTerm uint64
// Tracks running goroutines
routinesGroup sync.WaitGroup
// The current state
state RaftState
}
func (r *raftState) getState() RaftState {
stateAddr := (*uint32)(&r.state)
return RaftState(atomic.LoadUint32(stateAddr))
}
func (r *raftState) setState(s RaftState) {
stateAddr := (*uint32)(&r.state)
atomic.StoreUint32(stateAddr, uint32(s))
}
func (r *raftState) getCurrentTerm() uint64 {
return atomic.LoadUint64(&r.currentTerm)
}
func (r *raftState) setCurrentTerm(term uint64) {
atomic.StoreUint64(&r.currentTerm, term)
}
func (r *raftState) getLastLog() (index, term uint64) {
r.lastLock.Lock()
index = r.lastLogIndex
term = r.lastLogTerm
r.lastLock.Unlock()
return
}
func (r *raftState) setLastLog(index, term uint64) {
r.lastLock.Lock()
r.lastLogIndex = index
r.lastLogTerm = term
r.lastLock.Unlock()
}
func (r *raftState) getLastSnapshot() (index, term uint64) {
r.lastLock.Lock()
index = r.lastSnapshotIndex
term = r.lastSnapshotTerm
r.lastLock.Unlock()
return
}
func (r *raftState) setLastSnapshot(index, term uint64) {
r.lastLock.Lock()
r.lastSnapshotIndex = index
r.lastSnapshotTerm = term
r.lastLock.Unlock()
}
func (r *raftState) getCommitIndex() uint64 {
return atomic.LoadUint64(&r.commitIndex)
}
func (r *raftState) setCommitIndex(index uint64) {
atomic.StoreUint64(&r.commitIndex, index)
}
func (r *raftState) getLastApplied() uint64 {
return atomic.LoadUint64(&r.lastApplied)
}
func (r *raftState) setLastApplied(index uint64) {
atomic.StoreUint64(&r.lastApplied, index)
}
// Start a goroutine and properly handle the race between a routine
// starting and incrementing, and exiting and decrementing.
func (r *raftState) goFunc(f func()) {
r.routinesGroup.Add(1)
go func() {
defer r.routinesGroup.Done()
f()
}()
}
func (r *raftState) waitShutdown() {
r.routinesGroup.Wait()
}
// getLastIndex returns the last index in stable storage.
// Either from the last log or from the last snapshot.
func (r *raftState) getLastIndex() uint64 {
r.lastLock.Lock()
defer r.lastLock.Unlock()
return max(r.lastLogIndex, r.lastSnapshotIndex)
}
// getLastEntry returns the last index and term in stable storage.
// Either from the last log or from the last snapshot.
func (r *raftState) getLastEntry() (uint64, uint64) {
r.lastLock.Lock()
defer r.lastLock.Unlock()
if r.lastLogIndex >= r.lastSnapshotIndex {
return r.lastLogIndex, r.lastLogTerm
}
return r.lastSnapshotIndex, r.lastSnapshotTerm
}

View File

@ -1,105 +0,0 @@
package raft
import (
"errors"
"io"
"log"
"net"
"time"
)
var (
errNotAdvertisable = errors.New("local bind address is not advertisable")
errNotTCP = errors.New("local address is not a TCP address")
)
// TCPStreamLayer implements StreamLayer interface for plain TCP.
type TCPStreamLayer struct {
advertise net.Addr
listener *net.TCPListener
}
// NewTCPTransport returns a NetworkTransport that is built on top of
// a TCP streaming transport layer.
func NewTCPTransport(
bindAddr string,
advertise net.Addr,
maxPool int,
timeout time.Duration,
logOutput io.Writer,
) (*NetworkTransport, error) {
return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
return NewNetworkTransport(stream, maxPool, timeout, logOutput)
})
}
// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
// a TCP streaming transport layer, with log output going to the supplied Logger
func NewTCPTransportWithLogger(
bindAddr string,
advertise net.Addr,
maxPool int,
timeout time.Duration,
logger *log.Logger,
) (*NetworkTransport, error) {
return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)
})
}
func newTCPTransport(bindAddr string,
advertise net.Addr,
maxPool int,
timeout time.Duration,
transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {
// Try to bind
list, err := net.Listen("tcp", bindAddr)
if err != nil {
return nil, err
}
// Create stream
stream := &TCPStreamLayer{
advertise: advertise,
listener: list.(*net.TCPListener),
}
// Verify that we have a usable advertise address
addr, ok := stream.Addr().(*net.TCPAddr)
if !ok {
list.Close()
return nil, errNotTCP
}
if addr.IP.IsUnspecified() {
list.Close()
return nil, errNotAdvertisable
}
// Create the network transport
trans := transportCreator(stream)
return trans, nil
}
// Dial implements the StreamLayer interface.
func (t *TCPStreamLayer) Dial(address ServerAddress, timeout time.Duration) (net.Conn, error) {
return net.DialTimeout("tcp", string(address), timeout)
}
// Accept implements the net.Listener interface.
func (t *TCPStreamLayer) Accept() (c net.Conn, err error) {
return t.listener.Accept()
}
// Close implements the net.Listener interface.
func (t *TCPStreamLayer) Close() (err error) {
return t.listener.Close()
}
// Addr implements the net.Listener interface.
func (t *TCPStreamLayer) Addr() net.Addr {
// Use an advertise addr if provided
if t.advertise != nil {
return t.advertise
}
return t.listener.Addr()
}

View File

@ -1,124 +0,0 @@
package raft
import (
"io"
"time"
)
// RPCResponse captures both a response and a potential error.
type RPCResponse struct {
Response interface{}
Error error
}
// RPC has a command, and provides a response mechanism.
type RPC struct {
Command interface{}
Reader io.Reader // Set only for InstallSnapshot
RespChan chan<- RPCResponse
}
// Respond is used to respond with a response, error or both
func (r *RPC) Respond(resp interface{}, err error) {
r.RespChan <- RPCResponse{resp, err}
}
// Transport provides an interface for network transports
// to allow Raft to communicate with other nodes.
type Transport interface {
// Consumer returns a channel that can be used to
// consume and respond to RPC requests.
Consumer() <-chan RPC
// LocalAddr is used to return our local address to distinguish from our peers.
LocalAddr() ServerAddress
// AppendEntriesPipeline returns an interface that can be used to pipeline
// AppendEntries requests.
AppendEntriesPipeline(target ServerAddress) (AppendPipeline, error)
// AppendEntries sends the appropriate RPC to the target node.
AppendEntries(target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error
// RequestVote sends the appropriate RPC to the target node.
RequestVote(target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error
// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
// the ReadCloser and streamed to the client.
InstallSnapshot(target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error
// EncodePeer is used to serialize a peer's address.
EncodePeer(ServerAddress) []byte
// DecodePeer is used to deserialize a peer's address.
DecodePeer([]byte) ServerAddress
// SetHeartbeatHandler is used to setup a heartbeat handler
// as a fast-pass. This is to avoid head-of-line blocking from
// disk IO. If a Transport does not support this, it can simply
// ignore the call, and push the heartbeat onto the Consumer channel.
SetHeartbeatHandler(cb func(rpc RPC))
}
// WithClose is an interface that a transport may provide which
// allows a transport to be shut down cleanly when a Raft instance
// shuts down.
//
// It is defined separately from Transport as unfortunately it wasn't in the
// original interface specification.
type WithClose interface {
// Close permanently closes a transport, stopping
// any associated goroutines and freeing other resources.
Close() error
}
// LoopbackTransport is an interface that provides a loopback transport suitable for testing
// e.g. InmemTransport. It's there so we don't have to rewrite tests.
type LoopbackTransport interface {
Transport // Embedded transport reference
WithPeers // Embedded peer management
WithClose // with a close routine
}
// WithPeers is an interface that a transport may provide which allows for connection and
// disconnection. Unless the transport is a loopback transport, the transport specified to
// "Connect" is likely to be nil.
type WithPeers interface {
Connect(peer ServerAddress, t Transport) // Connect a peer
Disconnect(peer ServerAddress) // Disconnect a given peer
DisconnectAll() // Disconnect all peers, possibly to reconnect them later
}
// AppendPipeline is used for pipelining AppendEntries requests. It is used
// to increase the replication throughput by masking latency and better
// utilizing bandwidth.
type AppendPipeline interface {
// AppendEntries is used to add another request to the pipeline.
// The send may block which is an effective form of back-pressure.
AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)
// Consumer returns a channel that can be used to consume
// response futures when they are ready.
Consumer() <-chan AppendFuture
// Close closes the pipeline and cancels all inflight RPCs
Close() error
}
// AppendFuture is used to return information about a pipelined AppendEntries request.
type AppendFuture interface {
Future
// Start returns the time that the append request was started.
// It is always OK to call this method.
Start() time.Time
// Request holds the parameters of the AppendEntries call.
// It is always OK to call this method.
Request() *AppendEntriesRequest
// Response holds the results of the AppendEntries call.
// This method must only be called after the Error
// method returns, and will only be valid on success.
Response() *AppendEntriesResponse
}

View File

@ -1,133 +0,0 @@
package raft
import (
"bytes"
crand "crypto/rand"
"fmt"
"math"
"math/big"
"math/rand"
"time"
"github.com/hashicorp/go-msgpack/codec"
)
func init() {
// Ensure we use a high-entropy seed for the psuedo-random generator
rand.Seed(newSeed())
}
// returns an int64 from a crypto random source
// can be used to seed a source for a math/rand.
func newSeed() int64 {
r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
if err != nil {
panic(fmt.Errorf("failed to read random bytes: %v", err))
}
return r.Int64()
}
// randomTimeout returns a value that is between the minVal and 2x minVal.
func randomTimeout(minVal time.Duration) <-chan time.Time {
if minVal == 0 {
return nil
}
extra := (time.Duration(rand.Int63()) % minVal)
return time.After(minVal + extra)
}
// min returns the minimum.
func min(a, b uint64) uint64 {
if a <= b {
return a
}
return b
}
// max returns the maximum.
func max(a, b uint64) uint64 {
if a >= b {
return a
}
return b
}
// generateUUID is used to generate a random UUID.
func generateUUID() string {
buf := make([]byte, 16)
if _, err := crand.Read(buf); err != nil {
panic(fmt.Errorf("failed to read random bytes: %v", err))
}
return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
buf[0:4],
buf[4:6],
buf[6:8],
buf[8:10],
buf[10:16])
}
// asyncNotifyCh is used to do an async channel send
// to a single channel without blocking.
func asyncNotifyCh(ch chan struct{}) {
select {
case ch <- struct{}{}:
default:
}
}
// drainNotifyCh empties out a single-item notification channel without
// blocking, and returns whether it received anything.
func drainNotifyCh(ch chan struct{}) bool {
select {
case <-ch:
return true
default:
return false
}
}
// asyncNotifyBool is used to do an async notification
// on a bool channel.
func asyncNotifyBool(ch chan bool, v bool) {
select {
case ch <- v:
default:
}
}
// Decode reverses the encode operation on a byte slice input.
func decodeMsgPack(buf []byte, out interface{}) error {
r := bytes.NewBuffer(buf)
hd := codec.MsgpackHandle{}
dec := codec.NewDecoder(r, &hd)
return dec.Decode(out)
}
// Encode writes an encoded object to a new bytes buffer.
func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
buf := bytes.NewBuffer(nil)
hd := codec.MsgpackHandle{}
enc := codec.NewEncoder(buf, &hd)
err := enc.Encode(in)
return buf, err
}
// backoff is used to compute an exponential backoff
// duration. Base time is scaled by the current round,
// up to some maximum scale factor.
func backoff(base time.Duration, round, limit uint64) time.Duration {
power := min(round, limit)
for power > 2 {
base *= 2
power--
}
return base
}
// Needed for sorting []uint64, used to determine commitment
type uint64Slice []uint64
func (p uint64Slice) Len() int { return len(p) }
func (p uint64Slice) Less(i, j int) bool { return p[i] < p[j] }
func (p uint64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }

23
vendor/github.com/pkg/errors/LICENSE generated vendored Normal file
View File

@ -0,0 +1,23 @@
Copyright (c) 2015, Dave Cheney <dave@cheney.net>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

52
vendor/github.com/pkg/errors/README.md generated vendored Normal file
View File

@ -0,0 +1,52 @@
# errors [![Travis-CI](https://travis-ci.org/pkg/errors.svg)](https://travis-ci.org/pkg/errors) [![AppVeyor](https://ci.appveyor.com/api/projects/status/b98mptawhudj53ep/branch/master?svg=true)](https://ci.appveyor.com/project/davecheney/errors/branch/master) [![GoDoc](https://godoc.org/github.com/pkg/errors?status.svg)](http://godoc.org/github.com/pkg/errors) [![Report card](https://goreportcard.com/badge/github.com/pkg/errors)](https://goreportcard.com/report/github.com/pkg/errors)
Package errors provides simple error handling primitives.
`go get github.com/pkg/errors`
The traditional error handling idiom in Go is roughly akin to
```go
if err != nil {
return err
}
```
which applied recursively up the call stack results in error reports without context or debugging information. The errors package allows programmers to add context to the failure path in their code in a way that does not destroy the original value of the error.
## Adding context to an error
The errors.Wrap function returns a new error that adds context to the original error. For example
```go
_, err := ioutil.ReadAll(r)
if err != nil {
return errors.Wrap(err, "read failed")
}
```
## Retrieving the cause of an error
Using `errors.Wrap` constructs a stack of errors, adding context to the preceding error. Depending on the nature of the error it may be necessary to reverse the operation of errors.Wrap to retrieve the original error for inspection. Any error value which implements this interface can be inspected by `errors.Cause`.
```go
type causer interface {
Cause() error
}
```
`errors.Cause` will recursively retrieve the topmost error which does not implement `causer`, which is assumed to be the original cause. For example:
```go
switch err := errors.Cause(err).(type) {
case *MyError:
// handle specifically
default:
// unknown error
}
```
[Read the package documentation for more information](https://godoc.org/github.com/pkg/errors).
## Contributing
We welcome pull requests, bug fixes and issue reports. With that said, the bar for adding new symbols to this package is intentionally set high.
Before proposing a change, please discuss your change by raising an issue.
## Licence
BSD-2-Clause

32
vendor/github.com/pkg/errors/appveyor.yml generated vendored Normal file
View File

@ -0,0 +1,32 @@
version: build-{build}.{branch}
clone_folder: C:\gopath\src\github.com\pkg\errors
shallow_clone: true # for startup speed
environment:
GOPATH: C:\gopath
platform:
- x64
# http://www.appveyor.com/docs/installed-software
install:
# some helpful output for debugging builds
- go version
- go env
# pre-installed MinGW at C:\MinGW is 32bit only
# but MSYS2 at C:\msys64 has mingw64
- set PATH=C:\msys64\mingw64\bin;%PATH%
- gcc --version
- g++ --version
build_script:
- go install -v ./...
test_script:
- set PATH=C:\gopath\bin;%PATH%
- go test -v ./...
#artifacts:
# - path: '%GOPATH%\bin\*.exe'
deploy: off

269
vendor/github.com/pkg/errors/errors.go generated vendored Normal file
View File

@ -0,0 +1,269 @@
// Package errors provides simple error handling primitives.
//
// The traditional error handling idiom in Go is roughly akin to
//
// if err != nil {
// return err
// }
//
// which applied recursively up the call stack results in error reports
// without context or debugging information. The errors package allows
// programmers to add context to the failure path in their code in a way
// that does not destroy the original value of the error.
//
// Adding context to an error
//
// The errors.Wrap function returns a new error that adds context to the
// original error by recording a stack trace at the point Wrap is called,
// and the supplied message. For example
//
// _, err := ioutil.ReadAll(r)
// if err != nil {
// return errors.Wrap(err, "read failed")
// }
//
// If additional control is required the errors.WithStack and errors.WithMessage
// functions destructure errors.Wrap into its component operations of annotating
// an error with a stack trace and an a message, respectively.
//
// Retrieving the cause of an error
//
// Using errors.Wrap constructs a stack of errors, adding context to the
// preceding error. Depending on the nature of the error it may be necessary
// to reverse the operation of errors.Wrap to retrieve the original error
// for inspection. Any error value which implements this interface
//
// type causer interface {
// Cause() error
// }
//
// can be inspected by errors.Cause. errors.Cause will recursively retrieve
// the topmost error which does not implement causer, which is assumed to be
// the original cause. For example:
//
// switch err := errors.Cause(err).(type) {
// case *MyError:
// // handle specifically
// default:
// // unknown error
// }
//
// causer interface is not exported by this package, but is considered a part
// of stable public API.
//
// Formatted printing of errors
//
// All error values returned from this package implement fmt.Formatter and can
// be formatted by the fmt package. The following verbs are supported
//
// %s print the error. If the error has a Cause it will be
// printed recursively
// %v see %s
// %+v extended format. Each Frame of the error's StackTrace will
// be printed in detail.
//
// Retrieving the stack trace of an error or wrapper
//
// New, Errorf, Wrap, and Wrapf record a stack trace at the point they are
// invoked. This information can be retrieved with the following interface.
//
// type stackTracer interface {
// StackTrace() errors.StackTrace
// }
//
// Where errors.StackTrace is defined as
//
// type StackTrace []Frame
//
// The Frame type represents a call site in the stack trace. Frame supports
// the fmt.Formatter interface that can be used for printing information about
// the stack trace of this error. For example:
//
// if err, ok := err.(stackTracer); ok {
// for _, f := range err.StackTrace() {
// fmt.Printf("%+s:%d", f)
// }
// }
//
// stackTracer interface is not exported by this package, but is considered a part
// of stable public API.
//
// See the documentation for Frame.Format for more details.
package errors
import (
"fmt"
"io"
)
// New returns an error with the supplied message.
// New also records the stack trace at the point it was called.
func New(message string) error {
return &fundamental{
msg: message,
stack: callers(),
}
}
// Errorf formats according to a format specifier and returns the string
// as a value that satisfies error.
// Errorf also records the stack trace at the point it was called.
func Errorf(format string, args ...interface{}) error {
return &fundamental{
msg: fmt.Sprintf(format, args...),
stack: callers(),
}
}
// fundamental is an error that has a message and a stack, but no caller.
type fundamental struct {
msg string
*stack
}
func (f *fundamental) Error() string { return f.msg }
func (f *fundamental) Format(s fmt.State, verb rune) {
switch verb {
case 'v':
if s.Flag('+') {
io.WriteString(s, f.msg)
f.stack.Format(s, verb)
return
}
fallthrough
case 's':
io.WriteString(s, f.msg)
case 'q':
fmt.Fprintf(s, "%q", f.msg)
}
}
// WithStack annotates err with a stack trace at the point WithStack was called.
// If err is nil, WithStack returns nil.
func WithStack(err error) error {
if err == nil {
return nil
}
return &withStack{
err,
callers(),
}
}
type withStack struct {
error
*stack
}
func (w *withStack) Cause() error { return w.error }
func (w *withStack) Format(s fmt.State, verb rune) {
switch verb {
case 'v':
if s.Flag('+') {
fmt.Fprintf(s, "%+v", w.Cause())
w.stack.Format(s, verb)
return
}
fallthrough
case 's':
io.WriteString(s, w.Error())
case 'q':
fmt.Fprintf(s, "%q", w.Error())
}
}
// Wrap returns an error annotating err with a stack trace
// at the point Wrap is called, and the supplied message.
// If err is nil, Wrap returns nil.
func Wrap(err error, message string) error {
if err == nil {
return nil
}
err = &withMessage{
cause: err,
msg: message,
}
return &withStack{
err,
callers(),
}
}
// Wrapf returns an error annotating err with a stack trace
// at the point Wrapf is call, and the format specifier.
// If err is nil, Wrapf returns nil.
func Wrapf(err error, format string, args ...interface{}) error {
if err == nil {
return nil
}
err = &withMessage{
cause: err,
msg: fmt.Sprintf(format, args...),
}
return &withStack{
err,
callers(),
}
}
// WithMessage annotates err with a new message.
// If err is nil, WithMessage returns nil.
func WithMessage(err error, message string) error {
if err == nil {
return nil
}
return &withMessage{
cause: err,
msg: message,
}
}
type withMessage struct {
cause error
msg string
}
func (w *withMessage) Error() string { return w.msg + ": " + w.cause.Error() }
func (w *withMessage) Cause() error { return w.cause }
func (w *withMessage) Format(s fmt.State, verb rune) {
switch verb {
case 'v':
if s.Flag('+') {
fmt.Fprintf(s, "%+v\n", w.Cause())
io.WriteString(s, w.msg)
return
}
fallthrough
case 's', 'q':
io.WriteString(s, w.Error())
}
}
// Cause returns the underlying cause of the error, if possible.
// An error value has a cause if it implements the following
// interface:
//
// type causer interface {
// Cause() error
// }
//
// If the error does not implement Cause, the original error will
// be returned. If the error is nil, nil will be returned without further
// investigation.
func Cause(err error) error {
type causer interface {
Cause() error
}
for err != nil {
cause, ok := err.(causer)
if !ok {
break
}
err = cause.Cause()
}
return err
}

186
vendor/github.com/pkg/errors/stack.go generated vendored Normal file
View File

@ -0,0 +1,186 @@
package errors
import (
"fmt"
"io"
"path"
"runtime"
"strings"
)
// Frame represents a program counter inside a stack frame.
type Frame uintptr
// pc returns the program counter for this frame;
// multiple frames may have the same PC value.
func (f Frame) pc() uintptr { return uintptr(f) - 1 }
// file returns the full path to the file that contains the
// function for this Frame's pc.
func (f Frame) file() string {
fn := runtime.FuncForPC(f.pc())
if fn == nil {
return "unknown"
}
file, _ := fn.FileLine(f.pc())
return file
}
// line returns the line number of source code of the
// function for this Frame's pc.
func (f Frame) line() int {
fn := runtime.FuncForPC(f.pc())
if fn == nil {
return 0
}
_, line := fn.FileLine(f.pc())
return line
}
// Format formats the frame according to the fmt.Formatter interface.
//
// %s source file
// %d source line
// %n function name
// %v equivalent to %s:%d
//
// Format accepts flags that alter the printing of some verbs, as follows:
//
// %+s path of source file relative to the compile time GOPATH
// %+v equivalent to %+s:%d
func (f Frame) Format(s fmt.State, verb rune) {
switch verb {
case 's':
switch {
case s.Flag('+'):
pc := f.pc()
fn := runtime.FuncForPC(pc)
if fn == nil {
io.WriteString(s, "unknown")
} else {
file, _ := fn.FileLine(pc)
fmt.Fprintf(s, "%s\n\t%s", fn.Name(), file)
}
default:
io.WriteString(s, path.Base(f.file()))
}
case 'd':
fmt.Fprintf(s, "%d", f.line())
case 'n':
name := runtime.FuncForPC(f.pc()).Name()
io.WriteString(s, funcname(name))
case 'v':
f.Format(s, 's')
io.WriteString(s, ":")
f.Format(s, 'd')
}
}
// StackTrace is stack of Frames from innermost (newest) to outermost (oldest).
type StackTrace []Frame
// Format formats the stack of Frames according to the fmt.Formatter interface.
//
// %s lists source files for each Frame in the stack
// %v lists the source file and line number for each Frame in the stack
//
// Format accepts flags that alter the printing of some verbs, as follows:
//
// %+v Prints filename, function, and line number for each Frame in the stack.
func (st StackTrace) Format(s fmt.State, verb rune) {
switch verb {
case 'v':
switch {
case s.Flag('+'):
for _, f := range st {
fmt.Fprintf(s, "\n%+v", f)
}
case s.Flag('#'):
fmt.Fprintf(s, "%#v", []Frame(st))
default:
fmt.Fprintf(s, "%v", []Frame(st))
}
case 's':
fmt.Fprintf(s, "%s", []Frame(st))
}
}
// stack represents a stack of program counters.
type stack []uintptr
func (s *stack) Format(st fmt.State, verb rune) {
switch verb {
case 'v':
switch {
case st.Flag('+'):
for _, pc := range *s {
f := Frame(pc)
fmt.Fprintf(st, "\n%+v", f)
}
}
}
}
func (s *stack) StackTrace() StackTrace {
f := make([]Frame, len(*s))
for i := 0; i < len(f); i++ {
f[i] = Frame((*s)[i])
}
return f
}
func callers() *stack {
const depth = 32
var pcs [depth]uintptr
n := runtime.Callers(3, pcs[:])
var st stack = pcs[0:n]
return &st
}
// funcname removes the path prefix component of a function's name reported by func.Name().
func funcname(name string) string {
i := strings.LastIndex(name, "/")
name = name[i+1:]
i = strings.Index(name, ".")
return name[i+1:]
}
func trimGOPATH(name, file string) string {
// Here we want to get the source file path relative to the compile time
// GOPATH. As of Go 1.6.x there is no direct way to know the compiled
// GOPATH at runtime, but we can infer the number of path segments in the
// GOPATH. We note that fn.Name() returns the function name qualified by
// the import path, which does not include the GOPATH. Thus we can trim
// segments from the beginning of the file path until the number of path
// separators remaining is one more than the number of path separators in
// the function name. For example, given:
//
// GOPATH /home/user
// file /home/user/src/pkg/sub/file.go
// fn.Name() pkg/sub.Type.Method
//
// We want to produce:
//
// pkg/sub/file.go
//
// From this we can easily see that fn.Name() has one less path separator
// than our desired output. We count separators from the end of the file
// path until it finds two more than in the function name and then move
// one character forward to preserve the initial path segment without a
// leading separator.
const sep = "/"
goal := strings.Count(name, sep) + 2
i := len(file)
for n := 0; n < goal; n++ {
i = strings.LastIndex(file[:i], sep)
if i == -1 {
// not enough separators found, set i so that the slice expression
// below leaves file unmodified
i = -len(sep)
break
}
}
// get back to 0 or trim the leading separator
file = file[i+len(sep):]
return file
}

50
vendor/vendor.json vendored
View File

@ -1177,35 +1177,29 @@
"revisionTime": "2016-11-07T20:49:10Z"
},
{
"checksumSHA1": "jfELEMRhiTcppZmRH+ZwtkVS5Uw=",
"path": "github.com/hashicorp/consul/acl",
"revision": "144a5e5340893a5e726e831c648f26dc19fef1e7",
"revisionTime": "2017-03-10T23:35:18Z"
},
{
"checksumSHA1": "ygEjA1d52B1RDmZu8+1WTwkrYDQ=",
"checksumSHA1": "IYuLg7xUzsf/P9rMpdEh1n9rbIY=",
"comment": "v0.6.3-28-g3215b87",
"path": "github.com/hashicorp/consul/api",
"revision": "48d7b069ad443a48ffa93364048ff8909b5d1fa2",
"revisionTime": "2017-02-07T15:38:46Z"
"revision": "b79d951ced8c5f18fe73d35b2806f3435e40cd64",
"revisionTime": "2017-07-20T03:19:26Z",
"version": "v0.9.0",
"versionExact": "v0.9.0"
},
{
"checksumSHA1": "nomqbPd9j3XelMMcv7+vTEPsdr4=",
"path": "github.com/hashicorp/consul/consul/structs",
"revision": "48d7b069ad443a48ffa93364048ff8909b5d1fa2",
"revisionTime": "2017-02-07T15:38:46Z"
},
{
"checksumSHA1": "dgYoWTG7nIL9CUBuktDvMZqYDR8=",
"checksumSHA1": "++0PVBxbpylmllyCxSa7cdc6dDc=",
"path": "github.com/hashicorp/consul/testutil",
"revision": "48d7b069ad443a48ffa93364048ff8909b5d1fa2",
"revisionTime": "2017-02-07T15:38:46Z"
"revision": "b79d951ced8c5f18fe73d35b2806f3435e40cd64",
"revisionTime": "2017-07-20T03:19:26Z",
"version": "v0.9.0",
"versionExact": "v0.9.0"
},
{
"checksumSHA1": "ZPDLNuKJGZJFV9HlJ/V0O4/c/Ko=",
"path": "github.com/hashicorp/consul/types",
"revision": "48d7b069ad443a48ffa93364048ff8909b5d1fa2",
"revisionTime": "2017-02-07T15:38:46Z"
"checksumSHA1": "J8TTDc84MvAyXE/FrfgS+xc/b6s=",
"path": "github.com/hashicorp/consul/testutil/retry",
"revision": "b79d951ced8c5f18fe73d35b2806f3435e40cd64",
"revisionTime": "2017-07-20T03:19:26Z",
"version": "v0.9.0",
"versionExact": "v0.9.0"
},
{
"checksumSHA1": "cdOCt0Yb+hdErz8NAQqayxPmRsY=",
@ -1383,12 +1377,6 @@
"revision": "0dc08b1671f34c4250ce212759ebd880f743d883",
"revisionTime": "2015-06-09T07:04:31Z"
},
{
"checksumSHA1": "wpirHJV/6VEbbD+HyAP2/6Xc0ek=",
"path": "github.com/hashicorp/raft",
"revision": "aaad9f10266e089bd401e7a6487651a69275641b",
"revisionTime": "2016-11-10T00:52:40Z"
},
{
"checksumSHA1": "o8In5byYGDCY/mnTuV4Tfmci+3w=",
"comment": "v0.7.0-12-ge4ec8cc",
@ -1659,6 +1647,12 @@
"path": "github.com/packer-community/winrmcp/winrmcp",
"revision": "3d184cea22ee1c41ec1697e0d830ff0c78f7ea97"
},
{
"checksumSHA1": "rJab1YdNhQooDiBWNnt7TLWPyBU=",
"path": "github.com/pkg/errors",
"revision": "c605e284fe17294bda444b34710735b29d1a9d90",
"revisionTime": "2017-05-05T04:36:39Z"
},
{
"checksumSHA1": "Yqr9OQ7AuIB1N0HMbSxqEoVQG+k=",
"comment": "v2.0.1-8-g983d3a5",