package nebula import ( "bytes" "crypto/rand" "encoding/binary" "errors" "net" "time" "github.com/sirupsen/logrus" ) const ( // Total time to try a handshake = sequence of HandshakeTryInterval * HandshakeRetries // With 100ms interval and 20 retries is 23.5 seconds DefaultHandshakeTryInterval = time.Millisecond * 100 DefaultHandshakeRetries = 20 // DefaultHandshakeWaitRotation is the number of handshake attempts to do before starting to use other ips addresses DefaultHandshakeWaitRotation = 5 DefaultHandshakeTriggerBuffer = 64 ) var ( defaultHandshakeConfig = HandshakeConfig{ tryInterval: DefaultHandshakeTryInterval, retries: DefaultHandshakeRetries, waitRotation: DefaultHandshakeWaitRotation, triggerBuffer: DefaultHandshakeTriggerBuffer, } ) type HandshakeConfig struct { tryInterval time.Duration retries int waitRotation int triggerBuffer int messageMetrics *MessageMetrics } type HandshakeManager struct { pendingHostMap *HostMap mainHostMap *HostMap lightHouse *LightHouse outside *udpConn config HandshakeConfig // can be used to trigger outbound handshake for the given vpnIP trigger chan uint32 OutboundHandshakeTimer *SystemTimerWheel InboundHandshakeTimer *SystemTimerWheel messageMetrics *MessageMetrics l *logrus.Logger } func NewHandshakeManager(l *logrus.Logger, tunCidr *net.IPNet, preferredRanges []*net.IPNet, mainHostMap *HostMap, lightHouse *LightHouse, outside *udpConn, config HandshakeConfig) *HandshakeManager { return &HandshakeManager{ pendingHostMap: NewHostMap(l, "pending", tunCidr, preferredRanges), mainHostMap: mainHostMap, lightHouse: lightHouse, outside: outside, config: config, trigger: make(chan uint32, config.triggerBuffer), OutboundHandshakeTimer: NewSystemTimerWheel(config.tryInterval, config.tryInterval*time.Duration(config.retries)), InboundHandshakeTimer: NewSystemTimerWheel(config.tryInterval, config.tryInterval*time.Duration(config.retries)), messageMetrics: config.messageMetrics, l: l, } } func (c *HandshakeManager) Run(f EncWriter) { clockSource := time.Tick(c.config.tryInterval) for { select { case vpnIP := <-c.trigger: c.l.WithField("vpnIp", IntIp(vpnIP)).Debug("HandshakeManager: triggered") c.handleOutbound(vpnIP, f, true) case now := <-clockSource: c.NextOutboundHandshakeTimerTick(now, f) c.NextInboundHandshakeTimerTick(now) } } } func (c *HandshakeManager) NextOutboundHandshakeTimerTick(now time.Time, f EncWriter) { c.OutboundHandshakeTimer.advance(now) for { ep := c.OutboundHandshakeTimer.Purge() if ep == nil { break } vpnIP := ep.(uint32) c.handleOutbound(vpnIP, f, false) } } func (c *HandshakeManager) handleOutbound(vpnIP uint32, f EncWriter, lighthouseTriggered bool) { hostinfo, err := c.pendingHostMap.QueryVpnIP(vpnIP) if err != nil { return } hostinfo.Lock() defer hostinfo.Unlock() // If we haven't finished the handshake and we haven't hit max retries, query // lighthouse and then send the handshake packet again. if hostinfo.HandshakeCounter < c.config.retries && !hostinfo.HandshakeComplete { if hostinfo.remote == nil { // We continue to query the lighthouse because hosts may // come online during handshake retries. If the query // succeeds (no error), add the lighthouse info to hostinfo ips := c.lightHouse.QueryCache(vpnIP) // If we have no responses yet, or only one IP (the host hadn't // finished reporting its own IPs yet), then send another query to // the LH. if len(ips) <= 1 { ips, err = c.lightHouse.Query(vpnIP, f) } if err == nil { for _, ip := range ips { hostinfo.AddRemote(ip) } hostinfo.ForcePromoteBest(c.mainHostMap.preferredRanges) } } else if lighthouseTriggered { // We were triggered by a lighthouse HostQueryReply packet, but // we have already picked a remote for this host (this can happen // if we are configured with multiple lighthouses). So we can skip // this trigger and let the timerwheel handle the rest of the // process return } hostinfo.HandshakeCounter++ // We want to use the "best" calculated ip for the first 5 attempts, after that we just blindly rotate through // all the others until we can stand up a connection. if hostinfo.HandshakeCounter > c.config.waitRotation { hostinfo.rotateRemote() } // Ensure the handshake is ready to avoid a race in timer tick and stage 0 handshake generation if hostinfo.HandshakeReady && hostinfo.remote != nil { c.messageMetrics.Tx(handshake, NebulaMessageSubType(hostinfo.HandshakePacket[0][1]), 1) err := c.outside.WriteTo(hostinfo.HandshakePacket[0], hostinfo.remote) if err != nil { hostinfo.logger(c.l).WithField("udpAddr", hostinfo.remote). WithField("initiatorIndex", hostinfo.localIndexId). WithField("remoteIndex", hostinfo.remoteIndexId). WithField("handshake", m{"stage": 1, "style": "ix_psk0"}). WithError(err).Error("Failed to send handshake message") } else { //TODO: this log line is assuming a lot of stuff around the cached stage 0 handshake packet, we should // keep the real packet struct around for logging purposes hostinfo.logger(c.l).WithField("udpAddr", hostinfo.remote). WithField("initiatorIndex", hostinfo.localIndexId). WithField("remoteIndex", hostinfo.remoteIndexId). WithField("handshake", m{"stage": 1, "style": "ix_psk0"}). Info("Handshake message sent") } } // Readd to the timer wheel so we continue trying wait HandshakeTryInterval * counter longer for next try if !lighthouseTriggered { //l.Infoln("Interval: ", HandshakeTryInterval*time.Duration(hostinfo.HandshakeCounter)) c.OutboundHandshakeTimer.Add(vpnIP, c.config.tryInterval*time.Duration(hostinfo.HandshakeCounter)) } } else { c.pendingHostMap.DeleteHostInfo(hostinfo) } } func (c *HandshakeManager) NextInboundHandshakeTimerTick(now time.Time) { c.InboundHandshakeTimer.advance(now) for { ep := c.InboundHandshakeTimer.Purge() if ep == nil { break } index := ep.(uint32) c.pendingHostMap.DeleteIndex(index) } } func (c *HandshakeManager) AddVpnIP(vpnIP uint32) *HostInfo { hostinfo := c.pendingHostMap.AddVpnIP(vpnIP) // We lock here and use an array to insert items to prevent locking the // main receive thread for very long by waiting to add items to the pending map c.OutboundHandshakeTimer.Add(vpnIP, c.config.tryInterval) return hostinfo } var ( ErrExistingHostInfo = errors.New("existing hostinfo") ErrAlreadySeen = errors.New("already seen") ErrLocalIndexCollision = errors.New("local index collision") ) // CheckAndComplete checks for any conflicts in the main and pending hostmap // before adding hostinfo to main. If err is nil, it was added. Otherwise err will be: // ErrAlreadySeen if we already have an entry in the hostmap that has seen the // exact same handshake packet // // ErrExistingHostInfo if we already have an entry in the hostmap for this // VpnIP and overwrite was false. // // ErrLocalIndexCollision if we already have an entry in the main or pending // hostmap for the hostinfo.localIndexId. func (c *HandshakeManager) CheckAndComplete(hostinfo *HostInfo, handshakePacket uint8, overwrite bool, f *Interface) (*HostInfo, error) { c.pendingHostMap.RLock() defer c.pendingHostMap.RUnlock() c.mainHostMap.Lock() defer c.mainHostMap.Unlock() existingHostInfo, found := c.mainHostMap.Hosts[hostinfo.hostId] if found && existingHostInfo != nil { if bytes.Equal(hostinfo.HandshakePacket[handshakePacket], existingHostInfo.HandshakePacket[handshakePacket]) { return existingHostInfo, ErrAlreadySeen } if !overwrite { return existingHostInfo, ErrExistingHostInfo } } existingIndex, found := c.mainHostMap.Indexes[hostinfo.localIndexId] if found { // We have a collision, but for a different hostinfo return existingIndex, ErrLocalIndexCollision } existingIndex, found = c.pendingHostMap.Indexes[hostinfo.localIndexId] if found && existingIndex != hostinfo { // We have a collision, but for a different hostinfo return existingIndex, ErrLocalIndexCollision } existingRemoteIndex, found := c.mainHostMap.RemoteIndexes[hostinfo.remoteIndexId] if found && existingRemoteIndex != nil && existingRemoteIndex.hostId != hostinfo.hostId { // We have a collision, but this can happen since we can't control // the remote ID. Just log about the situation as a note. hostinfo.logger(c.l). WithField("remoteIndex", hostinfo.remoteIndexId).WithField("collision", IntIp(existingRemoteIndex.hostId)). Info("New host shadows existing host remoteIndex") } if existingHostInfo != nil { // We are going to overwrite this entry, so remove the old references delete(c.mainHostMap.Hosts, existingHostInfo.hostId) delete(c.mainHostMap.Indexes, existingHostInfo.localIndexId) delete(c.mainHostMap.RemoteIndexes, existingHostInfo.remoteIndexId) } c.mainHostMap.addHostInfo(hostinfo, f) return existingHostInfo, nil } // Complete is a simpler version of CheckAndComplete when we already know we // won't have a localIndexId collision because we already have an entry in the // pendingHostMap func (c *HandshakeManager) Complete(hostinfo *HostInfo, f *Interface) { c.mainHostMap.Lock() defer c.mainHostMap.Unlock() existingHostInfo, found := c.mainHostMap.Hosts[hostinfo.hostId] if found && existingHostInfo != nil { // We are going to overwrite this entry, so remove the old references delete(c.mainHostMap.Hosts, existingHostInfo.hostId) delete(c.mainHostMap.Indexes, existingHostInfo.localIndexId) delete(c.mainHostMap.RemoteIndexes, existingHostInfo.remoteIndexId) } existingRemoteIndex, found := c.mainHostMap.RemoteIndexes[hostinfo.remoteIndexId] if found && existingRemoteIndex != nil { // We have a collision, but this can happen since we can't control // the remote ID. Just log about the situation as a note. hostinfo.logger(c.l). WithField("remoteIndex", hostinfo.remoteIndexId).WithField("collision", IntIp(existingRemoteIndex.hostId)). Info("New host shadows existing host remoteIndex") } c.mainHostMap.addHostInfo(hostinfo, f) } // AddIndexHostInfo generates a unique localIndexId for this HostInfo // and adds it to the pendingHostMap. Will error if we are unable to generate // a unique localIndexId func (c *HandshakeManager) AddIndexHostInfo(h *HostInfo) error { c.pendingHostMap.Lock() defer c.pendingHostMap.Unlock() c.mainHostMap.RLock() defer c.mainHostMap.RUnlock() for i := 0; i < 32; i++ { index, err := generateIndex(c.l) if err != nil { return err } _, inPending := c.pendingHostMap.Indexes[index] _, inMain := c.mainHostMap.Indexes[index] if !inMain && !inPending { h.localIndexId = index c.pendingHostMap.Indexes[index] = h return nil } } return errors.New("failed to generate unique localIndexId") } func (c *HandshakeManager) addRemoteIndexHostInfo(index uint32, h *HostInfo) { c.pendingHostMap.addRemoteIndexHostInfo(index, h) } func (c *HandshakeManager) DeleteHostInfo(hostinfo *HostInfo) { //l.Debugln("Deleting pending hostinfo :", hostinfo) c.pendingHostMap.DeleteHostInfo(hostinfo) } func (c *HandshakeManager) QueryIndex(index uint32) (*HostInfo, error) { return c.pendingHostMap.QueryIndex(index) } func (c *HandshakeManager) EmitStats() { c.pendingHostMap.EmitStats("pending") c.mainHostMap.EmitStats("main") } // Utility functions below func generateIndex(l *logrus.Logger) (uint32, error) { b := make([]byte, 4) // Let zero mean we don't know the ID, so don't generate zero var index uint32 for index == 0 { _, err := rand.Read(b) if err != nil { l.Errorln(err) return 0, err } index = binary.BigEndian.Uint32(b) } if l.Level >= logrus.DebugLevel { l.WithField("index", index). Debug("Generated index") } return index, nil }