derp, derphttp, magicsock: send new unknown peer frame when destination is unknown (#7552)

* wgengine/magicsock: add envknob to send CallMeMaybe to non-existent peer

For testing older client version responses to the PeerGone packet format change.

Updates #4326

Signed-off-by: Val <valerie@tailscale.com>

* derp: remove dead sclient struct member replaceLimiter

Leftover from an previous solution to the duplicate client problem.

Updates #2751

Signed-off-by: Val <valerie@tailscale.com>

* derp, derp/derphttp, wgengine/magicsock: add new PeerGone message type Not Here

Extend the PeerGone message type by adding a reason byte. Send a
PeerGone "Not Here" message when an endpoint sends a disco message to
a peer that this server has no record of.

Fixes #4326

Signed-off-by: Val <valerie@tailscale.com>

---------

Signed-off-by: Val <valerie@tailscale.com>
This commit is contained in:
valscale
2023-03-24 19:11:48 -07:00
committed by GitHub
parent 09d0b632d4
commit 74eb99aed1
12 changed files with 211 additions and 67 deletions

View File

@ -34,12 +34,12 @@ import (
"go4.org/mem"
"golang.org/x/sync/errgroup"
"golang.org/x/time/rate"
"tailscale.com/client/tailscale"
"tailscale.com/disco"
"tailscale.com/envknob"
"tailscale.com/metrics"
"tailscale.com/syncs"
"tailscale.com/tstime/rate"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/version"
@ -122,7 +122,8 @@ type Server struct {
_ align64
packetsForwardedOut expvar.Int
packetsForwardedIn expvar.Int
peerGoneFrames expvar.Int // number of peer gone frames sent
peerGoneDisconnectedFrames expvar.Int // number of peer disconnected frames sent
peerGoneNotHereFrames expvar.Int // number of peer not here frames sent
gotPing expvar.Int // number of ping frames from client
sentPong expvar.Int // number of pong frames enqueued to client
accepts expvar.Int
@ -324,7 +325,8 @@ func NewServer(privateKey key.NodePrivate, logf logger.Logf) *Server {
s.packetsDroppedReasonCounters = []*expvar.Int{
s.packetsDroppedReason.Get("unknown_dest"),
s.packetsDroppedReason.Get("unknown_dest_on_fwd"),
s.packetsDroppedReason.Get("gone"),
s.packetsDroppedReason.Get("gone_disconnected"),
s.packetsDroppedReason.Get("gone_not_here"),
s.packetsDroppedReason.Get("queue_head"),
s.packetsDroppedReason.Get("queue_tail"),
s.packetsDroppedReason.Get("write_error"),
@ -615,13 +617,26 @@ func (s *Server) notePeerGoneFromRegionLocked(key key.NodePublic) {
}
set.ForeachClient(func(peer *sclient) {
if peer.connNum == connNum {
go peer.requestPeerGoneWrite(key)
go peer.requestPeerGoneWrite(key, PeerGoneReasonDisconnected)
}
})
}
delete(s.sentTo, key)
}
// requestPeerGoneWriteLimited sends a request to write a "peer gone"
// frame, but only in reply to a disco packet, and only if we haven't
// sent one recently.
func (c *sclient) requestPeerGoneWriteLimited(peer key.NodePublic, contents []byte, reason PeerGoneReasonType) {
if disco.LooksLikeDiscoWrapper(contents) != true {
return
}
if c.peerGoneLim.Allow() {
go c.requestPeerGoneWrite(peer, reason)
}
}
func (s *Server) addWatcher(c *sclient) {
if !c.canMesh {
panic("invariant: addWatcher called without permissions")
@ -686,8 +701,9 @@ func (s *Server) accept(ctx context.Context, nc Conn, brw *bufio.ReadWriter, rem
sendQueue: make(chan pkt, perClientSendQueueDepth),
discoSendQueue: make(chan pkt, perClientSendQueueDepth),
sendPongCh: make(chan [8]byte, 1),
peerGone: make(chan key.NodePublic),
peerGone: make(chan peerGoneMsg),
canMesh: clientInfo.MeshKey != "" && clientInfo.MeshKey == s.meshKey,
peerGoneLim: rate.NewLimiter(rate.Every(time.Second), 3),
}
if c.canMesh {
@ -887,6 +903,8 @@ func (c *sclient) handleFrameForwardPacket(ft frameType, fl uint32) error {
reason := dropReasonUnknownDestOnFwd
if dstLen > 1 {
reason = dropReasonDupClient
} else {
c.requestPeerGoneWriteLimited(dstKey, contents, PeerGoneReasonNotHere)
}
s.recordDrop(contents, srcKey, dstKey, reason)
return nil
@ -952,6 +970,8 @@ func (c *sclient) handleFrameSendPacket(ft frameType, fl uint32) error {
reason := dropReasonUnknownDest
if dstLen > 1 {
reason = dropReasonDupClient
} else {
c.requestPeerGoneWriteLimited(dstKey, contents, PeerGoneReasonNotHere)
}
s.recordDrop(contents, c.key, dstKey, reason)
c.debug("SendPacket for %s, dropping with reason=%s", dstKey.ShortString(), reason)
@ -981,7 +1001,7 @@ type dropReason int
const (
dropReasonUnknownDest dropReason = iota // unknown destination pubkey
dropReasonUnknownDestOnFwd // unknown destination pubkey on a derp-forwarded packet
dropReasonGone // destination tailscaled disconnected before we could send
dropReasonGoneDisconnected // destination tailscaled disconnected before we could send
dropReasonQueueHead // destination queue is full, dropped packet at queue head
dropReasonQueueTail // destination queue is full, dropped packet at queue tail
dropReasonWriteError // OS write() failed
@ -1023,7 +1043,7 @@ func (c *sclient) sendPkt(dst *sclient, p pkt) error {
for attempt := 0; attempt < 3; attempt++ {
select {
case <-dst.done:
s.recordDrop(p.bs, c.key, dstKey, dropReasonGone)
s.recordDrop(p.bs, c.key, dstKey, dropReasonGoneDisconnected)
dst.debug("sendPkt attempt %d dropped, dst gone", attempt)
return nil
default:
@ -1052,11 +1072,14 @@ func (c *sclient) sendPkt(dst *sclient, p pkt) error {
}
// requestPeerGoneWrite sends a request to write a "peer gone" frame
// that the provided peer has disconnected. It blocks until either the
// with an explanation of why it is gone. It blocks until either the
// write request is scheduled, or the client has closed.
func (c *sclient) requestPeerGoneWrite(peer key.NodePublic) {
func (c *sclient) requestPeerGoneWrite(peer key.NodePublic, reason PeerGoneReasonType) {
select {
case c.peerGone <- peer:
case c.peerGone <- peerGoneMsg{
peer: peer,
reason: reason,
}:
case <-c.done:
}
}
@ -1270,25 +1293,20 @@ type sclient struct {
key key.NodePublic
info clientInfo
logf logger.Logf
done <-chan struct{} // closed when connection closes
remoteAddr string // usually ip:port from net.Conn.RemoteAddr().String()
remoteIPPort netip.AddrPort // zero if remoteAddr is not ip:port.
sendQueue chan pkt // packets queued to this client; never closed
discoSendQueue chan pkt // important packets queued to this client; never closed
sendPongCh chan [8]byte // pong replies to send to the client; never closed
peerGone chan key.NodePublic // write request that a previous sender has disconnected (not used by mesh peers)
meshUpdate chan struct{} // write request to write peerStateChange
canMesh bool // clientInfo had correct mesh token for inter-region routing
isDup atomic.Bool // whether more than 1 sclient for key is connected
isDisabled atomic.Bool // whether sends to this peer are disabled due to active/active dups
done <-chan struct{} // closed when connection closes
remoteAddr string // usually ip:port from net.Conn.RemoteAddr().String()
remoteIPPort netip.AddrPort // zero if remoteAddr is not ip:port.
sendQueue chan pkt // packets queued to this client; never closed
discoSendQueue chan pkt // important packets queued to this client; never closed
sendPongCh chan [8]byte // pong replies to send to the client; never closed
peerGone chan peerGoneMsg // write request that a peer is not at this server (not used by mesh peers)
meshUpdate chan struct{} // write request to write peerStateChange
canMesh bool // clientInfo had correct mesh token for inter-region routing
isDup atomic.Bool // whether more than 1 sclient for key is connected
isDisabled atomic.Bool // whether sends to this peer are disabled due to active/active dups
debugLogging bool
// replaceLimiter controls how quickly two connections with
// the same client key can kick each other off the server by
// taking over ownership of a key.
replaceLimiter *rate.Limiter
// Owned by run, not thread-safe.
br *bufio.Reader
connectedAt time.Time
@ -1304,6 +1322,11 @@ type sclient struct {
// the client for them to update their map of who's connected
// to this node.
peerStateChange []peerConnState
// peerGoneLimiter limits how often the server will inform a
// client that it's trying to establish a direct connection
// through us with a peer we have no record of.
peerGoneLim *rate.Limiter
}
// peerConnState represents whether a peer is connected to the server
@ -1327,6 +1350,12 @@ type pkt struct {
bs []byte
}
// peerGoneMsg is a request to write a peerGone frame to an sclient
type peerGoneMsg struct {
peer key.NodePublic
reason PeerGoneReasonType
}
func (c *sclient) setPreferred(v bool) {
if c.preferred == v {
return
@ -1381,9 +1410,9 @@ func (c *sclient) sendLoop(ctx context.Context) error {
for {
select {
case pkt := <-c.sendQueue:
c.s.recordDrop(pkt.bs, pkt.src, c.key, dropReasonGone)
c.s.recordDrop(pkt.bs, pkt.src, c.key, dropReasonGoneDisconnected)
case pkt := <-c.discoSendQueue:
c.s.recordDrop(pkt.bs, pkt.src, c.key, dropReasonGone)
c.s.recordDrop(pkt.bs, pkt.src, c.key, dropReasonGoneDisconnected)
default:
return
}
@ -1404,8 +1433,8 @@ func (c *sclient) sendLoop(ctx context.Context) error {
select {
case <-ctx.Done():
return nil
case peer := <-c.peerGone:
werr = c.sendPeerGone(peer)
case msg := <-c.peerGone:
werr = c.sendPeerGone(msg.peer, msg.reason)
continue
case <-c.meshUpdate:
werr = c.sendMeshUpdates()
@ -1436,8 +1465,8 @@ func (c *sclient) sendLoop(ctx context.Context) error {
select {
case <-ctx.Done():
return nil
case peer := <-c.peerGone:
werr = c.sendPeerGone(peer)
case msg := <-c.peerGone:
werr = c.sendPeerGone(msg.peer, msg.reason)
case <-c.meshUpdate:
werr = c.sendMeshUpdates()
continue
@ -1478,13 +1507,22 @@ func (c *sclient) sendPong(data [8]byte) error {
}
// sendPeerGone sends a peerGone frame, without flushing.
func (c *sclient) sendPeerGone(peer key.NodePublic) error {
c.s.peerGoneFrames.Add(1)
func (c *sclient) sendPeerGone(peer key.NodePublic, reason PeerGoneReasonType) error {
switch reason {
case PeerGoneReasonDisconnected:
c.s.peerGoneDisconnectedFrames.Add(1)
case PeerGoneReasonNotHere:
c.s.peerGoneNotHereFrames.Add(1)
}
c.setWriteDeadline()
if err := writeFrameHeader(c.bw.bw(), framePeerGone, keyLen); err != nil {
data := make([]byte, 0, keyLen+1)
data = peer.AppendTo(data)
data = append(data, byte(reason))
if err := writeFrameHeader(c.bw.bw(), framePeerGone, uint32(len(data))); err != nil {
return err
}
_, err := c.bw.Write(peer.AppendTo(nil))
_, err := c.bw.Write(data)
return err
}
@ -1515,7 +1553,7 @@ func (c *sclient) sendMeshUpdates() error {
if pcs.present {
err = c.sendPeerPresent(pcs.peer)
} else {
err = c.sendPeerGone(pcs.peer)
err = c.sendPeerGone(pcs.peer, PeerGoneReasonDisconnected)
}
if err != nil {
// Shouldn't happen, though, as we're writing
@ -1756,7 +1794,8 @@ func (s *Server) ExpVar() expvar.Var {
m.Set("home_moves_out", &s.homeMovesOut)
m.Set("got_ping", &s.gotPing)
m.Set("sent_pong", &s.sentPong)
m.Set("peer_gone_frames", &s.peerGoneFrames)
m.Set("peer_gone_disconnected_frames", &s.peerGoneDisconnectedFrames)
m.Set("peer_gone_not_here_frames", &s.peerGoneNotHereFrames)
m.Set("packets_forwarded_out", &s.packetsForwardedOut)
m.Set("packets_forwarded_in", &s.packetsForwardedIn)
m.Set("multiforwarder_created", &s.multiForwarderCreated)