wgengine: remove all peer status from open timeout diagnostics

Avoid contention from fetching status for all peers, and instead fetch
status for a single peer.

Updates tailscale/coral#72
Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
James Tucker
2022-08-26 13:36:55 -07:00
committed by James Tucker
parent ad1cc6cff9
commit 81dba3738e
3 changed files with 22 additions and 97 deletions

View File

@ -9,7 +9,6 @@ import (
"runtime"
"time"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/flowtrack"
"tailscale.com/net/packet"
"tailscale.com/net/tsaddr"
@ -157,28 +156,8 @@ func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) {
return
}
// We don't care if this information is perfectly up-to-date, since
// we're just using it to print debug information.
//
// In tailscale/coral#72, we see a goroutine profile with thousands of
// goroutines blocked on the mutex in getStatus here, so we wrap it in
// a singleflight and accept stale information to reduce contention.
st, err, _ := e.getStatusSf.Do(struct{}{}, e.getStatus)
var ps *ipnstate.PeerStatusLite
if err == nil {
for _, v := range st.Peers {
if v.NodeKey == n.Key {
v := v // copy
ps = &v
break
}
}
} else {
e.logf("open-conn-track: timeout opening %v to node %v; failed to get engine status: %v", flow, n.Key.ShortString(), err)
return
}
if ps == nil {
ps, found := e.getPeerStatusLite(n.Key)
if !found {
onlyZeroRoute := true // whether peerForIP returned n only because its /0 route matched
for _, r := range n.AllowedIPs {
if r.Bits() != 0 && r.Contains(flow.Dst.Addr()) {