wgengine: remove all peer status from open timeout diagnostics

Avoid contention from fetching status for all peers, and instead fetch
status for a single peer.

Updates tailscale/coral#72
Signed-off-by: James Tucker <james@tailscale.com>
This commit is contained in:
James Tucker
2022-08-26 13:36:55 -07:00
committed by James Tucker
parent ad1cc6cff9
commit 81dba3738e
3 changed files with 22 additions and 97 deletions

View File

@ -5,14 +5,9 @@
package wgengine
import (
"fmt"
"regexp"
"runtime"
"strings"
"testing"
"time"
"tailscale.com/tstest"
)
func TestWatchdog(t *testing.T) {
@ -42,49 +37,4 @@ func TestWatchdog(t *testing.T) {
e.RequestStatus()
e.Close()
})
t.Run("watchdog fires on blocked getStatus", func(t *testing.T) {
t.Parallel()
e, err := NewFakeUserspaceEngine(t.Logf, 0)
if err != nil {
t.Fatal(err)
}
t.Cleanup(e.Close)
usEngine := e.(*userspaceEngine)
e = NewWatchdog(e)
wdEngine := e.(*watchdogEngine)
wdEngine.maxWait = maxWaitMultiple * 100 * time.Millisecond
logBuf := new(tstest.MemLogger)
fatalCalled := make(chan struct{})
wdEngine.logf = logBuf.Logf
wdEngine.fatalf = func(format string, args ...any) {
t.Logf("FATAL: %s", fmt.Sprintf(format, args...))
fatalCalled <- struct{}{}
}
usEngine.wgLock.Lock() // blocks getStatus so the watchdog will fire
go e.RequestStatus()
select {
case <-fatalCalled:
logs := logBuf.String()
if !strings.Contains(logs, "goroutine profile: total ") {
t.Errorf("fatal called without watchdog stacks, got: %s", logs)
}
re := regexp.MustCompile(`(?m)^\s*in-flight\[\d+\]: name=RequestStatus duration=.* start=.*$`)
if !re.MatchString(logs) {
t.Errorf("fatal called without in-flight list, got: %s", logs)
}
// expected
case <-time.After(3 * time.Second):
t.Fatalf("watchdog failed to fire")
}
usEngine.wgLock.Unlock()
wdEngine.fatalf = t.Fatalf
wdEngine.Close()
})
}