wgengine: print in-flight operations on watchdog trigger (#5447)

In addition to printing goroutine stacks, explicitly track all in-flight
operations and print them when the watchdog triggers (along with the
time they were started at). This should make debugging watchdog failures
easier, since we can look at the longest-running operation(s) first.

Signed-off-by: Andrew Dunham <andrew@tailscale.com>

Signed-off-by: Andrew Dunham <andrew@tailscale.com>
This commit is contained in:
Andrew Dunham
2022-08-27 22:06:18 -04:00
committed by GitHub
parent e1738ea78e
commit 9b77ac128a
2 changed files with 59 additions and 6 deletions

View File

@ -6,6 +6,7 @@ package wgengine
import (
"fmt"
"regexp"
"runtime"
"strings"
"testing"
@ -68,9 +69,15 @@ func TestWatchdog(t *testing.T) {
select {
case <-fatalCalled:
if !strings.Contains(logBuf.String(), "goroutine profile: total ") {
t.Errorf("fatal called without watchdog stacks, got: %s", logBuf.String())
logs := logBuf.String()
if !strings.Contains(logs, "goroutine profile: total ") {
t.Errorf("fatal called without watchdog stacks, got: %s", logs)
}
re := regexp.MustCompile(`(?m)^\s*in-flight\[\d+\]: name=RequestStatus duration=.* start=.*$`)
if !re.MatchString(logs) {
t.Errorf("fatal called without in-flight list, got: %s", logs)
}
// expected
case <-time.After(3 * time.Second):
t.Fatalf("watchdog failed to fire")