appc: fix a deadlock in route advertisements (#15031)

`routeAdvertiser` is the `iplocal.LocalBackend`. Calls to
`Advertise/UnadvertiseRoute` end up calling `EditPrefs` which in turn
calls `authReconfig` which finally calls `readvertiseAppConnectorRoutes`
which calls `AppConnector.DomainRoutes` and gets stuck on a mutex that
was already held when `routeAdvertiser` was called.

Make all calls to `routeAdvertiser` in `app.AppConnector` go through the
execqueue instead as a short-term fix.

Updates tailscale/corp#25965

Signed-off-by: Andrew Lytvynov <awly@tailscale.com>
Co-authored-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Andrew Lytvynov
2025-02-18 11:31:14 -08:00
committed by GitHub
parent 052eefbcce
commit ec5f04b274
3 changed files with 84 additions and 11 deletions

View File

@ -8,6 +8,7 @@ import (
"net/netip"
"reflect"
"slices"
"sync/atomic"
"testing"
"time"
@ -86,6 +87,7 @@ func TestUpdateRoutes(t *testing.T) {
routes := []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24"), netip.MustParsePrefix("192.0.0.1/32")}
a.updateRoutes(routes)
a.Wait(ctx)
slices.SortFunc(rc.Routes(), prefixCompare)
rc.SetRoutes(slices.Compact(rc.Routes()))
@ -105,6 +107,7 @@ func TestUpdateRoutes(t *testing.T) {
}
func TestUpdateRoutesUnadvertisesContainedRoutes(t *testing.T) {
ctx := context.Background()
for _, shouldStore := range []bool{false, true} {
rc := &appctest.RouteCollector{}
var a *AppConnector
@ -117,6 +120,7 @@ func TestUpdateRoutesUnadvertisesContainedRoutes(t *testing.T) {
rc.SetRoutes([]netip.Prefix{netip.MustParsePrefix("192.0.2.1/32")})
routes := []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24")}
a.updateRoutes(routes)
a.Wait(ctx)
if !slices.EqualFunc(routes, rc.Routes(), prefixEqual) {
t.Fatalf("got %v, want %v", rc.Routes(), routes)
@ -636,3 +640,57 @@ func TestMetricBucketsAreSorted(t *testing.T) {
t.Errorf("metricStoreRoutesNBuckets must be in order")
}
}
// TestUpdateRoutesDeadlock is a regression test for a deadlock in
// LocalBackend<->AppConnector interaction. When using real LocalBackend as the
// routeAdvertiser, calls to Advertise/UnadvertiseRoutes can end up calling
// back into AppConnector via authReconfig. If everything is called
// synchronously, this results in a deadlock on AppConnector.mu.
func TestUpdateRoutesDeadlock(t *testing.T) {
ctx := context.Background()
rc := &appctest.RouteCollector{}
a := NewAppConnector(t.Logf, rc, &RouteInfo{}, fakeStoreRoutes)
advertiseCalled := new(atomic.Bool)
unadvertiseCalled := new(atomic.Bool)
rc.AdvertiseCallback = func() {
// Call something that requires a.mu to be held.
a.DomainRoutes()
advertiseCalled.Store(true)
}
rc.UnadvertiseCallback = func() {
// Call something that requires a.mu to be held.
a.DomainRoutes()
unadvertiseCalled.Store(true)
}
a.updateDomains([]string{"example.com"})
a.Wait(ctx)
// Trigger rc.AdveriseRoute.
a.updateRoutes(
[]netip.Prefix{
netip.MustParsePrefix("127.0.0.1/32"),
netip.MustParsePrefix("127.0.0.2/32"),
},
)
a.Wait(ctx)
// Trigger rc.UnadveriseRoute.
a.updateRoutes(
[]netip.Prefix{
netip.MustParsePrefix("127.0.0.1/32"),
},
)
a.Wait(ctx)
if !advertiseCalled.Load() {
t.Error("AdvertiseRoute was not called")
}
if !unadvertiseCalled.Load() {
t.Error("UnadvertiseRoute was not called")
}
if want := []netip.Prefix{netip.MustParsePrefix("127.0.0.1/32")}; !slices.Equal(slices.Compact(rc.Routes()), want) {
t.Fatalf("got %v, want %v", rc.Routes(), want)
}
}