From d05a572db446ffe7d66b840f07b3acdf479bdb32 Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Fri, 15 Dec 2023 23:28:32 -0500 Subject: [PATCH] net/portmapper: handle multiple UPnP discovery responses Instead of taking the first UPnP response we receive and using that to create port mappings, store all received UPnP responses, sort and deduplicate them, and then try all of them to obtain an external address. Updates #10602 Signed-off-by: Andrew Dunham Change-Id: I783ccb1834834ee2a9ecbae2b16d801f2354302f --- cmd/tailscale/depaware.txt | 2 +- cmd/tailscaled/depaware.txt | 2 +- net/portmapper/disabled_stubs.go | 4 + net/portmapper/portmapper.go | 95 ++++++++++++-- net/portmapper/upnp.go | 215 +++++++++++++++++++++++-------- net/portmapper/upnp_test.go | 169 ++++++++++++++++++++---- 6 files changed, 399 insertions(+), 88 deletions(-) diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index 53bd2005c..50571b2e9 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -210,7 +210,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep archive/tar from tailscale.com/clientupdate bufio from compress/flate+ bytes from bufio+ - cmp from slices + cmp from slices+ compress/flate from compress/gzip+ compress/gzip from net/http+ compress/zlib from image/png+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 9d6fb9d42..b5ac27975 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -442,7 +442,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de archive/tar from tailscale.com/clientupdate bufio from compress/flate+ bytes from bufio+ - cmp from slices + cmp from slices+ compress/flate from compress/gzip+ compress/gzip from golang.org/x/net/http2+ W compress/zlib from debug/pe diff --git a/net/portmapper/disabled_stubs.go b/net/portmapper/disabled_stubs.go index e08b54376..a1324c20b 100644 --- a/net/portmapper/disabled_stubs.go +++ b/net/portmapper/disabled_stubs.go @@ -18,6 +18,10 @@ func parseUPnPDiscoResponse([]byte) (uPnPDiscoResponse, error) { return uPnPDiscoResponse{}, nil } +func processUPnPResponses(metas []uPnPDiscoResponse) []uPnPDiscoResponse { + return metas +} + func (c *Client) getUPnPPortMapping( ctx context.Context, gw netip.Addr, diff --git a/net/portmapper/portmapper.go b/net/portmapper/portmapper.go index 1105c947d..f346028d2 100644 --- a/net/portmapper/portmapper.go +++ b/net/portmapper/portmapper.go @@ -14,7 +14,9 @@ import ( "net" "net/http" "net/netip" + "slices" "sync" + "sync/atomic" "time" "go4.org/mem" @@ -94,15 +96,21 @@ type Client struct { pcpSawTime time.Time // time we last saw PCP was available - uPnPSawTime time.Time // time we last saw UPnP was available - uPnPMeta uPnPDiscoResponse // Location header from UPnP UDP discovery response - uPnPHTTPClient *http.Client // netns-configured HTTP client for UPnP; nil until needed + uPnPSawTime time.Time // time we last saw UPnP was available + uPnPMetas []uPnPDiscoResponse // UPnP UDP discovery responses + uPnPHTTPClient *http.Client // netns-configured HTTP client for UPnP; nil until needed localPort uint16 mapping mapping // non-nil if we have a mapping } +func (c *Client) vlogf(format string, args ...any) { + if c.debug.VerboseLogs { + c.logf(format, args...) + } +} + // mapping represents a created port-mapping over some protocol. It specifies a lease duration, // how to release the mapping, and whether the map is still valid. // @@ -307,7 +315,7 @@ func (c *Client) invalidateMappingsLocked(releaseOld bool) { c.pmpPubIPTime = time.Time{} c.pcpSawTime = time.Time{} c.uPnPSawTime = time.Time{} - c.uPnPMeta = uPnPDiscoResponse{} + c.uPnPMetas = nil } func (c *Client) sawPMPRecently() bool { @@ -803,12 +811,69 @@ func (c *Client) Probe(ctx context.Context) (res ProbeResult, err error) { uc.WriteToUDPAddrPort(uPnPIGDPacket, upnpMulticastAddr) } + // We can see multiple UPnP responses from LANs with multiple + // UPnP-capable routers. Rather than randomly picking whichever arrives + // first, let's collect all UPnP responses and choose at the end. + // + // We do this by starting a 50ms timer from when the first UDP packet + // is received, and waiting at least that long for more UPnP responses + // to arrive before returning (as long as the first packet is seen + // within the first 200ms of the context creation, which is likely in + // the common case). + // + // This 50ms timer is distinct from the context timeout; it is used to + // delay an early return in the case where we see all three portmapping + // responses (PCP, PMP, UPnP), whereas the context timeout causes the + // loop to exit regardless of what portmapping responses we've seen. + // + // We use an atomic value to signal that the timer has finished. + var ( + upnpTimer *time.Timer + upnpTimerDone atomic.Bool + ) + defer func() { + if upnpTimer != nil { + upnpTimer.Stop() + } + }() + + // Store all returned UPnP responses until we're done, at which point + // we select from all available options. + var upnpResponses []uPnPDiscoResponse + defer func() { + if !res.UPnP || len(upnpResponses) == 0 { + // Either we didn't discover any UPnP responses or + // c.sawUPnPRecently() is true; don't change anything. + return + } + + // Deduplicate and sort responses + upnpResponses = processUPnPResponses(upnpResponses) + + c.mu.Lock() + c.uPnPSawTime = time.Now() + if !slices.Equal(c.uPnPMetas, upnpResponses) { + c.logf("UPnP meta changed: %+v", upnpResponses) + c.uPnPMetas = upnpResponses + metricUPnPUpdatedMeta.Add(1) + } + c.mu.Unlock() + }() + + // This is the main loop that receives UDP packets and parses them into + // PCP, PMP, or UPnP responses, updates our ProbeResult, and stores + // data for use in GetCachedMappingOrStartCreatingOne. buf := make([]byte, 1500) pcpHeard := false // true when we get any PCP response for { if pcpHeard && res.PMP && res.UPnP { - // Nothing more to discover. - return res, nil + if upnpTimerDone.Load() { + // Nothing more to discover. + return res, nil + } + + // UPnP timer still running; fall through and keep + // receiving packets. } n, src, err := uc.ReadFromUDPAddrPort(buf) if err != nil { @@ -817,6 +882,11 @@ func (c *Client) Probe(ctx context.Context) (res ProbeResult, err error) { } return res, err } + // Start timer after we get the first response. + if upnpTimer == nil { + upnpTimer = time.AfterFunc(50*time.Millisecond, func() { upnpTimerDone.Store(true) }) + } + ip := src.Addr().Unmap() handleUPnPResponse := func() { @@ -834,15 +904,14 @@ func (c *Client) Probe(ctx context.Context) (res ProbeResult, err error) { } metricUPnPOK.Add(1) c.logf("[v1] UPnP reply %+v, %q", meta, buf[:n]) + + // Store the UPnP response for later selection res.UPnP = true - c.mu.Lock() - c.uPnPSawTime = time.Now() - if c.uPnPMeta != meta { - c.logf("UPnP meta changed: %+v", meta) - c.uPnPMeta = meta - metricUPnPUpdatedMeta.Add(1) + if len(upnpResponses) > 10 { + c.logf("too many UPnP responses: skipping") + } else { + upnpResponses = append(upnpResponses, meta) } - c.mu.Unlock() } port := src.Port() diff --git a/net/portmapper/upnp.go b/net/portmapper/upnp.go index bc705dc20..b0a243138 100644 --- a/net/portmapper/upnp.go +++ b/net/portmapper/upnp.go @@ -10,6 +10,7 @@ package portmapper import ( "bufio" "bytes" + "cmp" "context" "encoding/xml" "fmt" @@ -19,6 +20,7 @@ import ( "net/http" "net/netip" "net/url" + "slices" "strings" "sync/atomic" "time" @@ -421,38 +423,133 @@ func (c *Client) getUPnPPortMapping( internal: internal, } - var ( - rootDev *goupnp.RootDevice - loc *url.URL - err error - ) + // We can have multiple UPnP "meta" values (which correspond to the + // UPnP discovery responses received). We want to try all of them when + // obtaining a mapping, but also prefer any existing mapping's root + // device (if present), since that will allow us to renew an existing + // mapping instead of creating a new one. + // Start by grabbing the list of metas, any existing mapping, and + // creating a HTTP client for use. c.mu.Lock() oldMapping, ok := c.mapping.(*upnpMapping) - meta := c.uPnPMeta + metas := c.uPnPMetas ctx = goupnp.WithHTTPClient(ctx, c.upnpHTTPClientLocked()) c.mu.Unlock() - if ok && oldMapping != nil { - rootDev = oldMapping.rootDev - loc = oldMapping.loc - } else { - rootDev, loc, err = getUPnPRootDevice(ctx, c.logf, c.debug, gw, meta) - if c.debug.VerboseLogs { - c.logf("getUPnPRootDevice: loc=%q err=%v", loc, err) - } - if err != nil { - return netip.AddrPort{}, false - } + + // Wrapper for a uPnPDiscoResponse with an optional existing root + // device + URL (if we've got a previous cached mapping). + type step struct { + rootDev *goupnp.RootDevice // if nil, use 'meta' + loc *url.URL // non-nil if rootDev is non-nil + meta uPnPDiscoResponse } - if rootDev == nil { - return netip.AddrPort{}, false + var steps []step + + // Now, if we have an existing mapping, swap that mapping's entry to + // the first entry in our "metas" list so we try it first. + haveOldMapping := ok && oldMapping != nil + if haveOldMapping && oldMapping.rootDev != nil { + steps = append(steps, step{rootDev: oldMapping.rootDev, loc: oldMapping.loc}) + } + // Note: this includes the meta for a previously-cached mapping, in + // case the rootDev changes. + for _, meta := range metas { + steps = append(steps, step{meta: meta}) } - // Now that we have a root device, select the best mapping service from - // it. This makes network requests, and can vary from mapping to - // mapping if the upstream device's connection status changes. + // Now, iterate through every meta that we have trying to get an + // external IP address. If we succeed, we'll return; if we fail, we + // continue this loop. + var errs []error + for _, step := range steps { + var ( + rootDev *goupnp.RootDevice + loc *url.URL + err error + ) + if step.rootDev != nil { + rootDev = step.rootDev + loc = step.loc + } else { + rootDev, loc, err = getUPnPRootDevice(ctx, c.logf, c.debug, gw, step.meta) + c.vlogf("getUPnPRootDevice: loc=%q err=%v", loc, err) + if err != nil { + errs = append(errs, err) + continue + } + } + if rootDev == nil { + continue + } + + // This actually performs the port mapping operation using this + // root device. + // + // TODO(andrew-d): this can successfully perform a portmap and + // return an externalAddrPort that refers to a non-public IP + // address if the first selected RootDevice is a device that is + // connected to another internal network. This is still better + // than randomly flapping between multiple devices, but we + // should probably split this up further to try the best + // service (one with an external IP) first, instead of + // iterating by device. + // + // This is probably sufficiently unlikely that I'm leaving that + // as a follow-up task if it's necessary. + externalAddrPort, client, err := c.tryUPnPPortmapWithDevice(ctx, internal, prevPort, rootDev, loc) + if err != nil { + errs = append(errs, err) + continue + } + + // If we get here, we're successful; we can cache this mapping, + // update our local port, and then return. + // + // NOTE: this time might not technically be accurate if we created a + // permanent lease above, but we should still re-check the presence of + // the lease on a regular basis so we use it anyway. + d := time.Duration(pmpMapLifetimeSec) * time.Second + upnp.goodUntil = now.Add(d) + upnp.renewAfter = now.Add(d / 2) + upnp.external = externalAddrPort + upnp.rootDev = rootDev + upnp.loc = loc + upnp.client = client + + c.mu.Lock() + defer c.mu.Unlock() + c.mapping = upnp + c.localPort = externalAddrPort.Port() + return upnp.external, true + } + + // If we get here, we didn't get anything. + // TODO(andrew-d): use or log errs? + _ = errs + return netip.AddrPort{}, false +} + +// tryUPnPPortmapWithDevice attempts to perform a port forward from the given +// UPnP device to the 'internal' address. It tries to re-use the previous port, +// if a non-zero value is provided, and handles retries and errors about +// unsupported features. +// +// It returns the external address and port that was mapped (i.e. the +// address+port that another Tailscale node can use to make a connection to +// this one) and the UPnP client that was used to obtain that mapping. +func (c *Client) tryUPnPPortmapWithDevice( + ctx context.Context, + internal netip.AddrPort, + prevPort uint16, + rootDev *goupnp.RootDevice, + loc *url.URL, +) (netip.AddrPort, upnpClient, error) { + // Select the best mapping service from the given root device. This + // makes network requests, and can vary from mapping to mapping if the + // upstream device's connection status changes. client, err := selectBestService(ctx, c.logf, rootDev, loc) if err != nil { - return netip.AddrPort{}, false + return netip.AddrPort{}, nil, err } // Start by trying to make a temporary lease with a duration. @@ -465,9 +562,7 @@ func (c *Client) getUPnPPortMapping( internal.Addr().String(), pmpMapLifetimeSec*time.Second, ) - if c.debug.VerboseLogs { - c.logf("addAnyPortMapping: %v, err=%q", newPort, err) - } + c.vlogf("addAnyPortMapping: %v, err=%q", newPort, err) // If this is an error and the code is // "OnlyPermanentLeasesSupported", then we retry with no lease @@ -490,45 +585,63 @@ func (c *Client) getUPnPPortMapping( internal.Addr().String(), 0, // permanent ) - if c.debug.VerboseLogs { - c.logf("addAnyPortMapping: 725 retry %v, err=%q", newPort, err) - } + c.vlogf("addAnyPortMapping: 725 retry %v, err=%q", newPort, err) } } if err != nil { - return netip.AddrPort{}, false + return netip.AddrPort{}, nil, err } // TODO cache this ip somewhere? extIP, err := client.GetExternalIPAddress(ctx) - if c.debug.VerboseLogs { - c.logf("client.GetExternalIPAddress: %v, %v", extIP, err) - } + c.vlogf("client.GetExternalIPAddress: %v, %v", extIP, err) if err != nil { - // TODO this doesn't seem right - return netip.AddrPort{}, false + return netip.AddrPort{}, nil, err } externalIP, err := netip.ParseAddr(extIP) if err != nil { - return netip.AddrPort{}, false + return netip.AddrPort{}, nil, err } - upnp.external = netip.AddrPortFrom(externalIP, newPort) + return netip.AddrPortFrom(externalIP, newPort), client, nil +} - // NOTE: this time might not technically be accurate if we created a - // permanent lease above, but we should still re-check the presence of - // the lease on a regular basis so we use it anyway. - d := time.Duration(pmpMapLifetimeSec) * time.Second - upnp.goodUntil = now.Add(d) - upnp.renewAfter = now.Add(d / 2) - upnp.rootDev = rootDev - upnp.loc = loc - upnp.client = client - c.mu.Lock() - defer c.mu.Unlock() - c.mapping = upnp - c.localPort = newPort - return upnp.external, true +// processUPnPResponses sorts and deduplicates a list of UPnP discovery +// responses, returning the possibly-reduced list. +// +// It will perform a consistent sort of the provided responses, so if we have +// multiple valid UPnP destinations a consistent option will be picked every +// time. +func processUPnPResponses(metas []uPnPDiscoResponse) []uPnPDiscoResponse { + // Sort and compact all responses to remove duplicates; since + // we send multiple probes, we often get duplicate responses. + slices.SortFunc(metas, func(a, b uPnPDiscoResponse) int { + // Sort the USN in reverse, so that + // "InternetGatewayDevice:2" sorts before + // "InternetGatewayDevice:1". + if ii := cmp.Compare(a.USN, b.USN); ii != 0 { + return -ii + } + if ii := cmp.Compare(a.Location, b.Location); ii != 0 { + return ii + } + return cmp.Compare(a.Server, b.Server) + }) + + // We can get multiple responses that point to a single Location, since + // we probe for both ssdp:all and InternetGatewayDevice:1 as + // independent packets. Compact by comparing the Location and Server, + // but not the USN (which contains the device being offered). + // + // Since the slices are sorted in reverse above, this means that if we + // get a discovery response for both InternetGatewayDevice:1 and + // InternetGatewayDevice:2, we'll keep the first + // (InternetGatewayDevice:2) response, which is what we want. + metas = slices.CompactFunc(metas, func(a, b uPnPDiscoResponse) bool { + return a.Location == b.Location && a.Server == b.Server + }) + + return metas } // getUPnPErrorCode returns the UPnP error code from the given response, if the diff --git a/net/portmapper/upnp_test.go b/net/portmapper/upnp_test.go index ec1446e91..8748cf427 100644 --- a/net/portmapper/upnp_test.go +++ b/net/portmapper/upnp_test.go @@ -14,6 +14,7 @@ import ( "net/netip" "reflect" "regexp" + "slices" "sync/atomic" "testing" @@ -332,32 +333,156 @@ func TestGetUPnPPortMapping(t *testing.T) { c.debug.VerboseLogs = true - sawRequestWithLease.Store(false) - res, err := c.Probe(ctx) - if err != nil { - t.Fatalf("Probe: %v", err) - } - if !res.UPnP { - t.Errorf("didn't detect UPnP") - } + // Try twice to test the "cache previous mapping" logic. + var ( + firstResponse netip.AddrPort + prevPort uint16 + ) + for i := 0; i < 2; i++ { + sawRequestWithLease.Store(false) + res, err := c.Probe(ctx) + if err != nil { + t.Fatalf("Probe: %v", err) + } + if !res.UPnP { + t.Errorf("didn't detect UPnP") + } - gw, myIP, ok := c.gatewayAndSelfIP() - if !ok { - t.Fatalf("could not get gateway and self IP") - } - t.Logf("gw=%v myIP=%v", gw, myIP) + gw, myIP, ok := c.gatewayAndSelfIP() + if !ok { + t.Fatalf("could not get gateway and self IP") + } + t.Logf("gw=%v myIP=%v", gw, myIP) - ext, ok := c.getUPnPPortMapping(ctx, gw, netip.AddrPortFrom(myIP, 12345), 0) - if !ok { - t.Fatal("could not get UPnP port mapping") + ext, ok := c.getUPnPPortMapping(ctx, gw, netip.AddrPortFrom(myIP, 12345), prevPort) + if !ok { + t.Fatal("could not get UPnP port mapping") + } + if got, want := ext.Addr(), netip.MustParseAddr("123.123.123.123"); got != want { + t.Errorf("bad external address; got %v want %v", got, want) + } + if !sawRequestWithLease.Load() { + t.Errorf("wanted request with lease, but didn't see one") + } + if i == 0 { + firstResponse = ext + prevPort = ext.Port() + } else if firstResponse != ext { + t.Errorf("got different response on second attempt: (got) %v != %v (want)", ext, firstResponse) + } + t.Logf("external IP: %v", ext) } - if got, want := ext.Addr(), netip.MustParseAddr("123.123.123.123"); got != want { - t.Errorf("bad external address; got %v want %v", got, want) + } +} + +func TestGetUPnPPortMappingNoResponses(t *testing.T) { + igd, err := NewTestIGD(t.Logf, TestIGDOptions{UPnP: true}) + if err != nil { + t.Fatal(err) + } + defer igd.Close() + + c := newTestClient(t, igd) + t.Logf("Listening on upnp=%v", c.testUPnPPort) + defer c.Close() + + c.debug.VerboseLogs = true + + // Do this before setting uPnPMetas since it invalidates those mappings + // if gw/myIP change. + gw, myIP, _ := c.gatewayAndSelfIP() + + t.Run("ErrorContactingUPnP", func(t *testing.T) { + c.mu.Lock() + c.uPnPMetas = []uPnPDiscoResponse{{ + Location: "http://127.0.0.1:1/does-not-exist.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2", + }} + c.mu.Unlock() + + _, ok := c.getUPnPPortMapping(context.Background(), gw, netip.AddrPortFrom(myIP, 12345), 0) + if ok { + t.Errorf("expected no mapping when there are no responses") } - if !sawRequestWithLease.Load() { - t.Errorf("wanted request with lease, but didn't see one") - } - t.Logf("external IP: %v", ext) + }) +} + +func TestProcessUPnPResponses(t *testing.T) { + testCases := []struct { + name string + responses []uPnPDiscoResponse + want []uPnPDiscoResponse + }{ + { + name: "single", + responses: []uPnPDiscoResponse{{ + Location: "http://192.168.1.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1", + }}, + want: []uPnPDiscoResponse{{ + Location: "http://192.168.1.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1", + }}, + }, + { + name: "multiple_with_same_location", + responses: []uPnPDiscoResponse{ + { + Location: "http://192.168.1.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1", + }, + { + Location: "http://192.168.1.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2", + }, + }, + want: []uPnPDiscoResponse{{ + Location: "http://192.168.1.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2", + }}, + }, + { + name: "multiple_with_different_location", + responses: []uPnPDiscoResponse{ + { + Location: "http://192.168.1.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1", + }, + { + Location: "http://192.168.100.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2", + }, + }, + want: []uPnPDiscoResponse{ + // note: this sorts first because we prefer "InternetGatewayDevice:2" + { + Location: "http://192.168.100.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:2", + }, + { + Location: "http://192.168.1.1:2828/control.xml", + Server: "Tailscale-Test/1.0 UPnP/1.1 MiniUPnPd/2.2.1", + USN: "uuid:bee7052b-49e8-3597-b545-55a1e38ac11::urn:schemas-upnp-org:device:InternetGatewayDevice:1", + }, + }, + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + got := processUPnPResponses(slices.Clone(tt.responses)) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("unexpected result:\n got: %+v\nwant: %+v\n", got, tt.want) + } + }) } }