cmd/tailscale,controlclient,ipnlocal: fix 'up', deflake tests more

The CLI's "up" is kinda chaotic and LocalBackend.Start is kinda
chaotic and they both need to be redone/deleted (respectively), but
this fixes some buggy behavior meanwhile. We were previously calling
StartLoginInteractive (to start the controlclient's RegisterRequest)
redundantly in some cases, causing test flakes depending on timing and
up's weird state machine.

We only need to call StartLoginInteractive in the client if Start itself
doesn't. But Start doesn't tell us that. So cheat a bit and a put the
information about whether there's a current NodeKey in the ipn.Status.
It used to be accessible over LocalAPI via GetPrefs as a private key but
we removed that for security. But a bool is fine.

So then only call StartLoginInteractive if that bool is false and don't
do it in the WatchIPNBus loop.

Fixes #12028
Updates #12042

Change-Id: I0923c3f704a9d6afd825a858eb9a63ca7c1df294
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2024-05-06 21:33:37 -07:00
committed by Brad Fitzpatrick
parent e5ef35857f
commit e968b0ecd7
6 changed files with 85 additions and 20 deletions

View File

@ -80,6 +80,7 @@ type Direct struct {
onClientVersion func(*tailcfg.ClientVersion) // or nil
onControlTime func(time.Time) // or nil
onTailnetDefaultAutoUpdate func(bool) // or nil
panicOnUse bool // if true, panic if client is used (for testing)
dialPlan ControlDialPlanner // can be nil
@ -310,6 +311,9 @@ func NewDirect(opts Options) (*Direct, error) {
}
c.serverNoiseKey = key.NewMachine().Public() // prevent early error before hitting test client
}
if strings.Contains(opts.ServerURL, "controlplane.tailscale.com") && envknob.Bool("TS_PANIC_IF_HIT_MAIN_CONTROL") {
c.panicOnUse = true
}
return c, nil
}
@ -399,7 +403,7 @@ func (c *Direct) TryLogout(ctx context.Context) error {
func (c *Direct) TryLogin(ctx context.Context, t *tailcfg.Oauth2Token, flags LoginFlags) (url string, err error) {
if strings.Contains(c.serverURL, "controlplane.tailscale.com") && envknob.Bool("TS_PANIC_IF_HIT_MAIN_CONTROL") {
panic("[unexpected] controlclient: TryLogin called on " + c.serverURL)
panic(fmt.Sprintf("[unexpected] controlclient: TryLogin called on %s; tainted=%v", c.serverURL, c.panicOnUse))
}
c.logf("[v1] direct.TryLogin(token=%v, flags=%v)", t != nil, flags)
return c.doLoginOrRegen(ctx, loginOpt{Token: t, Flags: flags})
@ -462,6 +466,9 @@ func (c *Direct) hostInfoLocked() *tailcfg.Hostinfo {
}
func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, newURL string, nks tkatype.MarshaledSignature, err error) {
if c.panicOnUse {
panic("tainted client")
}
c.mu.Lock()
persist := c.persist.AsStruct()
tryingNewKey := c.tryingNewKey
@ -835,6 +842,9 @@ const watchdogTimeout = 120 * time.Second
//
// If nu is nil, OmitPeers will be set to true.
func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu NetmapUpdater) error {
if c.panicOnUse {
panic("tainted client")
}
if isStreaming && nu == nil {
panic("cb must be non-nil if isStreaming is true")
}
@ -1531,6 +1541,9 @@ func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) (err er
}
func (c *Direct) DoNoiseRequest(req *http.Request) (*http.Response, error) {
if c.panicOnUse {
panic("tainted client")
}
nc, err := c.getNoiseClient()
if err != nil {
return nil, err
@ -1627,6 +1640,9 @@ func (c *Direct) ReportHealthChange(sys health.Subsystem, sysErr error) {
if !ok {
return
}
if c.panicOnUse {
panic("tainted client")
}
req := &tailcfg.HealthChangeRequest{
Subsys: string(sys),
NodeKey: nodeKey,