diff --git a/cmd/tailscaled/tailscaled_windows.go b/cmd/tailscaled/tailscaled_windows.go index 786c5d833..7208e03da 100644 --- a/cmd/tailscaled/tailscaled_windows.go +++ b/cmd/tailscaled/tailscaled_windows.go @@ -55,6 +55,7 @@ "tailscale.com/util/osdiag" "tailscale.com/util/syspolicy" "tailscale.com/util/winutil" + "tailscale.com/util/winutil/gp" "tailscale.com/version" "tailscale.com/wf" ) @@ -70,6 +71,22 @@ func init() { } } +// permitPolicyLocks is a function to be called to lift the restriction on acquiring +// [gp.PolicyLock]s once the service is running. +// It is safe to be called multiple times. +var permitPolicyLocks = func() {} + +func init() { + if isWindowsService() { + // We prevent [gp.PolicyLock]s from being acquired until the service enters the running state. + // Otherwise, if tailscaled starts due to a GPSI policy installing Tailscale, it may deadlock + // while waiting for the write counterpart of the GP lock to be released by Group Policy, + // which is itself waiting for the installation to complete and tailscaled to start. + // See tailscale/tailscale#14416 for more information. + permitPolicyLocks = gp.RestrictPolicyLocks() + } +} + const serviceName = "Tailscale" // Application-defined command codes between 128 and 255 @@ -109,13 +126,13 @@ func tstunNewWithWindowsRetries(logf logger.Logf, tunName string) (_ tun.Device, } } -func isWindowsService() bool { +var isWindowsService = sync.OnceValue(func() bool { v, err := svc.IsWindowsService() if err != nil { log.Fatalf("svc.IsWindowsService failed: %v", err) } return v -} +}) // syslogf is a logger function that writes to the Windows event log (ie, the // one that you see in the Windows Event Viewer). tailscaled may optionally @@ -180,6 +197,10 @@ func (service *ipnService) Execute(args []string, r <-chan svc.ChangeRequest, ch changes <- svc.Status{State: svc.Running, Accepts: svcAccepts} syslogf("Service running") + // It is safe to allow GP locks to be acquired now that the service + // is running. + permitPolicyLocks() + for { select { case <-doneCh: diff --git a/tstest/integration/tailscaled_deps_test_windows.go b/tstest/integration/tailscaled_deps_test_windows.go index 5eda22327..b0d1c8968 100644 --- a/tstest/integration/tailscaled_deps_test_windows.go +++ b/tstest/integration/tailscaled_deps_test_windows.go @@ -60,6 +60,7 @@ _ "tailscale.com/util/osshare" _ "tailscale.com/util/syspolicy" _ "tailscale.com/util/winutil" + _ "tailscale.com/util/winutil/gp" _ "tailscale.com/version" _ "tailscale.com/version/distro" _ "tailscale.com/wf" diff --git a/util/syspolicy/source/policy_store_windows.go b/util/syspolicy/source/policy_store_windows.go index 86e2254e0..621701e84 100644 --- a/util/syspolicy/source/policy_store_windows.go +++ b/util/syspolicy/source/policy_store_windows.go @@ -12,6 +12,7 @@ "golang.org/x/sys/windows" "golang.org/x/sys/windows/registry" "tailscale.com/util/set" + "tailscale.com/util/syspolicy/internal/loggerx" "tailscale.com/util/syspolicy/setting" "tailscale.com/util/winutil/gp" ) @@ -29,6 +30,18 @@ _ Expirable = (*PlatformPolicyStore)(nil) ) +// lockableCloser is a [Lockable] that can also be closed. +// It is implemented by [gp.PolicyLock] and [optionalPolicyLock]. +type lockableCloser interface { + Lockable + Close() error +} + +var ( + _ lockableCloser = (*gp.PolicyLock)(nil) + _ lockableCloser = (*optionalPolicyLock)(nil) +) + // PlatformPolicyStore implements [Store] by providing read access to // Registry-based Tailscale policies, such as those configured via Group Policy or MDM. // For better performance and consistency, it is recommended to lock it when @@ -55,7 +68,7 @@ type PlatformPolicyStore struct { // they are being read. // // When both policyLock and mu need to be taken, mu must be taken before policyLock. - policyLock *gp.PolicyLock + policyLock lockableCloser mu sync.Mutex tsKeys []registry.Key // or nil if the [PlatformPolicyStore] hasn't been locked. @@ -108,7 +121,7 @@ func newPlatformPolicyStore(scope gp.Scope, softwareKey registry.Key, policyLock scope: scope, softwareKey: softwareKey, done: make(chan struct{}), - policyLock: policyLock, + policyLock: &optionalPolicyLock{PolicyLock: policyLock}, } } @@ -448,3 +461,68 @@ func tailscaleKeyNamesFor(scope gp.Scope) []string { panic("unreachable") } } + +type gpLockState int + +const ( + gpUnlocked = gpLockState(iota) + gpLocked + gpLockRestricted // the lock could not be acquired due to a restriction in place +) + +// optionalPolicyLock is a wrapper around [gp.PolicyLock] that locks +// and unlocks the underlying [gp.PolicyLock]. +// +// If the [gp.PolicyLock.Lock] returns [gp.ErrLockRestricted], the error is ignored, +// and calling [optionalPolicyLock.Unlock] is a no-op. +// +// The underlying GP lock is kinda optional: it is safe to read policy settings +// from the Registry without acquiring it, but it is recommended to lock it anyway +// when reading multiple policy settings to avoid potentially inconsistent results. +// +// It is not safe for concurrent use. +type optionalPolicyLock struct { + *gp.PolicyLock + state gpLockState +} + +// Lock acquires the underlying [gp.PolicyLock], returning an error on failure. +// If the lock cannot be acquired due to a restriction in place +// (e.g., attempting to acquire a lock while the service is starting), +// the lock is considered to be held, the method returns nil, and a subsequent +// call to [Unlock] is a no-op. +// It is a runtime error to call Lock when the lock is already held. +func (o *optionalPolicyLock) Lock() error { + if o.state != gpUnlocked { + panic("already locked") + } + switch err := o.PolicyLock.Lock(); err { + case nil: + o.state = gpLocked + return nil + case gp.ErrLockRestricted: + loggerx.Errorf("GP lock not acquired: %v", err) + o.state = gpLockRestricted + return nil + default: + return err + } +} + +// Unlock releases the underlying [gp.PolicyLock], if it was previously acquired. +// It is a runtime error to call Unlock when the lock is not held. +func (o *optionalPolicyLock) Unlock() { + switch o.state { + case gpLocked: + o.PolicyLock.Unlock() + case gpLockRestricted: + // The GP lock wasn't acquired due to a restriction in place + // when [optionalPolicyLock.Lock] was called. Unlock is a no-op. + case gpUnlocked: + panic("not locked") + default: + panic("unreachable") + } + + o.state = gpUnlocked +} diff --git a/util/winutil/gp/policylock_windows.go b/util/winutil/gp/policylock_windows.go index 95453aa16..69c5ff016 100644 --- a/util/winutil/gp/policylock_windows.go +++ b/util/winutil/gp/policylock_windows.go @@ -48,10 +48,35 @@ type policyLockResult struct { } var ( - // ErrInvalidLockState is returned by (*PolicyLock).Lock if the lock has a zero value or has already been closed. + // ErrInvalidLockState is returned by [PolicyLock.Lock] if the lock has a zero value or has already been closed. ErrInvalidLockState = errors.New("the lock has not been created or has already been closed") + // ErrLockRestricted is returned by [PolicyLock.Lock] if the lock cannot be acquired due to a restriction in place, + // such as when [RestrictPolicyLocks] has been called. + ErrLockRestricted = errors.New("the lock cannot be acquired due to a restriction in place") ) +var policyLockRestricted atomic.Int32 + +// RestrictPolicyLocks forces all [PolicyLock.Lock] calls to return [ErrLockRestricted] +// until the returned function is called to remove the restriction. +// +// It is safe to call the returned function multiple times, but the restriction will only +// be removed once. If [RestrictPolicyLocks] is called multiple times, each call must be +// matched by a corresponding call to the returned function to fully remove the restrictions. +// +// It is primarily used to prevent certain deadlocks, such as when tailscaled attempts to acquire +// a policy lock during startup. If the service starts due to Tailscale being installed by GPSI, +// the write lock will be held by the Group Policy service throughout the installation, +// preventing tailscaled from acquiring the read lock. Since Group Policy waits for the installation +// to complete, and therefore for tailscaled to start, before releasing the write lock, this scenario +// would result in a deadlock. See tailscale/tailscale#14416 for more information. +func RestrictPolicyLocks() (removeRestriction func()) { + policyLockRestricted.Add(1) + return sync.OnceFunc(func() { + policyLockRestricted.Add(-1) + }) +} + // NewMachinePolicyLock creates a PolicyLock that facilitates pausing the // application of computer policy. To avoid deadlocks when acquiring both // machine and user locks, acquire the user lock before the machine lock. @@ -103,13 +128,18 @@ func NewUserPolicyLock(token windows.Token) (*PolicyLock, error) { } // Lock locks l. -// It returns ErrNotInitialized if l has a zero value or has already been closed, -// or an Errno if the underlying Group Policy lock cannot be acquired. +// It returns [ErrInvalidLockState] if l has a zero value or has already been closed, +// [ErrLockRestricted] if the lock cannot be acquired due to a restriction in place, +// or a [syscall.Errno] if the underlying Group Policy lock cannot be acquired. // -// As a special case, it fails with windows.ERROR_ACCESS_DENIED +// As a special case, it fails with [windows.ERROR_ACCESS_DENIED] // if l is a user policy lock, and the corresponding user is not logged in // interactively at the time of the call. func (l *PolicyLock) Lock() error { + if policyLockRestricted.Load() > 0 { + return ErrLockRestricted + } + l.mu.Lock() defer l.mu.Unlock() if l.lockCnt.Add(2)&1 == 0 {