diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 7411ea949..895be108b 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -359,6 +359,12 @@ authLoop: log.Fatalf("rewatching tailscaled for updates after auth: %v", err) } + // If tailscaled config was read from a mounted file, watch the file for updates and reload. + cfgWatchErrChan := make(chan error) + if cfg.TailscaledConfigFilePath != "" { + go watchTailscaledConfigChanges(ctx, cfg.TailscaledConfigFilePath, client, cfgWatchErrChan) + } + var ( startupTasksDone = false currentIPs deephash.Sum // tailscale IPs assigned to device @@ -452,6 +458,8 @@ runLoop: break runLoop case err := <-errChan: log.Fatalf("failed to read from tailscaled: %v", err) + case err := <-cfgWatchErrChan: + log.Fatalf("failed to watch tailscaled config: %v", err) case n := <-notifyChan: if n.State != nil && *n.State != ipn.Running { // Something's gone wrong and we've left the authenticated state. diff --git a/cmd/containerboot/tailscaled.go b/cmd/containerboot/tailscaled.go index d8da49b03..fc2092477 100644 --- a/cmd/containerboot/tailscaled.go +++ b/cmd/containerboot/tailscaled.go @@ -13,10 +13,13 @@ import ( "log" "os" "os/exec" + "path/filepath" + "reflect" "strings" "syscall" "time" + "github.com/fsnotify/fsnotify" "tailscale.com/client/tailscale" ) @@ -166,3 +169,70 @@ func tailscaleSet(ctx context.Context, cfg *settings) error { } return nil } + +func watchTailscaledConfigChanges(ctx context.Context, path string, lc *tailscale.LocalClient, errCh chan<- error) { + var ( + tickChan <-chan time.Time + tailscaledCfgDir = filepath.Dir(path) + prevTailscaledCfg []byte + ) + w, err := fsnotify.NewWatcher() + if err != nil { + log.Printf("tailscaled config watch: failed to create fsnotify watcher, timer-only mode: %v", err) + ticker := time.NewTicker(5 * time.Second) + defer ticker.Stop() + tickChan = ticker.C + } else { + defer w.Close() + if err := w.Add(tailscaledCfgDir); err != nil { + errCh <- fmt.Errorf("failed to add fsnotify watch: %w", err) + return + } + } + b, err := os.ReadFile(path) + if err != nil { + errCh <- fmt.Errorf("error reading configfile: %w", err) + return + } + prevTailscaledCfg = b + // kubelet mounts Secrets to Pods using a series of symlinks, one of + // which is /..data that Kubernetes recommends consumers to + // use if they need to monitor changes + // https://github.com/kubernetes/kubernetes/blob/v1.28.1/pkg/volume/util/atomic_writer.go#L39-L61 + const kubeletMountedCfg = "..data" + toWatch := filepath.Join(tailscaledCfgDir, kubeletMountedCfg) + for { + select { + case <-ctx.Done(): + return + case err := <-w.Errors: + errCh <- fmt.Errorf("watcher error: %w", err) + return + case <-tickChan: + case event := <-w.Events: + if event.Name != toWatch { + continue + } + } + b, err := os.ReadFile(path) + if err != nil { + errCh <- fmt.Errorf("error reading configfile: %w", err) + return + } + // For some proxy types the mounted volume also contains tailscaled state and other files. We + // don't want to reload config unnecessarily on unrelated changes to these files. + if reflect.DeepEqual(b, prevTailscaledCfg) { + continue + } + prevTailscaledCfg = b + log.Printf("tailscaled config watch: ensuring that config is up to date") + ok, err := lc.ReloadConfig(ctx) + if err != nil { + errCh <- fmt.Errorf("error reloading tailscaled config: %w", err) + return + } + if ok { + log.Printf("tailscaled config watch: config was reloaded") + } + } +} diff --git a/cmd/k8s-operator/operator_test.go b/cmd/k8s-operator/operator_test.go index d53269f05..1998fe3bc 100644 --- a/cmd/k8s-operator/operator_test.go +++ b/cmd/k8s-operator/operator_test.go @@ -1379,6 +1379,7 @@ func TestTailscaledConfigfileHash(t *testing.T) { }, }) + expectReconciled(t, sr, "default", "test") expectReconciled(t, sr, "default", "test") fullName, shortName := findGenName(t, fc, "default", "test", "svc") @@ -1389,7 +1390,7 @@ func TestTailscaledConfigfileHash(t *testing.T) { parentType: "svc", hostname: "default-test", clusterTargetIP: "10.20.30.40", - confFileHash: "acf3467364b0a3ba9b8ee0dd772cb7c2f0bf585e288fa99b7fe4566009ed6041", + confFileHash: "848bff4b5ba83ac999e6984c8464e597156daba961ae045e7dbaef606d54ab5e", app: kubetypes.AppIngressProxy, } expectEqual(t, fc, expectedSTS(t, fc, o), nil) diff --git a/cmd/k8s-operator/proxygroup.go b/cmd/k8s-operator/proxygroup.go index 194474fb2..a4befa039 100644 --- a/cmd/k8s-operator/proxygroup.go +++ b/cmd/k8s-operator/proxygroup.go @@ -261,17 +261,44 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro return fmt.Errorf("error provisioning ConfigMap: %w", err) } } - ss, err := pgStatefulSet(pg, r.tsNamespace, r.proxyImage, r.tsFirewallMode, cfgHash) + ss, err := pgStatefulSet(pg, r.tsNamespace, r.proxyImage, r.tsFirewallMode) if err != nil { return fmt.Errorf("error generating StatefulSet spec: %w", err) } ss = applyProxyClassToStatefulSet(proxyClass, ss, nil, logger) - if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) { + capver, err := r.capVerForPG(ctx, pg, logger) + if err != nil { + return fmt.Errorf("error getting device info: %w", err) + } + + updateSS := func(s *appsv1.StatefulSet) { + + // This is a temporary workaround to ensure that egress ProxyGroup proxies with capver older than 110 + // are restarted when tailscaled configfile contents have changed. + // This workaround ensures that: + // 1. The hash mechanism is used to trigger pod restarts for proxies below capver 110. + // 2. Proxies above capver are not unnecessarily restarted when the configfile contents change. + // 3. If the hash has alreay been set, but the capver is above 110, the old hash is preserved to avoid + // unnecessary pod restarts that could result in an update loop where capver cannot be determined for a + // restarting Pod and the hash is re-added again. + // Note that this workaround is only applied to egress ProxyGroups, because ingress ProxyGroup was added after capver 110. + // Note also that the hash annotation is only set on updates, not creation, because if the StatefulSet is + // being created, there is no need for a restart. + // TODO(irbekrm): remove this in 1.84. + hash := cfgHash + if capver >= 110 { + hash = s.Spec.Template.GetAnnotations()[podAnnotationLastSetConfigFileHash] + } + s.Spec = ss.Spec + if hash != "" && pg.Spec.Type == tsapi.ProxyGroupTypeEgress { + mak.Set(&s.Spec.Template.Annotations, podAnnotationLastSetConfigFileHash, hash) + } + s.ObjectMeta.Labels = ss.ObjectMeta.Labels s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations s.ObjectMeta.OwnerReferences = ss.ObjectMeta.OwnerReferences - s.Spec = ss.Spec - }); err != nil { + } + if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, updateSS); err != nil { return fmt.Errorf("error provisioning StatefulSet: %w", err) } mo := &metricsOpts{ @@ -564,12 +591,19 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr continue } - metadata = append(metadata, nodeMetadata{ + nm := nodeMetadata{ ordinal: ordinal, stateSecret: &secret, tsID: id, dnsName: dnsName, - }) + } + pod := &corev1.Pod{} + if err := r.Get(ctx, client.ObjectKey{Namespace: r.tsNamespace, Name: secret.Name}, pod); err != nil && !apierrors.IsNotFound(err) { + return nil, err + } else if err == nil { + nm.podUID = string(pod.UID) + } + metadata = append(metadata, nm) } return metadata, nil @@ -601,6 +635,29 @@ func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, pg *tsapi.Prox type nodeMetadata struct { ordinal int stateSecret *corev1.Secret - tsID tailcfg.StableNodeID - dnsName string + // podUID is the UID of the current Pod or empty if the Pod does not exist. + podUID string + tsID tailcfg.StableNodeID + dnsName string +} + +// capVerForPG returns best effort capability version for the given ProxyGroup. It attempts to find it by looking at the +// Secret + Pod for the replica with ordinal 0. Returns -1 if it is not possible to determine the capability version +// (i.e there is no Pod yet). +func (r *ProxyGroupReconciler) capVerForPG(ctx context.Context, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (tailcfg.CapabilityVersion, error) { + metas, err := r.getNodeMetadata(ctx, pg) + if err != nil { + return -1, fmt.Errorf("error getting node metadata: %w", err) + } + if len(metas) == 0 { + return -1, nil + } + dev, err := deviceInfo(metas[0].stateSecret, metas[0].podUID, logger) + if err != nil { + return -1, fmt.Errorf("error getting device info: %w", err) + } + if dev == nil { + return -1, nil + } + return dev.capver, nil } diff --git a/cmd/k8s-operator/proxygroup_specs.go b/cmd/k8s-operator/proxygroup_specs.go index d602be814..dc58b9f0e 100644 --- a/cmd/k8s-operator/proxygroup_specs.go +++ b/cmd/k8s-operator/proxygroup_specs.go @@ -21,7 +21,7 @@ import ( // Returns the base StatefulSet definition for a ProxyGroup. A ProxyClass may be // applied over the top after. -func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode, cfgHash string) (*appsv1.StatefulSet, error) { +func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode string) (*appsv1.StatefulSet, error) { ss := new(appsv1.StatefulSet) if err := yaml.Unmarshal(proxyYaml, &ss); err != nil { return nil, fmt.Errorf("failed to unmarshal proxy spec: %w", err) @@ -53,9 +53,6 @@ func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode, cfgHa Namespace: namespace, Labels: pgLabels(pg.Name, nil), DeletionGracePeriodSeconds: ptr.To[int64](10), - Annotations: map[string]string{ - podAnnotationLastSetConfigFileHash: cfgHash, - }, } tmpl.Spec.ServiceAccountName = pg.Name tmpl.Spec.InitContainers[0].Image = image diff --git a/cmd/k8s-operator/proxygroup_test.go b/cmd/k8s-operator/proxygroup_test.go index 6464a0b2d..96ffefbed 100644 --- a/cmd/k8s-operator/proxygroup_test.go +++ b/cmd/k8s-operator/proxygroup_test.go @@ -29,6 +29,7 @@ import ( "tailscale.com/kube/kubetypes" "tailscale.com/tstest" "tailscale.com/types/ptr" + "tailscale.com/util/mak" ) const testProxyImage = "tailscale/tailscale:test" @@ -117,11 +118,11 @@ func TestProxyGroup(t *testing.T) { tsoperator.SetProxyGroupCondition(pg, tsapi.ProxyGroupReady, metav1.ConditionFalse, reasonProxyGroupCreating, "0/2 ProxyGroup pods running", 0, cl, zl.Sugar()) expectEqual(t, fc, pg, nil) - expectProxyGroupResources(t, fc, pg, true, initialCfgHash) + expectProxyGroupResources(t, fc, pg, true, "") if expected := 1; reconciler.egressProxyGroups.Len() != expected { t.Fatalf("expected %d egress ProxyGroups, got %d", expected, reconciler.egressProxyGroups.Len()) } - expectProxyGroupResources(t, fc, pg, true, initialCfgHash) + expectProxyGroupResources(t, fc, pg, true, "") keyReq := tailscale.KeyCapabilities{ Devices: tailscale.KeyDeviceCapabilities{ Create: tailscale.KeyDeviceCreateCapabilities{ @@ -378,11 +379,14 @@ func expectProxyGroupResources(t *testing.T, fc client.WithWatch, pg *tsapi.Prox role := pgRole(pg, tsNamespace) roleBinding := pgRoleBinding(pg, tsNamespace) serviceAccount := pgServiceAccount(pg, tsNamespace) - statefulSet, err := pgStatefulSet(pg, tsNamespace, testProxyImage, "auto", cfgHash) + statefulSet, err := pgStatefulSet(pg, tsNamespace, testProxyImage, "auto") if err != nil { t.Fatal(err) } statefulSet.Annotations = defaultProxyClassAnnotations + if cfgHash != "" { + mak.Set(&statefulSet.Spec.Template.Annotations, podAnnotationLastSetConfigFileHash, cfgHash) + } if shouldExist { expectEqual(t, fc, role, nil) diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index b861bdfff..c2b925058 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -437,10 +437,10 @@ func sanitizeConfigBytes(c ipn.ConfigVAlpha) string { return string(sanitizedBytes) } -// DeviceInfo returns the device ID, hostname and IPs for the Tailscale device -// that acts as an operator proxy. It retrieves info from a Kubernetes Secret -// labeled with the provided labels. -// Either of device ID, hostname and IPs can be empty string if not found in the Secret. +// DeviceInfo returns the device ID, hostname, IPs and capver for the Tailscale device that acts as an operator proxy. +// It retrieves info from a Kubernetes Secret labeled with the provided labels. Capver is cross-validated against the +// Pod to ensure that it is the currently running Pod that set the capver. If the Pod or the Secret does not exist, the +// returned capver is -1. Either of device ID, hostname and IPs can be empty string if not found in the Secret. func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map[string]string, logger *zap.SugaredLogger) (dev *device, err error) { sec, err := getSingleObject[corev1.Secret](ctx, a.Client, a.operatorNamespace, childLabels) if err != nil { @@ -449,12 +449,14 @@ func (a *tailscaleSTSReconciler) DeviceInfo(ctx context.Context, childLabels map if sec == nil { return dev, nil } + podUID := "" pod := new(corev1.Pod) if err := a.Get(ctx, types.NamespacedName{Namespace: sec.Namespace, Name: sec.Name}, pod); err != nil && !apierrors.IsNotFound(err) { - return dev, nil + return dev, err + } else if err == nil { + podUID = string(pod.ObjectMeta.UID) } - - return deviceInfo(sec, pod, logger) + return deviceInfo(sec, podUID, logger) } // device contains tailscale state of a proxy device as gathered from its tailscale state Secret. @@ -465,9 +467,10 @@ type device struct { // ingressDNSName is the L7 Ingress DNS name. In practice this will be the same value as hostname, but only set // when the device has been configured to serve traffic on it via 'tailscale serve'. ingressDNSName string + capver tailcfg.CapabilityVersion } -func deviceInfo(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) (dev *device, err error) { +func deviceInfo(sec *corev1.Secret, podUID string, log *zap.SugaredLogger) (dev *device, err error) { id := tailcfg.StableNodeID(sec.Data[kubetypes.KeyDeviceID]) if id == "" { return dev, nil @@ -484,10 +487,12 @@ func deviceInfo(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) (de // operator to clean up such devices. return dev, nil } + dev.ingressDNSName = dev.hostname + pcv := proxyCapVer(sec, podUID, log) + dev.capver = pcv // TODO(irbekrm): we fall back to using the hostname field to determine Ingress's hostname to ensure backwards // compatibility. In 1.82 we can remove this fallback mechanism. - dev.ingressDNSName = dev.hostname - if proxyCapVer(sec, pod, log) >= 109 { + if pcv >= 109 { dev.ingressDNSName = strings.TrimSuffix(string(sec.Data[kubetypes.KeyHTTPSEndpoint]), ".") if strings.EqualFold(dev.ingressDNSName, kubetypes.ValueNoHTTPS) { dev.ingressDNSName = "" @@ -584,8 +589,6 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S Value: "true", }) } - // Configure containeboot to run tailscaled with a configfile read from the state Secret. - mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetConfigFileHash, tsConfigHash) configVolume := corev1.Volume{ Name: "tailscaledconfig", @@ -655,6 +658,12 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S }, }) } + + dev, err := a.DeviceInfo(ctx, sts.ChildResourceLabels, logger) + if err != nil { + return nil, fmt.Errorf("failed to get device info: %w", err) + } + app, err := appInfoForProxy(sts) if err != nil { // No need to error out if now or in future we end up in a @@ -673,7 +682,25 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S ss = applyProxyClassToStatefulSet(sts.ProxyClass, ss, sts, logger) } updateSS := func(s *appsv1.StatefulSet) { + // This is a temporary workaround to ensure that proxies with capver older than 110 + // are restarted when tailscaled configfile contents have changed. + // This workaround ensures that: + // 1. The hash mechanism is used to trigger pod restarts for proxies below capver 110. + // 2. Proxies above capver are not unnecessarily restarted when the configfile contents change. + // 3. If the hash has alreay been set, but the capver is above 110, the old hash is preserved to avoid + // unnecessary pod restarts that could result in an update loop where capver cannot be determined for a + // restarting Pod and the hash is re-added again. + // Note that the hash annotation is only set on updates not creation, because if the StatefulSet is + // being created, there is no need for a restart. + // TODO(irbekrm): remove this in 1.84. + hash := tsConfigHash + if dev != nil && dev.capver >= 110 { + hash = s.Spec.Template.GetAnnotations()[podAnnotationLastSetConfigFileHash] + } s.Spec = ss.Spec + if hash != "" { + mak.Set(&s.Spec.Template.Annotations, podAnnotationLastSetConfigFileHash, hash) + } s.ObjectMeta.Labels = ss.Labels s.ObjectMeta.Annotations = ss.Annotations } @@ -1112,10 +1139,11 @@ func isValidFirewallMode(m string) bool { return m == "auto" || m == "nftables" || m == "iptables" } -// proxyCapVer accepts a proxy state Secret and a proxy Pod returns the capability version of a proxy Pod. -// This is best effort - if the capability version can not (currently) be determined, it returns -1. -func proxyCapVer(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) tailcfg.CapabilityVersion { - if sec == nil || pod == nil { +// proxyCapVer accepts a proxy state Secret and UID of the current proxy Pod returns the capability version of the +// tailscale running in that Pod. This is best effort - if the capability version can not (currently) be determined, it +// returns -1. +func proxyCapVer(sec *corev1.Secret, podUID string, log *zap.SugaredLogger) tailcfg.CapabilityVersion { + if sec == nil || podUID == "" { return tailcfg.CapabilityVersion(-1) } if len(sec.Data[kubetypes.KeyCapVer]) == 0 || len(sec.Data[kubetypes.KeyPodUID]) == 0 { @@ -1126,7 +1154,7 @@ func proxyCapVer(sec *corev1.Secret, pod *corev1.Pod, log *zap.SugaredLogger) ta log.Infof("[unexpected]: unexpected capability version in proxy's state Secret, expected an integer, got %q", string(sec.Data[kubetypes.KeyCapVer])) return tailcfg.CapabilityVersion(-1) } - if !strings.EqualFold(string(pod.ObjectMeta.UID), string(sec.Data[kubetypes.KeyPodUID])) { + if !strings.EqualFold(podUID, string(sec.Data[kubetypes.KeyPodUID])) { return tailcfg.CapabilityVersion(-1) } return tailcfg.CapabilityVersion(capVer) diff --git a/cmd/k8s-operator/testutils_test.go b/cmd/k8s-operator/testutils_test.go index d43e75b1e..277bd16df 100644 --- a/cmd/k8s-operator/testutils_test.go +++ b/cmd/k8s-operator/testutils_test.go @@ -95,7 +95,7 @@ func expectedSTS(t *testing.T, cl client.Client, opts configOpts) *appsv1.Statef Value: "true", }) } - annots := make(map[string]string) + var annots map[string]string var volumes []corev1.Volume volumes = []corev1.Volume{ { @@ -113,7 +113,7 @@ func expectedSTS(t *testing.T, cl client.Client, opts configOpts) *appsv1.Statef MountPath: "/etc/tsconfig", }} if opts.confFileHash != "" { - annots["tailscale.com/operator-last-set-config-file-hash"] = opts.confFileHash + mak.Set(&annots, "tailscale.com/operator-last-set-config-file-hash", opts.confFileHash) } if opts.firewallMode != "" { tsContainer.Env = append(tsContainer.Env, corev1.EnvVar{ @@ -122,13 +122,13 @@ func expectedSTS(t *testing.T, cl client.Client, opts configOpts) *appsv1.Statef }) } if opts.tailnetTargetIP != "" { - annots["tailscale.com/operator-last-set-ts-tailnet-target-ip"] = opts.tailnetTargetIP + mak.Set(&annots, "tailscale.com/operator-last-set-ts-tailnet-target-ip", opts.tailnetTargetIP) tsContainer.Env = append(tsContainer.Env, corev1.EnvVar{ Name: "TS_TAILNET_TARGET_IP", Value: opts.tailnetTargetIP, }) } else if opts.tailnetTargetFQDN != "" { - annots["tailscale.com/operator-last-set-ts-tailnet-target-fqdn"] = opts.tailnetTargetFQDN + mak.Set(&annots, "tailscale.com/operator-last-set-ts-tailnet-target-fqdn", opts.tailnetTargetFQDN) tsContainer.Env = append(tsContainer.Env, corev1.EnvVar{ Name: "TS_TAILNET_TARGET_FQDN", Value: opts.tailnetTargetFQDN, @@ -139,13 +139,13 @@ func expectedSTS(t *testing.T, cl client.Client, opts configOpts) *appsv1.Statef Name: "TS_DEST_IP", Value: opts.clusterTargetIP, }) - annots["tailscale.com/operator-last-set-cluster-ip"] = opts.clusterTargetIP + mak.Set(&annots, "tailscale.com/operator-last-set-cluster-ip", opts.clusterTargetIP) } else if opts.clusterTargetDNS != "" { tsContainer.Env = append(tsContainer.Env, corev1.EnvVar{ Name: "TS_EXPERIMENTAL_DEST_DNS_NAME", Value: opts.clusterTargetDNS, }) - annots["tailscale.com/operator-last-set-cluster-dns-name"] = opts.clusterTargetDNS + mak.Set(&annots, "tailscale.com/operator-last-set-cluster-dns-name", opts.clusterTargetDNS) } if opts.serveConfig != nil { tsContainer.Env = append(tsContainer.Env, corev1.EnvVar{ @@ -794,6 +794,9 @@ func (c *fakeTSClient) Deleted() []string { // change to the configfile contents). func removeHashAnnotation(sts *appsv1.StatefulSet) { delete(sts.Spec.Template.Annotations, podAnnotationLastSetConfigFileHash) + if len(sts.Spec.Template.Annotations) == 0 { + sts.Spec.Template.Annotations = nil + } } func removeTargetPortsFromSvc(svc *corev1.Service) {