From 8d84720edb471c639e0f6de3addf3490e78b7748 Mon Sep 17 00:00:00 2001 From: Tom Proctor Date: Wed, 19 Mar 2025 06:49:36 -0700 Subject: [PATCH] cmd/k8s-operator: update ProxyGroup config Secrets instead of patch (#15353) There was a flaky failure case where renaming a TLS hostname for an ingress might leave the old hostname dangling in tailscaled config. This happened when the proxygroup reconciler loop had an outdated resource version of the config Secret in its cache after the ingress-pg-reconciler loop had very recently written it to delete the old hostname. As the proxygroup reconciler then did a patch, there was no conflict and it reinstated the old hostname. This commit updates the patch to an update operation so that if the resource version is out of date it will fail with an optimistic lock error. It also checks for equality to reduce the likelihood that we make the update API call in the first place, because most of the time the proxygroup reconciler is not even making an update to the Secret in the case that the hostname has changed. Updates tailscale/corp#24795 Change-Id: Ie23a97440063976c9a8475d24ab18253e1f89050 Signed-off-by: Tom Proctor --- cmd/k8s-operator/proxygroup.go | 16 +++++++++------- cmd/k8s-operator/proxygroup_test.go | 16 +++------------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/cmd/k8s-operator/proxygroup.go b/cmd/k8s-operator/proxygroup.go index c961c0471..112e5e2b0 100644 --- a/cmd/k8s-operator/proxygroup.go +++ b/cmd/k8s-operator/proxygroup.go @@ -461,7 +461,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p var existingCfgSecret *corev1.Secret // unmodified copy of secret if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil { - logger.Debugf("secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName()) + logger.Debugf("Secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName()) existingCfgSecret = cfgSecret.DeepCopy() } else if !apierrors.IsNotFound(err) { return "", err @@ -469,7 +469,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p var authKey string if existingCfgSecret == nil { - logger.Debugf("creating authkey for new ProxyGroup proxy") + logger.Debugf("Creating authkey for new ProxyGroup proxy") tags := pg.Spec.Tags.Stringify() if len(tags) == 0 { tags = r.defaultTags @@ -490,7 +490,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p if err != nil { return "", fmt.Errorf("error marshalling tailscaled config: %w", err) } - mak.Set(&cfgSecret.StringData, tsoperator.TailscaledConfigFileName(cap), string(cfgJSON)) + mak.Set(&cfgSecret.Data, tsoperator.TailscaledConfigFileName(cap), cfgJSON) } // The config sha256 sum is a value for a hash annotation used to trigger @@ -520,12 +520,14 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p } if existingCfgSecret != nil { - logger.Debugf("patching the existing ProxyGroup config Secret %s", cfgSecret.Name) - if err := r.Patch(ctx, cfgSecret, client.MergeFrom(existingCfgSecret)); err != nil { - return "", err + if !apiequality.Semantic.DeepEqual(existingCfgSecret, cfgSecret) { + logger.Debugf("Updating the existing ProxyGroup config Secret %s", cfgSecret.Name) + if err := r.Update(ctx, cfgSecret); err != nil { + return "", err + } } } else { - logger.Debugf("creating a new config Secret %s for the ProxyGroup", cfgSecret.Name) + logger.Debugf("Creating a new config Secret %s for the ProxyGroup", cfgSecret.Name) if err := r.Create(ctx, cfgSecret); err != nil { return "", err } diff --git a/cmd/k8s-operator/proxygroup_test.go b/cmd/k8s-operator/proxygroup_test.go index 5b690a485..1f1a39ab0 100644 --- a/cmd/k8s-operator/proxygroup_test.go +++ b/cmd/k8s-operator/proxygroup_test.go @@ -475,8 +475,6 @@ func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) { Name: pgConfigSecretName(pgName, 0), Namespace: tsNamespace, }, - // Write directly to Data because the fake client doesn't copy the write-only - // StringData field across to Data for us. Data: map[string][]byte{ tsoperator.TailscaledConfigFileName(106): existingConfigBytes, }, @@ -514,10 +512,10 @@ func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) { Namespace: tsNamespace, ResourceVersion: "2", }, - StringData: map[string]string{ - tsoperator.TailscaledConfigFileName(106): string(expectedConfigBytes), + Data: map[string][]byte{ + tsoperator.TailscaledConfigFileName(106): expectedConfigBytes, }, - }, omitSecretData) + }) } func verifyProxyGroupCounts(t *testing.T, r *ProxyGroupReconciler, wantIngress, wantEgress int) { @@ -620,11 +618,3 @@ func addNodeIDToStateSecrets(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyG }) } } - -// The operator mostly writes to StringData and reads from Data, but the fake -// client doesn't copy StringData across to Data on write. When comparing actual -// vs expected Secrets, use this function to only check what the operator writes -// to StringData. -func omitSecretData(secret *corev1.Secret) { - secret.Data = nil -}