cmd/k8s-operator: update ProxyGroup config Secrets instead of patch (#15353)

There was a flaky failure case where renaming a TLS hostname for an
ingress might leave the old hostname dangling in tailscaled config. This
happened when the proxygroup reconciler loop had an outdated resource
version of the config Secret in its cache after the
ingress-pg-reconciler loop had very recently written it to delete the
old hostname. As the proxygroup reconciler then did a patch, there was
no conflict and it reinstated the old hostname.

This commit updates the patch to an update operation so that if the
resource version is out of date it will fail with an optimistic lock
error. It also checks for equality to reduce the likelihood that we make
the update API call in the first place, because most of the time the
proxygroup reconciler is not even making an update to the Secret in the
case that the hostname has changed.

Updates tailscale/corp#24795

Change-Id: Ie23a97440063976c9a8475d24ab18253e1f89050
Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor 2025-03-19 06:49:36 -07:00 committed by GitHub
parent 25d5f78c6e
commit 8d84720edb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 12 additions and 20 deletions

View File

@ -461,7 +461,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
var existingCfgSecret *corev1.Secret // unmodified copy of secret var existingCfgSecret *corev1.Secret // unmodified copy of secret
if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil { if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil {
logger.Debugf("secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName()) logger.Debugf("Secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName())
existingCfgSecret = cfgSecret.DeepCopy() existingCfgSecret = cfgSecret.DeepCopy()
} else if !apierrors.IsNotFound(err) { } else if !apierrors.IsNotFound(err) {
return "", err return "", err
@ -469,7 +469,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
var authKey string var authKey string
if existingCfgSecret == nil { if existingCfgSecret == nil {
logger.Debugf("creating authkey for new ProxyGroup proxy") logger.Debugf("Creating authkey for new ProxyGroup proxy")
tags := pg.Spec.Tags.Stringify() tags := pg.Spec.Tags.Stringify()
if len(tags) == 0 { if len(tags) == 0 {
tags = r.defaultTags tags = r.defaultTags
@ -490,7 +490,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
if err != nil { if err != nil {
return "", fmt.Errorf("error marshalling tailscaled config: %w", err) return "", fmt.Errorf("error marshalling tailscaled config: %w", err)
} }
mak.Set(&cfgSecret.StringData, tsoperator.TailscaledConfigFileName(cap), string(cfgJSON)) mak.Set(&cfgSecret.Data, tsoperator.TailscaledConfigFileName(cap), cfgJSON)
} }
// The config sha256 sum is a value for a hash annotation used to trigger // The config sha256 sum is a value for a hash annotation used to trigger
@ -520,12 +520,14 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
} }
if existingCfgSecret != nil { if existingCfgSecret != nil {
logger.Debugf("patching the existing ProxyGroup config Secret %s", cfgSecret.Name) if !apiequality.Semantic.DeepEqual(existingCfgSecret, cfgSecret) {
if err := r.Patch(ctx, cfgSecret, client.MergeFrom(existingCfgSecret)); err != nil { logger.Debugf("Updating the existing ProxyGroup config Secret %s", cfgSecret.Name)
if err := r.Update(ctx, cfgSecret); err != nil {
return "", err return "", err
} }
}
} else { } else {
logger.Debugf("creating a new config Secret %s for the ProxyGroup", cfgSecret.Name) logger.Debugf("Creating a new config Secret %s for the ProxyGroup", cfgSecret.Name)
if err := r.Create(ctx, cfgSecret); err != nil { if err := r.Create(ctx, cfgSecret); err != nil {
return "", err return "", err
} }

View File

@ -475,8 +475,6 @@ func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) {
Name: pgConfigSecretName(pgName, 0), Name: pgConfigSecretName(pgName, 0),
Namespace: tsNamespace, Namespace: tsNamespace,
}, },
// Write directly to Data because the fake client doesn't copy the write-only
// StringData field across to Data for us.
Data: map[string][]byte{ Data: map[string][]byte{
tsoperator.TailscaledConfigFileName(106): existingConfigBytes, tsoperator.TailscaledConfigFileName(106): existingConfigBytes,
}, },
@ -514,10 +512,10 @@ func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) {
Namespace: tsNamespace, Namespace: tsNamespace,
ResourceVersion: "2", ResourceVersion: "2",
}, },
StringData: map[string]string{ Data: map[string][]byte{
tsoperator.TailscaledConfigFileName(106): string(expectedConfigBytes), tsoperator.TailscaledConfigFileName(106): expectedConfigBytes,
}, },
}, omitSecretData) })
} }
func verifyProxyGroupCounts(t *testing.T, r *ProxyGroupReconciler, wantIngress, wantEgress int) { func verifyProxyGroupCounts(t *testing.T, r *ProxyGroupReconciler, wantIngress, wantEgress int) {
@ -620,11 +618,3 @@ func addNodeIDToStateSecrets(t *testing.T, fc client.WithWatch, pg *tsapi.ProxyG
}) })
} }
} }
// The operator mostly writes to StringData and reads from Data, but the fake
// client doesn't copy StringData across to Data on write. When comparing actual
// vs expected Secrets, use this function to only check what the operator writes
// to StringData.
func omitSecretData(secret *corev1.Secret) {
secret.Data = nil
}