cmd/{k8s-operator,containerboot},kube: ensure egress ProxyGroup proxies don't terminate while cluster traffic is still routed to them (#14436)
Some checks are pending
checklocks / checklocks (push) Waiting to run
CodeQL / Analyze (go) (push) Waiting to run
Dockerfile build / deploy (push) Waiting to run
CI / race-root-integration (1/4) (push) Waiting to run
CI / race-root-integration (2/4) (push) Waiting to run
CI / race-root-integration (3/4) (push) Waiting to run
CI / race-root-integration (4/4) (push) Waiting to run
CI / test (-coverprofile=/tmp/coverage.out, amd64) (push) Waiting to run
CI / test (-race, amd64, 1/3) (push) Waiting to run
CI / test (-race, amd64, 2/3) (push) Waiting to run
CI / test (-race, amd64, 3/3) (push) Waiting to run
CI / test (386) (push) Waiting to run
CI / windows (push) Waiting to run
CI / privileged (push) Waiting to run
CI / vm (push) Waiting to run
CI / race-build (push) Waiting to run
CI / cross (386, linux) (push) Waiting to run
CI / cross (amd64, darwin) (push) Waiting to run
CI / cross (amd64, freebsd) (push) Waiting to run
CI / cross (amd64, openbsd) (push) Waiting to run
CI / cross (amd64, windows) (push) Waiting to run
CI / cross (arm, 5, linux) (push) Waiting to run
CI / cross (arm, 7, linux) (push) Waiting to run
CI / cross (arm64, darwin) (push) Waiting to run
CI / cross (arm64, linux) (push) Waiting to run
CI / cross (arm64, windows) (push) Waiting to run
CI / cross (loong64, linux) (push) Waiting to run
CI / ios (push) Waiting to run
CI / crossmin (amd64, illumos) (push) Waiting to run
CI / crossmin (amd64, plan9) (push) Waiting to run
CI / crossmin (amd64, solaris) (push) Waiting to run
CI / crossmin (ppc64, aix) (push) Waiting to run
CI / android (push) Waiting to run
CI / wasm (push) Waiting to run
CI / tailscale_go (push) Waiting to run
CI / fuzz (push) Waiting to run
CI / depaware (push) Waiting to run
CI / go_generate (push) Waiting to run
CI / go_mod_tidy (push) Waiting to run
CI / licenses (push) Waiting to run
CI / staticcheck (386, windows) (push) Waiting to run
CI / staticcheck (amd64, darwin) (push) Waiting to run
CI / staticcheck (amd64, linux) (push) Waiting to run
CI / staticcheck (amd64, windows) (push) Waiting to run
CI / notify_slack (push) Blocked by required conditions
CI / check_mergeability (push) Blocked by required conditions
Some checks are pending
checklocks / checklocks (push) Waiting to run
CodeQL / Analyze (go) (push) Waiting to run
Dockerfile build / deploy (push) Waiting to run
CI / race-root-integration (1/4) (push) Waiting to run
CI / race-root-integration (2/4) (push) Waiting to run
CI / race-root-integration (3/4) (push) Waiting to run
CI / race-root-integration (4/4) (push) Waiting to run
CI / test (-coverprofile=/tmp/coverage.out, amd64) (push) Waiting to run
CI / test (-race, amd64, 1/3) (push) Waiting to run
CI / test (-race, amd64, 2/3) (push) Waiting to run
CI / test (-race, amd64, 3/3) (push) Waiting to run
CI / test (386) (push) Waiting to run
CI / windows (push) Waiting to run
CI / privileged (push) Waiting to run
CI / vm (push) Waiting to run
CI / race-build (push) Waiting to run
CI / cross (386, linux) (push) Waiting to run
CI / cross (amd64, darwin) (push) Waiting to run
CI / cross (amd64, freebsd) (push) Waiting to run
CI / cross (amd64, openbsd) (push) Waiting to run
CI / cross (amd64, windows) (push) Waiting to run
CI / cross (arm, 5, linux) (push) Waiting to run
CI / cross (arm, 7, linux) (push) Waiting to run
CI / cross (arm64, darwin) (push) Waiting to run
CI / cross (arm64, linux) (push) Waiting to run
CI / cross (arm64, windows) (push) Waiting to run
CI / cross (loong64, linux) (push) Waiting to run
CI / ios (push) Waiting to run
CI / crossmin (amd64, illumos) (push) Waiting to run
CI / crossmin (amd64, plan9) (push) Waiting to run
CI / crossmin (amd64, solaris) (push) Waiting to run
CI / crossmin (ppc64, aix) (push) Waiting to run
CI / android (push) Waiting to run
CI / wasm (push) Waiting to run
CI / tailscale_go (push) Waiting to run
CI / fuzz (push) Waiting to run
CI / depaware (push) Waiting to run
CI / go_generate (push) Waiting to run
CI / go_mod_tidy (push) Waiting to run
CI / licenses (push) Waiting to run
CI / staticcheck (386, windows) (push) Waiting to run
CI / staticcheck (amd64, darwin) (push) Waiting to run
CI / staticcheck (amd64, linux) (push) Waiting to run
CI / staticcheck (amd64, windows) (push) Waiting to run
CI / notify_slack (push) Blocked by required conditions
CI / check_mergeability (push) Blocked by required conditions
cmd/{containerboot,k8s-operator},kube: add preshutdown hook for egress PG proxies This change is part of work towards minimizing downtime during update rollouts of egress ProxyGroup replicas. This change: - updates the containerboot health check logic to return Pod IP in headers, if set - always runs the health check for egress PG proxies - updates ClusterIP Services created for PG egress endpoints to include the health check endpoint - implements preshutdown endpoint in proxies. The preshutdown endpoint logic waits till, for all currently configured egress services, the ClusterIP Service health check endpoint is no longer returned by the shutting-down Pod (by looking at the new Pod IP header). - ensures that kubelet is configured to call the preshutdown endpoint This reduces the possibility that, as replicas are terminated during an update, a replica gets terminated to which cluster traffic is still being routed via the ClusterIP Service because kube proxy has not yet updated routig rules. This is not a perfect check as in practice, it only checks that the kube proxy on the node on which the proxy runs has updated rules. However, overall this might be good enough. The preshutdown logic is disabled if users have configured a custom health check port via TS_LOCAL_ADDR_PORT env var. This change throws a warnign if so and in future setting of that env var for operator proxies might be disallowed (as users shouldn't need to configure this for a Pod directly). This is backwards compatible with earlier proxy versions. Updates tailscale/tailscale#14326 Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
@ -32,6 +32,8 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/egressservices"
|
||||
"tailscale.com/kube/kubeclient"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/tstest"
|
||||
"tailscale.com/types/netmap"
|
||||
@ -54,20 +56,9 @@ func TestContainerBoot(t *testing.T) {
|
||||
defer kube.Close()
|
||||
|
||||
tailscaledConf := &ipn.ConfigVAlpha{AuthKey: ptr.To("foo"), Version: "alpha0"}
|
||||
tailscaledConfBytes, err := json.Marshal(tailscaledConf)
|
||||
if err != nil {
|
||||
t.Fatalf("error unmarshaling tailscaled config: %v", err)
|
||||
}
|
||||
serveConf := ipn.ServeConfig{TCP: map[uint16]*ipn.TCPPortHandler{80: {HTTP: true}}}
|
||||
serveConfBytes, err := json.Marshal(serveConf)
|
||||
if err != nil {
|
||||
t.Fatalf("error unmarshaling serve config: %v", err)
|
||||
}
|
||||
egressSvcsCfg := egressservices.Configs{"foo": {TailnetTarget: egressservices.TailnetTarget{FQDN: "foo.tailnetxyx.ts.net"}}}
|
||||
egressSvcsCfgBytes, err := json.Marshal(egressSvcsCfg)
|
||||
if err != nil {
|
||||
t.Fatalf("error unmarshaling egress services config: %v", err)
|
||||
}
|
||||
egressCfg := egressSvcConfig("foo", "foo.tailnetxyz.ts.net")
|
||||
egressStatus := egressSvcStatus("foo", "foo.tailnetxyz.ts.net")
|
||||
|
||||
dirs := []string{
|
||||
"var/lib",
|
||||
@ -84,16 +75,17 @@ func TestContainerBoot(t *testing.T) {
|
||||
}
|
||||
}
|
||||
files := map[string][]byte{
|
||||
"usr/bin/tailscaled": fakeTailscaled,
|
||||
"usr/bin/tailscale": fakeTailscale,
|
||||
"usr/bin/iptables": fakeTailscale,
|
||||
"usr/bin/ip6tables": fakeTailscale,
|
||||
"dev/net/tun": []byte(""),
|
||||
"proc/sys/net/ipv4/ip_forward": []byte("0"),
|
||||
"proc/sys/net/ipv6/conf/all/forwarding": []byte("0"),
|
||||
"etc/tailscaled/cap-95.hujson": tailscaledConfBytes,
|
||||
"etc/tailscaled/serve-config.json": serveConfBytes,
|
||||
"etc/tailscaled/egress-services-config.json": egressSvcsCfgBytes,
|
||||
"usr/bin/tailscaled": fakeTailscaled,
|
||||
"usr/bin/tailscale": fakeTailscale,
|
||||
"usr/bin/iptables": fakeTailscale,
|
||||
"usr/bin/ip6tables": fakeTailscale,
|
||||
"dev/net/tun": []byte(""),
|
||||
"proc/sys/net/ipv4/ip_forward": []byte("0"),
|
||||
"proc/sys/net/ipv6/conf/all/forwarding": []byte("0"),
|
||||
"etc/tailscaled/cap-95.hujson": mustJSON(t, tailscaledConf),
|
||||
"etc/tailscaled/serve-config.json": mustJSON(t, serveConf),
|
||||
filepath.Join("etc/tailscaled/", egressservices.KeyEgressServices): mustJSON(t, egressCfg),
|
||||
filepath.Join("etc/tailscaled/", egressservices.KeyHEPPings): []byte("4"),
|
||||
}
|
||||
resetFiles := func() {
|
||||
for path, content := range files {
|
||||
@ -132,6 +124,9 @@ func TestContainerBoot(t *testing.T) {
|
||||
healthURL := func(port int) string {
|
||||
return fmt.Sprintf("http://127.0.0.1:%d/healthz", port)
|
||||
}
|
||||
egressSvcTerminateURL := func(port int) string {
|
||||
return fmt.Sprintf("http://127.0.0.1:%d%s", port, kubetypes.EgessServicesPreshutdownEP)
|
||||
}
|
||||
|
||||
capver := fmt.Sprintf("%d", tailcfg.CurrentCapabilityVersion)
|
||||
|
||||
@ -896,9 +891,10 @@ func TestContainerBoot(t *testing.T) {
|
||||
{
|
||||
Name: "egress_svcs_config_kube",
|
||||
Env: map[string]string{
|
||||
"KUBERNETES_SERVICE_HOST": kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": kube.Port,
|
||||
"TS_EGRESS_SERVICES_CONFIG_PATH": filepath.Join(d, "etc/tailscaled/egress-services-config.json"),
|
||||
"KUBERNETES_SERVICE_HOST": kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": kube.Port,
|
||||
"TS_EGRESS_PROXIES_CONFIG_PATH": filepath.Join(d, "etc/tailscaled"),
|
||||
"TS_LOCAL_ADDR_PORT": fmt.Sprintf("[::]:%d", localAddrPort),
|
||||
},
|
||||
KubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
@ -912,28 +908,35 @@ func TestContainerBoot(t *testing.T) {
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
},
|
||||
EndpointStatuses: map[string]int{
|
||||
egressSvcTerminateURL(localAddrPort): 200,
|
||||
},
|
||||
},
|
||||
{
|
||||
Notify: runningNotify,
|
||||
WantKubeSecret: map[string]string{
|
||||
"egress-services": mustBase64(t, egressStatus),
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
"tailscale_capver": capver,
|
||||
},
|
||||
EndpointStatuses: map[string]int{
|
||||
egressSvcTerminateURL(localAddrPort): 200,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "egress_svcs_config_no_kube",
|
||||
Env: map[string]string{
|
||||
"TS_EGRESS_SERVICES_CONFIG_PATH": filepath.Join(d, "etc/tailscaled/egress-services-config.json"),
|
||||
"TS_AUTHKEY": "tskey-key",
|
||||
"TS_EGRESS_PROXIES_CONFIG_PATH": filepath.Join(d, "etc/tailscaled"),
|
||||
"TS_AUTHKEY": "tskey-key",
|
||||
},
|
||||
Phases: []phase{
|
||||
{
|
||||
WantFatalLog: "TS_EGRESS_SERVICES_CONFIG_PATH is only supported for Tailscale running on Kubernetes",
|
||||
WantFatalLog: "TS_EGRESS_PROXIES_CONFIG_PATH is only supported for Tailscale running on Kubernetes",
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -1394,13 +1397,31 @@ func (k *kubeServer) serveSecret(w http.ResponseWriter, r *http.Request) {
|
||||
panic(fmt.Sprintf("json decode failed: %v. Body:\n\n%s", err, string(bs)))
|
||||
}
|
||||
for _, op := range req {
|
||||
if op.Op != "remove" {
|
||||
if op.Op == "remove" {
|
||||
if !strings.HasPrefix(op.Path, "/data/") {
|
||||
panic(fmt.Sprintf("unsupported json-patch path %q", op.Path))
|
||||
}
|
||||
delete(k.secret, strings.TrimPrefix(op.Path, "/data/"))
|
||||
} else if op.Op == "replace" {
|
||||
path, ok := strings.CutPrefix(op.Path, "/data/")
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("unsupported json-patch path %q", op.Path))
|
||||
}
|
||||
req := make([]kubeclient.JSONPatch, 0)
|
||||
if err := json.Unmarshal(bs, &req); err != nil {
|
||||
panic(fmt.Sprintf("json decode failed: %v. Body:\n\n%s", err, string(bs)))
|
||||
}
|
||||
|
||||
for _, patch := range req {
|
||||
val, ok := patch.Value.(string)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("unsupported json patch value %v: cannot be converted to string", patch.Value))
|
||||
}
|
||||
k.secret[path] = val
|
||||
}
|
||||
} else {
|
||||
panic(fmt.Sprintf("unsupported json-patch op %q", op.Op))
|
||||
}
|
||||
if !strings.HasPrefix(op.Path, "/data/") {
|
||||
panic(fmt.Sprintf("unsupported json-patch path %q", op.Path))
|
||||
}
|
||||
delete(k.secret, strings.TrimPrefix(op.Path, "/data/"))
|
||||
}
|
||||
case "application/strategic-merge-patch+json":
|
||||
req := struct {
|
||||
@ -1419,3 +1440,41 @@ func (k *kubeServer) serveSecret(w http.ResponseWriter, r *http.Request) {
|
||||
panic(fmt.Sprintf("unhandled HTTP method %q", r.Method))
|
||||
}
|
||||
}
|
||||
|
||||
func mustBase64(t *testing.T, v any) string {
|
||||
b := mustJSON(t, v)
|
||||
s := base64.StdEncoding.WithPadding('=').EncodeToString(b)
|
||||
return s
|
||||
}
|
||||
|
||||
func mustJSON(t *testing.T, v any) []byte {
|
||||
b, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
t.Fatalf("error converting %v to json: %v", v, err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// egress services status given one named tailnet target specified by FQDN. As written by the proxy to its state Secret.
|
||||
func egressSvcStatus(name, fqdn string) egressservices.Status {
|
||||
return egressservices.Status{
|
||||
Services: map[string]*egressservices.ServiceStatus{
|
||||
name: {
|
||||
TailnetTarget: egressservices.TailnetTarget{
|
||||
FQDN: fqdn,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// egress config given one named tailnet target specified by FQDN.
|
||||
func egressSvcConfig(name, fqdn string) egressservices.Configs {
|
||||
return egressservices.Configs{
|
||||
name: egressservices.Config{
|
||||
TailnetTarget: egressservices.TailnetTarget{
|
||||
FQDN: fqdn,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user