cmd/containerboot: serve health on local endpoint (#14246)
* cmd/containerboot: serve health on local endpoint We introduced stable (user) metrics in #14035, and `TS_LOCAL_ADDR_PORT` with it. Rather than requiring users to specify a new addr/port combination for each new local endpoint they want the container to serve, this combines the health check endpoint onto the local addr/port used by metrics if `TS_ENABLE_HEALTH_CHECK` is used instead of `TS_HEALTHCHECK_ADDR_PORT`. `TS_LOCAL_ADDR_PORT` now defaults to binding to all interfaces on 9002 so that it works more seamlessly and with less configuration in environments other than Kubernetes, where the operator always overrides the default anyway. In particular, listening on localhost would not be accessible from outside the container, and many scripted container environments do not know the IP address of the container before it's started. Listening on all interfaces allows users to just set one env var (`TS_ENABLE_METRICS` or `TS_ENABLE_HEALTH_CHECK`) to get a fully functioning local endpoint they can query from outside the container. Updates #14035, #12898 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
@ -52,11 +52,17 @@
|
||||
// ${TS_CERT_DOMAIN}, it will be replaced with the value of the available FQDN.
|
||||
// It cannot be used in conjunction with TS_DEST_IP. The file is watched for changes,
|
||||
// and will be re-applied when it changes.
|
||||
// - TS_HEALTHCHECK_ADDR_PORT: if specified, an HTTP health endpoint will be
|
||||
// served at /healthz at the provided address, which should be in form [<address>]:<port>.
|
||||
// If not set, no health check will be run. If set to :<port>, addr will default to 0.0.0.0
|
||||
// The health endpoint will return 200 OK if this node has at least one tailnet IP address,
|
||||
// otherwise returns 503.
|
||||
// - TS_HEALTHCHECK_ADDR_PORT: deprecated, use TS_ENABLE_HEALTH_CHECK instead and optionally
|
||||
// set TS_LOCAL_ADDR_PORT. Will be removed in 1.82.0.
|
||||
// - TS_LOCAL_ADDR_PORT: the address and port to serve local metrics and health
|
||||
// check endpoints if enabled via TS_ENABLE_METRICS and/or TS_ENABLE_HEALTH_CHECK.
|
||||
// Defaults to [::]:9002, serving on all available interfaces.
|
||||
// - TS_ENABLE_METRICS: if true, a metrics endpoint will be served at /metrics on
|
||||
// the address specified by TS_LOCAL_ADDR_PORT. See https://tailscale.com/kb/1482/client-metrics
|
||||
// for more information on the metrics exposed.
|
||||
// - TS_ENABLE_HEALTH_CHECK: if true, a health check endpoint will be served at /healthz on
|
||||
// the address specified by TS_LOCAL_ADDR_PORT. The health endpoint will return 200
|
||||
// OK if this node has at least one tailnet IP address, otherwise returns 503.
|
||||
// NB: the health criteria might change in the future.
|
||||
// - TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR: if specified, a path to a
|
||||
// directory that containers tailscaled config in file. The config file needs to be
|
||||
@ -99,6 +105,7 @@ import (
|
||||
"log"
|
||||
"math"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"os"
|
||||
"os/signal"
|
||||
@ -178,12 +185,32 @@ func main() {
|
||||
}
|
||||
defer killTailscaled()
|
||||
|
||||
if cfg.LocalAddrPort != "" && cfg.MetricsEnabled {
|
||||
m := &metrics{
|
||||
lc: client,
|
||||
debugEndpoint: cfg.DebugAddrPort,
|
||||
var healthCheck *healthz
|
||||
if cfg.HealthCheckAddrPort != "" {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
log.Printf("Running healthcheck endpoint at %s/healthz", cfg.HealthCheckAddrPort)
|
||||
healthCheck = healthHandlers(mux)
|
||||
|
||||
close := runHTTPServer(mux, cfg.HealthCheckAddrPort)
|
||||
defer close()
|
||||
}
|
||||
|
||||
if cfg.localMetricsEnabled() || cfg.localHealthEnabled() {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
if cfg.localMetricsEnabled() {
|
||||
log.Printf("Running metrics endpoint at %s/metrics", cfg.LocalAddrPort)
|
||||
metricsHandlers(mux, client, cfg.DebugAddrPort)
|
||||
}
|
||||
runMetrics(cfg.LocalAddrPort, m)
|
||||
|
||||
if cfg.localHealthEnabled() {
|
||||
log.Printf("Running healthcheck endpoint at %s/healthz", cfg.LocalAddrPort)
|
||||
healthCheck = healthHandlers(mux)
|
||||
}
|
||||
|
||||
close := runHTTPServer(mux, cfg.LocalAddrPort)
|
||||
defer close()
|
||||
}
|
||||
|
||||
if cfg.EnableForwardingOptimizations {
|
||||
@ -328,9 +355,6 @@ authLoop:
|
||||
|
||||
certDomain = new(atomic.Pointer[string])
|
||||
certDomainChanged = make(chan bool, 1)
|
||||
|
||||
h = &healthz{} // http server for the healthz endpoint
|
||||
healthzRunner = sync.OnceFunc(func() { runHealthz(cfg.HealthCheckAddrPort, h) })
|
||||
)
|
||||
if cfg.ServeConfigPath != "" {
|
||||
go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client)
|
||||
@ -556,11 +580,8 @@ runLoop:
|
||||
}
|
||||
}
|
||||
|
||||
if cfg.HealthCheckAddrPort != "" {
|
||||
h.Lock()
|
||||
h.hasAddrs = len(addrs) != 0
|
||||
h.Unlock()
|
||||
healthzRunner()
|
||||
if healthCheck != nil {
|
||||
healthCheck.update(len(addrs) != 0)
|
||||
}
|
||||
if egressSvcsNotify != nil {
|
||||
egressSvcsNotify <- n
|
||||
@ -751,3 +772,22 @@ func tailscaledConfigFilePath() string {
|
||||
log.Printf("Using tailscaled config file %q to match current capability version %d", filePath, tailcfg.CurrentCapabilityVersion)
|
||||
return filePath
|
||||
}
|
||||
|
||||
func runHTTPServer(mux *http.ServeMux, addr string) (close func() error) {
|
||||
ln, err := net.Listen("tcp", addr)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to listen on addr %q: %v", addr, err)
|
||||
}
|
||||
srv := &http.Server{Handler: mux}
|
||||
|
||||
go func() {
|
||||
if err := srv.Serve(ln); err != nil {
|
||||
log.Fatalf("failed running server: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
return func() error {
|
||||
err := srv.Shutdown(context.Background())
|
||||
return errors.Join(err, ln.Close())
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user