clientv3: use grpc reconnection logic
This commit is contained in:
@ -23,13 +23,11 @@ import (
|
||||
"net"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
"golang.org/x/time/rate"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/credentials"
|
||||
"google.golang.org/grpc/metadata"
|
||||
@ -37,9 +35,6 @@ import (
|
||||
|
||||
var (
|
||||
ErrNoAvailableEndpoints = errors.New("etcdclient: no available endpoints")
|
||||
|
||||
// minConnRetryWait is the minimum time between reconnects to avoid flooding
|
||||
minConnRetryWait = time.Second
|
||||
)
|
||||
|
||||
// Client provides and manages an etcd v3 client session.
|
||||
@ -54,20 +49,10 @@ type Client struct {
|
||||
conn *grpc.ClientConn
|
||||
cfg Config
|
||||
creds *credentials.TransportAuthenticator
|
||||
mu sync.RWMutex // protects connection selection and error list
|
||||
errors []error // errors passed to retryConnection
|
||||
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
|
||||
// fields below are managed by connMonitor
|
||||
|
||||
// reconnc accepts writes which signal the client should reconnect
|
||||
reconnc chan error
|
||||
// newconnc is closed on successful connect and set to a fresh channel
|
||||
newconnc chan struct{}
|
||||
lastConnErr error
|
||||
|
||||
// Username is a username for authentication
|
||||
Username string
|
||||
// Password is a password for authentication
|
||||
@ -76,9 +61,6 @@ type Client struct {
|
||||
|
||||
// New creates a new etcdv3 client from a given configuration.
|
||||
func New(cfg Config) (*Client, error) {
|
||||
if cfg.retryDialer == nil {
|
||||
cfg.retryDialer = dialEndpointList
|
||||
}
|
||||
if len(cfg.Endpoints) == 0 {
|
||||
return nil, ErrNoAvailableEndpoints
|
||||
}
|
||||
@ -101,39 +83,9 @@ func NewFromConfigFile(path string) (*Client, error) {
|
||||
}
|
||||
|
||||
// Close shuts down the client's etcd connections.
|
||||
func (c *Client) Close() (err error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
// acquire the cancel
|
||||
if c.cancel == nil {
|
||||
// already canceled
|
||||
if c.lastConnErr != c.ctx.Err() {
|
||||
err = c.lastConnErr
|
||||
}
|
||||
return
|
||||
}
|
||||
cancel := c.cancel
|
||||
c.cancel = nil
|
||||
c.mu.Unlock()
|
||||
|
||||
// close watcher and lease before terminating connection
|
||||
// so they don't retry on a closed client
|
||||
c.Watcher.Close()
|
||||
c.Lease.Close()
|
||||
|
||||
// cancel reconnection loop
|
||||
cancel()
|
||||
c.mu.Lock()
|
||||
connc := c.newconnc
|
||||
c.mu.Unlock()
|
||||
// connc on cancel() is left closed
|
||||
<-connc
|
||||
c.mu.Lock()
|
||||
if c.lastConnErr != c.ctx.Err() {
|
||||
err = c.lastConnErr
|
||||
}
|
||||
return
|
||||
func (c *Client) Close() error {
|
||||
c.cancel()
|
||||
return toErr(c.ctx, c.conn.Close())
|
||||
}
|
||||
|
||||
// Ctx is a context for "out of band" messages (e.g., for sending
|
||||
@ -144,15 +96,6 @@ func (c *Client) Ctx() context.Context { return c.ctx }
|
||||
// Endpoints lists the registered endpoints for the client.
|
||||
func (c *Client) Endpoints() []string { return c.cfg.Endpoints }
|
||||
|
||||
// Errors returns all errors that have been observed since called last.
|
||||
func (c *Client) Errors() (errs []error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
errs = c.errors
|
||||
c.errors = nil
|
||||
return errs
|
||||
}
|
||||
|
||||
type authTokenCredential struct {
|
||||
token string
|
||||
}
|
||||
@ -241,7 +184,7 @@ func WithRequireLeader(ctx context.Context) context.Context {
|
||||
|
||||
func newClient(cfg *Config) (*Client, error) {
|
||||
if cfg == nil {
|
||||
cfg = &Config{retryDialer: dialEndpointList}
|
||||
cfg = &Config{}
|
||||
}
|
||||
var creds *credentials.TransportAuthenticator
|
||||
if cfg.TLS != nil {
|
||||
@ -251,26 +194,23 @@ func newClient(cfg *Config) (*Client, error) {
|
||||
|
||||
// use a temporary skeleton client to bootstrap first connection
|
||||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
conn, err := cfg.retryDialer(&Client{cfg: *cfg, creds: creds, ctx: ctx, Username: cfg.Username, Password: cfg.Password})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
client := &Client{
|
||||
conn: conn,
|
||||
cfg: *cfg,
|
||||
creds: creds,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
reconnc: make(chan error, 1),
|
||||
newconnc: make(chan struct{}),
|
||||
conn: nil,
|
||||
cfg: *cfg,
|
||||
creds: creds,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
|
||||
if cfg.Username != "" && cfg.Password != "" {
|
||||
client.Username = cfg.Username
|
||||
client.Password = cfg.Password
|
||||
}
|
||||
|
||||
go client.connMonitor()
|
||||
// TODO: use grpc balancer
|
||||
conn, err := client.Dial(cfg.Endpoints[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
client.conn = conn
|
||||
|
||||
client.Cluster = NewCluster(client)
|
||||
client.KV = NewKV(client)
|
||||
@ -289,126 +229,30 @@ func newClient(cfg *Config) (*Client, error) {
|
||||
}
|
||||
|
||||
// ActiveConnection returns the current in-use connection
|
||||
func (c *Client) ActiveConnection() *grpc.ClientConn {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
if c.conn == nil {
|
||||
panic("trying to return nil active connection")
|
||||
}
|
||||
return c.conn
|
||||
}
|
||||
|
||||
// retryConnection establishes a new connection
|
||||
func (c *Client) retryConnection(err error) {
|
||||
oldconn := c.conn
|
||||
|
||||
// return holding lock so old connection can be cleaned up in this defer
|
||||
defer func() {
|
||||
if oldconn != nil {
|
||||
oldconn.Close()
|
||||
if st, _ := oldconn.State(); st != grpc.Shutdown {
|
||||
// wait so grpc doesn't leak sleeping goroutines
|
||||
oldconn.WaitForStateChange(context.Background(), st)
|
||||
}
|
||||
}
|
||||
c.mu.Unlock()
|
||||
}()
|
||||
|
||||
c.mu.Lock()
|
||||
if err != nil {
|
||||
c.errors = append(c.errors, err)
|
||||
}
|
||||
if c.cancel == nil {
|
||||
// client has called Close() so don't try to dial out
|
||||
return
|
||||
}
|
||||
c.mu.Unlock()
|
||||
|
||||
nc, dialErr := c.cfg.retryDialer(c)
|
||||
|
||||
c.mu.Lock()
|
||||
if nc != nil {
|
||||
c.conn = nc
|
||||
}
|
||||
if dialErr != nil {
|
||||
c.errors = append(c.errors, dialErr)
|
||||
}
|
||||
c.lastConnErr = dialErr
|
||||
}
|
||||
|
||||
// connStartRetry schedules a reconnect if one is not already running
|
||||
func (c *Client) connStartRetry(err error) {
|
||||
c.mu.Lock()
|
||||
ch := c.reconnc
|
||||
defer c.mu.Unlock()
|
||||
select {
|
||||
case ch <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// connWait waits for a reconnect to be processed
|
||||
func (c *Client) connWait(ctx context.Context, err error) (*grpc.ClientConn, error) {
|
||||
c.mu.RLock()
|
||||
ch := c.newconnc
|
||||
c.mu.RUnlock()
|
||||
c.connStartRetry(err)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
case <-ch:
|
||||
}
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
if c.cancel == nil {
|
||||
return c.conn, rpctypes.ErrConnClosed
|
||||
}
|
||||
return c.conn, c.lastConnErr
|
||||
}
|
||||
|
||||
// connMonitor monitors the connection and handles retries
|
||||
func (c *Client) connMonitor() {
|
||||
var err error
|
||||
|
||||
defer func() {
|
||||
c.retryConnection(c.ctx.Err())
|
||||
close(c.newconnc)
|
||||
}()
|
||||
|
||||
limiter := rate.NewLimiter(rate.Every(minConnRetryWait), 1)
|
||||
for limiter.Wait(c.ctx) == nil {
|
||||
select {
|
||||
case err = <-c.reconnc:
|
||||
case <-c.ctx.Done():
|
||||
return
|
||||
}
|
||||
c.retryConnection(err)
|
||||
c.mu.Lock()
|
||||
close(c.newconnc)
|
||||
c.newconnc = make(chan struct{})
|
||||
c.reconnc = make(chan error, 1)
|
||||
c.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// dialEndpointList attempts to connect to each endpoint in order until a
|
||||
// connection is established.
|
||||
func dialEndpointList(c *Client) (*grpc.ClientConn, error) {
|
||||
var err error
|
||||
for _, ep := range c.Endpoints() {
|
||||
conn, curErr := c.Dial(ep)
|
||||
if curErr != nil {
|
||||
err = curErr
|
||||
} else {
|
||||
return conn, nil
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
func (c *Client) ActiveConnection() *grpc.ClientConn { return c.conn }
|
||||
|
||||
// isHaltErr returns true if the given error and context indicate no forward
|
||||
// progress can be made, even after reconnecting.
|
||||
func isHaltErr(ctx context.Context, err error) bool {
|
||||
isRPCError := strings.HasPrefix(grpc.ErrorDesc(err), "etcdserver: ")
|
||||
return isRPCError || ctx.Err() != nil || err == rpctypes.ErrConnClosed
|
||||
if ctx != nil && ctx.Err() != nil {
|
||||
return true
|
||||
}
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
return strings.HasPrefix(grpc.ErrorDesc(err), "etcdserver: ") ||
|
||||
strings.Contains(err.Error(), grpc.ErrClientConnClosing.Error())
|
||||
}
|
||||
|
||||
func toErr(ctx context.Context, err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
err = rpctypes.Error(err)
|
||||
if ctx.Err() != nil && strings.Contains(err.Error(), "context") {
|
||||
err = ctx.Err()
|
||||
} else if strings.Contains(err.Error(), grpc.ErrClientConnClosing.Error()) {
|
||||
err = grpc.ErrClientConnClosing
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
Reference in New Issue
Block a user