client/v3: Add backoff before retry when watch stream returns unavailable

The client retries connection without backoff when the server is gone
after the watch stream is established. This results in high CPU usage
in the client process. This change introduces backoff when the stream is
failed and unavailable.

Signed-off-by: Hisanobu Tomari <posco.grubb@gmail.com>
This commit is contained in:
Hisanobu Tomari 2022-10-08 06:57:30 +09:00
parent acc7463fb2
commit d3da22fb1f

View File

@ -541,6 +541,7 @@ func (w *watchGrpcStream) run() {
cancelSet := make(map[int64]struct{}) cancelSet := make(map[int64]struct{})
var cur *pb.WatchResponse var cur *pb.WatchResponse
backoff := time.Millisecond
for { for {
select { select {
// Watch() requested // Watch() requested
@ -677,6 +678,7 @@ func (w *watchGrpcStream) run() {
closeErr = err closeErr = err
return return
} }
backoff = w.backoffIfUnavailable(backoff, err)
if wc, closeErr = w.newWatchClient(); closeErr != nil { if wc, closeErr = w.newWatchClient(); closeErr != nil {
return return
} }
@ -996,6 +998,21 @@ func (w *watchGrpcStream) joinSubstreams() {
var maxBackoff = 100 * time.Millisecond var maxBackoff = 100 * time.Millisecond
func (w *watchGrpcStream) backoffIfUnavailable(backoff time.Duration, err error) time.Duration {
if isUnavailableErr(w.ctx, err) {
// retry, but backoff
if backoff < maxBackoff {
// 25% backoff factor
backoff = backoff + backoff/4
if backoff > maxBackoff {
backoff = maxBackoff
}
}
time.Sleep(backoff)
}
return backoff
}
// openWatchClient retries opening a watch client until success or halt. // openWatchClient retries opening a watch client until success or halt.
// manually retry in case "ws==nil && err==nil" // manually retry in case "ws==nil && err==nil"
// TODO: remove FailFast=false // TODO: remove FailFast=false
@ -1016,17 +1033,7 @@ func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error)
if isHaltErr(w.ctx, err) { if isHaltErr(w.ctx, err) {
return nil, v3rpc.Error(err) return nil, v3rpc.Error(err)
} }
if isUnavailableErr(w.ctx, err) { backoff = w.backoffIfUnavailable(backoff, err)
// retry, but backoff
if backoff < maxBackoff {
// 25% backoff factor
backoff = backoff + backoff/4
if backoff > maxBackoff {
backoff = maxBackoff
}
}
time.Sleep(backoff)
}
} }
return ws, nil return ws, nil
} }