feat(server): make the RetryInterval of PeerServer tunable

For tests and other environments it would be nice to be able to tune how
long to sleep between retries.
This commit is contained in:
Brandon Philips
2014-02-06 20:12:20 -08:00
parent da3fe920cb
commit 468a68c96c
4 changed files with 14 additions and 10 deletions

View File

@ -64,6 +64,7 @@ type Config struct {
MaxClusterSize int `toml:"max_cluster_size" env:"ETCD_MAX_CLUSTER_SIZE"` MaxClusterSize int `toml:"max_cluster_size" env:"ETCD_MAX_CLUSTER_SIZE"`
MaxResultBuffer int `toml:"max_result_buffer" env:"ETCD_MAX_RESULT_BUFFER"` MaxResultBuffer int `toml:"max_result_buffer" env:"ETCD_MAX_RESULT_BUFFER"`
MaxRetryAttempts int `toml:"max_retry_attempts" env:"ETCD_MAX_RETRY_ATTEMPTS"` MaxRetryAttempts int `toml:"max_retry_attempts" env:"ETCD_MAX_RETRY_ATTEMPTS"`
RetryInterval float64 `toml:"retry_interval" env:"ETCD_RETRY_INTERVAL"`
Name string `toml:"name" env:"ETCD_NAME"` Name string `toml:"name" env:"ETCD_NAME"`
Snapshot bool `toml:"snapshot" env:"ETCD_SNAPSHOT"` Snapshot bool `toml:"snapshot" env:"ETCD_SNAPSHOT"`
SnapshotCount int `toml:"snapshot_count" env:"ETCD_SNAPSHOTCOUNT"` SnapshotCount int `toml:"snapshot_count" env:"ETCD_SNAPSHOTCOUNT"`
@ -93,6 +94,7 @@ func New() *Config {
c.MaxClusterSize = 9 c.MaxClusterSize = 9
c.MaxResultBuffer = 1024 c.MaxResultBuffer = 1024
c.MaxRetryAttempts = 3 c.MaxRetryAttempts = 3
c.RetryInterval = 10.0
c.Snapshot = true c.Snapshot = true
c.SnapshotCount = 10000 c.SnapshotCount = 10000
c.Peer.Addr = "127.0.0.1:7001" c.Peer.Addr = "127.0.0.1:7001"
@ -282,6 +284,7 @@ func (c *Config) LoadFlags(arguments []string) error {
f.StringVar(&c.DataDir, "data-dir", c.DataDir, "") f.StringVar(&c.DataDir, "data-dir", c.DataDir, "")
f.IntVar(&c.MaxResultBuffer, "max-result-buffer", c.MaxResultBuffer, "") f.IntVar(&c.MaxResultBuffer, "max-result-buffer", c.MaxResultBuffer, "")
f.IntVar(&c.MaxRetryAttempts, "max-retry-attempts", c.MaxRetryAttempts, "") f.IntVar(&c.MaxRetryAttempts, "max-retry-attempts", c.MaxRetryAttempts, "")
f.Float64Var(&c.RetryInterval, "retry-interval", c.RetryInterval, "")
f.IntVar(&c.MaxClusterSize, "max-cluster-size", c.MaxClusterSize, "") f.IntVar(&c.MaxClusterSize, "max-cluster-size", c.MaxClusterSize, "")
f.IntVar(&c.Peer.HeartbeatTimeout, "peer-heartbeat-timeout", c.Peer.HeartbeatTimeout, "") f.IntVar(&c.Peer.HeartbeatTimeout, "peer-heartbeat-timeout", c.Peer.HeartbeatTimeout, "")
f.IntVar(&c.Peer.ElectionTimeout, "peer-election-timeout", c.Peer.ElectionTimeout, "") f.IntVar(&c.Peer.ElectionTimeout, "peer-election-timeout", c.Peer.ElectionTimeout, "")

View File

@ -122,6 +122,7 @@ func main() {
SnapshotCount: config.SnapshotCount, SnapshotCount: config.SnapshotCount,
MaxClusterSize: config.MaxClusterSize, MaxClusterSize: config.MaxClusterSize,
RetryTimes: config.MaxRetryAttempts, RetryTimes: config.MaxRetryAttempts,
RetryInterval: config.RetryInterval,
} }
ps := server.NewPeerServer(psConfig, registry, store, &mb, followersStats, serverStats) ps := server.NewPeerServer(psConfig, registry, store, &mb, followersStats, serverStats)

View File

@ -20,17 +20,16 @@ import (
"github.com/coreos/etcd/store" "github.com/coreos/etcd/store"
) )
const retryInterval = 10
const ThresholdMonitorTimeout = 5 * time.Second const ThresholdMonitorTimeout = 5 * time.Second
type PeerServerConfig struct { type PeerServerConfig struct {
Name string Name string
Scheme string Scheme string
URL string URL string
SnapshotCount int SnapshotCount int
MaxClusterSize int MaxClusterSize int
RetryTimes int RetryTimes int
RetryInterval float64
} }
type PeerServer struct { type PeerServer struct {
@ -209,8 +208,8 @@ func (s *PeerServer) startAsFollower(cluster []string) {
if ok { if ok {
return return
} }
log.Warnf("Unable to join the cluster using any of the peers %v. Retrying in %d seconds", cluster, retryInterval) log.Warnf("Unable to join the cluster using any of the peers %v. Retrying in %.1f seconds", cluster, s.Config.RetryInterval)
time.Sleep(time.Second * retryInterval) time.Sleep(time.Second * time.Duration(s.Config.RetryInterval))
} }
log.Fatalf("Cannot join the cluster via given peers after %x retries", s.Config.RetryTimes) log.Fatalf("Cannot join the cluster via given peers after %x retries", s.Config.RetryTimes)

View File

@ -52,6 +52,7 @@ Peer Communication Options:
Other Options: Other Options:
-max-result-buffer Max size of the result buffer. -max-result-buffer Max size of the result buffer.
-max-retry-attempts Number of times a node will try to join a cluster. -max-retry-attempts Number of times a node will try to join a cluster.
-retry-interval Seconds to wait between cluster join retry attempts.
-max-cluster-size Maximum number of nodes in the cluster. -max-cluster-size Maximum number of nodes in the cluster.
-snapshot=false Disable log snapshots -snapshot=false Disable log snapshots
-snapshot-count Number of transactions before issuing a snapshot. -snapshot-count Number of transactions before issuing a snapshot.