server: Make corrtuption check optional and period configurable
Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
This commit is contained in:
parent
6697fca97d
commit
d44bbff278
@ -149,6 +149,8 @@ type ServerConfig struct {
|
|||||||
// before serving any peer/client traffic.
|
// before serving any peer/client traffic.
|
||||||
InitialCorruptCheck bool
|
InitialCorruptCheck bool
|
||||||
CorruptCheckTime time.Duration
|
CorruptCheckTime time.Duration
|
||||||
|
CompactHashCheckEnabled bool
|
||||||
|
CompactHashCheckTime time.Duration
|
||||||
|
|
||||||
// PreVote is true to enable Raft Pre-Vote.
|
// PreVote is true to enable Raft Pre-Vote.
|
||||||
PreVote bool
|
PreVote bool
|
||||||
|
@ -322,6 +322,9 @@ type Config struct {
|
|||||||
|
|
||||||
ExperimentalInitialCorruptCheck bool `json:"experimental-initial-corrupt-check"`
|
ExperimentalInitialCorruptCheck bool `json:"experimental-initial-corrupt-check"`
|
||||||
ExperimentalCorruptCheckTime time.Duration `json:"experimental-corrupt-check-time"`
|
ExperimentalCorruptCheckTime time.Duration `json:"experimental-corrupt-check-time"`
|
||||||
|
ExperimentalCompactHashCheckEnabled bool `json:"experimental-compact-hash-check-enabled"`
|
||||||
|
ExperimentalCompactHashCheckTime time.Duration `json:"experimental-compact-hash-check-time"`
|
||||||
|
|
||||||
// ExperimentalEnableLeaseCheckpoint enables leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.
|
// ExperimentalEnableLeaseCheckpoint enables leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.
|
||||||
ExperimentalEnableLeaseCheckpoint bool `json:"experimental-enable-lease-checkpoint"`
|
ExperimentalEnableLeaseCheckpoint bool `json:"experimental-enable-lease-checkpoint"`
|
||||||
// ExperimentalEnableLeaseCheckpointPersist enables persisting remainingTTL to prevent indefinite auto-renewal of long lived leases. Always enabled in v3.6. Should be used to ensure smooth upgrade from v3.5 clusters with this feature enabled.
|
// ExperimentalEnableLeaseCheckpointPersist enables persisting remainingTTL to prevent indefinite auto-renewal of long lived leases. Always enabled in v3.6. Should be used to ensure smooth upgrade from v3.5 clusters with this feature enabled.
|
||||||
@ -521,6 +524,9 @@ func NewConfig() *Config {
|
|||||||
ExperimentalTxnModeWriteWithSharedBuffer: true,
|
ExperimentalTxnModeWriteWithSharedBuffer: true,
|
||||||
ExperimentalMaxLearners: membership.DefaultMaxLearners,
|
ExperimentalMaxLearners: membership.DefaultMaxLearners,
|
||||||
|
|
||||||
|
ExperimentalCompactHashCheckEnabled: false,
|
||||||
|
ExperimentalCompactHashCheckTime: time.Minute,
|
||||||
|
|
||||||
V2Deprecation: config.V2_DEPR_DEFAULT,
|
V2Deprecation: config.V2_DEPR_DEFAULT,
|
||||||
|
|
||||||
DiscoveryCfg: v3discovery.DiscoveryConfig{
|
DiscoveryCfg: v3discovery.DiscoveryConfig{
|
||||||
@ -759,6 +765,10 @@ func (cfg *Config) Validate() error {
|
|||||||
return fmt.Errorf("setting experimental-enable-lease-checkpoint-persist requires experimental-enable-lease-checkpoint")
|
return fmt.Errorf("setting experimental-enable-lease-checkpoint-persist requires experimental-enable-lease-checkpoint")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cfg.ExperimentalCompactHashCheckTime <= 0 {
|
||||||
|
return fmt.Errorf("--experimental-compact-hash-check-time must be >0 (set to %v)", cfg.ExperimentalCompactHashCheckTime)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,6 +202,8 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
|
|||||||
HostWhitelist: cfg.HostWhitelist,
|
HostWhitelist: cfg.HostWhitelist,
|
||||||
InitialCorruptCheck: cfg.ExperimentalInitialCorruptCheck,
|
InitialCorruptCheck: cfg.ExperimentalInitialCorruptCheck,
|
||||||
CorruptCheckTime: cfg.ExperimentalCorruptCheckTime,
|
CorruptCheckTime: cfg.ExperimentalCorruptCheckTime,
|
||||||
|
CompactHashCheckEnabled: cfg.ExperimentalCompactHashCheckEnabled,
|
||||||
|
CompactHashCheckTime: cfg.ExperimentalCompactHashCheckTime,
|
||||||
PreVote: cfg.PreVote,
|
PreVote: cfg.PreVote,
|
||||||
Logger: cfg.logger,
|
Logger: cfg.logger,
|
||||||
ForceNewCluster: cfg.ForceNewCluster,
|
ForceNewCluster: cfg.ForceNewCluster,
|
||||||
@ -344,6 +346,8 @@ func print(lg *zap.Logger, ec Config, sc config.ServerConfig, memberInitialized
|
|||||||
zap.Bool("pre-vote", sc.PreVote),
|
zap.Bool("pre-vote", sc.PreVote),
|
||||||
zap.Bool("initial-corrupt-check", sc.InitialCorruptCheck),
|
zap.Bool("initial-corrupt-check", sc.InitialCorruptCheck),
|
||||||
zap.String("corrupt-check-time-interval", sc.CorruptCheckTime.String()),
|
zap.String("corrupt-check-time-interval", sc.CorruptCheckTime.String()),
|
||||||
|
zap.Bool("compact-check-time-enabled", sc.CompactHashCheckEnabled),
|
||||||
|
zap.Duration("compact-check-time-interval", sc.CompactHashCheckTime),
|
||||||
zap.String("auto-compaction-mode", sc.AutoCompactionMode),
|
zap.String("auto-compaction-mode", sc.AutoCompactionMode),
|
||||||
zap.Duration("auto-compaction-retention", sc.AutoCompactionRetention),
|
zap.Duration("auto-compaction-retention", sc.AutoCompactionRetention),
|
||||||
zap.String("auto-compaction-interval", sc.AutoCompactionRetention.String()),
|
zap.String("auto-compaction-interval", sc.AutoCompactionRetention.String()),
|
||||||
|
@ -259,6 +259,8 @@ func newConfig() *config {
|
|||||||
// experimental
|
// experimental
|
||||||
fs.BoolVar(&cfg.ec.ExperimentalInitialCorruptCheck, "experimental-initial-corrupt-check", cfg.ec.ExperimentalInitialCorruptCheck, "Enable to check data corruption before serving any client/peer traffic.")
|
fs.BoolVar(&cfg.ec.ExperimentalInitialCorruptCheck, "experimental-initial-corrupt-check", cfg.ec.ExperimentalInitialCorruptCheck, "Enable to check data corruption before serving any client/peer traffic.")
|
||||||
fs.DurationVar(&cfg.ec.ExperimentalCorruptCheckTime, "experimental-corrupt-check-time", cfg.ec.ExperimentalCorruptCheckTime, "Duration of time between cluster corruption check passes.")
|
fs.DurationVar(&cfg.ec.ExperimentalCorruptCheckTime, "experimental-corrupt-check-time", cfg.ec.ExperimentalCorruptCheckTime, "Duration of time between cluster corruption check passes.")
|
||||||
|
fs.BoolVar(&cfg.ec.ExperimentalCompactHashCheckEnabled, "experimental-compact-hash-check-enabled", cfg.ec.ExperimentalCompactHashCheckEnabled, "Enable leader to periodically check followers compaction hashes.")
|
||||||
|
fs.DurationVar(&cfg.ec.ExperimentalCompactHashCheckTime, "experimental-compact-hash-check-time", cfg.ec.ExperimentalCompactHashCheckTime, "Duration of time between leader checks followers compaction hashes.")
|
||||||
|
|
||||||
fs.BoolVar(&cfg.ec.ExperimentalEnableLeaseCheckpoint, "experimental-enable-lease-checkpoint", false, "Enable leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.")
|
fs.BoolVar(&cfg.ec.ExperimentalEnableLeaseCheckpoint, "experimental-enable-lease-checkpoint", false, "Enable leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.")
|
||||||
// TODO: delete in v3.7
|
// TODO: delete in v3.7
|
||||||
|
@ -301,14 +301,13 @@ func (cm *corruptionChecker) CompactHashCheck() {
|
|||||||
cm.mux.Unlock()
|
cm.mux.Unlock()
|
||||||
cm.lg.Info("finished compaction hash check", zap.Int("number-of-hashes-checked", i+1))
|
cm.lg.Info("finished compaction hash check", zap.Int("number-of-hashes-checked", i+1))
|
||||||
return
|
return
|
||||||
} else {
|
}
|
||||||
cm.lg.Warn("skipped checking hash; was not able to check all peers",
|
cm.lg.Warn("skipped revision in compaction hash check; was not able to check all peers",
|
||||||
zap.Int("number-of-peers-checked", peersChecked),
|
zap.Int("number-of-peers-checked", peersChecked),
|
||||||
zap.Int("number-of-peers", len(peers)),
|
zap.Int("number-of-peers", len(peers)),
|
||||||
zap.Int64("revision", hash.Revision),
|
zap.Int64("revision", hash.Revision),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
cm.lg.Info("finished compaction hash check", zap.Int("number-of-hashes-checked", len(hashes)))
|
cm.lg.Info("finished compaction hash check", zap.Int("number-of-hashes-checked", len(hashes)))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -112,7 +112,6 @@ var (
|
|||||||
// on the connection. Or we will not be able to reuse the connection
|
// on the connection. Or we will not be able to reuse the connection
|
||||||
// (since it will timeout).
|
// (since it will timeout).
|
||||||
monitorVersionInterval = rafthttp.ConnWriteTimeout - time.Second
|
monitorVersionInterval = rafthttp.ConnWriteTimeout - time.Second
|
||||||
CompactHashCheckInterval = 15 * time.Second
|
|
||||||
|
|
||||||
recommendedMaxRequestBytesString = humanize.Bytes(uint64(recommendedMaxRequestBytes))
|
recommendedMaxRequestBytesString = humanize.Bytes(uint64(recommendedMaxRequestBytes))
|
||||||
storeMemberAttributeRegexp = regexp.MustCompile(path.Join(membership.StoreMembersPrefix, "[[:xdigit:]]{1,16}", "attributes"))
|
storeMemberAttributeRegexp = regexp.MustCompile(path.Join(membership.StoreMembersPrefix, "[[:xdigit:]]{1,16}", "attributes"))
|
||||||
@ -2219,9 +2218,13 @@ func (s *EtcdServer) monitorKVHash() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) monitorCompactHash() {
|
func (s *EtcdServer) monitorCompactHash() {
|
||||||
|
if !s.Cfg.CompactHashCheckEnabled {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t := s.Cfg.CompactHashCheckTime
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-time.After(CompactHashCheckInterval):
|
case <-time.After(t):
|
||||||
case <-s.stopping:
|
case <-s.stopping:
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,6 @@ import (
|
|||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"go.etcd.io/etcd/api/v3/etcdserverpb"
|
"go.etcd.io/etcd/api/v3/etcdserverpb"
|
||||||
"go.etcd.io/etcd/client/v3"
|
"go.etcd.io/etcd/client/v3"
|
||||||
"go.etcd.io/etcd/server/v3/etcdserver"
|
|
||||||
"go.etcd.io/etcd/server/v3/storage/datadir"
|
"go.etcd.io/etcd/server/v3/storage/datadir"
|
||||||
"go.etcd.io/etcd/server/v3/storage/mvcc/testutil"
|
"go.etcd.io/etcd/server/v3/storage/mvcc/testutil"
|
||||||
"go.etcd.io/etcd/tests/v3/framework/config"
|
"go.etcd.io/etcd/tests/v3/framework/config"
|
||||||
@ -136,10 +135,13 @@ func TestPeriodicCheckDetectsCorruption(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestCompactHashCheckDetectCorruption(t *testing.T) {
|
func TestCompactHashCheckDetectCorruption(t *testing.T) {
|
||||||
|
checkTime := time.Second
|
||||||
e2e.BeforeTest(t)
|
e2e.BeforeTest(t)
|
||||||
epc, err := e2e.NewEtcdProcessCluster(t, &e2e.EtcdProcessClusterConfig{
|
epc, err := e2e.NewEtcdProcessCluster(t, &e2e.EtcdProcessClusterConfig{
|
||||||
ClusterSize: 3,
|
ClusterSize: 3,
|
||||||
KeepDataDir: true,
|
KeepDataDir: true,
|
||||||
|
CompactHashCheckEnabled: true,
|
||||||
|
CompactHashCheckTime: checkTime,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("could not start etcd process cluster (%v)", err)
|
t.Fatalf("could not start etcd process cluster (%v)", err)
|
||||||
@ -173,7 +175,7 @@ func TestCompactHashCheckDetectCorruption(t *testing.T) {
|
|||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
_, err = cc.Compact(5, config.CompactOption{})
|
_, err = cc.Compact(5, config.CompactOption{})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
time.Sleep(etcdserver.CompactHashCheckInterval * 11 / 10)
|
time.Sleep(checkTime * 11 / 10)
|
||||||
alarmResponse, err := cc.AlarmList()
|
alarmResponse, err := cc.AlarmList()
|
||||||
assert.NoError(t, err, "error on alarm list")
|
assert.NoError(t, err, "error on alarm list")
|
||||||
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: memberID}}, alarmResponse.Alarms)
|
assert.Equal(t, []*etcdserverpb.AlarmMember{{Alarm: etcdserverpb.AlarmType_CORRUPT, MemberID: memberID}}, alarmResponse.Alarms)
|
||||||
|
@ -179,6 +179,8 @@ type EtcdProcessClusterConfig struct {
|
|||||||
|
|
||||||
MaxConcurrentStreams uint32 // default is math.MaxUint32
|
MaxConcurrentStreams uint32 // default is math.MaxUint32
|
||||||
CorruptCheckTime time.Duration
|
CorruptCheckTime time.Duration
|
||||||
|
CompactHashCheckEnabled bool
|
||||||
|
CompactHashCheckTime time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewEtcdProcessCluster launches a new cluster from etcd processes, returning
|
// NewEtcdProcessCluster launches a new cluster from etcd processes, returning
|
||||||
@ -351,6 +353,12 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfigs(tb testing.TB) []*
|
|||||||
if cfg.CorruptCheckTime != 0 {
|
if cfg.CorruptCheckTime != 0 {
|
||||||
args = append(args, "--experimental-corrupt-check-time", fmt.Sprintf("%s", cfg.CorruptCheckTime))
|
args = append(args, "--experimental-corrupt-check-time", fmt.Sprintf("%s", cfg.CorruptCheckTime))
|
||||||
}
|
}
|
||||||
|
if cfg.CompactHashCheckEnabled {
|
||||||
|
args = append(args, "--experimental-compact-hash-check-enabled")
|
||||||
|
}
|
||||||
|
if cfg.CompactHashCheckTime != 0 {
|
||||||
|
args = append(args, "--experimental-compact-hash-check-time", cfg.CompactHashCheckTime.String())
|
||||||
|
}
|
||||||
|
|
||||||
etcdCfgs[i] = &EtcdServerProcessConfig{
|
etcdCfgs[i] = &EtcdServerProcessConfig{
|
||||||
lg: lg,
|
lg: lg,
|
||||||
|
Loading…
Reference in New Issue
Block a user