diff --git a/server/config/config.go b/server/config/config.go index 4c82cfae2..575dd96d4 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -177,6 +177,10 @@ type ServerConfig struct { // Currently all etcd memory gets mlocked, but in future the flag can // be refined to mlock in-use area of bbolt only. ExperimentalMemoryMlock bool `json:"experimental-memory-mlock"` + + // ExperimentalTxnModeWriteWithSharedBuffer enable write transaction to use + // a shared buffer in its readonly check operations. + ExperimentalTxnModeWriteWithSharedBuffer bool `json:"experimental-txn-mode-write-with-shared-buffer"` } // VerifyBootstrap sanity-checks the initial config for bootstrap case diff --git a/server/embed/config.go b/server/embed/config.go index 0dbaaa6b2..83ff19210 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -355,6 +355,9 @@ type Config struct { // Currently all etcd memory gets mlocked, but in future the flag can // be refined to mlock in-use area of bbolt only. ExperimentalMemoryMlock bool `json:"experimental-memory-mlock"` + + // ExperimentalTxnModeWriteWithSharedBuffer enables write transaction to use a shared buffer in its readonly check operations. + ExperimentalTxnModeWriteWithSharedBuffer bool `json:"experimental-txn-mode-write-with-shared-buffer"` } // configYAML holds the config suitable for yaml parsing @@ -444,8 +447,9 @@ func NewConfig() *Config { LogLevel: logutil.DefaultLogLevel, EnableGRPCGateway: true, - ExperimentalDowngradeCheckTime: DefaultDowngradeCheckTime, - ExperimentalMemoryMlock: false, + ExperimentalDowngradeCheckTime: DefaultDowngradeCheckTime, + ExperimentalMemoryMlock: false, + ExperimentalTxnModeWriteWithSharedBuffer: true, } cfg.InitialCluster = cfg.InitialClusterFromName(cfg.Name) return cfg diff --git a/server/embed/etcd.go b/server/embed/etcd.go index c53450cd8..464a253a4 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -163,56 +163,57 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { backendFreelistType := parseBackendFreelistType(cfg.BackendFreelistType) srvcfg := config.ServerConfig{ - Name: cfg.Name, - ClientURLs: cfg.ACUrls, - PeerURLs: cfg.APUrls, - DataDir: cfg.Dir, - DedicatedWALDir: cfg.WalDir, - SnapshotCount: cfg.SnapshotCount, - SnapshotCatchUpEntries: cfg.SnapshotCatchUpEntries, - MaxSnapFiles: cfg.MaxSnapFiles, - MaxWALFiles: cfg.MaxWalFiles, - InitialPeerURLsMap: urlsmap, - InitialClusterToken: token, - DiscoveryURL: cfg.Durl, - DiscoveryProxy: cfg.Dproxy, - NewCluster: cfg.IsNewCluster(), - PeerTLSInfo: cfg.PeerTLSInfo, - TickMs: cfg.TickMs, - ElectionTicks: cfg.ElectionTicks(), - InitialElectionTickAdvance: cfg.InitialElectionTickAdvance, - AutoCompactionRetention: autoCompactionRetention, - AutoCompactionMode: cfg.AutoCompactionMode, - QuotaBackendBytes: cfg.QuotaBackendBytes, - BackendBatchLimit: cfg.BackendBatchLimit, - BackendFreelistType: backendFreelistType, - BackendBatchInterval: cfg.BackendBatchInterval, - MaxTxnOps: cfg.MaxTxnOps, - MaxRequestBytes: cfg.MaxRequestBytes, - SocketOpts: cfg.SocketOpts, - StrictReconfigCheck: cfg.StrictReconfigCheck, - ClientCertAuthEnabled: cfg.ClientTLSInfo.ClientCertAuth, - AuthToken: cfg.AuthToken, - BcryptCost: cfg.BcryptCost, - TokenTTL: cfg.AuthTokenTTL, - CORS: cfg.CORS, - HostWhitelist: cfg.HostWhitelist, - InitialCorruptCheck: cfg.ExperimentalInitialCorruptCheck, - CorruptCheckTime: cfg.ExperimentalCorruptCheckTime, - PreVote: cfg.PreVote, - Logger: cfg.logger, - LoggerConfig: cfg.loggerConfig, - LoggerCore: cfg.loggerCore, - LoggerWriteSyncer: cfg.loggerWriteSyncer, - ForceNewCluster: cfg.ForceNewCluster, - EnableGRPCGateway: cfg.EnableGRPCGateway, - UnsafeNoFsync: cfg.UnsafeNoFsync, - EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint, - CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit, - WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval, - DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime, - WarningApplyDuration: cfg.ExperimentalWarningApplyDuration, - ExperimentalMemoryMlock: cfg.ExperimentalMemoryMlock, + Name: cfg.Name, + ClientURLs: cfg.ACUrls, + PeerURLs: cfg.APUrls, + DataDir: cfg.Dir, + DedicatedWALDir: cfg.WalDir, + SnapshotCount: cfg.SnapshotCount, + SnapshotCatchUpEntries: cfg.SnapshotCatchUpEntries, + MaxSnapFiles: cfg.MaxSnapFiles, + MaxWALFiles: cfg.MaxWalFiles, + InitialPeerURLsMap: urlsmap, + InitialClusterToken: token, + DiscoveryURL: cfg.Durl, + DiscoveryProxy: cfg.Dproxy, + NewCluster: cfg.IsNewCluster(), + PeerTLSInfo: cfg.PeerTLSInfo, + TickMs: cfg.TickMs, + ElectionTicks: cfg.ElectionTicks(), + InitialElectionTickAdvance: cfg.InitialElectionTickAdvance, + AutoCompactionRetention: autoCompactionRetention, + AutoCompactionMode: cfg.AutoCompactionMode, + QuotaBackendBytes: cfg.QuotaBackendBytes, + BackendBatchLimit: cfg.BackendBatchLimit, + BackendFreelistType: backendFreelistType, + BackendBatchInterval: cfg.BackendBatchInterval, + MaxTxnOps: cfg.MaxTxnOps, + MaxRequestBytes: cfg.MaxRequestBytes, + SocketOpts: cfg.SocketOpts, + StrictReconfigCheck: cfg.StrictReconfigCheck, + ClientCertAuthEnabled: cfg.ClientTLSInfo.ClientCertAuth, + AuthToken: cfg.AuthToken, + BcryptCost: cfg.BcryptCost, + TokenTTL: cfg.AuthTokenTTL, + CORS: cfg.CORS, + HostWhitelist: cfg.HostWhitelist, + InitialCorruptCheck: cfg.ExperimentalInitialCorruptCheck, + CorruptCheckTime: cfg.ExperimentalCorruptCheckTime, + PreVote: cfg.PreVote, + Logger: cfg.logger, + LoggerConfig: cfg.loggerConfig, + LoggerCore: cfg.loggerCore, + LoggerWriteSyncer: cfg.loggerWriteSyncer, + ForceNewCluster: cfg.ForceNewCluster, + EnableGRPCGateway: cfg.EnableGRPCGateway, + UnsafeNoFsync: cfg.UnsafeNoFsync, + EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint, + CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit, + WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval, + DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime, + WarningApplyDuration: cfg.ExperimentalWarningApplyDuration, + ExperimentalMemoryMlock: cfg.ExperimentalMemoryMlock, + ExperimentalTxnModeWriteWithSharedBuffer: cfg.ExperimentalTxnModeWriteWithSharedBuffer, } print(e.cfg.logger, *cfg, srvcfg, memberInitialized) if e.Server, err = etcdserver.NewServer(srvcfg); err != nil { diff --git a/server/etcdmain/config.go b/server/etcdmain/config.go index 3a6cee036..64ed7f1cb 100644 --- a/server/etcdmain/config.go +++ b/server/etcdmain/config.go @@ -267,6 +267,7 @@ func newConfig() *config { fs.DurationVar(&cfg.ec.ExperimentalDowngradeCheckTime, "experimental-downgrade-check-time", cfg.ec.ExperimentalDowngradeCheckTime, "Duration of time between two downgrade status check.") fs.DurationVar(&cfg.ec.ExperimentalWarningApplyDuration, "experimental-warning-apply-duration", cfg.ec.ExperimentalWarningApplyDuration, "Time duration after which a warning is generated if request takes more time.") fs.BoolVar(&cfg.ec.ExperimentalMemoryMlock, "experimental-memory-mlock", cfg.ec.ExperimentalMemoryMlock, "Enable to enforce etcd pages (in particular bbolt) to stay in RAM.") + fs.BoolVar(&cfg.ec.ExperimentalTxnModeWriteWithSharedBuffer, "experimental-txn-mode-write-with-shared-buffer", true, "Enable the write transaction to use a shared buffer in its readonly check operations.") // unsafe fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.") diff --git a/server/etcdmain/help.go b/server/etcdmain/help.go index aaad8aea8..726c18cab 100644 --- a/server/etcdmain/help.go +++ b/server/etcdmain/help.go @@ -220,6 +220,8 @@ Experimental feature: Duration of periodical watch progress notification. --experimental-warning-apply-duration '100ms' Warning is generated if requests take more than this duration. + --experimental-txn-mode-write-with-shared-buffer 'true' + Enable the write transaction to use a shared buffer in its readonly check operations. Unsafe feature: --force-new-cluster 'false' diff --git a/server/etcdserver/apply.go b/server/etcdserver/apply.go index 46da09a0b..cf281d1a9 100644 --- a/server/etcdserver/apply.go +++ b/server/etcdserver/apply.go @@ -438,7 +438,7 @@ func (a *applierV3backend) Txn(ctx context.Context, rt *pb.TxnRequest) (*pb.TxnR // When the transaction contains write operations, we use ReadTx instead of // ConcurrentReadTx to avoid extra overhead of copying buffer. var txn mvcc.TxnWrite - if isWrite { + if isWrite && a.s.Cfg.ExperimentalTxnModeWriteWithSharedBuffer { txn = mvcc.NewReadOnlyTxnWrite(a.s.KV().Read(mvcc.SharedBufReadTxMode, trace)) } else { txn = mvcc.NewReadOnlyTxnWrite(a.s.KV().Read(mvcc.ConcurrentReadTxMode, trace))