fix the data inconsistency issue by adding a txPostLockHook into the backend
Previously the SetConsistentIndex() is called during the apply workflow, but it's outside the db transaction. If a commit happens between SetConsistentIndex and the following apply workflow, and etcd crashes for whatever reason right after the commit, then etcd commits an incomplete transaction to db. Eventually etcd runs into the data inconsistency issue. In this commit, we move the SetConsistentIndex into a txPostLockHook, so it will be executed inside the transaction lock.
This commit is contained in:
@ -68,6 +68,9 @@ type Backend interface {
|
||||
Defrag() error
|
||||
ForceCommit()
|
||||
Close() error
|
||||
|
||||
// SetTxPostLockInsideApplyHook sets a txPostLockInsideApplyHook.
|
||||
SetTxPostLockInsideApplyHook(func())
|
||||
}
|
||||
|
||||
type Snapshot interface {
|
||||
@ -120,6 +123,9 @@ type backend struct {
|
||||
|
||||
hooks Hooks
|
||||
|
||||
// txPostLockInsideApplyHook is called each time right after locking the tx.
|
||||
txPostLockInsideApplyHook func()
|
||||
|
||||
lg *zap.Logger
|
||||
}
|
||||
|
||||
@ -231,6 +237,14 @@ func (b *backend) BatchTx() BatchTx {
|
||||
return b.batchTx
|
||||
}
|
||||
|
||||
func (b *backend) SetTxPostLockInsideApplyHook(hook func()) {
|
||||
// It needs to lock the batchTx, because the periodic commit
|
||||
// may be accessing the txPostLockInsideApplyHook at the moment.
|
||||
b.batchTx.lock()
|
||||
defer b.batchTx.Unlock()
|
||||
b.txPostLockInsideApplyHook = hook
|
||||
}
|
||||
|
||||
func (b *backend) ReadTx() ReadTx { return b.readTx }
|
||||
|
||||
// ConcurrentReadTx creates and returns a new ReadTx, which:
|
||||
@ -440,7 +454,7 @@ func (b *backend) defrag() error {
|
||||
// TODO: make this non-blocking?
|
||||
// lock batchTx to ensure nobody is using previous tx, and then
|
||||
// close previous ongoing tx.
|
||||
b.batchTx.Lock()
|
||||
b.batchTx.LockOutsideApply()
|
||||
defer b.batchTx.Unlock()
|
||||
|
||||
// lock database after lock tx to avoid deadlock.
|
||||
|
Reference in New Issue
Block a user