add a txPostLockHook into the backend

Previously the SetConsistentIndex() is called during the apply workflow,
but it's outside the db transaction. If a commit happens between SetConsistentIndex
and the following apply workflow, and etcd crashes for whatever reason right
after the commit, then etcd commits an incomplete transaction to db.
Eventually etcd runs into the data inconsistency issue.

In this commit, we move the SetConsistentIndex into a txPostLockHook, so
it will be executed inside the transaction lock.
This commit is contained in:
ahrtr
2022-03-30 15:26:31 +08:00
parent c4d055fe7b
commit bfd5170f66
19 changed files with 104 additions and 44 deletions

View File

@ -67,6 +67,9 @@ type Backend interface {
Defrag() error
ForceCommit()
Close() error
// SetTxPostLockHook sets a txPostLockHook.
SetTxPostLockHook(func())
}
type Snapshot interface {
@ -119,6 +122,9 @@ type backend struct {
hooks Hooks
// txPostLockHook is called each time right after locking the tx.
txPostLockHook func()
lg *zap.Logger
}
@ -227,6 +233,14 @@ func (b *backend) BatchTx() BatchTx {
return b.batchTx
}
func (b *backend) SetTxPostLockHook(hook func()) {
// It needs to lock the batchTx, because the periodic commit
// may be accessing the txPostLockHook at the moment.
b.batchTx.LockWithoutHook()
defer b.batchTx.Unlock()
b.txPostLockHook = hook
}
func (b *backend) ReadTx() ReadTx { return b.readTx }
// ConcurrentReadTx creates and returns a new ReadTx, which:
@ -438,7 +452,7 @@ func (b *backend) defrag() error {
// TODO: make this non-blocking?
// lock batchTx to ensure nobody is using previous tx, and then
// close previous ongoing tx.
b.batchTx.Lock()
b.batchTx.LockWithoutHook()
defer b.batchTx.Unlock()
// lock database after lock tx to avoid deadlock.