fix the data inconsistency issue by adding a txPostLockHook into the backend

Previously the SetConsistentIndex() is called during the apply workflow,
but it's outside the db transaction. If a commit happens between SetConsistentIndex
and the following apply workflow, and etcd crashes for whatever reason right
after the commit, then etcd commits an incomplete transaction to db.
Eventually etcd runs into the data inconsistency issue.

In this commit, we move the SetConsistentIndex into a txPostLockHook, so
it will be executed inside the transaction lock.
This commit is contained in:
ahrtr
2022-04-08 14:39:23 +08:00
parent 3ace622792
commit 66c7aab4d3
21 changed files with 216 additions and 83 deletions

View File

@ -15,7 +15,6 @@
package backend_test
import (
"fmt"
"os"
"testing"
"time"
@ -26,40 +25,60 @@ import (
func TestLockVerify(t *testing.T) {
tcs := []struct {
insideApply bool
lock func(tx backend.BatchTx)
expectPanic bool
name string
insideApply bool
lock func(tx backend.BatchTx)
txPostLockInsideApplyHook func()
expectPanic bool
}{
{
name: "call lockInsideApply from inside apply",
insideApply: true,
lock: lockInsideApply,
expectPanic: false,
},
{
name: "call lockInsideApply from outside apply (without txPostLockInsideApplyHook)",
insideApply: false,
lock: lockInsideApply,
expectPanic: true,
expectPanic: false,
},
{
name: "call lockInsideApply from outside apply (with txPostLockInsideApplyHook)",
insideApply: false,
lock: lockInsideApply,
txPostLockInsideApplyHook: func() {},
expectPanic: true,
},
{
name: "call lockOutsideApply from outside apply",
insideApply: false,
lock: lockOutsideApply,
expectPanic: false,
},
{
name: "call lockOutsideApply from inside apply",
insideApply: true,
lock: lockOutsideApply,
expectPanic: true,
},
{
name: "call Lock from unit test",
insideApply: false,
lock: lockFromUT,
expectPanic: false,
},
}
env := os.Getenv("ETCD_VERIFY")
os.Setenv("ETCD_VERIFY", "lock")
defer func() {
os.Setenv("ETCD_VERIFY", env)
}()
for i, tc := range tcs {
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
be, _ := betesting.NewTmpBackend(t, time.Hour, 10000)
be.SetTxPostLockInsideApplyHook(tc.txPostLockInsideApplyHook)
hasPaniced := handlePanic(func() {
if tc.insideApply {
@ -89,3 +108,4 @@ func applyEntries(be backend.Backend, f func(tx backend.BatchTx)) {
func lockInsideApply(tx backend.BatchTx) { tx.LockInsideApply() }
func lockOutsideApply(tx backend.BatchTx) { tx.LockOutsideApply() }
func lockFromUT(tx backend.BatchTx) { tx.Lock() }