fix(defrag): handle defragdb failure
Signed-off-by: Thomas Gosteli <thomas.gosteli@protonmail.ch>
This commit is contained in:
@ -490,8 +490,8 @@ func (b *backend) defrag() error {
|
||||
options = *boltOpenOptions
|
||||
}
|
||||
options.OpenFile = func(_ string, _ int, _ os.FileMode) (file *os.File, err error) {
|
||||
// gofail: var defragNoSpace string
|
||||
// return nil, fmt.Errorf(defragNoSpace)
|
||||
// gofail: var defragOpenFileError string
|
||||
// return nil, fmt.Errorf(defragOpenFileError)
|
||||
return temp, nil
|
||||
}
|
||||
// Don't load tmp db into memory regardless of opening options
|
||||
@ -526,6 +526,11 @@ func (b *backend) defrag() error {
|
||||
if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil {
|
||||
b.lg.Error("failed to remove db.tmp after defragmentation completed", zap.Error(rmErr))
|
||||
}
|
||||
|
||||
// restore the bbolt transactions if defragmentation fails
|
||||
b.batchTx.tx = b.unsafeBegin(true)
|
||||
b.readTx.tx = b.unsafeBegin(false)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@ -578,6 +583,9 @@ func (b *backend) defrag() error {
|
||||
}
|
||||
|
||||
func defragdb(odb, tmpdb *bolt.DB, limit int) error {
|
||||
// gofail: var defragdbFail string
|
||||
// return fmt.Errorf(defragdbFail)
|
||||
|
||||
// open a tx on tmpdb for writes
|
||||
tmptx, err := tmpdb.Begin(true)
|
||||
if err != nil {
|
||||
|
@ -16,6 +16,7 @@ package e2e
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -26,24 +27,45 @@ import (
|
||||
)
|
||||
|
||||
func TestDefragNoSpace(t *testing.T) {
|
||||
e2e.BeforeTest(t)
|
||||
tests := []struct {
|
||||
name string
|
||||
failpoint string
|
||||
err string
|
||||
}{
|
||||
{
|
||||
name: "no space (#18810) - can't open/create new bbolt db",
|
||||
failpoint: "defragOpenFileError",
|
||||
err: "no space",
|
||||
},
|
||||
{
|
||||
name: "defragdb failure",
|
||||
failpoint: "defragdbFail",
|
||||
err: "some random error",
|
||||
},
|
||||
}
|
||||
|
||||
clus, err := e2e.NewEtcdProcessCluster(context.TODO(), t,
|
||||
e2e.WithClusterSize(1),
|
||||
e2e.WithGoFailEnabled(true),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() { clus.Stop() })
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
e2e.BeforeTest(t)
|
||||
|
||||
member := clus.Procs[0]
|
||||
clus, err := e2e.NewEtcdProcessCluster(context.TODO(), t,
|
||||
e2e.WithClusterSize(1),
|
||||
e2e.WithGoFailEnabled(true),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(func() { clus.Stop() })
|
||||
|
||||
require.NoError(t, member.Failpoints().SetupHTTP(context.Background(), "defragNoSpace", `return("no space")`))
|
||||
require.ErrorContains(t, member.Etcdctl().Defragment(context.Background(), config.DefragOption{Timeout: time.Minute}), "no space")
|
||||
member := clus.Procs[0]
|
||||
|
||||
// Make sure etcd continues to run even after the failed defrag attempt
|
||||
require.NoError(t, member.Etcdctl().Put(context.Background(), "foo", "bar", config.PutOptions{}))
|
||||
value, err := member.Etcdctl().Get(context.Background(), "foo", config.GetOptions{})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, value.Kvs, 1)
|
||||
require.Equal(t, "bar", string(value.Kvs[0].Value))
|
||||
require.NoError(t, member.Failpoints().SetupHTTP(context.Background(), tc.failpoint, fmt.Sprintf(`return("%s")`, tc.err)))
|
||||
require.ErrorContains(t, member.Etcdctl().Defragment(context.Background(), config.DefragOption{Timeout: time.Minute}), tc.err)
|
||||
|
||||
// Make sure etcd continues to run even after the failed defrag attempt
|
||||
require.NoError(t, member.Etcdctl().Put(context.Background(), "foo", "bar", config.PutOptions{}))
|
||||
value, err := member.Etcdctl().Get(context.Background(), "foo", config.GetOptions{})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, value.Kvs, 1)
|
||||
require.Equal(t, "bar", string(value.Kvs[0].Value))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user