
Fixes: 65159a2b96
(*: Update cases related to Downgrade)
Signed-off-by: Wei Fu <fuweid89@gmail.com>
332 lines
11 KiB
Go
332 lines
11 KiB
Go
// Copyright 2021 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package e2e
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math/rand"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/coreos/go-semver/semver"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
|
|
"go.etcd.io/etcd/api/v3/version"
|
|
"go.etcd.io/etcd/client/pkg/v3/fileutil"
|
|
"go.etcd.io/etcd/client/pkg/v3/types"
|
|
clientv3 "go.etcd.io/etcd/client/v3"
|
|
"go.etcd.io/etcd/server/v3/etcdserver"
|
|
"go.etcd.io/etcd/server/v3/etcdserver/api/membership"
|
|
"go.etcd.io/etcd/server/v3/etcdserver/api/snap"
|
|
"go.etcd.io/etcd/server/v3/etcdserver/api/v2store"
|
|
"go.etcd.io/etcd/server/v3/storage/datadir"
|
|
"go.etcd.io/etcd/tests/v3/framework/config"
|
|
"go.etcd.io/etcd/tests/v3/framework/e2e"
|
|
)
|
|
|
|
type CancellationState int
|
|
|
|
const (
|
|
noCancellation CancellationState = iota
|
|
cancelRightBeforeEnable
|
|
cancelRightAfterEnable
|
|
cancelAfterDowngrading
|
|
)
|
|
|
|
func TestDowngradeUpgradeClusterOf1(t *testing.T) {
|
|
testDowngradeUpgrade(t, 1, 1, false, noCancellation)
|
|
}
|
|
|
|
func TestDowngradeUpgrade2InClusterOf3(t *testing.T) {
|
|
testDowngradeUpgrade(t, 2, 3, false, noCancellation)
|
|
}
|
|
|
|
func TestDowngradeUpgradeClusterOf3(t *testing.T) {
|
|
testDowngradeUpgrade(t, 3, 3, false, noCancellation)
|
|
}
|
|
|
|
func TestDowngradeUpgradeClusterOf1WithSnapshot(t *testing.T) {
|
|
testDowngradeUpgrade(t, 1, 1, true, noCancellation)
|
|
}
|
|
|
|
func TestDowngradeUpgradeClusterOf3WithSnapshot(t *testing.T) {
|
|
testDowngradeUpgrade(t, 3, 3, true, noCancellation)
|
|
}
|
|
|
|
func TestDowngradeCancellationWithoutEnablingClusterOf1(t *testing.T) {
|
|
testDowngradeUpgrade(t, 0, 1, false, cancelRightBeforeEnable)
|
|
}
|
|
|
|
func TestDowngradeCancellationRightAfterEnablingClusterOf1(t *testing.T) {
|
|
testDowngradeUpgrade(t, 0, 1, false, cancelRightAfterEnable)
|
|
}
|
|
|
|
func TestDowngradeCancellationWithoutEnablingClusterOf3(t *testing.T) {
|
|
testDowngradeUpgrade(t, 0, 3, false, cancelRightBeforeEnable)
|
|
}
|
|
|
|
func TestDowngradeCancellationRightAfterEnablingClusterOf3(t *testing.T) {
|
|
testDowngradeUpgrade(t, 0, 3, false, cancelRightAfterEnable)
|
|
}
|
|
|
|
func TestDowngradeCancellationAfterDowngrading1InClusterOf3(t *testing.T) {
|
|
testDowngradeUpgrade(t, 1, 3, false, cancelAfterDowngrading)
|
|
}
|
|
|
|
func TestDowngradeCancellationAfterDowngrading2InClusterOf3(t *testing.T) {
|
|
testDowngradeUpgrade(t, 2, 3, false, cancelAfterDowngrading)
|
|
}
|
|
|
|
func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterSize int, triggerSnapshot bool, triggerCancellation CancellationState) {
|
|
currentEtcdBinary := e2e.BinPath.Etcd
|
|
lastReleaseBinary := e2e.BinPath.EtcdLastRelease
|
|
if !fileutil.Exist(lastReleaseBinary) {
|
|
t.Skipf("%q does not exist", lastReleaseBinary)
|
|
}
|
|
|
|
currentVersion, err := e2e.GetVersionFromBinary(currentEtcdBinary)
|
|
require.NoError(t, err)
|
|
// wipe any pre-release suffix like -alpha.0 we see commonly in builds
|
|
currentVersion.PreRelease = ""
|
|
|
|
lastVersion, err := e2e.GetVersionFromBinary(lastReleaseBinary)
|
|
require.NoError(t, err)
|
|
|
|
require.Equalf(t, lastVersion.Minor, currentVersion.Minor-1, "unexpected minor version difference")
|
|
currentVersionStr := currentVersion.String()
|
|
lastVersionStr := lastVersion.String()
|
|
|
|
lastClusterVersion := semver.New(lastVersionStr)
|
|
lastClusterVersion.Patch = 0
|
|
|
|
e2e.BeforeTest(t)
|
|
|
|
t.Logf("Create cluster with version %s", currentVersionStr)
|
|
var snapshotCount uint64 = 10
|
|
epc := newCluster(t, clusterSize, snapshotCount)
|
|
for i := 0; i < len(epc.Procs); i++ {
|
|
e2e.ValidateVersion(t, epc.Cfg, epc.Procs[i], version.Versions{
|
|
Cluster: currentVersionStr,
|
|
Server: version.Version,
|
|
Storage: currentVersionStr,
|
|
})
|
|
}
|
|
cc := epc.Etcdctl()
|
|
t.Logf("Cluster created")
|
|
if len(epc.Procs) > 1 {
|
|
t.Log("Waiting health interval to required to make membership changes")
|
|
time.Sleep(etcdserver.HealthInterval)
|
|
}
|
|
|
|
t.Log("Downgrade should be disabled")
|
|
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: false})
|
|
|
|
t.Log("Adding member to test membership, but a learner avoid breaking quorum")
|
|
resp, err := cc.MemberAddAsLearner(context.Background(), "fake1", []string{"http://127.0.0.1:1001"})
|
|
require.NoError(t, err)
|
|
if triggerSnapshot {
|
|
t.Logf("Generating snapshot")
|
|
generateSnapshot(t, snapshotCount, cc)
|
|
verifySnapshot(t, epc)
|
|
}
|
|
t.Log("Removing learner to test membership")
|
|
_, err = cc.MemberRemove(context.Background(), resp.Member.ID)
|
|
require.NoError(t, err)
|
|
beforeMembers, beforeKV := getMembersAndKeys(t, cc)
|
|
|
|
if triggerCancellation == cancelRightBeforeEnable {
|
|
t.Logf("Cancelling downgrade before enabling")
|
|
e2e.DowngradeCancel(t, epc)
|
|
t.Log("Downgrade cancelled, validating if cluster is in the right state")
|
|
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, currentVersion))
|
|
|
|
return // No need to perform downgrading, end the test here
|
|
}
|
|
e2e.DowngradeEnable(t, epc, lastVersion)
|
|
|
|
t.Log("Downgrade should be enabled")
|
|
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: true, TargetVersion: lastClusterVersion.String()})
|
|
|
|
if triggerCancellation == cancelRightAfterEnable {
|
|
t.Logf("Cancelling downgrade right after enabling (no node is downgraded yet)")
|
|
e2e.DowngradeCancel(t, epc)
|
|
t.Log("Downgrade cancelled, validating if cluster is in the right state")
|
|
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, currentVersion))
|
|
return // No need to perform downgrading, end the test here
|
|
}
|
|
|
|
membersToChange := rand.Perm(len(epc.Procs))[:numberOfMembersToDowngrade]
|
|
t.Logf("Elect members for operations on members: %v", membersToChange)
|
|
|
|
t.Logf("Starting downgrade process to %q", lastVersionStr)
|
|
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, membersToChange, true, currentVersion, lastClusterVersion)
|
|
require.NoError(t, err)
|
|
if len(membersToChange) == len(epc.Procs) {
|
|
e2e.AssertProcessLogs(t, epc.Procs[epc.WaitLeader(t)], "the cluster has been downgraded")
|
|
}
|
|
|
|
t.Log("Downgrade complete")
|
|
afterMembers, afterKV := getMembersAndKeys(t, cc)
|
|
assert.Equal(t, beforeKV.Kvs, afterKV.Kvs)
|
|
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
|
|
|
|
if len(epc.Procs) > 1 {
|
|
t.Log("Waiting health interval to required to make membership changes")
|
|
time.Sleep(etcdserver.HealthInterval)
|
|
}
|
|
|
|
if triggerCancellation == cancelAfterDowngrading {
|
|
e2e.DowngradeCancel(t, epc)
|
|
t.Log("Downgrade cancelled, validating if cluster is in the right state")
|
|
e2e.ValidateMemberVersions(t, epc, generatePartialCancellationVersions(clusterSize, membersToChange, lastClusterVersion))
|
|
}
|
|
|
|
t.Log("Adding learner to test membership, but avoid breaking quorum")
|
|
resp, err = cc.MemberAddAsLearner(context.Background(), "fake2", []string{"http://127.0.0.1:1002"})
|
|
require.NoError(t, err)
|
|
if triggerSnapshot {
|
|
t.Logf("Generating snapshot")
|
|
generateSnapshot(t, snapshotCount, cc)
|
|
verifySnapshot(t, epc)
|
|
}
|
|
t.Log("Removing learner to test membership")
|
|
_, err = cc.MemberRemove(context.Background(), resp.Member.ID)
|
|
require.NoError(t, err)
|
|
beforeMembers, beforeKV = getMembersAndKeys(t, cc)
|
|
|
|
t.Logf("Starting upgrade process to %q", currentVersionStr)
|
|
downgradeEnabled := triggerCancellation == noCancellation && numberOfMembersToDowngrade < clusterSize
|
|
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, membersToChange, downgradeEnabled, lastClusterVersion, currentVersion)
|
|
require.NoError(t, err)
|
|
t.Log("Upgrade complete")
|
|
|
|
if downgradeEnabled {
|
|
t.Log("Downgrade should be still enabled")
|
|
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: true, TargetVersion: lastClusterVersion.String()})
|
|
} else {
|
|
t.Log("Downgrade should be disabled")
|
|
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: false})
|
|
}
|
|
|
|
afterMembers, afterKV = getMembersAndKeys(t, cc)
|
|
assert.Equal(t, beforeKV.Kvs, afterKV.Kvs)
|
|
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
|
|
}
|
|
|
|
func newCluster(t *testing.T, clusterSize int, snapshotCount uint64) *e2e.EtcdProcessCluster {
|
|
epc, err := e2e.NewEtcdProcessCluster(context.TODO(), t,
|
|
e2e.WithClusterSize(clusterSize),
|
|
e2e.WithSnapshotCount(snapshotCount),
|
|
e2e.WithKeepDataDir(true),
|
|
)
|
|
if err != nil {
|
|
t.Fatalf("could not start etcd process cluster (%v)", err)
|
|
}
|
|
t.Cleanup(func() {
|
|
if errC := epc.Close(); errC != nil {
|
|
t.Fatalf("error closing etcd processes (%v)", errC)
|
|
}
|
|
})
|
|
return epc
|
|
}
|
|
|
|
func generateSnapshot(t *testing.T, snapshotCount uint64, cc *e2e.EtcdctlV3) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
var i uint64
|
|
t.Logf("Adding keys")
|
|
for i = 0; i < snapshotCount*3; i++ {
|
|
err := cc.Put(ctx, fmt.Sprintf("%d", i), "1", config.PutOptions{})
|
|
assert.NoError(t, err)
|
|
}
|
|
}
|
|
|
|
func verifySnapshot(t *testing.T, epc *e2e.EtcdProcessCluster) {
|
|
for i := range epc.Procs {
|
|
t.Logf("Verifying snapshot for member %d", i)
|
|
ss := snap.New(epc.Cfg.Logger, datadir.ToSnapDir(epc.Procs[i].Config().DataDirPath))
|
|
_, err := ss.Load()
|
|
require.NoError(t, err)
|
|
}
|
|
t.Logf("All members have a valid snapshot")
|
|
}
|
|
|
|
func verifySnapshotMembers(t *testing.T, epc *e2e.EtcdProcessCluster, expectedMembers *clientv3.MemberListResponse) {
|
|
for i := range epc.Procs {
|
|
t.Logf("Verifying snapshot for member %d", i)
|
|
ss := snap.New(epc.Cfg.Logger, datadir.ToSnapDir(epc.Procs[i].Config().DataDirPath))
|
|
snap, err := ss.Load()
|
|
require.NoError(t, err)
|
|
st := v2store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix)
|
|
err = st.Recovery(snap.Data)
|
|
require.NoError(t, err)
|
|
for _, m := range expectedMembers.Members {
|
|
_, err := st.Get(membership.MemberStoreKey(types.ID(m.ID)), true, true)
|
|
require.NoError(t, err)
|
|
}
|
|
t.Logf("Verifed snapshot for member %d", i)
|
|
}
|
|
t.Log("All members have a valid snapshot")
|
|
}
|
|
|
|
func getMembersAndKeys(t *testing.T, cc *e2e.EtcdctlV3) (*clientv3.MemberListResponse, *clientv3.GetResponse) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
kvs, err := cc.Get(ctx, "", config.GetOptions{Prefix: true})
|
|
require.NoError(t, err)
|
|
|
|
members, err := cc.MemberList(ctx, false)
|
|
assert.NoError(t, err)
|
|
|
|
return members, kvs
|
|
}
|
|
|
|
func generateIdenticalVersions(clusterSize int, ver *semver.Version) []*version.Versions {
|
|
ret := make([]*version.Versions, clusterSize)
|
|
|
|
for i := range clusterSize {
|
|
ret[i] = &version.Versions{
|
|
Cluster: ver.String(),
|
|
Server: ver.String(),
|
|
Storage: ver.String(),
|
|
}
|
|
}
|
|
|
|
return ret
|
|
}
|
|
|
|
func generatePartialCancellationVersions(clusterSize int, membersToChange []int, ver *semver.Version) []*version.Versions {
|
|
ret := make([]*version.Versions, clusterSize)
|
|
|
|
for i := range clusterSize {
|
|
ret[i] = &version.Versions{
|
|
Cluster: ver.String(),
|
|
Server: e2e.OffsetMinor(ver, 1).String(),
|
|
Storage: "",
|
|
}
|
|
}
|
|
|
|
for i := range membersToChange {
|
|
ret[membersToChange[i]].Server = ver.String()
|
|
}
|
|
|
|
return ret
|
|
}
|