etcd-tester: set advertise ports, delay w/ network faults
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
@ -30,10 +30,12 @@ import (
|
|||||||
|
|
||||||
// agentConfig holds information needed to interact/configure an agent and its etcd process
|
// agentConfig holds information needed to interact/configure an agent and its etcd process
|
||||||
type agentConfig struct {
|
type agentConfig struct {
|
||||||
endpoint string
|
endpoint string
|
||||||
clientPort int
|
clientPort int
|
||||||
peerPort int
|
advertiseClientPort int
|
||||||
failpointPort int
|
peerPort int
|
||||||
|
advertisePeerPort int
|
||||||
|
failpointPort int
|
||||||
}
|
}
|
||||||
|
|
||||||
type cluster struct {
|
type cluster struct {
|
||||||
@ -61,12 +63,14 @@ func (c *cluster) bootstrap() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
members[i] = &member{
|
members[i] = &member{
|
||||||
Agent: agent,
|
Agent: agent,
|
||||||
Endpoint: a.endpoint,
|
Endpoint: a.endpoint,
|
||||||
Name: fmt.Sprintf("etcd-%d", i),
|
Name: fmt.Sprintf("etcd-%d", i),
|
||||||
ClientURL: fmt.Sprintf("http://%s:%d", host, a.clientPort),
|
ClientURL: fmt.Sprintf("http://%s:%d", host, a.clientPort),
|
||||||
PeerURL: fmt.Sprintf("http://%s:%d", host, a.peerPort),
|
AdvertiseClientURL: fmt.Sprintf("http://%s:%d", host, a.advertiseClientPort),
|
||||||
FailpointURL: fmt.Sprintf("http://%s:%d", host, a.failpointPort),
|
PeerURL: fmt.Sprintf("http://%s:%d", host, a.peerPort),
|
||||||
|
AdvertisePeerURL: fmt.Sprintf("http://%s:%d", host, a.advertisePeerPort),
|
||||||
|
FailpointURL: fmt.Sprintf("http://%s:%d", host, a.failpointPort),
|
||||||
}
|
}
|
||||||
memberNameURLs[i] = members[i].ClusterEntry()
|
memberNameURLs[i] = members[i].ClusterEntry()
|
||||||
}
|
}
|
||||||
|
@ -128,7 +128,10 @@ func (f *failureDelay) Inject(c *cluster, round int) error {
|
|||||||
if err := f.failure.Inject(c, round); err != nil {
|
if err := f.failure.Inject(c, round); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
time.Sleep(f.delayDuration)
|
if f.delayDuration > 0 {
|
||||||
|
plog.Infof("sleeping delay duration %v for %q", f.delayDuration, f.failure.Desc())
|
||||||
|
time.Sleep(f.delayDuration)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,6 +24,9 @@ const (
|
|||||||
slowNetworkLatency = 500 // 500 millisecond
|
slowNetworkLatency = 500 // 500 millisecond
|
||||||
randomVariation = 50
|
randomVariation = 50
|
||||||
|
|
||||||
|
// delay duration to trigger leader election (default election timeout 1s)
|
||||||
|
triggerElectionDur = 5 * time.Second
|
||||||
|
|
||||||
// Wait more when it recovers from slow network, because network layer
|
// Wait more when it recovers from slow network, because network layer
|
||||||
// needs extra time to propagate traffic control (tc command) change.
|
// needs extra time to propagate traffic control (tc command) change.
|
||||||
// Otherwise, we get different hash values from the previous revision.
|
// Otherwise, we get different hash values from the previous revision.
|
||||||
@ -82,19 +85,27 @@ func injectDropPort(m *member) error { return m.Agent.DropPort(m.peerPort()) }
|
|||||||
func recoverDropPort(m *member) error { return m.Agent.RecoverPort(m.peerPort()) }
|
func recoverDropPort(m *member) error { return m.Agent.RecoverPort(m.peerPort()) }
|
||||||
|
|
||||||
func newFailureIsolate() failure {
|
func newFailureIsolate() failure {
|
||||||
return &failureOne{
|
f := &failureOne{
|
||||||
description: "isolate one member",
|
description: "isolate one member",
|
||||||
injectMember: injectDropPort,
|
injectMember: injectDropPort,
|
||||||
recoverMember: recoverDropPort,
|
recoverMember: recoverDropPort,
|
||||||
}
|
}
|
||||||
|
return &failureDelay{
|
||||||
|
failure: f,
|
||||||
|
delayDuration: triggerElectionDur,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newFailureIsolateAll() failure {
|
func newFailureIsolateAll() failure {
|
||||||
return &failureAll{
|
f := &failureAll{
|
||||||
description: "isolate all members",
|
description: "isolate all members",
|
||||||
injectMember: injectDropPort,
|
injectMember: injectDropPort,
|
||||||
recoverMember: recoverDropPort,
|
recoverMember: recoverDropPort,
|
||||||
}
|
}
|
||||||
|
return &failureDelay{
|
||||||
|
failure: f,
|
||||||
|
delayDuration: triggerElectionDur,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func injectLatency(m *member) error {
|
func injectLatency(m *member) error {
|
||||||
@ -115,11 +126,15 @@ func recoverLatency(m *member) error {
|
|||||||
|
|
||||||
func newFailureSlowNetworkOneMember() failure {
|
func newFailureSlowNetworkOneMember() failure {
|
||||||
desc := fmt.Sprintf("slow down one member's network by adding %d ms latency", slowNetworkLatency)
|
desc := fmt.Sprintf("slow down one member's network by adding %d ms latency", slowNetworkLatency)
|
||||||
return &failureOne{
|
f := &failureOne{
|
||||||
description: description(desc),
|
description: description(desc),
|
||||||
injectMember: injectLatency,
|
injectMember: injectLatency,
|
||||||
recoverMember: recoverLatency,
|
recoverMember: recoverLatency,
|
||||||
}
|
}
|
||||||
|
return &failureDelay{
|
||||||
|
failure: f,
|
||||||
|
delayDuration: triggerElectionDur,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newFailureSlowNetworkLeader() failure {
|
func newFailureSlowNetworkLeader() failure {
|
||||||
@ -129,15 +144,23 @@ func newFailureSlowNetworkLeader() failure {
|
|||||||
injectMember: injectLatency,
|
injectMember: injectLatency,
|
||||||
recoverMember: recoverLatency,
|
recoverMember: recoverLatency,
|
||||||
}
|
}
|
||||||
return &failureLeader{ff, 0}
|
f := &failureLeader{ff, 0}
|
||||||
|
return &failureDelay{
|
||||||
|
failure: f,
|
||||||
|
delayDuration: triggerElectionDur,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newFailureSlowNetworkAll() failure {
|
func newFailureSlowNetworkAll() failure {
|
||||||
return &failureAll{
|
f := &failureAll{
|
||||||
description: "slow down all members' network",
|
description: "slow down all members' network",
|
||||||
injectMember: injectLatency,
|
injectMember: injectLatency,
|
||||||
recoverMember: recoverLatency,
|
recoverMember: recoverLatency,
|
||||||
}
|
}
|
||||||
|
return &failureDelay{
|
||||||
|
failure: f,
|
||||||
|
delayDuration: triggerElectionDur,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newFailureNop() failure {
|
func newFailureNop() failure {
|
||||||
|
@ -41,7 +41,9 @@ const (
|
|||||||
func main() {
|
func main() {
|
||||||
endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.")
|
endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.")
|
||||||
clientPorts := flag.String("client-ports", "", "etcd client port for each agent endpoint")
|
clientPorts := flag.String("client-ports", "", "etcd client port for each agent endpoint")
|
||||||
|
advertiseClientPorts := flag.String("advertise-client-ports", "", "etcd advertise client port for each agent endpoint")
|
||||||
peerPorts := flag.String("peer-ports", "", "etcd peer port for each agent endpoint")
|
peerPorts := flag.String("peer-ports", "", "etcd peer port for each agent endpoint")
|
||||||
|
advertisePeerPorts := flag.String("advertise-peer-ports", "", "etcd advertise peer port for each agent endpoint")
|
||||||
failpointPorts := flag.String("failpoint-ports", "", "etcd failpoint port for each agent endpoint")
|
failpointPorts := flag.String("failpoint-ports", "", "etcd failpoint port for each agent endpoint")
|
||||||
|
|
||||||
stressKeyLargeSize := flag.Uint("stress-key-large-size", 32*1024+1, "the size of each large key written into etcd.")
|
stressKeyLargeSize := flag.Uint("stress-key-large-size", 32*1024+1, "the size of each large key written into etcd.")
|
||||||
@ -67,14 +69,18 @@ func main() {
|
|||||||
|
|
||||||
eps := strings.Split(*endpointStr, ",")
|
eps := strings.Split(*endpointStr, ",")
|
||||||
cports := portsFromArg(*clientPorts, len(eps), defaultClientPort)
|
cports := portsFromArg(*clientPorts, len(eps), defaultClientPort)
|
||||||
|
acports := portsFromArg(*advertiseClientPorts, len(eps), defaultClientPort)
|
||||||
pports := portsFromArg(*peerPorts, len(eps), defaultPeerPort)
|
pports := portsFromArg(*peerPorts, len(eps), defaultPeerPort)
|
||||||
|
apports := portsFromArg(*advertisePeerPorts, len(eps), defaultPeerPort)
|
||||||
fports := portsFromArg(*failpointPorts, len(eps), defaultFailpointPort)
|
fports := portsFromArg(*failpointPorts, len(eps), defaultFailpointPort)
|
||||||
agents := make([]agentConfig, len(eps))
|
agents := make([]agentConfig, len(eps))
|
||||||
|
|
||||||
for i := range eps {
|
for i := range eps {
|
||||||
agents[i].endpoint = eps[i]
|
agents[i].endpoint = eps[i]
|
||||||
agents[i].clientPort = cports[i]
|
agents[i].clientPort = cports[i]
|
||||||
|
agents[i].advertiseClientPort = acports[i]
|
||||||
agents[i].peerPort = pports[i]
|
agents[i].peerPort = pports[i]
|
||||||
|
agents[i].advertisePeerPort = apports[i]
|
||||||
agents[i].failpointPort = fports[i]
|
agents[i].failpointPort = fports[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,23 +29,25 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type member struct {
|
type member struct {
|
||||||
Agent client.Agent
|
Agent client.Agent
|
||||||
Endpoint string
|
Endpoint string
|
||||||
Name string
|
Name string
|
||||||
ClientURL string
|
ClientURL string
|
||||||
PeerURL string
|
AdvertiseClientURL string
|
||||||
FailpointURL string
|
PeerURL string
|
||||||
|
AdvertisePeerURL string
|
||||||
|
FailpointURL string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *member) ClusterEntry() string { return m.Name + "=" + m.PeerURL }
|
func (m *member) ClusterEntry() string { return m.Name + "=" + m.AdvertisePeerURL }
|
||||||
|
|
||||||
func (m *member) Flags() []string {
|
func (m *member) Flags() []string {
|
||||||
return []string{
|
return []string{
|
||||||
"--name", m.Name,
|
"--name", m.Name,
|
||||||
"--listen-client-urls", m.ClientURL,
|
"--listen-client-urls", m.ClientURL,
|
||||||
"--advertise-client-urls", m.ClientURL,
|
"--advertise-client-urls", m.AdvertiseClientURL,
|
||||||
"--listen-peer-urls", m.PeerURL,
|
"--listen-peer-urls", m.PeerURL,
|
||||||
"--initial-advertise-peer-urls", m.PeerURL,
|
"--initial-advertise-peer-urls", m.AdvertisePeerURL,
|
||||||
"--initial-cluster-state", "new",
|
"--initial-cluster-state", "new",
|
||||||
"--experimental-initial-corrupt-check",
|
"--experimental-initial-corrupt-check",
|
||||||
}
|
}
|
||||||
@ -54,7 +56,7 @@ func (m *member) Flags() []string {
|
|||||||
func (m *member) CheckCompact(rev int64) error {
|
func (m *member) CheckCompact(rev int64) error {
|
||||||
cli, err := m.newClientV3()
|
cli, err := m.newClientV3()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("%v (endpoint %s)", err, m.ClientURL)
|
return fmt.Errorf("%v (endpoint %s)", err, m.AdvertiseClientURL)
|
||||||
}
|
}
|
||||||
defer cli.Close()
|
defer cli.Close()
|
||||||
|
|
||||||
@ -64,29 +66,29 @@ func (m *member) CheckCompact(rev int64) error {
|
|||||||
cancel()
|
cancel()
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("watch channel terminated (endpoint %s)", m.ClientURL)
|
return fmt.Errorf("watch channel terminated (endpoint %s)", m.AdvertiseClientURL)
|
||||||
}
|
}
|
||||||
if wr.CompactRevision != rev {
|
if wr.CompactRevision != rev {
|
||||||
return fmt.Errorf("got compact revision %v, wanted %v (endpoint %s)", wr.CompactRevision, rev, m.ClientURL)
|
return fmt.Errorf("got compact revision %v, wanted %v (endpoint %s)", wr.CompactRevision, rev, m.AdvertiseClientURL)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *member) Defrag() error {
|
func (m *member) Defrag() error {
|
||||||
plog.Printf("defragmenting %s\n", m.ClientURL)
|
plog.Printf("defragmenting %s\n", m.AdvertiseClientURL)
|
||||||
cli, err := m.newClientV3()
|
cli, err := m.newClientV3()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer cli.Close()
|
defer cli.Close()
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||||
_, err = cli.Defragment(ctx, m.ClientURL)
|
_, err = cli.Defragment(ctx, m.AdvertiseClientURL)
|
||||||
cancel()
|
cancel()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
plog.Printf("defragmented %s\n", m.ClientURL)
|
plog.Printf("defragmented %s\n", m.AdvertiseClientURL)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +116,7 @@ func (m *member) Rev(ctx context.Context) (int64, error) {
|
|||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
defer cli.Close()
|
defer cli.Close()
|
||||||
resp, err := cli.Status(ctx, m.ClientURL)
|
resp, err := cli.Status(ctx, m.AdvertiseClientURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
@ -127,7 +129,7 @@ func (m *member) IsLeader() (bool, error) {
|
|||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
defer cli.Close()
|
defer cli.Close()
|
||||||
resp, err := cli.Status(context.Background(), m.ClientURL)
|
resp, err := cli.Status(context.Background(), m.AdvertiseClientURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
@ -137,7 +139,7 @@ func (m *member) IsLeader() (bool, error) {
|
|||||||
func (m *member) SetHealthKeyV3() error {
|
func (m *member) SetHealthKeyV3() error {
|
||||||
cli, err := m.newClientV3()
|
cli, err := m.newClientV3()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("%v (%s)", err, m.ClientURL)
|
return fmt.Errorf("%v (%s)", err, m.AdvertiseClientURL)
|
||||||
}
|
}
|
||||||
defer cli.Close()
|
defer cli.Close()
|
||||||
// give enough time-out in case expensive requests (range/delete) are pending
|
// give enough time-out in case expensive requests (range/delete) are pending
|
||||||
@ -145,14 +147,14 @@ func (m *member) SetHealthKeyV3() error {
|
|||||||
_, err = cli.Put(ctx, "health", "good")
|
_, err = cli.Put(ctx, "health", "good")
|
||||||
cancel()
|
cancel()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("%v (%s)", err, m.ClientURL)
|
return fmt.Errorf("%v (%s)", err, m.AdvertiseClientURL)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *member) newClientV3() (*clientv3.Client, error) {
|
func (m *member) newClientV3() (*clientv3.Client, error) {
|
||||||
return clientv3.New(clientv3.Config{
|
return clientv3.New(clientv3.Config{
|
||||||
Endpoints: []string{m.ClientURL},
|
Endpoints: []string{m.AdvertiseClientURL},
|
||||||
DialTimeout: 5 * time.Second,
|
DialTimeout: 5 * time.Second,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -163,7 +165,7 @@ func (m *member) dialGRPC() (*grpc.ClientConn, error) {
|
|||||||
|
|
||||||
// grpcAddr gets the host from clientURL so it works with grpc.Dial()
|
// grpcAddr gets the host from clientURL so it works with grpc.Dial()
|
||||||
func (m *member) grpcAddr() string {
|
func (m *member) grpcAddr() string {
|
||||||
u, err := url.Parse(m.ClientURL)
|
u, err := url.Parse(m.AdvertiseClientURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -171,7 +173,7 @@ func (m *member) grpcAddr() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *member) peerPort() (port int) {
|
func (m *member) peerPort() (port int) {
|
||||||
u, err := url.Parse(m.PeerURL)
|
u, err := url.Parse(m.AdvertisePeerURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user