Compare commits
74 Commits
Author | SHA1 | Date | |
---|---|---|---|
2d1e2e8e64 | |||
6412758177 | |||
836c8159f6 | |||
e406e6e8f4 | |||
2fa2c6284e | |||
2862c4fa12 | |||
6f89fbf8b5 | |||
6ae7ec9a3f | |||
4a35b1b20a | |||
c859c97ee2 | |||
a091c629e1 | |||
96de94a584 | |||
e9cd8410d7 | |||
e37ede1d2e | |||
4420a29ac4 | |||
0544d4bfd0 | |||
fe7379f102 | |||
c76df5052b | |||
3299cad1c3 | |||
d9ab018c49 | |||
e853451cd2 | |||
1becf9d2f5 | |||
1a712cf187 | |||
023f335f67 | |||
bf0da78b63 | |||
e8473850a2 | |||
b836d187fd | |||
9b09229c4d | |||
546c0f7ed6 | |||
adbad1c9b5 | |||
273b986751 | |||
5b205729b9 | |||
fe900b09dd | |||
494c012659 | |||
4abc381ebe | |||
73c8fdac53 | |||
ee2717493a | |||
2435eb9ecd | |||
8fb533dabe | |||
2f0f5ac504 | |||
9ab811d478 | |||
e0a99fb4ba | |||
d40982fc91 | |||
fe3a1cc31b | |||
70713706a1 | |||
0054e7e89b | |||
97f718b504 | |||
202da9270e | |||
6e83ec0ed7 | |||
5c44cdfdaa | |||
09a239f040 | |||
3faff8b2e2 | |||
2345fda18e | |||
5695120efc | |||
183293e061 | |||
4b48876f0e | |||
5089bf58fb | |||
480a347179 | |||
59e560c7a7 | |||
0bd9bea2e9 | |||
bd7581ac59 | |||
db378c3d26 | |||
23740162dc | |||
96422a955f | |||
6fd996fdac | |||
9efa00d103 | |||
72d30f4c34 | |||
2e92779777 | |||
404415b1e3 | |||
07e421d245 | |||
a7d6e29275 | |||
1a8b295dab | |||
ffc45cc066 | |||
0db1ba8093 |
@ -427,6 +427,7 @@ Empty field.
|
||||
| ----- | ----------- | ---- |
|
||||
| key | key is the first key to delete in the range. | bytes |
|
||||
| range_end | range_end is the key following the last key to delete for the range [key, range_end). If range_end is not given, the range is defined to contain only the key argument. If range_end is '\0', the range is all keys greater than or equal to the key argument. | bytes |
|
||||
| prev_kv | If prev_kv is set, etcd gets the previous key-value pairs before deleting it. The previous key-value pairs will be returned in the delte response. | bool |
|
||||
|
||||
|
||||
|
||||
@ -436,6 +437,7 @@ Empty field.
|
||||
| ----- | ----------- | ---- |
|
||||
| header | | ResponseHeader |
|
||||
| deleted | deleted is the number of keys deleted by the delete range request. | int64 |
|
||||
| prev_kvs | if prev_kv is set in the request, the previous key-value pairs will be returned. | (slice of) mvccpb.KeyValue |
|
||||
|
||||
|
||||
|
||||
@ -591,6 +593,7 @@ Empty field.
|
||||
| key | key is the key, in bytes, to put into the key-value store. | bytes |
|
||||
| value | value is the value, in bytes, to associate with the key in the key-value store. | bytes |
|
||||
| lease | lease is the lease ID to associate with the key in the key-value store. A lease value of 0 indicates no lease. | int64 |
|
||||
| prev_kv | If prev_kv is set, etcd gets the previous key-value pair before changing it. The previous key-value pair will be returned in the put response. | bool |
|
||||
|
||||
|
||||
|
||||
@ -599,6 +602,7 @@ Empty field.
|
||||
| Field | Description | Type |
|
||||
| ----- | ----------- | ---- |
|
||||
| header | | ResponseHeader |
|
||||
| prev_kv | if prev_kv is set in the request, the previous key-value pair will be returned. | mvccpb.KeyValue |
|
||||
|
||||
|
||||
|
||||
@ -735,6 +739,7 @@ From google paxosdb paper: Our implementation hinges around a powerful primitive
|
||||
| range_end | range_end is the end of the range [key, range_end) to watch. If range_end is not given, only the key argument is watched. If range_end is equal to '\0', all keys greater than or equal to the key argument are watched. | bytes |
|
||||
| start_revision | start_revision is an optional revision to watch from (inclusive). No start_revision is "now". | int64 |
|
||||
| progress_notify | progress_notify is set so that the etcd server will periodically send a WatchResponse with no events to the new watcher if there are no recent events. It is useful when clients wish to recover a disconnected watcher starting from a recent known revision. The etcd server may decide how often it will send notifications based on current load. | bool |
|
||||
| prev_kv | If prev_kv is set, created watcher gets the previous KV before the event happens. If the previous KV is already compacted, nothing will be returned. | bool |
|
||||
|
||||
|
||||
|
||||
@ -767,6 +772,7 @@ From google paxosdb paper: Our implementation hinges around a powerful primitive
|
||||
| ----- | ----------- | ---- |
|
||||
| type | type is the kind of event. If type is a PUT, it indicates new data has been stored to the key. If type is a DELETE, it indicates the key was deleted. | EventType |
|
||||
| kv | kv holds the KeyValue for the event. A PUT event contains current kv pair. A PUT event with kv.Version=1 indicates the creation of a key. A DELETE/EXPIRE event contains the deleted key with its modification revision set to the revision of deletion. | KeyValue |
|
||||
| prev_kv | prev_kv holds the key-value pair before the event happens. | KeyValue |
|
||||
|
||||
|
||||
|
||||
|
@ -1474,6 +1474,11 @@
|
||||
"format": "byte",
|
||||
"description": "key is the first key to delete in the range."
|
||||
},
|
||||
"prev_kv": {
|
||||
"type": "boolean",
|
||||
"format": "boolean",
|
||||
"description": "If prev_kv is set, etcd gets the previous key-value pairs before deleting it.\nThe previous key-value pairs will be returned in the delte response."
|
||||
},
|
||||
"range_end": {
|
||||
"type": "string",
|
||||
"format": "byte",
|
||||
@ -1491,6 +1496,13 @@
|
||||
},
|
||||
"header": {
|
||||
"$ref": "#/definitions/etcdserverpbResponseHeader"
|
||||
},
|
||||
"prev_kvs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/mvccpbKeyValue"
|
||||
},
|
||||
"description": "if prev_kv is set in the request, the previous key-value pairs will be returned."
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -1724,6 +1736,11 @@
|
||||
"format": "int64",
|
||||
"description": "lease is the lease ID to associate with the key in the key-value store. A lease\nvalue of 0 indicates no lease."
|
||||
},
|
||||
"prev_kv": {
|
||||
"type": "boolean",
|
||||
"format": "boolean",
|
||||
"description": "If prev_kv is set, etcd gets the previous key-value pair before changing it.\nThe previous key-value pair will be returned in the put response."
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"format": "byte",
|
||||
@ -1736,6 +1753,10 @@
|
||||
"properties": {
|
||||
"header": {
|
||||
"$ref": "#/definitions/etcdserverpbResponseHeader"
|
||||
},
|
||||
"prev_kv": {
|
||||
"$ref": "#/definitions/mvccpbKeyValue",
|
||||
"description": "if prev_kv is set in the request, the previous key-value pair will be returned."
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -1988,6 +2009,11 @@
|
||||
"format": "byte",
|
||||
"description": "key is the key to register for watching."
|
||||
},
|
||||
"prev_kv": {
|
||||
"type": "boolean",
|
||||
"format": "boolean",
|
||||
"description": "If prev_kv is set, created watcher gets the previous KV before the event happens.\nIf the previous KV is already compacted, nothing will be returned."
|
||||
},
|
||||
"progress_notify": {
|
||||
"type": "boolean",
|
||||
"format": "boolean",
|
||||
@ -2057,6 +2083,10 @@
|
||||
"$ref": "#/definitions/mvccpbKeyValue",
|
||||
"description": "kv holds the KeyValue for the event.\nA PUT event contains current kv pair.\nA PUT event with kv.Version=1 indicates the creation of a key.\nA DELETE/EXPIRE event contains the deleted key with\nits modification revision set to the revision of deletion."
|
||||
},
|
||||
"prev_kv": {
|
||||
"$ref": "#/definitions/mvccpbKeyValue",
|
||||
"description": "prev_kv holds the key-value pair before the event happens."
|
||||
},
|
||||
"type": {
|
||||
"$ref": "#/definitions/EventEventType",
|
||||
"description": "type is the kind of event. If type is a PUT, it indicates\nnew data has been stored to the key. If type is a DELETE,\nit indicates the key was deleted."
|
||||
|
@ -21,9 +21,9 @@ import (
|
||||
proto "github.com/golang/protobuf/proto"
|
||||
|
||||
math "math"
|
||||
)
|
||||
|
||||
import io "io"
|
||||
io "io"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
|
@ -22,7 +22,10 @@ import (
|
||||
"github.com/coreos/etcd/mvcc/backend"
|
||||
)
|
||||
|
||||
// isSubset returns true if a is a subset of b
|
||||
// isSubset returns true if a is a subset of b.
|
||||
// If a is a prefix of b, then a is a subset of b.
|
||||
// Given intervals [a1,a2) and [b1,b2), is
|
||||
// the a interval a subset of b?
|
||||
func isSubset(a, b *rangePerm) bool {
|
||||
switch {
|
||||
case len(a.end) == 0 && len(b.end) == 0:
|
||||
@ -32,9 +35,11 @@ func isSubset(a, b *rangePerm) bool {
|
||||
// b is a key, a is a range
|
||||
return false
|
||||
case len(a.end) == 0:
|
||||
return 0 <= bytes.Compare(a.begin, b.begin) && bytes.Compare(a.begin, b.end) <= 0
|
||||
// a is a key, b is a range. need b1 <= a1 and a1 < b2
|
||||
return bytes.Compare(b.begin, a.begin) <= 0 && bytes.Compare(a.begin, b.end) < 0
|
||||
default:
|
||||
return 0 <= bytes.Compare(a.begin, b.begin) && bytes.Compare(a.end, b.end) <= 0
|
||||
// both are ranges. need b1 <= a1 and a2 <= b2
|
||||
return bytes.Compare(b.begin, a.begin) <= 0 && bytes.Compare(a.end, b.end) <= 0
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,12 +93,18 @@ func mergeRangePerms(perms []*rangePerm) []*rangePerm {
|
||||
i := 0
|
||||
for i < len(perms) {
|
||||
begin, next := i, i
|
||||
for next+1 < len(perms) && bytes.Compare(perms[next].end, perms[next+1].begin) != -1 {
|
||||
for next+1 < len(perms) && bytes.Compare(perms[next].end, perms[next+1].begin) >= 0 {
|
||||
next++
|
||||
}
|
||||
|
||||
merged = append(merged, &rangePerm{begin: perms[begin].begin, end: perms[next].end})
|
||||
|
||||
// don't merge ["a", "b") with ["b", ""), because perms[next+1].end is empty.
|
||||
if next != begin && len(perms[next].end) > 0 {
|
||||
merged = append(merged, &rangePerm{begin: perms[begin].begin, end: perms[next].end})
|
||||
} else {
|
||||
merged = append(merged, perms[begin])
|
||||
if next != begin {
|
||||
merged = append(merged, perms[next])
|
||||
}
|
||||
}
|
||||
i = next + 1
|
||||
}
|
||||
|
||||
|
@ -46,6 +46,10 @@ func TestGetMergedPerms(t *testing.T) {
|
||||
[]*rangePerm{{[]byte("a"), []byte("b")}},
|
||||
[]*rangePerm{{[]byte("a"), []byte("b")}},
|
||||
},
|
||||
{
|
||||
[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("b"), []byte("")}},
|
||||
[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("b"), []byte("")}},
|
||||
},
|
||||
{
|
||||
[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("b"), []byte("c")}},
|
||||
[]*rangePerm{{[]byte("a"), []byte("c")}},
|
||||
@ -106,7 +110,7 @@ func TestGetMergedPerms(t *testing.T) {
|
||||
},
|
||||
{
|
||||
[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("b"), []byte("c")}, {[]byte("b"), []byte("")}, {[]byte("c"), []byte("")}, {[]byte("d"), []byte("")}},
|
||||
[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("b"), []byte("c")}, {[]byte("d"), []byte("")}},
|
||||
[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("b"), []byte("c")}, {[]byte("c"), []byte("")}, {[]byte("d"), []byte("")}},
|
||||
},
|
||||
// duplicate ranges
|
||||
{
|
||||
|
@ -45,6 +45,8 @@ type simpleBalancer struct {
|
||||
// pinAddr is the currently pinned address; set to the empty string on
|
||||
// intialization and shutdown.
|
||||
pinAddr string
|
||||
|
||||
closed bool
|
||||
}
|
||||
|
||||
func newSimpleBalancer(eps []string) *simpleBalancer {
|
||||
@ -74,15 +76,25 @@ func (b *simpleBalancer) ConnectNotify() <-chan struct{} {
|
||||
|
||||
func (b *simpleBalancer) Up(addr grpc.Address) func(error) {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
// gRPC might call Up after it called Close. We add this check
|
||||
// to "fix" it up at application layer. Or our simplerBalancer
|
||||
// might panic since b.upc is closed.
|
||||
if b.closed {
|
||||
return func(err error) {}
|
||||
}
|
||||
|
||||
if len(b.upEps) == 0 {
|
||||
// notify waiting Get()s and pin first connected address
|
||||
close(b.upc)
|
||||
b.pinAddr = addr.Addr
|
||||
}
|
||||
b.upEps[addr.Addr] = struct{}{}
|
||||
b.mu.Unlock()
|
||||
|
||||
// notify client that a connection is up
|
||||
b.readyOnce.Do(func() { close(b.readyc) })
|
||||
|
||||
return func(err error) {
|
||||
b.mu.Lock()
|
||||
delete(b.upEps, addr.Addr)
|
||||
@ -128,13 +140,19 @@ func (b *simpleBalancer) Notify() <-chan []grpc.Address { return b.notifyCh }
|
||||
|
||||
func (b *simpleBalancer) Close() error {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
// In case gRPC calls close twice. TODO: remove the checking
|
||||
// when we are sure that gRPC wont call close twice.
|
||||
if b.closed {
|
||||
return nil
|
||||
}
|
||||
b.closed = true
|
||||
close(b.notifyCh)
|
||||
// terminate all waiting Get()s
|
||||
b.pinAddr = ""
|
||||
if len(b.upEps) == 0 {
|
||||
close(b.upc)
|
||||
}
|
||||
b.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@ type Election struct {
|
||||
|
||||
// NewElection returns a new election on a given key prefix.
|
||||
func NewElection(client *v3.Client, pfx string) *Election {
|
||||
return &Election{client: client, keyPrefix: pfx}
|
||||
return &Election{client: client, keyPrefix: pfx + "/"}
|
||||
}
|
||||
|
||||
// Campaign puts a value as eligible for the election. It blocks until
|
||||
@ -59,7 +59,6 @@ func (e *Election) Campaign(ctx context.Context, val string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
e.leaderKey, e.leaderRev, e.leaderSession = k, resp.Header.Revision, s
|
||||
if !resp.Succeeded {
|
||||
kv := resp.Responses[0].GetResponseRange().Kvs[0]
|
||||
|
@ -32,7 +32,7 @@ type Mutex struct {
|
||||
}
|
||||
|
||||
func NewMutex(client *v3.Client, pfx string) *Mutex {
|
||||
return &Mutex{client, pfx, "", -1}
|
||||
return &Mutex{client, pfx + "/", "", -1}
|
||||
}
|
||||
|
||||
// Lock locks the mutex with a cancellable context. If the context is cancelled
|
||||
@ -43,7 +43,7 @@ func (m *Mutex) Lock(ctx context.Context) error {
|
||||
return serr
|
||||
}
|
||||
|
||||
m.myKey = fmt.Sprintf("%s/%x", m.pfx, s.Lease())
|
||||
m.myKey = fmt.Sprintf("%s%x", m.pfx, s.Lease())
|
||||
cmp := v3.Compare(v3.CreateRevision(m.myKey), "=", 0)
|
||||
// put self in lock waiters via myKey; oldest waiter holds lock
|
||||
put := v3.OpPut(m.myKey, "", v3.WithLease(s.Lease()))
|
||||
|
@ -673,3 +673,112 @@ func TestWatchWithRequireLeader(t *testing.T) {
|
||||
t.Fatalf("expected response, got closed channel")
|
||||
}
|
||||
}
|
||||
|
||||
// TestWatchOverlapContextCancel stresses the watcher stream teardown path by
|
||||
// creating/canceling watchers to ensure that new watchers are not taken down
|
||||
// by a torn down watch stream. The sort of race that's being detected:
|
||||
// 1. create w1 using a cancelable ctx with %v as "ctx"
|
||||
// 2. cancel ctx
|
||||
// 3. watcher client begins tearing down watcher grpc stream since no more watchers
|
||||
// 3. start creating watcher w2 using a new "ctx" (not canceled), attaches to old grpc stream
|
||||
// 4. watcher client finishes tearing down stream on "ctx"
|
||||
// 5. w2 comes back canceled
|
||||
func TestWatchOverlapContextCancel(t *testing.T) {
|
||||
f := func(clus *integration.ClusterV3) {}
|
||||
testWatchOverlapContextCancel(t, f)
|
||||
}
|
||||
|
||||
func TestWatchOverlapDropConnContextCancel(t *testing.T) {
|
||||
f := func(clus *integration.ClusterV3) {
|
||||
clus.Members[0].DropConnections()
|
||||
}
|
||||
testWatchOverlapContextCancel(t, f)
|
||||
}
|
||||
|
||||
func testWatchOverlapContextCancel(t *testing.T, f func(*integration.ClusterV3)) {
|
||||
defer testutil.AfterTest(t)
|
||||
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
|
||||
defer clus.Terminate(t)
|
||||
|
||||
// each unique context "%v" has a unique grpc stream
|
||||
n := 100
|
||||
ctxs, ctxc := make([]context.Context, 5), make([]chan struct{}, 5)
|
||||
for i := range ctxs {
|
||||
// make "%v" unique
|
||||
ctxs[i] = context.WithValue(context.TODO(), "key", i)
|
||||
// limits the maximum number of outstanding watchers per stream
|
||||
ctxc[i] = make(chan struct{}, 2)
|
||||
}
|
||||
|
||||
// issue concurrent watches on "abc" with cancel
|
||||
cli := clus.RandClient()
|
||||
if _, err := cli.Put(context.TODO(), "abc", "def"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
ch := make(chan struct{}, n)
|
||||
for i := 0; i < n; i++ {
|
||||
go func() {
|
||||
defer func() { ch <- struct{}{} }()
|
||||
idx := rand.Intn(len(ctxs))
|
||||
ctx, cancel := context.WithCancel(ctxs[idx])
|
||||
ctxc[idx] <- struct{}{}
|
||||
wch := cli.Watch(ctx, "abc", clientv3.WithRev(1))
|
||||
f(clus)
|
||||
select {
|
||||
case _, ok := <-wch:
|
||||
if !ok {
|
||||
t.Fatalf("unexpected closed channel %p", wch)
|
||||
}
|
||||
// may take a second or two to reestablish a watcher because of
|
||||
// grpc backoff policies for disconnects
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Errorf("timed out waiting for watch on %p", wch)
|
||||
}
|
||||
// randomize how cancel overlaps with watch creation
|
||||
if rand.Intn(2) == 0 {
|
||||
<-ctxc[idx]
|
||||
cancel()
|
||||
} else {
|
||||
cancel()
|
||||
<-ctxc[idx]
|
||||
}
|
||||
}()
|
||||
}
|
||||
// join on watches
|
||||
for i := 0; i < n; i++ {
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatalf("timed out waiting for completed watch")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestWatchCanelAndCloseClient ensures that canceling a watcher then immediately
|
||||
// closing the client does not return a client closing error.
|
||||
func TestWatchCancelAndCloseClient(t *testing.T) {
|
||||
defer testutil.AfterTest(t)
|
||||
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
|
||||
defer clus.Terminate(t)
|
||||
cli := clus.Client(0)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
wch := cli.Watch(ctx, "abc")
|
||||
donec := make(chan struct{})
|
||||
go func() {
|
||||
defer close(donec)
|
||||
select {
|
||||
case wr, ok := <-wch:
|
||||
if ok {
|
||||
t.Fatalf("expected closed watch after cancel(), got resp=%+v err=%v", wr, wr.Err())
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("timed out waiting for closed channel")
|
||||
}
|
||||
}()
|
||||
cancel()
|
||||
if err := cli.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
<-donec
|
||||
clus.TakeClient(0)
|
||||
}
|
||||
|
@ -157,14 +157,14 @@ func (kv *kv) do(ctx context.Context, op Op) (OpResponse, error) {
|
||||
}
|
||||
case tPut:
|
||||
var resp *pb.PutResponse
|
||||
r := &pb.PutRequest{Key: op.key, Value: op.val, Lease: int64(op.leaseID)}
|
||||
r := &pb.PutRequest{Key: op.key, Value: op.val, Lease: int64(op.leaseID), PrevKv: op.prevKV}
|
||||
resp, err = kv.remote.Put(ctx, r)
|
||||
if err == nil {
|
||||
return OpResponse{put: (*PutResponse)(resp)}, nil
|
||||
}
|
||||
case tDeleteRange:
|
||||
var resp *pb.DeleteRangeResponse
|
||||
r := &pb.DeleteRangeRequest{Key: op.key, RangeEnd: op.end}
|
||||
r := &pb.DeleteRangeRequest{Key: op.key, RangeEnd: op.end, PrevKv: op.prevKV}
|
||||
resp, err = kv.remote.DeleteRange(ctx, r)
|
||||
if err == nil {
|
||||
return OpResponse{del: (*DeleteResponse)(resp)}, nil
|
||||
|
@ -47,6 +47,9 @@ type Op struct {
|
||||
// for range, watch
|
||||
rev int64
|
||||
|
||||
// for watch, put, delete
|
||||
prevKV bool
|
||||
|
||||
// progressNotify is for progress updates.
|
||||
progressNotify bool
|
||||
|
||||
@ -73,10 +76,10 @@ func (op Op) toRequestOp() *pb.RequestOp {
|
||||
}
|
||||
return &pb.RequestOp{Request: &pb.RequestOp_RequestRange{RequestRange: r}}
|
||||
case tPut:
|
||||
r := &pb.PutRequest{Key: op.key, Value: op.val, Lease: int64(op.leaseID)}
|
||||
r := &pb.PutRequest{Key: op.key, Value: op.val, Lease: int64(op.leaseID), PrevKv: op.prevKV}
|
||||
return &pb.RequestOp{Request: &pb.RequestOp_RequestPut{RequestPut: r}}
|
||||
case tDeleteRange:
|
||||
r := &pb.DeleteRangeRequest{Key: op.key, RangeEnd: op.end}
|
||||
r := &pb.DeleteRangeRequest{Key: op.key, RangeEnd: op.end, PrevKv: op.prevKV}
|
||||
return &pb.RequestOp{Request: &pb.RequestOp_RequestDeleteRange{RequestDeleteRange: r}}
|
||||
default:
|
||||
panic("Unknown Op")
|
||||
@ -271,3 +274,11 @@ func WithProgressNotify() OpOption {
|
||||
op.progressNotify = true
|
||||
}
|
||||
}
|
||||
|
||||
// WithPrevKV gets the previous key-value pair before the event happens. If the previous KV is already compacted,
|
||||
// nothing will be returned.
|
||||
func WithPrevKV() OpOption {
|
||||
return func(op *Op) {
|
||||
op.prevKV = true
|
||||
}
|
||||
}
|
||||
|
@ -61,6 +61,9 @@ type WatchResponse struct {
|
||||
// the channel sends a final response that has Canceled set to true with a non-nil Err().
|
||||
Canceled bool
|
||||
|
||||
// created is used to indicate the creation of the watcher.
|
||||
created bool
|
||||
|
||||
closeErr error
|
||||
}
|
||||
|
||||
@ -89,7 +92,7 @@ func (wr *WatchResponse) Err() error {
|
||||
|
||||
// IsProgressNotify returns true if the WatchResponse is progress notification.
|
||||
func (wr *WatchResponse) IsProgressNotify() bool {
|
||||
return len(wr.Events) == 0 && !wr.Canceled
|
||||
return len(wr.Events) == 0 && !wr.Canceled && !wr.created && wr.CompactRevision == 0 && wr.Header.Revision != 0
|
||||
}
|
||||
|
||||
// watcher implements the Watcher interface
|
||||
@ -102,6 +105,7 @@ type watcher struct {
|
||||
streams map[string]*watchGrpcStream
|
||||
}
|
||||
|
||||
// watchGrpcStream tracks all watch resources attached to a single grpc stream.
|
||||
type watchGrpcStream struct {
|
||||
owner *watcher
|
||||
remote pb.WatchClient
|
||||
@ -112,10 +116,10 @@ type watchGrpcStream struct {
|
||||
ctxKey string
|
||||
cancel context.CancelFunc
|
||||
|
||||
// mu protects the streams map
|
||||
mu sync.RWMutex
|
||||
// streams holds all active watchers
|
||||
streams map[int64]*watcherStream
|
||||
// substreams holds all active watchers on this grpc stream
|
||||
substreams map[int64]*watcherStream
|
||||
// resuming holds all resuming watchers on this grpc stream
|
||||
resuming []*watcherStream
|
||||
|
||||
// reqc sends a watch request from Watch() to the main goroutine
|
||||
reqc chan *watchRequest
|
||||
@ -127,8 +131,12 @@ type watchGrpcStream struct {
|
||||
donec chan struct{}
|
||||
// errc transmits errors from grpc Recv to the watch stream reconn logic
|
||||
errc chan error
|
||||
// closingc gets the watcherStream of closing watchers
|
||||
closingc chan *watcherStream
|
||||
|
||||
// the error that closed the watch stream
|
||||
// resumec closes to signal that all substreams should begin resuming
|
||||
resumec chan struct{}
|
||||
// closeErr is the error that closed the watch stream
|
||||
closeErr error
|
||||
}
|
||||
|
||||
@ -140,6 +148,8 @@ type watchRequest struct {
|
||||
rev int64
|
||||
// progressNotify is for progress updates.
|
||||
progressNotify bool
|
||||
// get the previous key-value pair before the event happens
|
||||
prevKV bool
|
||||
// retc receives a chan WatchResponse once the watcher is established
|
||||
retc chan chan WatchResponse
|
||||
}
|
||||
@ -150,15 +160,18 @@ type watcherStream struct {
|
||||
initReq watchRequest
|
||||
|
||||
// outc publishes watch responses to subscriber
|
||||
outc chan<- WatchResponse
|
||||
outc chan WatchResponse
|
||||
// recvc buffers watch responses before publishing
|
||||
recvc chan *WatchResponse
|
||||
id int64
|
||||
// donec closes when the watcherStream goroutine stops.
|
||||
donec chan struct{}
|
||||
// closing is set to true when stream should be scheduled to shutdown.
|
||||
closing bool
|
||||
// id is the registered watch id on the grpc stream
|
||||
id int64
|
||||
|
||||
// lastRev is revision last successfully sent over outc
|
||||
lastRev int64
|
||||
// resumec indicates the stream must recover at a given revision
|
||||
resumec chan int64
|
||||
// buf holds all events received from etcd but not yet consumed by the client
|
||||
buf []*WatchResponse
|
||||
}
|
||||
|
||||
func NewWatcher(c *Client) Watcher {
|
||||
@ -182,18 +195,20 @@ func (vc *valCtx) Err() error { return nil }
|
||||
func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
|
||||
ctx, cancel := context.WithCancel(&valCtx{inctx})
|
||||
wgs := &watchGrpcStream{
|
||||
owner: w,
|
||||
remote: w.remote,
|
||||
ctx: ctx,
|
||||
ctxKey: fmt.Sprintf("%v", inctx),
|
||||
cancel: cancel,
|
||||
streams: make(map[int64]*watcherStream),
|
||||
owner: w,
|
||||
remote: w.remote,
|
||||
ctx: ctx,
|
||||
ctxKey: fmt.Sprintf("%v", inctx),
|
||||
cancel: cancel,
|
||||
substreams: make(map[int64]*watcherStream),
|
||||
|
||||
respc: make(chan *pb.WatchResponse),
|
||||
reqc: make(chan *watchRequest),
|
||||
stopc: make(chan struct{}),
|
||||
donec: make(chan struct{}),
|
||||
errc: make(chan error, 1),
|
||||
respc: make(chan *pb.WatchResponse),
|
||||
reqc: make(chan *watchRequest),
|
||||
stopc: make(chan struct{}),
|
||||
donec: make(chan struct{}),
|
||||
errc: make(chan error, 1),
|
||||
closingc: make(chan *watcherStream),
|
||||
resumec: make(chan struct{}),
|
||||
}
|
||||
go wgs.run()
|
||||
return wgs
|
||||
@ -203,14 +218,14 @@ func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
|
||||
func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
|
||||
ow := opWatch(key, opts...)
|
||||
|
||||
retc := make(chan chan WatchResponse, 1)
|
||||
wr := &watchRequest{
|
||||
ctx: ctx,
|
||||
key: string(ow.key),
|
||||
end: string(ow.end),
|
||||
rev: ow.rev,
|
||||
progressNotify: ow.progressNotify,
|
||||
retc: retc,
|
||||
prevKV: ow.prevKV,
|
||||
retc: make(chan chan WatchResponse, 1),
|
||||
}
|
||||
|
||||
ok := false
|
||||
@ -242,7 +257,6 @@ func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) Watch
|
||||
case reqc <- wr:
|
||||
ok = true
|
||||
case <-wr.ctx.Done():
|
||||
wgs.stopIfEmpty()
|
||||
case <-donec:
|
||||
if wgs.closeErr != nil {
|
||||
closeCh <- WatchResponse{closeErr: wgs.closeErr}
|
||||
@ -255,7 +269,7 @@ func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) Watch
|
||||
// receive channel
|
||||
if ok {
|
||||
select {
|
||||
case ret := <-retc:
|
||||
case ret := <-wr.retc:
|
||||
return ret
|
||||
case <-ctx.Done():
|
||||
case <-donec:
|
||||
@ -286,12 +300,7 @@ func (w *watcher) Close() (err error) {
|
||||
}
|
||||
|
||||
func (w *watchGrpcStream) Close() (err error) {
|
||||
w.mu.Lock()
|
||||
if w.stopc != nil {
|
||||
close(w.stopc)
|
||||
w.stopc = nil
|
||||
}
|
||||
w.mu.Unlock()
|
||||
close(w.stopc)
|
||||
<-w.donec
|
||||
select {
|
||||
case err = <-w.errc:
|
||||
@ -300,67 +309,57 @@ func (w *watchGrpcStream) Close() (err error) {
|
||||
return toErr(w.ctx, err)
|
||||
}
|
||||
|
||||
func (w *watchGrpcStream) addStream(resp *pb.WatchResponse, pendingReq *watchRequest) {
|
||||
if pendingReq == nil {
|
||||
// no pending request; ignore
|
||||
return
|
||||
}
|
||||
if resp.Canceled || resp.CompactRevision != 0 {
|
||||
// a cancel at id creation time means the start revision has
|
||||
// been compacted out of the store
|
||||
ret := make(chan WatchResponse, 1)
|
||||
ret <- WatchResponse{
|
||||
Header: *resp.Header,
|
||||
CompactRevision: resp.CompactRevision,
|
||||
Canceled: true}
|
||||
close(ret)
|
||||
pendingReq.retc <- ret
|
||||
return
|
||||
}
|
||||
|
||||
ret := make(chan WatchResponse)
|
||||
if resp.WatchId == -1 {
|
||||
// failed; no channel
|
||||
close(ret)
|
||||
pendingReq.retc <- ret
|
||||
return
|
||||
}
|
||||
|
||||
ws := &watcherStream{
|
||||
initReq: *pendingReq,
|
||||
id: resp.WatchId,
|
||||
outc: ret,
|
||||
// buffered so unlikely to block on sending while holding mu
|
||||
recvc: make(chan *WatchResponse, 4),
|
||||
resumec: make(chan int64),
|
||||
}
|
||||
|
||||
if pendingReq.rev == 0 {
|
||||
// note the header revision so that a put following a current watcher
|
||||
// disconnect will arrive on the watcher channel after reconnect
|
||||
ws.initReq.rev = resp.Header.Revision
|
||||
}
|
||||
|
||||
func (w *watcher) closeStream(wgs *watchGrpcStream) {
|
||||
w.mu.Lock()
|
||||
w.streams[ws.id] = ws
|
||||
close(wgs.donec)
|
||||
wgs.cancel()
|
||||
if w.streams != nil {
|
||||
delete(w.streams, wgs.ctxKey)
|
||||
}
|
||||
w.mu.Unlock()
|
||||
|
||||
// pass back the subscriber channel for the watcher
|
||||
pendingReq.retc <- ret
|
||||
|
||||
// send messages to subscriber
|
||||
go w.serveStream(ws)
|
||||
}
|
||||
|
||||
// closeStream closes the watcher resources and removes it
|
||||
func (w *watchGrpcStream) closeStream(ws *watcherStream) {
|
||||
w.mu.Lock()
|
||||
// cancels request stream; subscriber receives nil channel
|
||||
close(ws.initReq.retc)
|
||||
// close subscriber's channel
|
||||
func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
|
||||
if resp.WatchId == -1 {
|
||||
// failed; no channel
|
||||
close(ws.recvc)
|
||||
return
|
||||
}
|
||||
ws.id = resp.WatchId
|
||||
w.substreams[ws.id] = ws
|
||||
}
|
||||
|
||||
func (w *watchGrpcStream) sendCloseSubstream(ws *watcherStream, resp *WatchResponse) {
|
||||
select {
|
||||
case ws.outc <- *resp:
|
||||
case <-ws.initReq.ctx.Done():
|
||||
case <-time.After(closeSendErrTimeout):
|
||||
}
|
||||
close(ws.outc)
|
||||
delete(w.streams, ws.id)
|
||||
w.mu.Unlock()
|
||||
}
|
||||
|
||||
func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
|
||||
// send channel response in case stream was never established
|
||||
select {
|
||||
case ws.initReq.retc <- ws.outc:
|
||||
default:
|
||||
}
|
||||
// close subscriber's channel
|
||||
if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
|
||||
go w.sendCloseSubstream(ws, &WatchResponse{closeErr: w.closeErr})
|
||||
} else {
|
||||
close(ws.outc)
|
||||
}
|
||||
if ws.id != -1 {
|
||||
delete(w.substreams, ws.id)
|
||||
return
|
||||
}
|
||||
for i := range w.resuming {
|
||||
if w.resuming[i] == ws {
|
||||
w.resuming[i] = nil
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// run is the root of the goroutines for managing a watcher client
|
||||
@ -368,66 +367,79 @@ func (w *watchGrpcStream) run() {
|
||||
var wc pb.Watch_WatchClient
|
||||
var closeErr error
|
||||
|
||||
defer func() {
|
||||
w.owner.mu.Lock()
|
||||
w.closeErr = closeErr
|
||||
if w.owner.streams != nil {
|
||||
delete(w.owner.streams, w.ctxKey)
|
||||
}
|
||||
close(w.donec)
|
||||
w.owner.mu.Unlock()
|
||||
w.cancel()
|
||||
}()
|
||||
// substreams marked to close but goroutine still running; needed for
|
||||
// avoiding double-closing recvc on grpc stream teardown
|
||||
closing := make(map[*watcherStream]struct{})
|
||||
|
||||
// already stopped?
|
||||
w.mu.RLock()
|
||||
stopc := w.stopc
|
||||
w.mu.RUnlock()
|
||||
if stopc == nil {
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
w.closeErr = closeErr
|
||||
// shutdown substreams and resuming substreams
|
||||
for _, ws := range w.substreams {
|
||||
if _, ok := closing[ws]; !ok {
|
||||
close(ws.recvc)
|
||||
}
|
||||
}
|
||||
for _, ws := range w.resuming {
|
||||
if _, ok := closing[ws]; ws != nil && !ok {
|
||||
close(ws.recvc)
|
||||
}
|
||||
}
|
||||
w.joinSubstreams()
|
||||
for toClose := len(w.substreams) + len(w.resuming); toClose > 0; toClose-- {
|
||||
w.closeSubstream(<-w.closingc)
|
||||
}
|
||||
|
||||
w.owner.closeStream(w)
|
||||
}()
|
||||
|
||||
// start a stream with the etcd grpc server
|
||||
if wc, closeErr = w.newWatchClient(); closeErr != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var pendingReq, failedReq *watchRequest
|
||||
curReqC := w.reqc
|
||||
cancelSet := make(map[int64]struct{})
|
||||
|
||||
for {
|
||||
select {
|
||||
// Watch() requested
|
||||
case pendingReq = <-curReqC:
|
||||
// no more watch requests until there's a response
|
||||
curReqC = nil
|
||||
if err := wc.Send(pendingReq.toPB()); err == nil {
|
||||
// pendingReq now waits on w.respc
|
||||
break
|
||||
case wreq := <-w.reqc:
|
||||
outc := make(chan WatchResponse, 1)
|
||||
ws := &watcherStream{
|
||||
initReq: *wreq,
|
||||
id: -1,
|
||||
outc: outc,
|
||||
// unbufffered so resumes won't cause repeat events
|
||||
recvc: make(chan *WatchResponse),
|
||||
}
|
||||
|
||||
ws.donec = make(chan struct{})
|
||||
go w.serveSubstream(ws, w.resumec)
|
||||
|
||||
// queue up for watcher creation/resume
|
||||
w.resuming = append(w.resuming, ws)
|
||||
if len(w.resuming) == 1 {
|
||||
// head of resume queue, can register a new watcher
|
||||
wc.Send(ws.initReq.toPB())
|
||||
}
|
||||
failedReq = pendingReq
|
||||
// New events from the watch client
|
||||
case pbresp := <-w.respc:
|
||||
switch {
|
||||
case pbresp.Created:
|
||||
// response to pending req, try to add
|
||||
w.addStream(pbresp, pendingReq)
|
||||
pendingReq = nil
|
||||
curReqC = w.reqc
|
||||
// response to head of queue creation
|
||||
if ws := w.resuming[0]; ws != nil {
|
||||
w.addSubstream(pbresp, ws)
|
||||
w.dispatchEvent(pbresp)
|
||||
w.resuming[0] = nil
|
||||
}
|
||||
if ws := w.nextResume(); ws != nil {
|
||||
wc.Send(ws.initReq.toPB())
|
||||
}
|
||||
case pbresp.Canceled:
|
||||
delete(cancelSet, pbresp.WatchId)
|
||||
// shutdown serveStream, if any
|
||||
w.mu.Lock()
|
||||
if ws, ok := w.streams[pbresp.WatchId]; ok {
|
||||
if ws, ok := w.substreams[pbresp.WatchId]; ok {
|
||||
// signal to stream goroutine to update closingc
|
||||
close(ws.recvc)
|
||||
delete(w.streams, ws.id)
|
||||
}
|
||||
numStreams := len(w.streams)
|
||||
w.mu.Unlock()
|
||||
if numStreams == 0 {
|
||||
// don't leak watcher streams
|
||||
return
|
||||
closing[ws] = struct{}{}
|
||||
}
|
||||
default:
|
||||
// dispatch to appropriate watch stream
|
||||
@ -448,7 +460,6 @@ func (w *watchGrpcStream) run() {
|
||||
wc.Send(req)
|
||||
}
|
||||
// watch client failed to recv; spawn another if possible
|
||||
// TODO report watch client errors from errc?
|
||||
case err := <-w.errc:
|
||||
if toErr(w.ctx, err) == v3rpc.ErrNoLeader {
|
||||
closeErr = err
|
||||
@ -457,48 +468,58 @@ func (w *watchGrpcStream) run() {
|
||||
if wc, closeErr = w.newWatchClient(); closeErr != nil {
|
||||
return
|
||||
}
|
||||
curReqC = w.reqc
|
||||
if pendingReq != nil {
|
||||
failedReq = pendingReq
|
||||
if ws := w.nextResume(); ws != nil {
|
||||
wc.Send(ws.initReq.toPB())
|
||||
}
|
||||
cancelSet = make(map[int64]struct{})
|
||||
case <-stopc:
|
||||
case <-w.stopc:
|
||||
return
|
||||
}
|
||||
|
||||
// send failed; queue for retry
|
||||
if failedReq != nil {
|
||||
go func(wr *watchRequest) {
|
||||
select {
|
||||
case w.reqc <- wr:
|
||||
case <-wr.ctx.Done():
|
||||
case <-w.donec:
|
||||
}
|
||||
}(pendingReq)
|
||||
failedReq = nil
|
||||
pendingReq = nil
|
||||
case ws := <-w.closingc:
|
||||
w.closeSubstream(ws)
|
||||
delete(closing, ws)
|
||||
if len(w.substreams)+len(w.resuming) == 0 {
|
||||
// no more watchers on this stream, shutdown
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nextResume chooses the next resuming to register with the grpc stream. Abandoned
|
||||
// streams are marked as nil in the queue since the head must wait for its inflight registration.
|
||||
func (w *watchGrpcStream) nextResume() *watcherStream {
|
||||
for len(w.resuming) != 0 {
|
||||
if w.resuming[0] != nil {
|
||||
return w.resuming[0]
|
||||
}
|
||||
w.resuming = w.resuming[1:len(w.resuming)]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// dispatchEvent sends a WatchResponse to the appropriate watcher stream
|
||||
func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
|
||||
w.mu.RLock()
|
||||
defer w.mu.RUnlock()
|
||||
ws, ok := w.streams[pbresp.WatchId]
|
||||
ws, ok := w.substreams[pbresp.WatchId]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
events := make([]*Event, len(pbresp.Events))
|
||||
for i, ev := range pbresp.Events {
|
||||
events[i] = (*Event)(ev)
|
||||
}
|
||||
if ok {
|
||||
wr := &WatchResponse{
|
||||
Header: *pbresp.Header,
|
||||
Events: events,
|
||||
CompactRevision: pbresp.CompactRevision,
|
||||
Canceled: pbresp.Canceled}
|
||||
ws.recvc <- wr
|
||||
wr := &WatchResponse{
|
||||
Header: *pbresp.Header,
|
||||
Events: events,
|
||||
CompactRevision: pbresp.CompactRevision,
|
||||
created: pbresp.Created,
|
||||
Canceled: pbresp.Canceled,
|
||||
}
|
||||
return ok
|
||||
select {
|
||||
case ws.recvc <- wr:
|
||||
case <-ws.donec:
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// serveWatchClient forwards messages from the grpc stream to run()
|
||||
@ -520,134 +541,123 @@ func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
|
||||
}
|
||||
}
|
||||
|
||||
// serveStream forwards watch responses from run() to the subscriber
|
||||
func (w *watchGrpcStream) serveStream(ws *watcherStream) {
|
||||
var closeErr error
|
||||
emptyWr := &WatchResponse{}
|
||||
wrs := []*WatchResponse{}
|
||||
// serveSubstream forwards watch responses from run() to the subscriber
|
||||
func (w *watchGrpcStream) serveSubstream(ws *watcherStream, resumec chan struct{}) {
|
||||
if ws.closing {
|
||||
panic("created substream goroutine but substream is closing")
|
||||
}
|
||||
|
||||
// nextRev is the minimum expected next revision
|
||||
nextRev := ws.initReq.rev
|
||||
resuming := false
|
||||
closing := false
|
||||
for !closing {
|
||||
defer func() {
|
||||
if !resuming {
|
||||
ws.closing = true
|
||||
}
|
||||
close(ws.donec)
|
||||
if !resuming {
|
||||
w.closingc <- ws
|
||||
}
|
||||
}()
|
||||
|
||||
emptyWr := &WatchResponse{}
|
||||
for {
|
||||
curWr := emptyWr
|
||||
outc := ws.outc
|
||||
if len(wrs) > 0 {
|
||||
curWr = wrs[0]
|
||||
|
||||
if len(ws.buf) > 0 && ws.buf[0].created {
|
||||
select {
|
||||
case ws.initReq.retc <- ws.outc:
|
||||
default:
|
||||
}
|
||||
ws.buf = ws.buf[1:]
|
||||
}
|
||||
|
||||
if len(ws.buf) > 0 {
|
||||
curWr = ws.buf[0]
|
||||
} else {
|
||||
outc = nil
|
||||
}
|
||||
select {
|
||||
case outc <- *curWr:
|
||||
if wrs[0].Err() != nil {
|
||||
closing = true
|
||||
break
|
||||
}
|
||||
var newRev int64
|
||||
if len(wrs[0].Events) > 0 {
|
||||
newRev = wrs[0].Events[len(wrs[0].Events)-1].Kv.ModRevision
|
||||
} else {
|
||||
newRev = wrs[0].Header.Revision
|
||||
}
|
||||
if newRev != ws.lastRev {
|
||||
ws.lastRev = newRev
|
||||
}
|
||||
wrs[0] = nil
|
||||
wrs = wrs[1:]
|
||||
case wr, ok := <-ws.recvc:
|
||||
if !ok {
|
||||
// shutdown from closeStream
|
||||
if ws.buf[0].Err() != nil {
|
||||
return
|
||||
}
|
||||
// resume up to last seen event if disconnected
|
||||
if resuming && wr.Err() == nil {
|
||||
resuming = false
|
||||
// trim events already seen
|
||||
for i := 0; i < len(wr.Events); i++ {
|
||||
if wr.Events[i].Kv.ModRevision > ws.lastRev {
|
||||
wr.Events = wr.Events[i:]
|
||||
break
|
||||
}
|
||||
}
|
||||
// only forward new events
|
||||
if wr.Events[0].Kv.ModRevision == ws.lastRev {
|
||||
break
|
||||
}
|
||||
ws.buf[0] = nil
|
||||
ws.buf = ws.buf[1:]
|
||||
case wr, ok := <-ws.recvc:
|
||||
if !ok {
|
||||
// shutdown from closeSubstream
|
||||
return
|
||||
}
|
||||
resuming = false
|
||||
// TODO don't keep buffering if subscriber stops reading
|
||||
wrs = append(wrs, wr)
|
||||
case resumeRev := <-ws.resumec:
|
||||
wrs = nil
|
||||
resuming = true
|
||||
if resumeRev == -1 {
|
||||
// pause serving stream while resume gets set up
|
||||
break
|
||||
// TODO pause channel if buffer gets too large
|
||||
ws.buf = append(ws.buf, wr)
|
||||
nextRev = wr.Header.Revision
|
||||
if len(wr.Events) > 0 {
|
||||
nextRev = wr.Events[len(wr.Events)-1].Kv.ModRevision + 1
|
||||
}
|
||||
if resumeRev != ws.lastRev {
|
||||
panic("unexpected resume revision")
|
||||
}
|
||||
case <-w.donec:
|
||||
closing = true
|
||||
closeErr = w.closeErr
|
||||
ws.initReq.rev = nextRev
|
||||
case <-ws.initReq.ctx.Done():
|
||||
closing = true
|
||||
return
|
||||
case <-resumec:
|
||||
resuming = true
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// try to send off close error
|
||||
if closeErr != nil {
|
||||
select {
|
||||
case ws.outc <- WatchResponse{closeErr: w.closeErr}:
|
||||
case <-w.donec:
|
||||
case <-time.After(closeSendErrTimeout):
|
||||
}
|
||||
}
|
||||
|
||||
w.closeStream(ws)
|
||||
w.stopIfEmpty()
|
||||
// lazily send cancel message if events on missing id
|
||||
}
|
||||
|
||||
func (wgs *watchGrpcStream) stopIfEmpty() {
|
||||
wgs.mu.Lock()
|
||||
if len(wgs.streams) == 0 && wgs.stopc != nil {
|
||||
close(wgs.stopc)
|
||||
wgs.stopc = nil
|
||||
}
|
||||
wgs.mu.Unlock()
|
||||
}
|
||||
|
||||
func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
|
||||
ws, rerr := w.resume()
|
||||
if rerr != nil {
|
||||
return nil, rerr
|
||||
// connect to grpc stream
|
||||
wc, err := w.openWatchClient()
|
||||
if err != nil {
|
||||
return nil, v3rpc.Error(err)
|
||||
}
|
||||
go w.serveWatchClient(ws)
|
||||
return ws, nil
|
||||
}
|
||||
|
||||
// resume creates a new WatchClient with all current watchers reestablished
|
||||
func (w *watchGrpcStream) resume() (ws pb.Watch_WatchClient, err error) {
|
||||
for {
|
||||
if ws, err = w.openWatchClient(); err != nil {
|
||||
break
|
||||
} else if err = w.resumeWatchers(ws); err == nil {
|
||||
break
|
||||
// mark all substreams as resuming
|
||||
if len(w.substreams)+len(w.resuming) > 0 {
|
||||
close(w.resumec)
|
||||
w.resumec = make(chan struct{})
|
||||
w.joinSubstreams()
|
||||
for _, ws := range w.substreams {
|
||||
ws.id = -1
|
||||
w.resuming = append(w.resuming, ws)
|
||||
}
|
||||
for _, ws := range w.resuming {
|
||||
if ws == nil || ws.closing {
|
||||
continue
|
||||
}
|
||||
ws.donec = make(chan struct{})
|
||||
go w.serveSubstream(ws, w.resumec)
|
||||
}
|
||||
}
|
||||
w.substreams = make(map[int64]*watcherStream)
|
||||
// receive data from new grpc stream
|
||||
go w.serveWatchClient(wc)
|
||||
return wc, nil
|
||||
}
|
||||
|
||||
// joinSubstream waits for all substream goroutines to complete
|
||||
func (w *watchGrpcStream) joinSubstreams() {
|
||||
for _, ws := range w.substreams {
|
||||
<-ws.donec
|
||||
}
|
||||
for _, ws := range w.resuming {
|
||||
if ws != nil {
|
||||
<-ws.donec
|
||||
}
|
||||
}
|
||||
return ws, v3rpc.Error(err)
|
||||
}
|
||||
|
||||
// openWatchClient retries opening a watchclient until retryConnection fails
|
||||
func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
|
||||
for {
|
||||
w.mu.Lock()
|
||||
stopc := w.stopc
|
||||
w.mu.Unlock()
|
||||
if stopc == nil {
|
||||
select {
|
||||
case <-w.stopc:
|
||||
if err == nil {
|
||||
err = context.Canceled
|
||||
return nil, context.Canceled
|
||||
}
|
||||
return nil, err
|
||||
default:
|
||||
}
|
||||
if ws, err = w.remote.Watch(w.ctx, grpc.FailFast(false)); ws != nil && err == nil {
|
||||
break
|
||||
@ -659,48 +669,6 @@ func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error)
|
||||
return ws, nil
|
||||
}
|
||||
|
||||
// resumeWatchers rebuilds every registered watcher on a new client
|
||||
func (w *watchGrpcStream) resumeWatchers(wc pb.Watch_WatchClient) error {
|
||||
w.mu.RLock()
|
||||
streams := make([]*watcherStream, 0, len(w.streams))
|
||||
for _, ws := range w.streams {
|
||||
streams = append(streams, ws)
|
||||
}
|
||||
w.mu.RUnlock()
|
||||
|
||||
for _, ws := range streams {
|
||||
// pause serveStream
|
||||
ws.resumec <- -1
|
||||
|
||||
// reconstruct watcher from initial request
|
||||
if ws.lastRev != 0 {
|
||||
ws.initReq.rev = ws.lastRev
|
||||
}
|
||||
if err := wc.Send(ws.initReq.toPB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// wait for request ack
|
||||
resp, err := wc.Recv()
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(resp.Events) != 0 || !resp.Created {
|
||||
return fmt.Errorf("watcher: unexpected response (%+v)", resp)
|
||||
}
|
||||
|
||||
// id may be different since new remote watcher; update map
|
||||
w.mu.Lock()
|
||||
delete(w.streams, ws.id)
|
||||
ws.id = resp.WatchId
|
||||
w.streams[ws.id] = ws
|
||||
w.mu.Unlock()
|
||||
|
||||
// unpause serveStream
|
||||
ws.resumec <- ws.lastRev
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// toPB converts an internal watch request structure to its protobuf messagefunc (wr *watchRequest)
|
||||
func (wr *watchRequest) toPB() *pb.WatchRequest {
|
||||
req := &pb.WatchCreateRequest{
|
||||
@ -708,6 +676,7 @@ func (wr *watchRequest) toPB() *pb.WatchRequest {
|
||||
Key: []byte(wr.key),
|
||||
RangeEnd: []byte(wr.end),
|
||||
ProgressNotify: wr.progressNotify,
|
||||
PrevKv: wr.prevKV,
|
||||
}
|
||||
cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
|
||||
return &pb.WatchRequest{RequestUnion: cr}
|
||||
|
36
cmd/Godeps/Godeps.json
generated
36
cmd/Godeps/Godeps.json
generated
@ -237,48 +237,48 @@
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc/codes",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc/credentials",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc/grpclog",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc/internal",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc/metadata",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc/naming",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc/peer",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "google.golang.org/grpc/transport",
|
||||
"Comment": "v1.0.0-174-gc278196",
|
||||
"Rev": "c2781963b3af261a37e0f14fdcb7c1fa13259e1f"
|
||||
"Comment": "v1.0.0-183-g231b4cf",
|
||||
"Rev": "231b4cfea0e79843053a33f5fe90bd4d84b23cd3"
|
||||
},
|
||||
{
|
||||
"ImportPath": "gopkg.in/cheggaaa/pb.v1",
|
||||
|
4
cmd/vendor/google.golang.org/grpc/call.go
generated
vendored
4
cmd/vendor/google.golang.org/grpc/call.go
generated
vendored
@ -170,9 +170,9 @@ func Invoke(ctx context.Context, method string, args, reply interface{}, cc *Cli
|
||||
if _, ok := err.(*rpcError); ok {
|
||||
return err
|
||||
}
|
||||
if err == errConnClosing {
|
||||
if err == errConnClosing || err == errConnUnavailable {
|
||||
if c.failFast {
|
||||
return Errorf(codes.Unavailable, "%v", errConnClosing)
|
||||
return Errorf(codes.Unavailable, "%v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
67
cmd/vendor/google.golang.org/grpc/clientconn.go
generated
vendored
67
cmd/vendor/google.golang.org/grpc/clientconn.go
generated
vendored
@ -73,7 +73,9 @@ var (
|
||||
errConnDrain = errors.New("grpc: the connection is drained")
|
||||
// errConnClosing indicates that the connection is closing.
|
||||
errConnClosing = errors.New("grpc: the connection is closing")
|
||||
errNoAddr = errors.New("grpc: there is no address available to dial")
|
||||
// errConnUnavailable indicates that the connection is unavailable.
|
||||
errConnUnavailable = errors.New("grpc: the connection is unavailable")
|
||||
errNoAddr = errors.New("grpc: there is no address available to dial")
|
||||
// minimum time to give a connection to complete
|
||||
minConnectTimeout = 20 * time.Second
|
||||
)
|
||||
@ -213,9 +215,14 @@ func WithUserAgent(s string) DialOption {
|
||||
}
|
||||
}
|
||||
|
||||
// Dial creates a client connection the given target.
|
||||
// Dial creates a client connection to the given target.
|
||||
func Dial(target string, opts ...DialOption) (*ClientConn, error) {
|
||||
ctx := context.Background()
|
||||
return DialContext(context.Background(), target, opts...)
|
||||
}
|
||||
|
||||
// DialContext creates a client connection to the given target
|
||||
// using the supplied context.
|
||||
func DialContext(ctx context.Context, target string, opts ...DialOption) (*ClientConn, error) {
|
||||
cc := &ClientConn{
|
||||
target: target,
|
||||
conns: make(map[Address]*addrConn),
|
||||
@ -472,6 +479,10 @@ func (cc *ClientConn) getTransport(ctx context.Context, opts BalancerGetOptions)
|
||||
if cc.dopts.balancer == nil {
|
||||
// If balancer is nil, there should be only one addrConn available.
|
||||
cc.mu.RLock()
|
||||
if cc.conns == nil {
|
||||
cc.mu.RUnlock()
|
||||
return nil, nil, toRPCErr(ErrClientConnClosing)
|
||||
}
|
||||
for _, ac = range cc.conns {
|
||||
// Break after the first iteration to get the first addrConn.
|
||||
ok = true
|
||||
@ -501,11 +512,7 @@ func (cc *ClientConn) getTransport(ctx context.Context, opts BalancerGetOptions)
|
||||
}
|
||||
return nil, nil, errConnClosing
|
||||
}
|
||||
// ac.wait should block on transient failure only if balancer is nil and RPC is non-failfast.
|
||||
// - If RPC is failfast, ac.wait should not block.
|
||||
// - If balancer is not nil, ac.wait should return errConnClosing on transient failure
|
||||
// so that non-failfast RPCs will try to get a new transport instead of waiting on ac.
|
||||
t, err := ac.wait(ctx, cc.dopts.balancer == nil && opts.BlockingWait)
|
||||
t, err := ac.wait(ctx, cc.dopts.balancer != nil, !opts.BlockingWait)
|
||||
if err != nil {
|
||||
if put != nil {
|
||||
put()
|
||||
@ -757,36 +764,42 @@ func (ac *addrConn) transportMonitor() {
|
||||
}
|
||||
|
||||
// wait blocks until i) the new transport is up or ii) ctx is done or iii) ac is closed or
|
||||
// iv) transport is in TransientFailure and blocking is false.
|
||||
func (ac *addrConn) wait(ctx context.Context, blocking bool) (transport.ClientTransport, error) {
|
||||
// iv) transport is in TransientFailure and there's no balancer/failfast is true.
|
||||
func (ac *addrConn) wait(ctx context.Context, hasBalancer, failfast bool) (transport.ClientTransport, error) {
|
||||
for {
|
||||
ac.mu.Lock()
|
||||
switch {
|
||||
case ac.state == Shutdown:
|
||||
err := ac.tearDownErr
|
||||
if failfast || !hasBalancer {
|
||||
// RPC is failfast or balancer is nil. This RPC should fail with ac.tearDownErr.
|
||||
err := ac.tearDownErr
|
||||
ac.mu.Unlock()
|
||||
return nil, err
|
||||
}
|
||||
ac.mu.Unlock()
|
||||
return nil, err
|
||||
return nil, errConnClosing
|
||||
case ac.state == Ready:
|
||||
ct := ac.transport
|
||||
ac.mu.Unlock()
|
||||
return ct, nil
|
||||
case ac.state == TransientFailure && !blocking:
|
||||
ac.mu.Unlock()
|
||||
return nil, errConnClosing
|
||||
default:
|
||||
ready := ac.ready
|
||||
if ready == nil {
|
||||
ready = make(chan struct{})
|
||||
ac.ready = ready
|
||||
}
|
||||
ac.mu.Unlock()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, toRPCErr(ctx.Err())
|
||||
// Wait until the new transport is ready or failed.
|
||||
case <-ready:
|
||||
case ac.state == TransientFailure:
|
||||
if failfast || hasBalancer {
|
||||
ac.mu.Unlock()
|
||||
return nil, errConnUnavailable
|
||||
}
|
||||
}
|
||||
ready := ac.ready
|
||||
if ready == nil {
|
||||
ready = make(chan struct{})
|
||||
ac.ready = ready
|
||||
}
|
||||
ac.mu.Unlock()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, toRPCErr(ctx.Err())
|
||||
// Wait until the new transport is ready or failed.
|
||||
case <-ready:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
4
cmd/vendor/google.golang.org/grpc/stream.go
generated
vendored
4
cmd/vendor/google.golang.org/grpc/stream.go
generated
vendored
@ -146,9 +146,9 @@ func NewClientStream(ctx context.Context, desc *StreamDesc, cc *ClientConn, meth
|
||||
if _, ok := err.(*rpcError); ok {
|
||||
return nil, err
|
||||
}
|
||||
if err == errConnClosing {
|
||||
if err == errConnClosing || err == errConnUnavailable {
|
||||
if c.failFast {
|
||||
return nil, Errorf(codes.Unavailable, "%v", errConnClosing)
|
||||
return nil, Errorf(codes.Unavailable, "%v", err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
@ -53,8 +53,8 @@ func SRVGetCluster(name, dns string, defaultToken string, apurls types.URLs) (st
|
||||
return err
|
||||
}
|
||||
for _, srv := range addrs {
|
||||
target := strings.TrimSuffix(srv.Target, ".")
|
||||
host := net.JoinHostPort(target, fmt.Sprintf("%d", srv.Port))
|
||||
port := fmt.Sprintf("%d", srv.Port)
|
||||
host := net.JoinHostPort(srv.Target, port)
|
||||
tcpAddr, err := resolveTCPAddr("tcp", host)
|
||||
if err != nil {
|
||||
plog.Warningf("couldn't resolve host %s during SRV discovery", host)
|
||||
@ -70,8 +70,11 @@ func SRVGetCluster(name, dns string, defaultToken string, apurls types.URLs) (st
|
||||
n = fmt.Sprintf("%d", tempName)
|
||||
tempName += 1
|
||||
}
|
||||
stringParts = append(stringParts, fmt.Sprintf("%s=%s%s", n, prefix, host))
|
||||
plog.Noticef("got bootstrap from DNS for %s at %s%s", service, prefix, host)
|
||||
// SRV records have a trailing dot but URL shouldn't.
|
||||
shortHost := strings.TrimSuffix(srv.Target, ".")
|
||||
urlHost := net.JoinHostPort(shortHost, port)
|
||||
stringParts = append(stringParts, fmt.Sprintf("%s=%s%s", n, prefix, urlHost))
|
||||
plog.Noticef("got bootstrap from DNS for %s at %s%s", service, prefix, urlHost)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ package discovery
|
||||
import (
|
||||
"errors"
|
||||
"net"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/coreos/etcd/pkg/testutil"
|
||||
@ -29,11 +30,22 @@ func TestSRVGetCluster(t *testing.T) {
|
||||
}()
|
||||
|
||||
name := "dnsClusterTest"
|
||||
dns := map[string]string{
|
||||
"1.example.com.:2480": "10.0.0.1:2480",
|
||||
"2.example.com.:2480": "10.0.0.2:2480",
|
||||
"3.example.com.:2480": "10.0.0.3:2480",
|
||||
"4.example.com.:2380": "10.0.0.3:2380",
|
||||
}
|
||||
srvAll := []*net.SRV{
|
||||
{Target: "1.example.com.", Port: 2480},
|
||||
{Target: "2.example.com.", Port: 2480},
|
||||
{Target: "3.example.com.", Port: 2480},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
withSSL []*net.SRV
|
||||
withoutSSL []*net.SRV
|
||||
urls []string
|
||||
dns map[string]string
|
||||
|
||||
expected string
|
||||
}{
|
||||
@ -41,61 +53,50 @@ func TestSRVGetCluster(t *testing.T) {
|
||||
[]*net.SRV{},
|
||||
[]*net.SRV{},
|
||||
nil,
|
||||
nil,
|
||||
|
||||
"",
|
||||
},
|
||||
{
|
||||
[]*net.SRV{
|
||||
{Target: "10.0.0.1", Port: 2480},
|
||||
{Target: "10.0.0.2", Port: 2480},
|
||||
{Target: "10.0.0.3", Port: 2480},
|
||||
},
|
||||
srvAll,
|
||||
[]*net.SRV{},
|
||||
nil,
|
||||
|
||||
"0=https://1.example.com:2480,1=https://2.example.com:2480,2=https://3.example.com:2480",
|
||||
},
|
||||
{
|
||||
srvAll,
|
||||
[]*net.SRV{{Target: "4.example.com.", Port: 2380}},
|
||||
nil,
|
||||
|
||||
"0=https://10.0.0.1:2480,1=https://10.0.0.2:2480,2=https://10.0.0.3:2480",
|
||||
"0=https://1.example.com:2480,1=https://2.example.com:2480,2=https://3.example.com:2480,3=http://4.example.com:2380",
|
||||
},
|
||||
{
|
||||
[]*net.SRV{
|
||||
{Target: "10.0.0.1", Port: 2480},
|
||||
{Target: "10.0.0.2", Port: 2480},
|
||||
{Target: "10.0.0.3", Port: 2480},
|
||||
},
|
||||
[]*net.SRV{
|
||||
{Target: "10.0.0.1", Port: 2380},
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
"0=https://10.0.0.1:2480,1=https://10.0.0.2:2480,2=https://10.0.0.3:2480,3=http://10.0.0.1:2380",
|
||||
},
|
||||
{
|
||||
[]*net.SRV{
|
||||
{Target: "10.0.0.1", Port: 2480},
|
||||
{Target: "10.0.0.2", Port: 2480},
|
||||
{Target: "10.0.0.3", Port: 2480},
|
||||
},
|
||||
[]*net.SRV{
|
||||
{Target: "10.0.0.1", Port: 2380},
|
||||
},
|
||||
srvAll,
|
||||
[]*net.SRV{{Target: "4.example.com.", Port: 2380}},
|
||||
[]string{"https://10.0.0.1:2480"},
|
||||
nil,
|
||||
"dnsClusterTest=https://10.0.0.1:2480,0=https://10.0.0.2:2480,1=https://10.0.0.3:2480,2=http://10.0.0.1:2380",
|
||||
|
||||
"dnsClusterTest=https://1.example.com:2480,0=https://2.example.com:2480,1=https://3.example.com:2480,2=http://4.example.com:2380",
|
||||
},
|
||||
// matching local member with resolved addr and return unresolved hostnames
|
||||
{
|
||||
[]*net.SRV{
|
||||
{Target: "1.example.com.", Port: 2480},
|
||||
{Target: "2.example.com.", Port: 2480},
|
||||
{Target: "3.example.com.", Port: 2480},
|
||||
},
|
||||
srvAll,
|
||||
nil,
|
||||
[]string{"https://10.0.0.1:2480"},
|
||||
map[string]string{"1.example.com:2480": "10.0.0.1:2480", "2.example.com:2480": "10.0.0.2:2480", "3.example.com:2480": "10.0.0.3:2480"},
|
||||
|
||||
"dnsClusterTest=https://1.example.com:2480,0=https://2.example.com:2480,1=https://3.example.com:2480",
|
||||
},
|
||||
// invalid
|
||||
}
|
||||
|
||||
resolveTCPAddr = func(network, addr string) (*net.TCPAddr, error) {
|
||||
if strings.Contains(addr, "10.0.0.") {
|
||||
// accept IP addresses when resolving apurls
|
||||
return net.ResolveTCPAddr(network, addr)
|
||||
}
|
||||
if dns[addr] == "" {
|
||||
return nil, errors.New("missing dns record")
|
||||
}
|
||||
return net.ResolveTCPAddr(network, dns[addr])
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
@ -108,12 +109,6 @@ func TestSRVGetCluster(t *testing.T) {
|
||||
}
|
||||
return "", nil, errors.New("Unknown service in mock")
|
||||
}
|
||||
resolveTCPAddr = func(network, addr string) (*net.TCPAddr, error) {
|
||||
if tt.dns == nil || tt.dns[addr] == "" {
|
||||
return net.ResolveTCPAddr(network, addr)
|
||||
}
|
||||
return net.ResolveTCPAddr(network, tt.dns[addr])
|
||||
}
|
||||
urls := testutil.MustNewURLs(t, tt.urls)
|
||||
str, token, err := SRVGetCluster(name, "example.com", "token", urls)
|
||||
if err != nil {
|
||||
|
@ -39,15 +39,23 @@ func txnTestSuccess(cx ctlCtx) {
|
||||
if err := ctlV3Put(cx, "key2", "value2", ""); err != nil {
|
||||
cx.t.Fatalf("txnTestSuccess ctlV3Put error (%v)", err)
|
||||
}
|
||||
|
||||
rqs := txnRequests{
|
||||
compare: []string{`version("key1") = "1"`, `version("key2") = "1"`},
|
||||
ifSucess: []string{"get key1", "get key2"},
|
||||
ifFail: []string{`put key1 "fail"`, `put key2 "fail"`},
|
||||
results: []string{"SUCCESS", "key1", "value1", "key2", "value2"},
|
||||
rqs := []txnRequests{
|
||||
{
|
||||
compare: []string{`version("key1") = "1"`, `version("key2") = "1"`},
|
||||
ifSucess: []string{"get key1", "get key2", `put "key \"with\" space" "value \x23"`},
|
||||
ifFail: []string{`put key1 "fail"`, `put key2 "fail"`},
|
||||
results: []string{"SUCCESS", "key1", "value1", "key2", "value2"},
|
||||
},
|
||||
{
|
||||
compare: []string{`version("key \"with\" space") = "1"`},
|
||||
ifSucess: []string{`get "key \"with\" space"`},
|
||||
results: []string{"SUCCESS", `key "with" space`, "value \x23"},
|
||||
},
|
||||
}
|
||||
if err := ctlV3Txn(cx, rqs); err != nil {
|
||||
cx.t.Fatal(err)
|
||||
for _, rq := range rqs {
|
||||
if err := ctlV3Txn(cx, rq); err != nil {
|
||||
cx.t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -231,6 +231,8 @@ Watch watches events stream on keys or prefixes, [key or prefix, range_end) if `
|
||||
|
||||
- prefix -- watch on a prefix if prefix is set.
|
||||
|
||||
- prev-kv -- get the previous key-value pair before the event happens.
|
||||
|
||||
- rev -- the revision to start watching. Specifying a revision is useful for observing past events.
|
||||
|
||||
#### Input Format
|
||||
@ -245,7 +247,7 @@ watch [options] <key or prefix>\n
|
||||
|
||||
##### Simple reply
|
||||
|
||||
- \<event\>\n\<key\>\n\<value\>\n\<event\>\n\<next_key\>\n\<next_value\>\n...
|
||||
- \<event\>[\n\<old_key\>\n\<old_value\>]\n\<key\>\n\<value\>\n\<event\>\n\<next_key\>\n\<next_value\>\n...
|
||||
|
||||
- Additional error string if WATCH failed. Exit code is non-zero.
|
||||
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
|
||||
var (
|
||||
delPrefix bool
|
||||
delPrevKV bool
|
||||
)
|
||||
|
||||
// NewDelCommand returns the cobra command for "del".
|
||||
@ -34,6 +35,7 @@ func NewDelCommand() *cobra.Command {
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&delPrefix, "prefix", false, "delete keys with matching prefix")
|
||||
cmd.Flags().BoolVar(&delPrevKV, "prev-kv", false, "return deleted key-value pairs")
|
||||
return cmd
|
||||
}
|
||||
|
||||
@ -65,6 +67,9 @@ func getDelOp(cmd *cobra.Command, args []string) (string, []clientv3.OpOption) {
|
||||
if delPrefix {
|
||||
opts = append(opts, clientv3.WithPrefix())
|
||||
}
|
||||
if delPrevKV {
|
||||
opts = append(opts, clientv3.WithPrevKV())
|
||||
}
|
||||
|
||||
return key, opts
|
||||
}
|
||||
|
@ -243,7 +243,7 @@ func authCfgFromCmd(cmd *cobra.Command) *authCfg {
|
||||
var cfg authCfg
|
||||
|
||||
splitted := strings.SplitN(userFlag, ":", 2)
|
||||
if len(splitted) == 0 {
|
||||
if len(splitted) < 2 {
|
||||
cfg.username = userFlag
|
||||
cfg.password, err = speakeasy.Ask("Password: ")
|
||||
if err != nil {
|
||||
|
@ -108,6 +108,9 @@ type simplePrinter struct {
|
||||
|
||||
func (s *simplePrinter) Del(resp v3.DeleteResponse) {
|
||||
fmt.Println(resp.Deleted)
|
||||
for _, kv := range resp.PrevKvs {
|
||||
printKV(s.isHex, kv)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *simplePrinter) Get(resp v3.GetResponse) {
|
||||
@ -116,7 +119,12 @@ func (s *simplePrinter) Get(resp v3.GetResponse) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *simplePrinter) Put(r v3.PutResponse) { fmt.Println("OK") }
|
||||
func (s *simplePrinter) Put(r v3.PutResponse) {
|
||||
fmt.Println("OK")
|
||||
if r.PrevKv != nil {
|
||||
printKV(s.isHex, r.PrevKv)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *simplePrinter) Txn(resp v3.TxnResponse) {
|
||||
if resp.Succeeded {
|
||||
@ -143,6 +151,9 @@ func (s *simplePrinter) Txn(resp v3.TxnResponse) {
|
||||
func (s *simplePrinter) Watch(resp v3.WatchResponse) {
|
||||
for _, e := range resp.Events {
|
||||
fmt.Println(e.Type)
|
||||
if e.PrevKv != nil {
|
||||
printKV(s.isHex, e.PrevKv)
|
||||
}
|
||||
printKV(s.isHex, e.Kv)
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,8 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
leaseStr string
|
||||
leaseStr string
|
||||
putPrevKV bool
|
||||
)
|
||||
|
||||
// NewPutCommand returns the cobra command for "put".
|
||||
@ -49,6 +50,7 @@ will store the content of the file to <key>.
|
||||
Run: putCommandFunc,
|
||||
}
|
||||
cmd.Flags().StringVar(&leaseStr, "lease", "0", "lease ID (in hexadecimal) to attach to the key")
|
||||
cmd.Flags().BoolVar(&putPrevKV, "prev-kv", false, "return changed key-value pairs")
|
||||
return cmd
|
||||
}
|
||||
|
||||
@ -85,6 +87,9 @@ func getPutOp(cmd *cobra.Command, args []string) (string, string, []clientv3.OpO
|
||||
if id != 0 {
|
||||
opts = append(opts, clientv3.WithLease(clientv3.LeaseID(id)))
|
||||
}
|
||||
if putPrevKV {
|
||||
opts = append(opts, clientv3.WithPrevKV())
|
||||
}
|
||||
|
||||
return key, value, opts
|
||||
}
|
||||
|
@ -36,7 +36,10 @@ import (
|
||||
"github.com/coreos/etcd/pkg/types"
|
||||
"github.com/coreos/etcd/raft"
|
||||
"github.com/coreos/etcd/raft/raftpb"
|
||||
"github.com/coreos/etcd/snap"
|
||||
"github.com/coreos/etcd/store"
|
||||
"github.com/coreos/etcd/wal"
|
||||
"github.com/coreos/etcd/wal/walpb"
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
@ -112,7 +115,7 @@ func snapshotSaveCommandFunc(cmd *cobra.Command, args []string) {
|
||||
|
||||
partpath := path + ".part"
|
||||
f, err := os.Create(partpath)
|
||||
defer f.Close()
|
||||
|
||||
if err != nil {
|
||||
exiterr := fmt.Errorf("could not open %s (%v)", partpath, err)
|
||||
ExitWithError(ExitBadArgs, exiterr)
|
||||
@ -131,6 +134,8 @@ func snapshotSaveCommandFunc(cmd *cobra.Command, args []string) {
|
||||
|
||||
fileutil.Fsync(f)
|
||||
|
||||
f.Close()
|
||||
|
||||
if rerr := os.Rename(partpath, path); rerr != nil {
|
||||
exiterr := fmt.Errorf("could not rename %s to %s (%v)", partpath, path, rerr)
|
||||
ExitWithError(ExitIO, exiterr)
|
||||
@ -186,8 +191,8 @@ func snapshotRestoreCommandFunc(cmd *cobra.Command, args []string) {
|
||||
ExitWithError(ExitInvalidInput, fmt.Errorf("data-dir %q exists", basedir))
|
||||
}
|
||||
|
||||
makeDB(snapdir, args[0])
|
||||
makeWAL(waldir, cl)
|
||||
makeDB(snapdir, args[0], len(cl.Members()))
|
||||
makeWALAndSnap(waldir, snapdir, cl)
|
||||
}
|
||||
|
||||
func initialClusterFromName(name string) string {
|
||||
@ -199,11 +204,18 @@ func initialClusterFromName(name string) string {
|
||||
}
|
||||
|
||||
// makeWAL creates a WAL for the initial cluster
|
||||
func makeWAL(waldir string, cl *membership.RaftCluster) {
|
||||
func makeWALAndSnap(waldir, snapdir string, cl *membership.RaftCluster) {
|
||||
if err := fileutil.CreateDirAll(waldir); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
|
||||
// add members again to persist them to the store we create.
|
||||
st := store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix)
|
||||
cl.SetStore(st)
|
||||
for _, m := range cl.Members() {
|
||||
cl.AddMember(m)
|
||||
}
|
||||
|
||||
m := cl.MemberByName(restoreName)
|
||||
md := &etcdserverpb.Metadata{NodeID: uint64(m.ID), ClusterID: uint64(cl.ID())}
|
||||
metadata, merr := md.Marshal()
|
||||
@ -227,7 +239,9 @@ func makeWAL(waldir string, cl *membership.RaftCluster) {
|
||||
}
|
||||
|
||||
ents := make([]raftpb.Entry, len(peers))
|
||||
nodeIDs := make([]uint64, len(peers))
|
||||
for i, p := range peers {
|
||||
nodeIDs[i] = p.ID
|
||||
cc := raftpb.ConfChange{
|
||||
Type: raftpb.ConfChangeAddNode,
|
||||
NodeID: p.ID,
|
||||
@ -245,20 +259,48 @@ func makeWAL(waldir string, cl *membership.RaftCluster) {
|
||||
ents[i] = e
|
||||
}
|
||||
|
||||
w.Save(raftpb.HardState{
|
||||
Term: 1,
|
||||
commit, term := uint64(len(ents)), uint64(1)
|
||||
|
||||
if err := w.Save(raftpb.HardState{
|
||||
Term: term,
|
||||
Vote: peers[0].ID,
|
||||
Commit: uint64(len(ents))}, ents)
|
||||
Commit: commit}, ents); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
|
||||
b, berr := st.Save()
|
||||
if berr != nil {
|
||||
ExitWithError(ExitError, berr)
|
||||
}
|
||||
|
||||
raftSnap := raftpb.Snapshot{
|
||||
Data: b,
|
||||
Metadata: raftpb.SnapshotMetadata{
|
||||
Index: commit,
|
||||
Term: term,
|
||||
ConfState: raftpb.ConfState{
|
||||
Nodes: nodeIDs,
|
||||
},
|
||||
},
|
||||
}
|
||||
snapshotter := snap.New(snapdir)
|
||||
if err := snapshotter.SaveSnap(raftSnap); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if err := w.SaveSnapshot(walpb.Snapshot{Index: commit, Term: term}); err != nil {
|
||||
ExitWithError(ExitIO, err)
|
||||
}
|
||||
}
|
||||
|
||||
// initIndex implements ConsistentIndexGetter so the snapshot won't block
|
||||
// the new raft instance by waiting for a future raft index.
|
||||
type initIndex struct{}
|
||||
type initIndex int
|
||||
|
||||
func (*initIndex) ConsistentIndex() uint64 { return 1 }
|
||||
func (i *initIndex) ConsistentIndex() uint64 { return uint64(*i) }
|
||||
|
||||
// makeDB copies the database snapshot to the snapshot directory
|
||||
func makeDB(snapdir, dbfile string) {
|
||||
func makeDB(snapdir, dbfile string, commit int) {
|
||||
f, ferr := os.OpenFile(dbfile, os.O_RDONLY, 0600)
|
||||
if ferr != nil {
|
||||
ExitWithError(ExitInvalidInput, ferr)
|
||||
@ -329,7 +371,7 @@ func makeDB(snapdir, dbfile string) {
|
||||
// update consistentIndex so applies go through on etcdserver despite
|
||||
// having a new raft instance
|
||||
be := backend.NewDefaultBackend(dbpath)
|
||||
s := mvcc.NewStore(be, nil, &initIndex{})
|
||||
s := mvcc.NewStore(be, nil, (*initIndex)(&commit))
|
||||
id := s.TxnBegin()
|
||||
btx := be.BatchTx()
|
||||
del := func(k, v []byte) error {
|
||||
@ -339,6 +381,7 @@ func makeDB(snapdir, dbfile string) {
|
||||
|
||||
// delete stored members from old cluster since using new members
|
||||
btx.UnsafeForEach([]byte("members"), del)
|
||||
// todo: add back new members when we start to deprecate old snap file.
|
||||
btx.UnsafeForEach([]byte("members_removed"), del)
|
||||
// trigger write-out of new consistent index
|
||||
s.TxnEnd(id)
|
||||
|
@ -77,12 +77,13 @@ func readCompares(r *bufio.Reader) (cmps []clientv3.Cmp) {
|
||||
if err != nil {
|
||||
ExitWithError(ExitInvalidInput, err)
|
||||
}
|
||||
if len(line) == 1 {
|
||||
|
||||
// remove space from the line
|
||||
line = strings.TrimSpace(line)
|
||||
if len(line) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// remove trialling \n
|
||||
line = line[:len(line)-1]
|
||||
cmp, err := parseCompare(line)
|
||||
if err != nil {
|
||||
ExitWithError(ExitInvalidInput, err)
|
||||
@ -99,12 +100,13 @@ func readOps(r *bufio.Reader) (ops []clientv3.Op) {
|
||||
if err != nil {
|
||||
ExitWithError(ExitInvalidInput, err)
|
||||
}
|
||||
if len(line) == 1 {
|
||||
|
||||
// remove space from the line
|
||||
line = strings.TrimSpace(line)
|
||||
if len(line) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// remove trialling \n
|
||||
line = line[:len(line)-1]
|
||||
op, err := parseRequestUnion(line)
|
||||
if err != nil {
|
||||
ExitWithError(ExitInvalidInput, err)
|
||||
|
@ -46,8 +46,23 @@ func addHexPrefix(s string) string {
|
||||
}
|
||||
|
||||
func argify(s string) []string {
|
||||
r := regexp.MustCompile("'.+'|\".+\"|\\S+")
|
||||
return r.FindAllString(s, -1)
|
||||
r := regexp.MustCompile(`"(?:[^"\\]|\\.)*"|'[^']*'|[^'"\s]\S*[^'"\s]?`)
|
||||
args := r.FindAllString(s, -1)
|
||||
for i := range args {
|
||||
if len(args[i]) == 0 {
|
||||
continue
|
||||
}
|
||||
if args[i][0] == '\'' {
|
||||
// 'single-quoted string'
|
||||
args[i] = args[i][1 : len(args)-1]
|
||||
} else if args[i][0] == '"' {
|
||||
// "double quoted string"
|
||||
if _, err := fmt.Sscanf(args[i], "%q", &args[i]); err != nil {
|
||||
ExitWithError(ExitInvalidInput, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return args
|
||||
}
|
||||
|
||||
func commandCtx(cmd *cobra.Command) (context.Context, context.CancelFunc) {
|
||||
|
@ -29,6 +29,7 @@ var (
|
||||
watchRev int64
|
||||
watchPrefix bool
|
||||
watchInteractive bool
|
||||
watchPrevKey bool
|
||||
)
|
||||
|
||||
// NewWatchCommand returns the cobra command for "watch".
|
||||
@ -42,6 +43,7 @@ func NewWatchCommand() *cobra.Command {
|
||||
cmd.Flags().BoolVarP(&watchInteractive, "interactive", "i", false, "Interactive mode")
|
||||
cmd.Flags().BoolVar(&watchPrefix, "prefix", false, "Watch on a prefix if prefix is set")
|
||||
cmd.Flags().Int64Var(&watchRev, "rev", 0, "Revision to start watching")
|
||||
cmd.Flags().BoolVar(&watchPrevKey, "prev-kv", false, "get the previous key-value pair before the event happens")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@ -52,30 +54,18 @@ func watchCommandFunc(cmd *cobra.Command, args []string) {
|
||||
watchInteractiveFunc(cmd, args)
|
||||
return
|
||||
}
|
||||
if len(args) < 1 || len(args) > 2 {
|
||||
ExitWithError(ExitBadArgs, fmt.Errorf("watch in non-interactive mode requires one or two arguments as key or prefix, with range end"))
|
||||
}
|
||||
|
||||
opts := []clientv3.OpOption{clientv3.WithRev(watchRev)}
|
||||
key := args[0]
|
||||
if len(args) == 2 {
|
||||
if watchPrefix {
|
||||
ExitWithError(ExitBadArgs, fmt.Errorf("`range_end` and `--prefix` cannot be set at the same time, choose one"))
|
||||
}
|
||||
opts = append(opts, clientv3.WithRange(args[1]))
|
||||
}
|
||||
|
||||
if watchPrefix {
|
||||
opts = append(opts, clientv3.WithPrefix())
|
||||
}
|
||||
c := mustClientFromCmd(cmd)
|
||||
wc := c.Watch(context.TODO(), key, opts...)
|
||||
printWatchCh(wc)
|
||||
err := c.Close()
|
||||
if err == nil {
|
||||
ExitWithError(ExitInterrupted, fmt.Errorf("watch is canceled by the server"))
|
||||
wc, err := getWatchChan(c, args)
|
||||
if err != nil {
|
||||
ExitWithError(ExitBadArgs, err)
|
||||
}
|
||||
ExitWithError(ExitBadConnection, err)
|
||||
|
||||
printWatchCh(wc)
|
||||
if err = c.Close(); err != nil {
|
||||
ExitWithError(ExitBadConnection, err)
|
||||
}
|
||||
ExitWithError(ExitInterrupted, fmt.Errorf("watch is canceled by the server"))
|
||||
}
|
||||
|
||||
func watchInteractiveFunc(cmd *cobra.Command, args []string) {
|
||||
@ -107,32 +97,36 @@ func watchInteractiveFunc(cmd *cobra.Command, args []string) {
|
||||
fmt.Fprintf(os.Stderr, "Invalid command %s (%v)\n", l, err)
|
||||
continue
|
||||
}
|
||||
moreargs := flagset.Args()
|
||||
if len(moreargs) < 1 || len(moreargs) > 2 {
|
||||
fmt.Fprintf(os.Stderr, "Invalid command %s (Too few or many arguments)\n", l)
|
||||
ch, err := getWatchChan(c, flagset.Args())
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Invalid command %s (%v)\n", l, err)
|
||||
continue
|
||||
}
|
||||
var key string
|
||||
_, err = fmt.Sscanf(moreargs[0], "%q", &key)
|
||||
if err != nil {
|
||||
key = moreargs[0]
|
||||
}
|
||||
opts := []clientv3.OpOption{clientv3.WithRev(watchRev)}
|
||||
if len(moreargs) == 2 {
|
||||
if watchPrefix {
|
||||
fmt.Fprintf(os.Stderr, "`range_end` and `--prefix` cannot be set at the same time, choose one\n")
|
||||
continue
|
||||
}
|
||||
opts = append(opts, clientv3.WithRange(moreargs[1]))
|
||||
}
|
||||
if watchPrefix {
|
||||
opts = append(opts, clientv3.WithPrefix())
|
||||
}
|
||||
ch := c.Watch(context.TODO(), key, opts...)
|
||||
go printWatchCh(ch)
|
||||
}
|
||||
}
|
||||
|
||||
func getWatchChan(c *clientv3.Client, args []string) (clientv3.WatchChan, error) {
|
||||
if len(args) < 1 || len(args) > 2 {
|
||||
return nil, fmt.Errorf("bad number of arguments")
|
||||
}
|
||||
key := args[0]
|
||||
opts := []clientv3.OpOption{clientv3.WithRev(watchRev)}
|
||||
if len(args) == 2 {
|
||||
if watchPrefix {
|
||||
return nil, fmt.Errorf("`range_end` and `--prefix` are mutually exclusive")
|
||||
}
|
||||
opts = append(opts, clientv3.WithRange(args[1]))
|
||||
}
|
||||
if watchPrefix {
|
||||
opts = append(opts, clientv3.WithPrefix())
|
||||
}
|
||||
if watchPrevKey {
|
||||
opts = append(opts, clientv3.WithPrevKV())
|
||||
}
|
||||
return c.Watch(context.TODO(), key, opts...), nil
|
||||
}
|
||||
|
||||
func printWatchCh(ch clientv3.WatchChan) {
|
||||
for resp := range ch {
|
||||
display.Watch(resp)
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"net/url"
|
||||
"os"
|
||||
"runtime"
|
||||
@ -410,6 +411,13 @@ func (cfg *config) configFromFile() error {
|
||||
}
|
||||
|
||||
func (cfg *config) validateConfig(isSet func(field string) bool) error {
|
||||
if err := checkBindURLs(cfg.lpurls); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkBindURLs(cfg.lcurls); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// when etcd runs in member mode user needs to set --advertise-client-urls if --listen-client-urls is set.
|
||||
// TODO(yichengq): check this for joining through discovery service case
|
||||
mayFallbackToProxy := isSet("discovery") && cfg.fallback.String() == fallbackFlagProxy
|
||||
@ -456,3 +464,27 @@ func (cfg config) isReadonlyProxy() bool { return cfg.proxy.String() == pr
|
||||
func (cfg config) shouldFallbackToProxy() bool { return cfg.fallback.String() == fallbackFlagProxy }
|
||||
|
||||
func (cfg config) electionTicks() int { return int(cfg.ElectionMs / cfg.TickMs) }
|
||||
|
||||
// checkBindURLs returns an error if any URL uses a domain name.
|
||||
// TODO: return error in 3.2.0
|
||||
func checkBindURLs(urls []url.URL) error {
|
||||
for _, url := range urls {
|
||||
if url.Scheme == "unix" || url.Scheme == "unixs" {
|
||||
continue
|
||||
}
|
||||
host, _, err := net.SplitHostPort(url.Host)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if host == "localhost" {
|
||||
// special case for local address
|
||||
// TODO: support /etc/hosts ?
|
||||
continue
|
||||
}
|
||||
if net.ParseIP(host) == nil {
|
||||
err := fmt.Errorf("expected IP in URL for binding (%s)", url.String())
|
||||
plog.Warning(err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ type watchServer struct {
|
||||
clusterID int64
|
||||
memberID int64
|
||||
raftTimer etcdserver.RaftTimer
|
||||
watchable mvcc.Watchable
|
||||
watchable mvcc.WatchableKV
|
||||
}
|
||||
|
||||
func NewWatchServer(s *etcdserver.EtcdServer) pb.WatchServer {
|
||||
@ -82,6 +82,8 @@ type serverWatchStream struct {
|
||||
memberID int64
|
||||
raftTimer etcdserver.RaftTimer
|
||||
|
||||
watchable mvcc.WatchableKV
|
||||
|
||||
gRPCStream pb.Watch_WatchServer
|
||||
watchStream mvcc.WatchStream
|
||||
ctrlStream chan *pb.WatchResponse
|
||||
@ -91,6 +93,7 @@ type serverWatchStream struct {
|
||||
// progress tracks the watchID that stream might need to send
|
||||
// progress to.
|
||||
progress map[mvcc.WatchID]bool
|
||||
prevKV map[mvcc.WatchID]bool
|
||||
|
||||
// closec indicates the stream is closed.
|
||||
closec chan struct{}
|
||||
@ -101,14 +104,18 @@ type serverWatchStream struct {
|
||||
|
||||
func (ws *watchServer) Watch(stream pb.Watch_WatchServer) (err error) {
|
||||
sws := serverWatchStream{
|
||||
clusterID: ws.clusterID,
|
||||
memberID: ws.memberID,
|
||||
raftTimer: ws.raftTimer,
|
||||
clusterID: ws.clusterID,
|
||||
memberID: ws.memberID,
|
||||
raftTimer: ws.raftTimer,
|
||||
|
||||
watchable: ws.watchable,
|
||||
|
||||
gRPCStream: stream,
|
||||
watchStream: ws.watchable.NewWatchStream(),
|
||||
// chan for sending control response like watcher created and canceled.
|
||||
ctrlStream: make(chan *pb.WatchResponse, ctrlStreamBufLen),
|
||||
progress: make(map[mvcc.WatchID]bool),
|
||||
prevKV: make(map[mvcc.WatchID]bool),
|
||||
closec: make(chan struct{}),
|
||||
}
|
||||
|
||||
@ -170,9 +177,14 @@ func (sws *serverWatchStream) recvLoop() error {
|
||||
rev = wsrev + 1
|
||||
}
|
||||
id := sws.watchStream.Watch(creq.Key, creq.RangeEnd, rev)
|
||||
if id != -1 && creq.ProgressNotify {
|
||||
if id != -1 {
|
||||
sws.mu.Lock()
|
||||
sws.progress[id] = true
|
||||
if creq.ProgressNotify {
|
||||
sws.progress[id] = true
|
||||
}
|
||||
if creq.PrevKv {
|
||||
sws.prevKV[id] = true
|
||||
}
|
||||
sws.mu.Unlock()
|
||||
}
|
||||
wr := &pb.WatchResponse{
|
||||
@ -198,6 +210,7 @@ func (sws *serverWatchStream) recvLoop() error {
|
||||
}
|
||||
sws.mu.Lock()
|
||||
delete(sws.progress, mvcc.WatchID(id))
|
||||
delete(sws.prevKV, mvcc.WatchID(id))
|
||||
sws.mu.Unlock()
|
||||
}
|
||||
}
|
||||
@ -244,8 +257,19 @@ func (sws *serverWatchStream) sendLoop() {
|
||||
// or define protocol buffer with []mvccpb.Event.
|
||||
evs := wresp.Events
|
||||
events := make([]*mvccpb.Event, len(evs))
|
||||
sws.mu.Lock()
|
||||
needPrevKV := sws.prevKV[wresp.WatchID]
|
||||
sws.mu.Unlock()
|
||||
for i := range evs {
|
||||
events[i] = &evs[i]
|
||||
|
||||
if needPrevKV {
|
||||
opt := mvcc.RangeOptions{Rev: evs[i].Kv.ModRevision - 1}
|
||||
r, err := sws.watchable.Range(evs[i].Kv.Key, nil, opt)
|
||||
if err == nil && len(r.KVs) != 0 {
|
||||
events[i].PrevKv = &(r.KVs[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wr := &pb.WatchResponse{
|
||||
|
@ -159,6 +159,22 @@ func (a *applierV3backend) Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse,
|
||||
rev int64
|
||||
err error
|
||||
)
|
||||
|
||||
var rr *mvcc.RangeResult
|
||||
if p.PrevKv {
|
||||
if txnID != noTxn {
|
||||
rr, err = a.s.KV().TxnRange(txnID, p.Key, nil, mvcc.RangeOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
rr, err = a.s.KV().Range(p.Key, nil, mvcc.RangeOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if txnID != noTxn {
|
||||
rev, err = a.s.KV().TxnPut(txnID, p.Key, p.Value, lease.LeaseID(p.Lease))
|
||||
if err != nil {
|
||||
@ -174,6 +190,9 @@ func (a *applierV3backend) Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse,
|
||||
rev = a.s.KV().Put(p.Key, p.Value, leaseID)
|
||||
}
|
||||
resp.Header.Revision = rev
|
||||
if rr != nil && len(rr.KVs) != 0 {
|
||||
resp.PrevKv = &rr.KVs[0]
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
@ -191,6 +210,21 @@ func (a *applierV3backend) DeleteRange(txnID int64, dr *pb.DeleteRangeRequest) (
|
||||
dr.RangeEnd = []byte{}
|
||||
}
|
||||
|
||||
var rr *mvcc.RangeResult
|
||||
if dr.PrevKv {
|
||||
if txnID != noTxn {
|
||||
rr, err = a.s.KV().TxnRange(txnID, dr.Key, dr.RangeEnd, mvcc.RangeOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
rr, err = a.s.KV().Range(dr.Key, dr.RangeEnd, mvcc.RangeOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if txnID != noTxn {
|
||||
n, rev, err = a.s.KV().TxnDeleteRange(txnID, dr.Key, dr.RangeEnd)
|
||||
if err != nil {
|
||||
@ -201,6 +235,11 @@ func (a *applierV3backend) DeleteRange(txnID int64, dr *pb.DeleteRangeRequest) (
|
||||
}
|
||||
|
||||
resp.Deleted = n
|
||||
if rr != nil {
|
||||
for i := range rr.KVs {
|
||||
resp.PrevKvs = append(resp.PrevKvs, &rr.KVs[i])
|
||||
}
|
||||
}
|
||||
resp.Header.Revision = rev
|
||||
return resp, nil
|
||||
}
|
||||
|
@ -56,6 +56,9 @@ func (aa *authApplierV3) Put(txnID int64, r *pb.PutRequest) (*pb.PutResponse, er
|
||||
if !aa.as.IsPutPermitted(aa.user, r.Key) {
|
||||
return nil, auth.ErrPermissionDenied
|
||||
}
|
||||
if r.PrevKv && !aa.as.IsRangePermitted(aa.user, r.Key, nil) {
|
||||
return nil, auth.ErrPermissionDenied
|
||||
}
|
||||
return aa.applierV3.Put(txnID, r)
|
||||
}
|
||||
|
||||
@ -70,6 +73,9 @@ func (aa *authApplierV3) DeleteRange(txnID int64, r *pb.DeleteRangeRequest) (*pb
|
||||
if !aa.as.IsDeleteRangePermitted(aa.user, r.Key, r.RangeEnd) {
|
||||
return nil, auth.ErrPermissionDenied
|
||||
}
|
||||
if r.PrevKv && !aa.as.IsRangePermitted(aa.user, r.Key, r.RangeEnd) {
|
||||
return nil, auth.ErrPermissionDenied
|
||||
}
|
||||
return aa.applierV3.DeleteRange(txnID, r)
|
||||
}
|
||||
|
||||
@ -99,7 +105,7 @@ func (aa *authApplierV3) checkTxnReqsPermission(reqs []*pb.RequestOp) bool {
|
||||
continue
|
||||
}
|
||||
|
||||
if !aa.as.IsDeleteRangePermitted(aa.user, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd) {
|
||||
if tv.RequestDeleteRange.PrevKv && !aa.as.IsRangePermitted(aa.user, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -102,9 +102,9 @@ import (
|
||||
proto "github.com/golang/protobuf/proto"
|
||||
|
||||
math "math"
|
||||
)
|
||||
|
||||
import io "io"
|
||||
io "io"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
|
@ -10,9 +10,9 @@ import (
|
||||
proto "github.com/golang/protobuf/proto"
|
||||
|
||||
math "math"
|
||||
)
|
||||
|
||||
import io "io"
|
||||
io "io"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -396,10 +396,16 @@ message PutRequest {
|
||||
// lease is the lease ID to associate with the key in the key-value store. A lease
|
||||
// value of 0 indicates no lease.
|
||||
int64 lease = 3;
|
||||
|
||||
// If prev_kv is set, etcd gets the previous key-value pair before changing it.
|
||||
// The previous key-value pair will be returned in the put response.
|
||||
bool prev_kv = 4;
|
||||
}
|
||||
|
||||
message PutResponse {
|
||||
ResponseHeader header = 1;
|
||||
// if prev_kv is set in the request, the previous key-value pair will be returned.
|
||||
mvccpb.KeyValue prev_kv = 2;
|
||||
}
|
||||
|
||||
message DeleteRangeRequest {
|
||||
@ -409,12 +415,17 @@ message DeleteRangeRequest {
|
||||
// If range_end is not given, the range is defined to contain only the key argument.
|
||||
// If range_end is '\0', the range is all keys greater than or equal to the key argument.
|
||||
bytes range_end = 2;
|
||||
// If prev_kv is set, etcd gets the previous key-value pairs before deleting it.
|
||||
// The previous key-value pairs will be returned in the delte response.
|
||||
bool prev_kv = 3;
|
||||
}
|
||||
|
||||
message DeleteRangeResponse {
|
||||
ResponseHeader header = 1;
|
||||
// deleted is the number of keys deleted by the delete range request.
|
||||
int64 deleted = 2;
|
||||
// if prev_kv is set in the request, the previous key-value pairs will be returned.
|
||||
repeated mvccpb.KeyValue prev_kvs = 3;
|
||||
}
|
||||
|
||||
message RequestOp {
|
||||
@ -563,6 +574,9 @@ message WatchCreateRequest {
|
||||
// wish to recover a disconnected watcher starting from a recent known revision.
|
||||
// The etcd server may decide how often it will send notifications based on current load.
|
||||
bool progress_notify = 4;
|
||||
// If prev_kv is set, created watcher gets the previous KV before the event happens.
|
||||
// If the previous KV is already compacted, nothing will be returned.
|
||||
bool prev_kv = 6;
|
||||
}
|
||||
|
||||
message WatchCancelRequest {
|
||||
|
@ -405,14 +405,24 @@ func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
|
||||
|
||||
srv.be = be
|
||||
srv.lessor = lease.NewLessor(srv.be)
|
||||
|
||||
// always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
|
||||
// If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
|
||||
srv.lessor = lease.NewLessor(srv.be)
|
||||
srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex)
|
||||
if beExist {
|
||||
kvindex := srv.kv.ConsistentIndex()
|
||||
// TODO: remove kvindex != 0 checking when we do not expect users to upgrade
|
||||
// etcd from pre-3.0 release.
|
||||
if snapshot != nil && kvindex < snapshot.Metadata.Index {
|
||||
return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d).", bepath, kvindex, snapshot.Metadata.Index)
|
||||
if kvindex != 0 {
|
||||
return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d).", bepath, kvindex, snapshot.Metadata.Index)
|
||||
}
|
||||
plog.Warningf("consistent index never saved (snapshot index=%d)", snapshot.Metadata.Index)
|
||||
}
|
||||
}
|
||||
srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex())
|
||||
|
||||
srv.authStore = auth.NewAuthStore(srv.be)
|
||||
if h := cfg.AutoCompactionRetention; h != 0 {
|
||||
srv.compactor = compactor.NewPeriodic(h, srv.kv, srv)
|
||||
@ -658,6 +668,14 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
|
||||
|
||||
newbe := backend.NewDefaultBackend(fn)
|
||||
|
||||
// always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
|
||||
// If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
|
||||
if s.lessor != nil {
|
||||
plog.Info("recovering lessor...")
|
||||
s.lessor.Recover(newbe, s.kv)
|
||||
plog.Info("finished recovering lessor")
|
||||
}
|
||||
|
||||
plog.Info("restoring mvcc store...")
|
||||
|
||||
if err := s.kv.Restore(newbe); err != nil {
|
||||
@ -684,12 +702,6 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
|
||||
s.be = newbe
|
||||
s.bemu.Unlock()
|
||||
|
||||
if s.lessor != nil {
|
||||
plog.Info("recovering lessor...")
|
||||
s.lessor.Recover(newbe, s.kv)
|
||||
plog.Info("finished recovering lessor")
|
||||
}
|
||||
|
||||
plog.Info("recovering alarms...")
|
||||
if err := s.restoreAlarms(); err != nil {
|
||||
plog.Panicf("restore alarms error: %v", err)
|
||||
|
@ -551,4 +551,4 @@ func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.Intern
|
||||
}
|
||||
|
||||
// Watchable returns a watchable interface attached to the etcdserver.
|
||||
func (s *EtcdServer) Watchable() mvcc.Watchable { return s.KV() }
|
||||
func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() }
|
||||
|
@ -174,3 +174,28 @@ func TestElectionSessionRecampaign(t *testing.T) {
|
||||
t.Fatalf("expected value=%q, got response %v", "def", resp)
|
||||
}
|
||||
}
|
||||
|
||||
// TestElectionOnPrefixOfExistingKey checks that a single
|
||||
// candidate can be elected on a new key that is a prefix
|
||||
// of an existing key. To wit, check for regression
|
||||
// of bug #6278. https://github.com/coreos/etcd/issues/6278
|
||||
//
|
||||
func TestElectionOnPrefixOfExistingKey(t *testing.T) {
|
||||
clus := NewClusterV3(t, &ClusterConfig{Size: 1})
|
||||
defer clus.Terminate(t)
|
||||
|
||||
cli := clus.RandClient()
|
||||
if _, err := cli.Put(context.TODO(), "testa", "value"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
e := concurrency.NewElection(cli, "test")
|
||||
ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second)
|
||||
err := e.Campaign(ctx, "abc")
|
||||
cancel()
|
||||
if err != nil {
|
||||
// after 5 seconds, deadlock results in
|
||||
// 'context deadline exceeded' here.
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -379,6 +379,7 @@ func TestV3DeleteRange(t *testing.T) {
|
||||
keySet []string
|
||||
begin string
|
||||
end string
|
||||
prevKV bool
|
||||
|
||||
wantSet [][]byte
|
||||
deleted int64
|
||||
@ -386,39 +387,45 @@ func TestV3DeleteRange(t *testing.T) {
|
||||
// delete middle
|
||||
{
|
||||
[]string{"foo", "foo/abc", "fop"},
|
||||
"foo/", "fop",
|
||||
"foo/", "fop", false,
|
||||
[][]byte{[]byte("foo"), []byte("fop")}, 1,
|
||||
},
|
||||
// no delete
|
||||
{
|
||||
[]string{"foo", "foo/abc", "fop"},
|
||||
"foo/", "foo/",
|
||||
"foo/", "foo/", false,
|
||||
[][]byte{[]byte("foo"), []byte("foo/abc"), []byte("fop")}, 0,
|
||||
},
|
||||
// delete first
|
||||
{
|
||||
[]string{"foo", "foo/abc", "fop"},
|
||||
"fo", "fop",
|
||||
"fo", "fop", false,
|
||||
[][]byte{[]byte("fop")}, 2,
|
||||
},
|
||||
// delete tail
|
||||
{
|
||||
[]string{"foo", "foo/abc", "fop"},
|
||||
"foo/", "fos",
|
||||
"foo/", "fos", false,
|
||||
[][]byte{[]byte("foo")}, 2,
|
||||
},
|
||||
// delete exact
|
||||
{
|
||||
[]string{"foo", "foo/abc", "fop"},
|
||||
"foo/abc", "",
|
||||
"foo/abc", "", false,
|
||||
[][]byte{[]byte("foo"), []byte("fop")}, 1,
|
||||
},
|
||||
// delete none, [x,x)
|
||||
{
|
||||
[]string{"foo"},
|
||||
"foo", "foo",
|
||||
"foo", "foo", false,
|
||||
[][]byte{[]byte("foo")}, 0,
|
||||
},
|
||||
// delete middle with preserveKVs set
|
||||
{
|
||||
[]string{"foo", "foo/abc", "fop"},
|
||||
"foo/", "fop", true,
|
||||
[][]byte{[]byte("foo"), []byte("fop")}, 1,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
@ -436,7 +443,9 @@ func TestV3DeleteRange(t *testing.T) {
|
||||
|
||||
dreq := &pb.DeleteRangeRequest{
|
||||
Key: []byte(tt.begin),
|
||||
RangeEnd: []byte(tt.end)}
|
||||
RangeEnd: []byte(tt.end),
|
||||
PrevKv: tt.prevKV,
|
||||
}
|
||||
dresp, err := kvc.DeleteRange(context.TODO(), dreq)
|
||||
if err != nil {
|
||||
t.Fatalf("couldn't delete range on test %d (%v)", i, err)
|
||||
@ -444,6 +453,11 @@ func TestV3DeleteRange(t *testing.T) {
|
||||
if tt.deleted != dresp.Deleted {
|
||||
t.Errorf("expected %d on test %v, got %d", tt.deleted, i, dresp.Deleted)
|
||||
}
|
||||
if tt.prevKV {
|
||||
if len(dresp.PrevKvs) != int(dresp.Deleted) {
|
||||
t.Errorf("preserve %d keys, want %d", len(dresp.PrevKvs), dresp.Deleted)
|
||||
}
|
||||
}
|
||||
|
||||
rreq := &pb.RangeRequest{Key: []byte{0x0}, RangeEnd: []byte{0xff}}
|
||||
rresp, err := kvc.Range(context.TODO(), rreq)
|
||||
|
@ -19,9 +19,9 @@ import (
|
||||
proto "github.com/golang/protobuf/proto"
|
||||
|
||||
math "math"
|
||||
)
|
||||
|
||||
import io "io"
|
||||
io "io"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
|
@ -45,13 +45,18 @@ var (
|
||||
|
||||
type LeaseID int64
|
||||
|
||||
// RangeDeleter defines an interface with DeleteRange method.
|
||||
// RangeDeleter defines an interface with Txn and DeleteRange method.
|
||||
// We define this interface only for lessor to limit the number
|
||||
// of methods of mvcc.KV to what lessor actually needs.
|
||||
//
|
||||
// Having a minimum interface makes testing easy.
|
||||
type RangeDeleter interface {
|
||||
DeleteRange(key, end []byte) (int64, int64)
|
||||
// TxnBegin see comments on mvcc.KV
|
||||
TxnBegin() int64
|
||||
// TxnEnd see comments on mvcc.KV
|
||||
TxnEnd(txnID int64) error
|
||||
// TxnDeleteRange see comments on mvcc.KV
|
||||
TxnDeleteRange(txnID int64, key, end []byte) (n, rev int64, err error)
|
||||
}
|
||||
|
||||
// Lessor owns leases. It can grant, revoke, renew and modify leases for lessee.
|
||||
@ -211,16 +216,30 @@ func (le *lessor) Revoke(id LeaseID) error {
|
||||
// unlock before doing external work
|
||||
le.mu.Unlock()
|
||||
|
||||
if le.rd != nil {
|
||||
for item := range l.itemSet {
|
||||
le.rd.DeleteRange([]byte(item.Key), nil)
|
||||
if le.rd == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
tid := le.rd.TxnBegin()
|
||||
for item := range l.itemSet {
|
||||
_, _, err := le.rd.TxnDeleteRange(tid, []byte(item.Key), nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
le.mu.Lock()
|
||||
defer le.mu.Unlock()
|
||||
delete(le.leaseMap, l.ID)
|
||||
l.removeFrom(le.b)
|
||||
// lease deletion needs to be in the same backend transaction with the
|
||||
// kv deletion. Or we might end up with not executing the revoke or not
|
||||
// deleting the keys if etcdserver fails in between.
|
||||
le.b.BatchTx().UnsafeDelete(leaseBucketName, int64ToBytes(int64(l.ID)))
|
||||
|
||||
err := le.rd.TxnEnd(tid)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -443,16 +462,7 @@ func (l Lease) persistTo(b backend.Backend) {
|
||||
b.BatchTx().Unlock()
|
||||
}
|
||||
|
||||
func (l Lease) removeFrom(b backend.Backend) {
|
||||
key := int64ToBytes(int64(l.ID))
|
||||
|
||||
b.BatchTx().Lock()
|
||||
b.BatchTx().UnsafeDelete(leaseBucketName, key)
|
||||
b.BatchTx().Unlock()
|
||||
}
|
||||
|
||||
// refresh refreshes the expiry of the lease. It extends the expiry at least
|
||||
// minLeaseTTL second.
|
||||
// refresh refreshes the expiry of the lease.
|
||||
func (l *Lease) refresh(extend time.Duration) {
|
||||
if l.TTL < minLeaseTTL {
|
||||
l.TTL = minLeaseTTL
|
||||
|
@ -223,9 +223,17 @@ type fakeDeleter struct {
|
||||
deleted []string
|
||||
}
|
||||
|
||||
func (fd *fakeDeleter) DeleteRange(key, end []byte) (int64, int64) {
|
||||
func (fd *fakeDeleter) TxnBegin() int64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (fd *fakeDeleter) TxnEnd(txnID int64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fd *fakeDeleter) TxnDeleteRange(tid int64, key, end []byte) (int64, int64, error) {
|
||||
fd.deleted = append(fd.deleted, string(key)+"_"+string(end))
|
||||
return 0, 0
|
||||
return 0, 0, nil
|
||||
}
|
||||
|
||||
func NewTestBackend(t *testing.T) (string, backend.Backend) {
|
||||
|
@ -367,6 +367,8 @@ func (s *store) restore() error {
|
||||
revToBytes(revision{main: 1}, min)
|
||||
revToBytes(revision{main: math.MaxInt64, sub: math.MaxInt64}, max)
|
||||
|
||||
keyToLease := make(map[string]lease.LeaseID)
|
||||
|
||||
// restore index
|
||||
tx := s.b.BatchTx()
|
||||
tx.Lock()
|
||||
@ -390,26 +392,15 @@ func (s *store) restore() error {
|
||||
switch {
|
||||
case isTombstone(key):
|
||||
s.kvindex.Tombstone(kv.Key, rev)
|
||||
if lease.LeaseID(kv.Lease) != lease.NoLease {
|
||||
err := s.le.Detach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
|
||||
if err != nil && err != lease.ErrLeaseNotFound {
|
||||
plog.Fatalf("unexpected Detach error %v", err)
|
||||
}
|
||||
}
|
||||
delete(keyToLease, string(kv.Key))
|
||||
|
||||
default:
|
||||
s.kvindex.Restore(kv.Key, revision{kv.CreateRevision, 0}, rev, kv.Version)
|
||||
if lease.LeaseID(kv.Lease) != lease.NoLease {
|
||||
if s.le == nil {
|
||||
panic("no lessor to attach lease")
|
||||
}
|
||||
err := s.le.Attach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
|
||||
// We are walking through the kv history here. It is possible that we attached a key to
|
||||
// the lease and the lease was revoked later.
|
||||
// Thus attaching an old version of key to a none existing lease is possible here, and
|
||||
// we should just ignore the error.
|
||||
if err != nil && err != lease.ErrLeaseNotFound {
|
||||
panic("unexpected Attach error")
|
||||
}
|
||||
|
||||
if lid := lease.LeaseID(kv.Lease); lid != lease.NoLease {
|
||||
keyToLease[string(kv.Key)] = lid
|
||||
} else {
|
||||
delete(keyToLease, string(kv.Key))
|
||||
}
|
||||
}
|
||||
|
||||
@ -417,6 +408,16 @@ func (s *store) restore() error {
|
||||
s.currentRev = rev
|
||||
}
|
||||
|
||||
for key, lid := range keyToLease {
|
||||
if s.le == nil {
|
||||
panic("no lessor to attach lease")
|
||||
}
|
||||
err := s.le.Attach(lid, []lease.LeaseItem{{Key: key}})
|
||||
if err != nil {
|
||||
plog.Errorf("unexpected Attach error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
_, scheduledCompactBytes := tx.UnsafeRange(metaBucketName, scheduledCompactKeyName, nil, 0)
|
||||
scheduledCompact := int64(0)
|
||||
if len(scheduledCompactBytes) != 0 {
|
||||
@ -550,7 +551,7 @@ func (s *store) put(key, value []byte, leaseID lease.LeaseID) {
|
||||
|
||||
err = s.le.Detach(oldLease, []lease.LeaseItem{{Key: string(key)}})
|
||||
if err != nil {
|
||||
panic("unexpected error from lease detach")
|
||||
plog.Errorf("unexpected error from lease detach: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -619,7 +620,7 @@ func (s *store) delete(key []byte, rev revision) {
|
||||
if lease.LeaseID(kv.Lease) != lease.NoLease {
|
||||
err = s.le.Detach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
|
||||
if err != nil {
|
||||
plog.Fatalf("cannot detach %v", err)
|
||||
plog.Errorf("cannot detach %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20,9 +20,9 @@ import (
|
||||
proto "github.com/golang/protobuf/proto"
|
||||
|
||||
math "math"
|
||||
)
|
||||
|
||||
import io "io"
|
||||
io "io"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
@ -89,6 +89,8 @@ type Event struct {
|
||||
// A DELETE/EXPIRE event contains the deleted key with
|
||||
// its modification revision set to the revision of deletion.
|
||||
Kv *KeyValue `protobuf:"bytes,2,opt,name=kv" json:"kv,omitempty"`
|
||||
// prev_kv holds the key-value pair before the event happens.
|
||||
PrevKv *KeyValue `protobuf:"bytes,3,opt,name=prev_kv,json=prevKv" json:"prev_kv,omitempty"`
|
||||
}
|
||||
|
||||
func (m *Event) Reset() { *m = Event{} }
|
||||
@ -181,6 +183,16 @@ func (m *Event) MarshalTo(data []byte) (int, error) {
|
||||
}
|
||||
i += n1
|
||||
}
|
||||
if m.PrevKv != nil {
|
||||
data[i] = 0x1a
|
||||
i++
|
||||
i = encodeVarintKv(data, i, uint64(m.PrevKv.Size()))
|
||||
n2, err := m.PrevKv.MarshalTo(data[i:])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
i += n2
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
@ -247,6 +259,10 @@ func (m *Event) Size() (n int) {
|
||||
l = m.Kv.Size()
|
||||
n += 1 + l + sovKv(uint64(l))
|
||||
}
|
||||
if m.PrevKv != nil {
|
||||
l = m.PrevKv.Size()
|
||||
n += 1 + l + sovKv(uint64(l))
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
@ -532,6 +548,39 @@ func (m *Event) Unmarshal(data []byte) error {
|
||||
return err
|
||||
}
|
||||
iNdEx = postIndex
|
||||
case 3:
|
||||
if wireType != 2 {
|
||||
return fmt.Errorf("proto: wrong wireType = %d for field PrevKv", wireType)
|
||||
}
|
||||
var msglen int
|
||||
for shift := uint(0); ; shift += 7 {
|
||||
if shift >= 64 {
|
||||
return ErrIntOverflowKv
|
||||
}
|
||||
if iNdEx >= l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
b := data[iNdEx]
|
||||
iNdEx++
|
||||
msglen |= (int(b) & 0x7F) << shift
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if msglen < 0 {
|
||||
return ErrInvalidLengthKv
|
||||
}
|
||||
postIndex := iNdEx + msglen
|
||||
if postIndex > l {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
if m.PrevKv == nil {
|
||||
m.PrevKv = &KeyValue{}
|
||||
}
|
||||
if err := m.PrevKv.Unmarshal(data[iNdEx:postIndex]); err != nil {
|
||||
return err
|
||||
}
|
||||
iNdEx = postIndex
|
||||
default:
|
||||
iNdEx = preIndex
|
||||
skippy, err := skipKv(data[iNdEx:])
|
||||
@ -659,23 +708,24 @@ var (
|
||||
)
|
||||
|
||||
var fileDescriptorKv = []byte{
|
||||
// 285 bytes of a gzipped FileDescriptorProto
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x44, 0x90, 0x41, 0x4a, 0xc3, 0x40,
|
||||
0x14, 0x86, 0x33, 0x4d, 0x9b, 0xd6, 0xd7, 0x52, 0xc3, 0x50, 0x70, 0x70, 0x31, 0xc4, 0x6e, 0x2c,
|
||||
0x08, 0x11, 0xea, 0x0d, 0xc4, 0xac, 0x74, 0x21, 0x21, 0xba, 0x95, 0x34, 0x7d, 0x94, 0x92, 0xa6,
|
||||
0x13, 0xd2, 0x38, 0x98, 0x9b, 0x78, 0x0a, 0xcf, 0xd1, 0x65, 0x8f, 0x60, 0xe3, 0x45, 0x24, 0x6f,
|
||||
0x4c, 0xdd, 0x0c, 0xef, 0xff, 0xff, 0x6f, 0x98, 0xff, 0x0d, 0x0c, 0x52, 0xed, 0xe7, 0x85, 0x2a,
|
||||
0x15, 0x77, 0x32, 0x9d, 0x24, 0xf9, 0xe2, 0x72, 0xb2, 0x52, 0x2b, 0x45, 0xd6, 0x6d, 0x33, 0x99,
|
||||
0x74, 0xfa, 0xc5, 0x60, 0xf0, 0x88, 0xd5, 0x6b, 0xbc, 0x79, 0x47, 0xee, 0x82, 0x9d, 0x62, 0x25,
|
||||
0x98, 0xc7, 0x66, 0xa3, 0xb0, 0x19, 0xf9, 0x35, 0x9c, 0x27, 0x05, 0xc6, 0x25, 0xbe, 0x15, 0xa8,
|
||||
0xd7, 0xbb, 0xb5, 0xda, 0x8a, 0x8e, 0xc7, 0x66, 0x76, 0x38, 0x36, 0x76, 0xf8, 0xe7, 0xf2, 0x2b,
|
||||
0x18, 0x65, 0x6a, 0xf9, 0x4f, 0xd9, 0x44, 0x0d, 0x33, 0xb5, 0x3c, 0x21, 0x02, 0xfa, 0x1a, 0x0b,
|
||||
0x4a, 0xbb, 0x94, 0xb6, 0x92, 0x4f, 0xa0, 0xa7, 0x9b, 0x02, 0xa2, 0x47, 0x2f, 0x1b, 0xd1, 0xb8,
|
||||
0x1b, 0x8c, 0x77, 0x28, 0x1c, 0xa2, 0x8d, 0x98, 0x7e, 0x40, 0x2f, 0xd0, 0xb8, 0x2d, 0xf9, 0x0d,
|
||||
0x74, 0xcb, 0x2a, 0x47, 0x6a, 0x3b, 0x9e, 0x5f, 0xf8, 0x66, 0x4d, 0x9f, 0x42, 0x73, 0x46, 0x55,
|
||||
0x8e, 0x21, 0x41, 0xdc, 0x83, 0x4e, 0xaa, 0xa9, 0xfa, 0x70, 0xee, 0xb6, 0x68, 0xbb, 0x77, 0xd8,
|
||||
0x49, 0xf5, 0xd4, 0x83, 0xb3, 0xd3, 0x25, 0xde, 0x07, 0xfb, 0xf9, 0x25, 0x72, 0x2d, 0x0e, 0xe0,
|
||||
0x3c, 0x04, 0x4f, 0x41, 0x14, 0xb8, 0xec, 0x5e, 0xec, 0x8f, 0xd2, 0x3a, 0x1c, 0xa5, 0xb5, 0xaf,
|
||||
0x25, 0x3b, 0xd4, 0x92, 0x7d, 0xd7, 0x92, 0x7d, 0xfe, 0x48, 0x6b, 0xe1, 0xd0, 0x5f, 0xde, 0xfd,
|
||||
0x06, 0x00, 0x00, 0xff, 0xff, 0xd6, 0x21, 0x8f, 0x2c, 0x75, 0x01, 0x00, 0x00,
|
||||
// 303 bytes of a gzipped FileDescriptorProto
|
||||
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x6c, 0x90, 0x41, 0x4e, 0xc2, 0x40,
|
||||
0x14, 0x86, 0x3b, 0x14, 0x0a, 0x3e, 0x08, 0x36, 0x13, 0x12, 0x27, 0x2e, 0x26, 0x95, 0x8d, 0x18,
|
||||
0x13, 0x4c, 0xf0, 0x06, 0xc6, 0xae, 0x70, 0x61, 0x1a, 0x74, 0x4b, 0x4a, 0x79, 0x21, 0xa4, 0x94,
|
||||
0x69, 0x4a, 0x9d, 0xa4, 0x37, 0x71, 0xef, 0xde, 0x73, 0xb0, 0xe4, 0x08, 0x52, 0x2f, 0x62, 0xfa,
|
||||
0xc6, 0xe2, 0xc6, 0xcd, 0xe4, 0xfd, 0xff, 0xff, 0x65, 0xe6, 0x7f, 0x03, 0x9d, 0x58, 0x8f, 0xd3,
|
||||
0x4c, 0xe5, 0x8a, 0x3b, 0x89, 0x8e, 0xa2, 0x74, 0x71, 0x39, 0x58, 0xa9, 0x95, 0x22, 0xeb, 0xae,
|
||||
0x9a, 0x4c, 0x3a, 0xfc, 0x64, 0xd0, 0x99, 0x62, 0xf1, 0x1a, 0x6e, 0xde, 0x90, 0xbb, 0x60, 0xc7,
|
||||
0x58, 0x08, 0xe6, 0xb1, 0x51, 0x2f, 0xa8, 0x46, 0x7e, 0x0d, 0xe7, 0x51, 0x86, 0x61, 0x8e, 0xf3,
|
||||
0x0c, 0xf5, 0x7a, 0xb7, 0x56, 0x5b, 0xd1, 0xf0, 0xd8, 0xc8, 0x0e, 0xfa, 0xc6, 0x0e, 0x7e, 0x5d,
|
||||
0x7e, 0x05, 0xbd, 0x44, 0x2d, 0xff, 0x28, 0x9b, 0xa8, 0x6e, 0xa2, 0x96, 0x27, 0x44, 0x40, 0x5b,
|
||||
0x63, 0x46, 0x69, 0x93, 0xd2, 0x5a, 0xf2, 0x01, 0xb4, 0x74, 0x55, 0x40, 0xb4, 0xe8, 0x65, 0x23,
|
||||
0x2a, 0x77, 0x83, 0xe1, 0x0e, 0x85, 0x43, 0xb4, 0x11, 0xc3, 0x0f, 0x06, 0x2d, 0x5f, 0xe3, 0x36,
|
||||
0xe7, 0xb7, 0xd0, 0xcc, 0x8b, 0x14, 0xa9, 0x6e, 0x7f, 0x72, 0x31, 0x36, 0x7b, 0x8e, 0x29, 0x34,
|
||||
0xe7, 0xac, 0x48, 0x31, 0x20, 0x88, 0x7b, 0xd0, 0x88, 0x35, 0x75, 0xef, 0x4e, 0xdc, 0x1a, 0xad,
|
||||
0x17, 0x0f, 0x1a, 0xb1, 0xe6, 0x37, 0xd0, 0x4e, 0x33, 0xd4, 0xf3, 0x58, 0x53, 0xf9, 0xff, 0x30,
|
||||
0xa7, 0x02, 0xa6, 0x7a, 0xe8, 0xc1, 0xd9, 0xe9, 0x7e, 0xde, 0x06, 0xfb, 0xf9, 0x65, 0xe6, 0x5a,
|
||||
0x1c, 0xc0, 0x79, 0xf4, 0x9f, 0xfc, 0x99, 0xef, 0xb2, 0x07, 0xb1, 0x3f, 0x4a, 0xeb, 0x70, 0x94,
|
||||
0xd6, 0xbe, 0x94, 0xec, 0x50, 0x4a, 0xf6, 0x55, 0x4a, 0xf6, 0xfe, 0x2d, 0xad, 0x85, 0x43, 0xff,
|
||||
0x7e, 0xff, 0x13, 0x00, 0x00, 0xff, 0xff, 0xb5, 0x45, 0x92, 0x5d, 0xa1, 0x01, 0x00, 0x00,
|
||||
}
|
||||
|
@ -43,4 +43,6 @@ message Event {
|
||||
// A DELETE/EXPIRE event contains the deleted key with
|
||||
// its modification revision set to the revision of deletion.
|
||||
KeyValue kv = 2;
|
||||
// prev_kv holds the key-value pair before the event happens.
|
||||
KeyValue prev_kv = 3;
|
||||
}
|
||||
|
22
pkg/fileutil/dir_unix.go
Normal file
22
pkg/fileutil/dir_unix.go
Normal file
@ -0,0 +1,22 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !windows
|
||||
|
||||
package fileutil
|
||||
|
||||
import "os"
|
||||
|
||||
// OpenDir opens a directory for syncing.
|
||||
func OpenDir(path string) (*os.File, error) { return os.Open(path) }
|
46
pkg/fileutil/dir_windows.go
Normal file
46
pkg/fileutil/dir_windows.go
Normal file
@ -0,0 +1,46 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build windows
|
||||
|
||||
package fileutil
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// OpenDir opens a directory in windows with write access for syncing.
|
||||
func OpenDir(path string) (*os.File, error) {
|
||||
fd, err := openDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fd), path), nil
|
||||
}
|
||||
|
||||
func openDir(path string) (fd syscall.Handle, err error) {
|
||||
if len(path) == 0 {
|
||||
return syscall.InvalidHandle, syscall.ERROR_FILE_NOT_FOUND
|
||||
}
|
||||
pathp, err := syscall.UTF16PtrFromString(path)
|
||||
if err != nil {
|
||||
return syscall.InvalidHandle, err
|
||||
}
|
||||
access := uint32(syscall.GENERIC_READ | syscall.GENERIC_WRITE)
|
||||
sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE)
|
||||
createmode := uint32(syscall.OPEN_EXISTING)
|
||||
fl := uint32(syscall.FILE_FLAG_BACKUP_SEMANTICS)
|
||||
return syscall.CreateFile(pathp, access, sharemode, nil, createmode, fl, 0)
|
||||
}
|
@ -96,3 +96,26 @@ func Exist(name string) bool {
|
||||
_, err := os.Stat(name)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// ZeroToEnd zeros a file starting from SEEK_CUR to its SEEK_END. May temporarily
|
||||
// shorten the length of the file.
|
||||
func ZeroToEnd(f *os.File) error {
|
||||
// TODO: support FALLOC_FL_ZERO_RANGE
|
||||
off, err := f.Seek(0, os.SEEK_CUR)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
lenf, lerr := f.Seek(0, os.SEEK_END)
|
||||
if lerr != nil {
|
||||
return lerr
|
||||
}
|
||||
if err = f.Truncate(off); err != nil {
|
||||
return err
|
||||
}
|
||||
// make sure blocks remain allocated
|
||||
if err = Preallocate(f, lenf, true); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = f.Seek(off, os.SEEK_SET)
|
||||
return err
|
||||
}
|
||||
|
@ -118,3 +118,42 @@ func TestExist(t *testing.T) {
|
||||
t.Errorf("exist = %v, want false", g)
|
||||
}
|
||||
}
|
||||
|
||||
func TestZeroToEnd(t *testing.T) {
|
||||
f, err := ioutil.TempFile(os.TempDir(), "fileutil")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
b := make([]byte, 1024)
|
||||
for i := range b {
|
||||
b[i] = 12
|
||||
}
|
||||
if _, err = f.Write(b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err = f.Seek(512, os.SEEK_SET); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = ZeroToEnd(f); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
off, serr := f.Seek(0, os.SEEK_CUR)
|
||||
if serr != nil {
|
||||
t.Fatal(serr)
|
||||
}
|
||||
if off != 512 {
|
||||
t.Fatalf("expected offset 512, got %d", off)
|
||||
}
|
||||
|
||||
b = make([]byte, 512)
|
||||
if _, err = f.Read(b); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for i := range b {
|
||||
if b[i] != 0 {
|
||||
t.Errorf("expected b[%d] = 0, got %d", i, b[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
106
pkg/ioutil/pagewriter.go
Normal file
106
pkg/ioutil/pagewriter.go
Normal file
@ -0,0 +1,106 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package ioutil
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
var defaultBufferBytes = 128 * 1024
|
||||
|
||||
// PageWriter implements the io.Writer interface so that writes will
|
||||
// either be in page chunks or from flushing.
|
||||
type PageWriter struct {
|
||||
w io.Writer
|
||||
// pageOffset tracks the page offset of the base of the buffer
|
||||
pageOffset int
|
||||
// pageBytes is the number of bytes per page
|
||||
pageBytes int
|
||||
// bufferedBytes counts the number of bytes pending for write in the buffer
|
||||
bufferedBytes int
|
||||
// buf holds the write buffer
|
||||
buf []byte
|
||||
// bufWatermarkBytes is the number of bytes the buffer can hold before it needs
|
||||
// to be flushed. It is less than len(buf) so there is space for slack writes
|
||||
// to bring the writer to page alignment.
|
||||
bufWatermarkBytes int
|
||||
}
|
||||
|
||||
// NewPageWriter creates a new PageWriter. pageBytes is the number of bytes
|
||||
// to write per page. pageOffset is the starting offset of io.Writer.
|
||||
func NewPageWriter(w io.Writer, pageBytes, pageOffset int) *PageWriter {
|
||||
return &PageWriter{
|
||||
w: w,
|
||||
pageOffset: pageOffset,
|
||||
pageBytes: pageBytes,
|
||||
buf: make([]byte, defaultBufferBytes+pageBytes),
|
||||
bufWatermarkBytes: defaultBufferBytes,
|
||||
}
|
||||
}
|
||||
|
||||
func (pw *PageWriter) Write(p []byte) (n int, err error) {
|
||||
if len(p)+pw.bufferedBytes <= pw.bufWatermarkBytes {
|
||||
// no overflow
|
||||
copy(pw.buf[pw.bufferedBytes:], p)
|
||||
pw.bufferedBytes += len(p)
|
||||
return len(p), nil
|
||||
}
|
||||
// complete the slack page in the buffer if unaligned
|
||||
slack := pw.pageBytes - ((pw.pageOffset + pw.bufferedBytes) % pw.pageBytes)
|
||||
if slack != pw.pageBytes {
|
||||
partial := slack > len(p)
|
||||
if partial {
|
||||
// not enough data to complete the slack page
|
||||
slack = len(p)
|
||||
}
|
||||
// special case: writing to slack page in buffer
|
||||
copy(pw.buf[pw.bufferedBytes:], p[:slack])
|
||||
pw.bufferedBytes += slack
|
||||
n = slack
|
||||
p = p[slack:]
|
||||
if partial {
|
||||
// avoid forcing an unaligned flush
|
||||
return n, nil
|
||||
}
|
||||
}
|
||||
// buffer contents are now page-aligned; clear out
|
||||
if err = pw.Flush(); err != nil {
|
||||
return n, err
|
||||
}
|
||||
// directly write all complete pages without copying
|
||||
if len(p) > pw.pageBytes {
|
||||
pages := len(p) / pw.pageBytes
|
||||
c, werr := pw.w.Write(p[:pages*pw.pageBytes])
|
||||
n += c
|
||||
if werr != nil {
|
||||
return n, werr
|
||||
}
|
||||
p = p[pages*pw.pageBytes:]
|
||||
}
|
||||
// write remaining tail to buffer
|
||||
c, werr := pw.Write(p)
|
||||
n += c
|
||||
return n, werr
|
||||
}
|
||||
|
||||
func (pw *PageWriter) Flush() error {
|
||||
if pw.bufferedBytes == 0 {
|
||||
return nil
|
||||
}
|
||||
_, err := pw.w.Write(pw.buf[:pw.bufferedBytes])
|
||||
pw.pageOffset = (pw.pageOffset + pw.bufferedBytes) % pw.pageBytes
|
||||
pw.bufferedBytes = 0
|
||||
return err
|
||||
}
|
129
pkg/ioutil/pagewriter_test.go
Normal file
129
pkg/ioutil/pagewriter_test.go
Normal file
@ -0,0 +1,129 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package ioutil
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPageWriterRandom(t *testing.T) {
|
||||
// smaller buffer for stress testing
|
||||
defaultBufferBytes = 8 * 1024
|
||||
pageBytes := 128
|
||||
buf := make([]byte, 4*defaultBufferBytes)
|
||||
cw := &checkPageWriter{pageBytes: pageBytes, t: t}
|
||||
w := NewPageWriter(cw, pageBytes, 0)
|
||||
n := 0
|
||||
for i := 0; i < 4096; i++ {
|
||||
c, err := w.Write(buf[:rand.Intn(len(buf))])
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
n += c
|
||||
}
|
||||
if cw.writeBytes > n {
|
||||
t.Fatalf("wrote %d bytes to io.Writer, but only wrote %d bytes", cw.writeBytes, n)
|
||||
}
|
||||
if cw.writeBytes-n > pageBytes {
|
||||
t.Fatalf("got %d bytes pending, expected less than %d bytes", cw.writeBytes-n, pageBytes)
|
||||
}
|
||||
t.Logf("total writes: %d", cw.writes)
|
||||
t.Logf("total write bytes: %d (of %d)", cw.writeBytes, n)
|
||||
}
|
||||
|
||||
// TestPageWriterPariallack tests the case where a write overflows the buffer
|
||||
// but there is not enough data to complete the slack write.
|
||||
func TestPageWriterPartialSlack(t *testing.T) {
|
||||
defaultBufferBytes = 1024
|
||||
pageBytes := 128
|
||||
buf := make([]byte, defaultBufferBytes)
|
||||
cw := &checkPageWriter{pageBytes: 64, t: t}
|
||||
w := NewPageWriter(cw, pageBytes, 0)
|
||||
// put writer in non-zero page offset
|
||||
if _, err := w.Write(buf[:64]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := w.Flush(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if cw.writes != 1 {
|
||||
t.Fatalf("got %d writes, expected 1", cw.writes)
|
||||
}
|
||||
// nearly fill buffer
|
||||
if _, err := w.Write(buf[:1022]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// overflow buffer, but without enough to write as aligned
|
||||
if _, err := w.Write(buf[:8]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if cw.writes != 1 {
|
||||
t.Fatalf("got %d writes, expected 1", cw.writes)
|
||||
}
|
||||
// finish writing slack space
|
||||
if _, err := w.Write(buf[:128]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if cw.writes != 2 {
|
||||
t.Fatalf("got %d writes, expected 2", cw.writes)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPageWriterOffset tests if page writer correctly repositions when offset is given.
|
||||
func TestPageWriterOffset(t *testing.T) {
|
||||
defaultBufferBytes = 1024
|
||||
pageBytes := 128
|
||||
buf := make([]byte, defaultBufferBytes)
|
||||
cw := &checkPageWriter{pageBytes: 64, t: t}
|
||||
w := NewPageWriter(cw, pageBytes, 0)
|
||||
if _, err := w.Write(buf[:64]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := w.Flush(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if w.pageOffset != 64 {
|
||||
t.Fatalf("w.pageOffset expected 64, got %d", w.pageOffset)
|
||||
}
|
||||
|
||||
w = NewPageWriter(cw, w.pageOffset, pageBytes)
|
||||
if _, err := w.Write(buf[:64]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := w.Flush(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if w.pageOffset != 0 {
|
||||
t.Fatalf("w.pageOffset expected 0, got %d", w.pageOffset)
|
||||
}
|
||||
}
|
||||
|
||||
// checkPageWriter implements an io.Writer that fails a test on unaligned writes.
|
||||
type checkPageWriter struct {
|
||||
pageBytes int
|
||||
writes int
|
||||
writeBytes int
|
||||
t *testing.T
|
||||
}
|
||||
|
||||
func (cw *checkPageWriter) Write(p []byte) (int, error) {
|
||||
if len(p)%cw.pageBytes != 0 {
|
||||
cw.t.Fatalf("got write len(p) = %d, expected len(p) == k*cw.pageBytes", len(p))
|
||||
}
|
||||
cw.writes++
|
||||
cw.writeBytes += len(p)
|
||||
return len(p), nil
|
||||
}
|
@ -25,9 +25,9 @@ import (
|
||||
proto "github.com/golang/protobuf/proto"
|
||||
|
||||
math "math"
|
||||
)
|
||||
|
||||
import io "io"
|
||||
io "io"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
|
@ -49,6 +49,7 @@ var (
|
||||
"2.1.0": {streamTypeMsgAppV2, streamTypeMessage},
|
||||
"2.2.0": {streamTypeMsgAppV2, streamTypeMessage},
|
||||
"2.3.0": {streamTypeMsgAppV2, streamTypeMessage},
|
||||
"3.0.0": {streamTypeMsgAppV2, streamTypeMessage},
|
||||
}
|
||||
)
|
||||
|
||||
@ -332,7 +333,16 @@ func (cr *streamReader) decodeLoop(rc io.ReadCloser, t streamType) error {
|
||||
default:
|
||||
plog.Panicf("unhandled stream type %s", t)
|
||||
}
|
||||
cr.closer = rc
|
||||
select {
|
||||
case <-cr.stopc:
|
||||
cr.mu.Unlock()
|
||||
if err := rc.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
return io.EOF
|
||||
default:
|
||||
cr.closer = rc
|
||||
}
|
||||
cr.mu.Unlock()
|
||||
|
||||
for {
|
||||
|
@ -17,6 +17,7 @@ package rafthttp
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"reflect"
|
||||
@ -180,6 +181,60 @@ func TestStreamReaderDialResult(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestStreamReaderStopOnDial tests a stream reader closes the connection on stop.
|
||||
func TestStreamReaderStopOnDial(t *testing.T) {
|
||||
defer testutil.AfterTest(t)
|
||||
h := http.Header{}
|
||||
h.Add("X-Server-Version", version.Version)
|
||||
tr := &respWaitRoundTripper{rrt: &respRoundTripper{code: http.StatusOK, header: h}}
|
||||
sr := &streamReader{
|
||||
peerID: types.ID(2),
|
||||
tr: &Transport{streamRt: tr, ClusterID: types.ID(1)},
|
||||
picker: mustNewURLPicker(t, []string{"http://localhost:2380"}),
|
||||
errorc: make(chan error, 1),
|
||||
typ: streamTypeMessage,
|
||||
status: newPeerStatus(types.ID(2)),
|
||||
}
|
||||
tr.onResp = func() {
|
||||
// stop() waits for the run() goroutine to exit, but that exit
|
||||
// needs a response from RoundTrip() first; use goroutine
|
||||
go sr.stop()
|
||||
// wait so that stop() is blocked on run() exiting
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
// sr.run() completes dialing then begins decoding while stopped
|
||||
}
|
||||
sr.start()
|
||||
select {
|
||||
case <-sr.done:
|
||||
case <-time.After(time.Second):
|
||||
t.Fatal("streamReader did not stop in time")
|
||||
}
|
||||
}
|
||||
|
||||
type respWaitRoundTripper struct {
|
||||
rrt *respRoundTripper
|
||||
onResp func()
|
||||
}
|
||||
|
||||
func (t *respWaitRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
resp, err := t.rrt.RoundTrip(req)
|
||||
resp.Body = newWaitReadCloser()
|
||||
t.onResp()
|
||||
return resp, err
|
||||
}
|
||||
|
||||
type waitReadCloser struct{ closec chan struct{} }
|
||||
|
||||
func newWaitReadCloser() *waitReadCloser { return &waitReadCloser{make(chan struct{})} }
|
||||
func (wrc *waitReadCloser) Read(p []byte) (int, error) {
|
||||
<-wrc.closec
|
||||
return 0, io.EOF
|
||||
}
|
||||
func (wrc *waitReadCloser) Close() error {
|
||||
close(wrc.closec)
|
||||
return nil
|
||||
}
|
||||
|
||||
// TestStreamReaderDialDetectUnsupport tests that dial func could find
|
||||
// out that the stream type is not supported by the remote.
|
||||
func TestStreamReaderDialDetectUnsupport(t *testing.T) {
|
||||
|
@ -19,9 +19,9 @@ import (
|
||||
proto "github.com/golang/protobuf/proto"
|
||||
|
||||
math "math"
|
||||
)
|
||||
|
||||
import io "io"
|
||||
io "io"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
|
@ -29,7 +29,7 @@ import (
|
||||
var (
|
||||
// MinClusterVersion is the min cluster version this etcd binary is compatible with.
|
||||
MinClusterVersion = "2.3.0"
|
||||
Version = "3.0.5"
|
||||
Version = "3.0.12"
|
||||
|
||||
// Git SHA Value will be set during build
|
||||
GitSHA = "Not provided (use ./build instead of go build)"
|
||||
|
@ -15,28 +15,34 @@
|
||||
package wal
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/binary"
|
||||
"hash"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/coreos/etcd/pkg/crc"
|
||||
"github.com/coreos/etcd/pkg/ioutil"
|
||||
"github.com/coreos/etcd/wal/walpb"
|
||||
)
|
||||
|
||||
// walPageBytes is the alignment for flushing records to the backing Writer.
|
||||
// It should be a multiple of the minimum sector size so that WAL repair can
|
||||
// safely between torn writes and ordinary data corruption.
|
||||
const walPageBytes = 8 * minSectorSize
|
||||
|
||||
type encoder struct {
|
||||
mu sync.Mutex
|
||||
bw *bufio.Writer
|
||||
bw *ioutil.PageWriter
|
||||
|
||||
crc hash.Hash32
|
||||
buf []byte
|
||||
uint64buf []byte
|
||||
}
|
||||
|
||||
func newEncoder(w io.Writer, prevCrc uint32) *encoder {
|
||||
func newEncoder(w io.Writer, prevCrc uint32, pageOffset int) *encoder {
|
||||
return &encoder{
|
||||
bw: bufio.NewWriter(w),
|
||||
bw: ioutil.NewPageWriter(w, walPageBytes, pageOffset),
|
||||
crc: crc.New(prevCrc, crcTable),
|
||||
// 1MB buffer
|
||||
buf: make([]byte, 1024*1024),
|
||||
@ -44,6 +50,15 @@ func newEncoder(w io.Writer, prevCrc uint32) *encoder {
|
||||
}
|
||||
}
|
||||
|
||||
// newFileEncoder creates a new encoder with current file offset for the page writer.
|
||||
func newFileEncoder(f *os.File, prevCrc uint32) (*encoder, error) {
|
||||
offset, err := f.Seek(0, os.SEEK_CUR)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newEncoder(f, prevCrc, int(offset)), nil
|
||||
}
|
||||
|
||||
func (e *encoder) encode(rec *walpb.Record) error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
@ -69,7 +69,7 @@ func TestWriteRecord(t *testing.T) {
|
||||
typ := int64(0xABCD)
|
||||
d := []byte("Hello world!")
|
||||
buf := new(bytes.Buffer)
|
||||
e := newEncoder(buf, 0)
|
||||
e := newEncoder(buf, 0, 0)
|
||||
e.encode(&walpb.Record{Type: typ, Data: d})
|
||||
e.flush()
|
||||
decoder := newDecoder(ioutil.NopCloser(buf))
|
||||
|
89
wal/wal.go
89
wal/wal.go
@ -67,7 +67,11 @@ var (
|
||||
// A just opened WAL is in read mode, and ready for reading records.
|
||||
// The WAL will be ready for appending after reading out all the previous records.
|
||||
type WAL struct {
|
||||
dir string // the living directory of the underlay files
|
||||
dir string // the living directory of the underlay files
|
||||
|
||||
// dirFile is a fd for the wal directory for syncing on Rename
|
||||
dirFile *os.File
|
||||
|
||||
metadata []byte // metadata recorded at the head of each WAL
|
||||
state raftpb.HardState // hardstate recorded at the head of WAL
|
||||
|
||||
@ -106,45 +110,49 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := f.Seek(0, os.SEEK_END); err != nil {
|
||||
if _, err = f.Seek(0, os.SEEK_END); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := fileutil.Preallocate(f.File, segmentSizeBytes, true); err != nil {
|
||||
if err = fileutil.Preallocate(f.File, segmentSizeBytes, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
w := &WAL{
|
||||
dir: dirpath,
|
||||
metadata: metadata,
|
||||
encoder: newEncoder(f, 0),
|
||||
}
|
||||
w.encoder, err = newFileEncoder(f.File, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
w.locks = append(w.locks, f)
|
||||
if err := w.saveCrc(0); err != nil {
|
||||
if err = w.saveCrc(0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := w.encoder.encode(&walpb.Record{Type: metadataType, Data: metadata}); err != nil {
|
||||
if err = w.encoder.encode(&walpb.Record{Type: metadataType, Data: metadata}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := w.SaveSnapshot(walpb.Snapshot{}); err != nil {
|
||||
if err = w.SaveSnapshot(walpb.Snapshot{}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// rename of directory with locked files doesn't work on windows; close
|
||||
// the WAL to release the locks so the directory can be renamed
|
||||
w.Close()
|
||||
if err := os.Rename(tmpdirpath, dirpath); err != nil {
|
||||
if w, err = w.renameWal(tmpdirpath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// reopen and relock
|
||||
newWAL, oerr := Open(dirpath, walpb.Snapshot{})
|
||||
if oerr != nil {
|
||||
return nil, oerr
|
||||
|
||||
// directory was renamed; sync parent dir to persist rename
|
||||
pdir, perr := fileutil.OpenDir(path.Dir(w.dir))
|
||||
if perr != nil {
|
||||
return nil, perr
|
||||
}
|
||||
if _, _, _, err := newWAL.ReadAll(); err != nil {
|
||||
newWAL.Close()
|
||||
return nil, err
|
||||
if perr = fileutil.Fsync(pdir); perr != nil {
|
||||
return nil, perr
|
||||
}
|
||||
return newWAL, nil
|
||||
if perr = pdir.Close(); err != nil {
|
||||
return nil, perr
|
||||
}
|
||||
|
||||
return w, nil
|
||||
}
|
||||
|
||||
// Open opens the WAL at the given snap.
|
||||
@ -154,7 +162,14 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
|
||||
// the given snap. The WAL cannot be appended to before reading out all of its
|
||||
// previous records.
|
||||
func Open(dirpath string, snap walpb.Snapshot) (*WAL, error) {
|
||||
return openAtIndex(dirpath, snap, true)
|
||||
w, err := openAtIndex(dirpath, snap, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if w.dirFile, err = fileutil.OpenDir(w.dir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return w, nil
|
||||
}
|
||||
|
||||
// OpenForRead only opens the wal files for read.
|
||||
@ -299,6 +314,18 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
||||
state.Reset()
|
||||
return nil, state, nil, err
|
||||
}
|
||||
// decodeRecord() will return io.EOF if it detects a zero record,
|
||||
// but this zero record may be followed by non-zero records from
|
||||
// a torn write. Overwriting some of these non-zero records, but
|
||||
// not all, will cause CRC errors on WAL open. Since the records
|
||||
// were never fully synced to disk in the first place, it's safe
|
||||
// to zero them out to avoid any CRC errors from new writes.
|
||||
if _, err = w.tail().Seek(w.decoder.lastOffset(), os.SEEK_SET); err != nil {
|
||||
return nil, state, nil, err
|
||||
}
|
||||
if err = fileutil.ZeroToEnd(w.tail().File); err != nil {
|
||||
return nil, state, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
err = nil
|
||||
@ -317,8 +344,10 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
|
||||
|
||||
if w.tail() != nil {
|
||||
// create encoder (chain crc with the decoder), enable appending
|
||||
_, err = w.tail().Seek(w.decoder.lastOffset(), os.SEEK_SET)
|
||||
w.encoder = newEncoder(w.tail(), w.decoder.lastCRC())
|
||||
w.encoder, err = newFileEncoder(w.tail().File, w.decoder.lastCRC())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
w.decoder = nil
|
||||
|
||||
@ -352,7 +381,10 @@ func (w *WAL) cut() error {
|
||||
// update writer and save the previous crc
|
||||
w.locks = append(w.locks, newTail)
|
||||
prevCrc := w.encoder.crc.Sum32()
|
||||
w.encoder = newEncoder(w.tail(), prevCrc)
|
||||
w.encoder, err = newFileEncoder(w.tail().File, prevCrc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err = w.saveCrc(prevCrc); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -375,6 +407,10 @@ func (w *WAL) cut() error {
|
||||
if err = os.Rename(newTail.Name(), fpath); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = fileutil.Fsync(w.dirFile); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newTail.Close()
|
||||
|
||||
if newTail, err = fileutil.LockFile(fpath, os.O_WRONLY, fileutil.PrivateFileMode); err != nil {
|
||||
@ -387,7 +423,10 @@ func (w *WAL) cut() error {
|
||||
w.locks[len(w.locks)-1] = newTail
|
||||
|
||||
prevCrc = w.encoder.crc.Sum32()
|
||||
w.encoder = newEncoder(w.tail(), prevCrc)
|
||||
w.encoder, err = newFileEncoder(w.tail().File, prevCrc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
plog.Infof("segmented wal file %v is created", fpath)
|
||||
return nil
|
||||
@ -477,7 +516,7 @@ func (w *WAL) Close() error {
|
||||
plog.Errorf("failed to unlock during closing wal: %s", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return w.dirFile.Close()
|
||||
}
|
||||
|
||||
func (w *WAL) saveEntry(e *raftpb.Entry) error {
|
||||
|
@ -61,7 +61,7 @@ func TestNew(t *testing.T) {
|
||||
}
|
||||
|
||||
var wb bytes.Buffer
|
||||
e := newEncoder(&wb, 0)
|
||||
e := newEncoder(&wb, 0, 0)
|
||||
err = e.encode(&walpb.Record{Type: crcType, Crc: 0})
|
||||
if err != nil {
|
||||
t.Fatalf("err = %v, want nil", err)
|
||||
@ -465,7 +465,7 @@ func TestSaveEmpty(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
var est raftpb.HardState
|
||||
w := WAL{
|
||||
encoder: newEncoder(&buf, 0),
|
||||
encoder: newEncoder(&buf, 0, 0),
|
||||
}
|
||||
if err := w.saveState(&est); err != nil {
|
||||
t.Errorf("err = %v, want nil", err)
|
||||
@ -636,3 +636,89 @@ func TestRestartCreateWal(t *testing.T) {
|
||||
t.Fatalf("got error %v and meta %q, expected nil and %q", rerr, meta, "abc")
|
||||
}
|
||||
}
|
||||
|
||||
// TestOpenOnTornWrite ensures that entries past the torn write are truncated.
|
||||
func TestOpenOnTornWrite(t *testing.T) {
|
||||
maxEntries := 40
|
||||
clobberIdx := 20
|
||||
overwriteEntries := 5
|
||||
|
||||
p, err := ioutil.TempDir(os.TempDir(), "waltest")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(p)
|
||||
w, err := Create(p, nil)
|
||||
defer w.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// get offset of end of each saved entry
|
||||
offsets := make([]int64, maxEntries)
|
||||
for i := range offsets {
|
||||
es := []raftpb.Entry{{Index: uint64(i)}}
|
||||
if err = w.Save(raftpb.HardState{}, es); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if offsets[i], err = w.tail().Seek(0, os.SEEK_CUR); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
fn := path.Join(p, path.Base(w.tail().Name()))
|
||||
w.Close()
|
||||
|
||||
// clobber some entry with 0's to simulate a torn write
|
||||
f, ferr := os.OpenFile(fn, os.O_WRONLY, fileutil.PrivateFileMode)
|
||||
if ferr != nil {
|
||||
t.Fatal(ferr)
|
||||
}
|
||||
defer f.Close()
|
||||
_, err = f.Seek(offsets[clobberIdx], os.SEEK_SET)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
zeros := make([]byte, offsets[clobberIdx+1]-offsets[clobberIdx])
|
||||
_, err = f.Write(zeros)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
f.Close()
|
||||
|
||||
w, err = Open(p, walpb.Snapshot{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// seek up to clobbered entry
|
||||
_, _, _, err = w.ReadAll()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// write a few entries past the clobbered entry
|
||||
for i := 0; i < overwriteEntries; i++ {
|
||||
// Index is different from old, truncated entries
|
||||
es := []raftpb.Entry{{Index: uint64(i + clobberIdx), Data: []byte("new")}}
|
||||
if err = w.Save(raftpb.HardState{}, es); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
w.Close()
|
||||
|
||||
// read back the entries, confirm number of entries matches expectation
|
||||
w, err = OpenForRead(p, walpb.Snapshot{})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, _, ents, rerr := w.ReadAll()
|
||||
if rerr != nil {
|
||||
// CRC error? the old entries were likely never truncated away
|
||||
t.Fatal(rerr)
|
||||
}
|
||||
wEntries := (clobberIdx - 1) + overwriteEntries
|
||||
if len(ents) != wEntries {
|
||||
t.Fatalf("expected len(ents) = %d, got %d", wEntries, len(ents))
|
||||
}
|
||||
}
|
||||
|
44
wal/wal_unix.go
Normal file
44
wal/wal_unix.go
Normal file
@ -0,0 +1,44 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build !windows
|
||||
|
||||
package wal
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/coreos/etcd/pkg/fileutil"
|
||||
)
|
||||
|
||||
func (w *WAL) renameWal(tmpdirpath string) (*WAL, error) {
|
||||
// On non-Windows platforms, hold the lock while renaming. Releasing
|
||||
// the lock and trying to reacquire it quickly can be flaky because
|
||||
// it's possible the process will fork to spawn a process while this is
|
||||
// happening. The fds are set up as close-on-exec by the Go runtime,
|
||||
// but there is a window between the fork and the exec where another
|
||||
// process holds the lock.
|
||||
|
||||
if err := os.RemoveAll(w.dir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := os.Rename(tmpdirpath, w.dir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
w.fp = newFilePipeline(w.dir, segmentSizeBytes)
|
||||
df, err := fileutil.OpenDir(w.dir)
|
||||
w.dirFile = df
|
||||
return w, err
|
||||
}
|
41
wal/wal_windows.go
Normal file
41
wal/wal_windows.go
Normal file
@ -0,0 +1,41 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package wal
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/coreos/etcd/wal/walpb"
|
||||
)
|
||||
|
||||
func (w *WAL) renameWal(tmpdirpath string) (*WAL, error) {
|
||||
// rename of directory with locked files doesn't work on
|
||||
// windows; close the WAL to release the locks so the directory
|
||||
// can be renamed
|
||||
w.Close()
|
||||
if err := os.Rename(tmpdirpath, w.dir); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// reopen and relock
|
||||
newWAL, oerr := Open(w.dir, walpb.Snapshot{})
|
||||
if oerr != nil {
|
||||
return nil, oerr
|
||||
}
|
||||
if _, _, _, err := newWAL.ReadAll(); err != nil {
|
||||
newWAL.Close()
|
||||
return nil, err
|
||||
}
|
||||
return newWAL, nil
|
||||
}
|
@ -20,9 +20,9 @@ import (
|
||||
proto "github.com/golang/protobuf/proto"
|
||||
|
||||
math "math"
|
||||
)
|
||||
|
||||
import io "io"
|
||||
io "io"
|
||||
)
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
|
Reference in New Issue
Block a user