Compare commits

...

66 Commits

Author SHA1 Message Date
a23109a0c6 version: bump to v3.0.16 2017-01-13 11:29:12 -08:00
219a4e9ad5 clientv3: don't reset keepalive stream on grant failure
Was triggering cancelation errors on outstanding KeepAlives if Grant
had to retry.
2017-01-13 11:28:17 -08:00
3d050630f4 v3api, rpctypes: add ErrTimeoutDueToConnectionLost
Lack of GRPC code was causing this to look like a halting error to the client.
2017-01-13 11:27:56 -08:00
9c66ed2798 clientv3: don't reset stream on keepaliveonce or revoke failure
Would cause the keepalive loop to cancel out.

Fixes #7082
2017-01-13 10:42:01 -08:00
a9e2d3d4d3 *: remove 'tools/etcd-top' to drop pcap.h 2016-12-07 10:34:34 -08:00
41e329cd35 *: drop breaking-changes from master branch 2016-12-07 10:34:28 -08:00
3a8b524d36 travis, test: use Go 1.6.4, skip 'gosimple' 2016-12-07 10:28:53 -08:00
11668f53db integration: use RequireLeader for TestV3LeaseFailover
Giving Renew() the default request timeout causes TestV3LeaseFailover
to miss its timing constraints. Since it only needs to wait until the
leader recognizes the leader is lost, use RequireLeader to cancel the
keepalive stream before the request times out.
2016-12-07 10:06:28 -08:00
7ceca7e046 clientv3/integration: test lease keepalive works following quorum loss 2016-12-07 10:06:28 -08:00
395bd2313c v3rpc, etcdserver, leasehttp: ctxize Renew with request timeout
Would retry a few times before returning a not primary error that
the client should never see. Instead, use proper timeouts and
then return a request timeout error on failure.

Fixes #6922
2016-12-07 10:06:19 -08:00
b357569bc6 version: bump to v3.0.15+git 2016-11-11 11:17:31 -08:00
fc00305a2e version: bump to v3.0.15 2016-11-10 13:12:43 -08:00
f322fe7f0d clientv3, ctlv3: document range end requirement 2016-11-10 13:10:18 -08:00
049fcd30ea integration: test wrong watcher range 2016-11-10 13:09:13 -08:00
1b702e79db mvcc: return -1 for wrong watcher range key >= end
Fix https://github.com/coreos/etcd/issues/6819.
2016-11-10 13:08:51 -08:00
b87190d9dc integration: test canceling a watcher on disconnected stream 2016-11-10 13:07:24 -08:00
83b493f945 clientv3: let watchers cancel when reconnecting 2016-11-10 13:06:47 -08:00
9b69cbd989 version: bump to v3.0.14+git 2016-11-04 13:06:36 -07:00
8a37349097 version: bump to v3.0.14 2016-11-04 10:54:14 -07:00
9a0e4dfe4f ctlv3: fix migration 2016-11-03 09:47:41 -07:00
f60469af16 ctlv3: Add a no-ttl flag to etcdctl migrate to discard keys on transform. 2016-11-03 09:47:39 -07:00
932370d8ca version: bump to v3.0.13+git 2016-10-24 11:22:50 -07:00
c99d0d4b25 version: bump to v3.0.13 2016-10-24 11:04:43 -07:00
d78216f528 e2e: remove 'ctlV3GetFailPerm' 2016-10-24 11:04:13 -07:00
c05c027a24 etcdctl: fix migrate in outputing client.Node to json
Using printf will try to parse the string and replace special
characters. In migrate code, we want to just output the raw
json string of client.Node.
For example,
    Printf("%\\") => %!\(MISSING)
    Print("%\\") => %\
Thus, we should use print instead.
2016-10-20 10:51:16 -07:00
3fd64f913a auth: fix return type on 'hasRootRole' 2016-10-12 13:59:27 -07:00
f935290bbc mvcc: fix rev inconsistency
Try:

./etcdctl put foo bar
./etcdctl del foo
./etcdctl compact 3

restart etcd

./etcdctl get foo
mvcc: required revision has been compacted

The error is unexpected when range over the head revision.

Internally, we incorrectly set current revision smaller than the
compacted revision when we remove all keys around compacted revision.

This commit fixes the issue by recovering the current revision at least
to compacted revision.
2016-10-12 13:08:26 -07:00
ca91f898a2 auth, e2e, clientv3: the root role should be granted access to every key
This commit changes the semantics of the root role. The role should be
able to access to every key.

Partially fixes https://github.com/coreos/etcd/issues/6355
2016-10-11 12:19:46 -07:00
fcbada7798 Merge pull request #6622 from luxas/backport_arm_fixes
Backport arm fixes
2016-10-11 12:15:58 -07:00
fad9bdc3e1 etcdserver: atomic access alignment
Most fields accessed with sync/atomic functions are 64bit aligned, but a couple
are not.  This makes comments out of date and therefore misleading.

Affected fields reordered, comments scrubbed and updated.
2016-10-11 11:48:43 +03:00
198ccb8b7b raftpb: atomic access alignment
The Entry struct has misaligned fields that are accessed atomically.  The
misalignment is caused by the EntryType enum which the Protocol Buffers
spec forces to be a 32bit int.

Moving the order of the fields without renumbering them in the .proto file
seems to align the go structure without changing the wire format.
2016-10-11 11:48:43 +03:00
dc5d5c6ac8 raft: atomic access alignment
The relevant structures are properly aligned, however, there is no comment
highlighting the need to keep it aligned as is present elsewhere in the
codebase.

Adding note to keep alignment, in line with similar comments in the codebase.
2016-10-11 11:48:43 +03:00
f771eaca47 version: bump to v3.0.12+git 2016-10-07 16:42:12 -07:00
2d1e2e8e64 version: bump to v3.0.12 2016-10-07 15:14:25 -07:00
6412758177 v3rpc: remove redundant locks 2016-10-07 15:13:56 -07:00
836c8159f6 v3rpc: lock progress and prevKV map correctly 2016-10-07 15:13:12 -07:00
e406e6e8f4 etcdctl/ctlv3: add 'prev-kv' flag to watch command 2016-10-07 14:23:09 -07:00
2fa2c6284e clientv3: add 'prevKV' field to watch request 2016-10-07 14:22:58 -07:00
2862c4fa12 v3rpc: implement 'prev-kv' watch 2016-10-07 14:22:19 -07:00
6f89fbf8b5 etcdserver: use mvcc.WatchableKV for prev-kv watch 2016-10-07 14:22:00 -07:00
6ae7ec9a3f *: regenerate proto 2016-10-07 14:21:19 -07:00
4a35b1b20a etcdserverpb: add 'prev_kb' to WatchCreateRequest 2016-10-07 14:20:46 -07:00
c859c97ee2 mvccpb: add 'prev_kv' field 2016-10-07 14:19:59 -07:00
a091c629e1 version: bump to v3.0.11+git 2016-10-07 13:25:21 -07:00
96de94a584 version: bump to v3.0.11 2016-10-07 11:27:48 -07:00
e9cd8410d7 integration: add 'prevKV' to TestV3DeleteRange 2016-10-07 11:03:19 -07:00
e37ede1d2e etcdserver: handle 'PrevKV' 2016-10-07 11:00:48 -07:00
4420a29ac4 etcdctl/ctlv3: add 'prev-kv' flag 2016-10-07 10:56:06 -07:00
0544d4bfd0 clientv3: add WithPrevKV OpOption 2016-10-07 10:54:45 -07:00
fe7379f102 clientv3: add Op.prevKV 2016-10-07 10:51:01 -07:00
c76df5052b *: update proto to add 'prev_kv' 2016-10-07 10:47:47 -07:00
3299cad1c3 *: add put prevkv 2016-10-07 10:39:08 -07:00
d9ab018c49 integration: test a canceled watch won't return a closing error 2016-10-05 14:19:36 -07:00
e853451cd2 clientv3: only return closing error to watcher if context is not canceled
Fixes #6503
2016-10-05 14:19:32 -07:00
1becf9d2f5 clientv3: fix race on watch initial revision
The initial revision was being updated in the substream goroutine defer;
this was racing with the resume path fetching the initial revision when
the substream closes during resume. Instead, update the initial revision
whenever the substream processes a new watch response. Since the substream
cannot receive a watch response while it is resuming, the write to the
initial revision is ordered to always happen after the resume read.

Fixes #6586
2016-10-05 10:56:36 -07:00
1a712cf187 clientv3: make IsProgressNotify() false on compact event and closed channel
Fixes #6549
2016-10-04 15:13:02 -07:00
023f335f67 wal: set PageWriter offset in file encoder 2016-10-04 15:12:47 -07:00
bf0da78b63 pkg/ioutil: configure pageOffset in NewPageWriter 2016-10-04 15:12:46 -07:00
e8473850a2 integration: test canceling watchers when disconnected 2016-10-04 15:12:37 -07:00
b836d187fd clientv3: simplify watch synchronization
Was more complicated than it needed to be and didn't really work in the
first place. Restructured watcher registation to use a queue.
2016-10-04 15:12:18 -07:00
9b09229c4d version: bump to v3.0.10+git 2016-09-23 11:13:45 -07:00
546c0f7ed6 version: bump to v3.0.10 2016-09-23 10:49:03 -07:00
adbad1c9b5 ctlv3: close snapshot file before rename (Windows) 2016-09-23 09:11:02 -07:00
273b986751 clientv3: process closed watcherStreams in watcherGrpcStream run loop
Was racing with Watch() when closing the grpc stream on no watchers.

Fixes #6476
2016-09-21 15:52:20 -07:00
5b205729b9 rafthttp: add v3.0.0 to supported streams 2016-09-16 21:54:55 +09:00
fe900b09dd version: bump to v3.0.9+git 2016-09-15 15:10:23 -07:00
69 changed files with 1756 additions and 2157 deletions

View File

@ -4,8 +4,7 @@ go_import_path: github.com/coreos/etcd
sudo: false
go:
- 1.6
- tip
- 1.6.4
env:
global:

View File

@ -427,6 +427,7 @@ Empty field.
| ----- | ----------- | ---- |
| key | key is the first key to delete in the range. | bytes |
| range_end | range_end is the key following the last key to delete for the range [key, range_end). If range_end is not given, the range is defined to contain only the key argument. If range_end is '\0', the range is all keys greater than or equal to the key argument. | bytes |
| prev_kv | If prev_kv is set, etcd gets the previous key-value pairs before deleting it. The previous key-value pairs will be returned in the delte response. | bool |
@ -436,6 +437,7 @@ Empty field.
| ----- | ----------- | ---- |
| header | | ResponseHeader |
| deleted | deleted is the number of keys deleted by the delete range request. | int64 |
| prev_kvs | if prev_kv is set in the request, the previous key-value pairs will be returned. | (slice of) mvccpb.KeyValue |
@ -591,6 +593,7 @@ Empty field.
| key | key is the key, in bytes, to put into the key-value store. | bytes |
| value | value is the value, in bytes, to associate with the key in the key-value store. | bytes |
| lease | lease is the lease ID to associate with the key in the key-value store. A lease value of 0 indicates no lease. | int64 |
| prev_kv | If prev_kv is set, etcd gets the previous key-value pair before changing it. The previous key-value pair will be returned in the put response. | bool |
@ -599,6 +602,7 @@ Empty field.
| Field | Description | Type |
| ----- | ----------- | ---- |
| header | | ResponseHeader |
| prev_kv | if prev_kv is set in the request, the previous key-value pair will be returned. | mvccpb.KeyValue |
@ -735,6 +739,7 @@ From google paxosdb paper: Our implementation hinges around a powerful primitive
| range_end | range_end is the end of the range [key, range_end) to watch. If range_end is not given, only the key argument is watched. If range_end is equal to '\0', all keys greater than or equal to the key argument are watched. | bytes |
| start_revision | start_revision is an optional revision to watch from (inclusive). No start_revision is "now". | int64 |
| progress_notify | progress_notify is set so that the etcd server will periodically send a WatchResponse with no events to the new watcher if there are no recent events. It is useful when clients wish to recover a disconnected watcher starting from a recent known revision. The etcd server may decide how often it will send notifications based on current load. | bool |
| prev_kv | If prev_kv is set, created watcher gets the previous KV before the event happens. If the previous KV is already compacted, nothing will be returned. | bool |
@ -767,6 +772,7 @@ From google paxosdb paper: Our implementation hinges around a powerful primitive
| ----- | ----------- | ---- |
| type | type is the kind of event. If type is a PUT, it indicates new data has been stored to the key. If type is a DELETE, it indicates the key was deleted. | EventType |
| kv | kv holds the KeyValue for the event. A PUT event contains current kv pair. A PUT event with kv.Version=1 indicates the creation of a key. A DELETE/EXPIRE event contains the deleted key with its modification revision set to the revision of deletion. | KeyValue |
| prev_kv | prev_kv holds the key-value pair before the event happens. | KeyValue |

View File

@ -1474,6 +1474,11 @@
"format": "byte",
"description": "key is the first key to delete in the range."
},
"prev_kv": {
"type": "boolean",
"format": "boolean",
"description": "If prev_kv is set, etcd gets the previous key-value pairs before deleting it.\nThe previous key-value pairs will be returned in the delte response."
},
"range_end": {
"type": "string",
"format": "byte",
@ -1491,6 +1496,13 @@
},
"header": {
"$ref": "#/definitions/etcdserverpbResponseHeader"
},
"prev_kvs": {
"type": "array",
"items": {
"$ref": "#/definitions/mvccpbKeyValue"
},
"description": "if prev_kv is set in the request, the previous key-value pairs will be returned."
}
}
},
@ -1724,6 +1736,11 @@
"format": "int64",
"description": "lease is the lease ID to associate with the key in the key-value store. A lease\nvalue of 0 indicates no lease."
},
"prev_kv": {
"type": "boolean",
"format": "boolean",
"description": "If prev_kv is set, etcd gets the previous key-value pair before changing it.\nThe previous key-value pair will be returned in the put response."
},
"value": {
"type": "string",
"format": "byte",
@ -1736,6 +1753,10 @@
"properties": {
"header": {
"$ref": "#/definitions/etcdserverpbResponseHeader"
},
"prev_kv": {
"$ref": "#/definitions/mvccpbKeyValue",
"description": "if prev_kv is set in the request, the previous key-value pair will be returned."
}
}
},
@ -1988,6 +2009,11 @@
"format": "byte",
"description": "key is the key to register for watching."
},
"prev_kv": {
"type": "boolean",
"format": "boolean",
"description": "If prev_kv is set, created watcher gets the previous KV before the event happens.\nIf the previous KV is already compacted, nothing will be returned."
},
"progress_notify": {
"type": "boolean",
"format": "boolean",
@ -2057,6 +2083,10 @@
"$ref": "#/definitions/mvccpbKeyValue",
"description": "kv holds the KeyValue for the event.\nA PUT event contains current kv pair.\nA PUT event with kv.Version=1 indicates the creation of a key.\nA DELETE/EXPIRE event contains the deleted key with\nits modification revision set to the revision of deletion."
},
"prev_kv": {
"$ref": "#/definitions/mvccpbKeyValue",
"description": "prev_kv holds the key-value pair before the event happens."
},
"type": {
"$ref": "#/definitions/EventEventType",
"description": "type is the kind of event. If type is a PUT, it indicates\nnew data has been stored to the key. If type is a DELETE,\nit indicates the key was deleted."

View File

@ -21,9 +21,9 @@ import (
proto "github.com/golang/protobuf/proto"
math "math"
)
import io "io"
io "io"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal

View File

@ -603,6 +603,11 @@ func (as *authStore) isOpPermitted(userName string, key, rangeEnd []byte, permTy
return false
}
// root role should have permission on all ranges
if hasRootRole(user) {
return true
}
if as.isRangeOpPermitted(tx, userName, key, rangeEnd, permTyp) {
return true
}

View File

@ -32,35 +32,63 @@ func ExampleAuth() {
}
defer cli.Close()
authapi := clientv3.NewAuth(cli)
if _, err = authapi.RoleAdd(context.TODO(), "root"); err != nil {
if _, err = cli.RoleAdd(context.TODO(), "root"); err != nil {
log.Fatal(err)
}
if _, err = cli.UserAdd(context.TODO(), "root", "123"); err != nil {
log.Fatal(err)
}
if _, err = cli.UserGrantRole(context.TODO(), "root", "root"); err != nil {
log.Fatal(err)
}
if _, err = authapi.RoleGrantPermission(
if _, err = cli.RoleAdd(context.TODO(), "r"); err != nil {
log.Fatal(err)
}
if _, err = cli.RoleGrantPermission(
context.TODO(),
"root", // role name
"foo", // key
"zoo", // range end
"r", // role name
"foo", // key
"zoo", // range end
clientv3.PermissionType(clientv3.PermReadWrite),
); err != nil {
log.Fatal(err)
}
if _, err = authapi.UserAdd(context.TODO(), "root", "123"); err != nil {
if _, err = cli.UserAdd(context.TODO(), "u", "123"); err != nil {
log.Fatal(err)
}
if _, err = authapi.UserGrantRole(context.TODO(), "root", "root"); err != nil {
if _, err = cli.UserGrantRole(context.TODO(), "u", "r"); err != nil {
log.Fatal(err)
}
if _, err = authapi.AuthEnable(context.TODO()); err != nil {
if _, err = cli.AuthEnable(context.TODO()); err != nil {
log.Fatal(err)
}
cliAuth, err := clientv3.New(clientv3.Config{
Endpoints: endpoints,
DialTimeout: dialTimeout,
Username: "u",
Password: "123",
})
if err != nil {
log.Fatal(err)
}
defer cliAuth.Close()
if _, err = cliAuth.Put(context.TODO(), "foo1", "bar"); err != nil {
log.Fatal(err)
}
_, err = cliAuth.Txn(context.TODO()).
If(clientv3.Compare(clientv3.Value("zoo1"), ">", "abc")).
Then(clientv3.OpPut("zoo1", "XYZ")).
Else(clientv3.OpPut("zoo1", "ABC")).
Commit()
fmt.Println(err)
// now check the permission with the root account
rootCli, err := clientv3.New(clientv3.Config{
Endpoints: endpoints,
DialTimeout: dialTimeout,
Username: "root",
@ -69,31 +97,17 @@ func ExampleAuth() {
if err != nil {
log.Fatal(err)
}
defer cliAuth.Close()
defer rootCli.Close()
kv := clientv3.NewKV(cliAuth)
if _, err = kv.Put(context.TODO(), "foo1", "bar"); err != nil {
log.Fatal(err)
}
_, err = kv.Txn(context.TODO()).
If(clientv3.Compare(clientv3.Value("zoo1"), ">", "abc")).
Then(clientv3.OpPut("zoo1", "XYZ")).
Else(clientv3.OpPut("zoo1", "ABC")).
Commit()
fmt.Println(err)
// now check the permission
authapi2 := clientv3.NewAuth(cliAuth)
resp, err := authapi2.RoleGet(context.TODO(), "root")
resp, err := rootCli.RoleGet(context.TODO(), "r")
if err != nil {
log.Fatal(err)
}
fmt.Printf("root user permission: key %q, range end %q\n", resp.Perm[0].Key, resp.Perm[0].RangeEnd)
fmt.Printf("user u permission: key %q, range end %q\n", resp.Perm[0].Key, resp.Perm[0].RangeEnd)
if _, err = authapi2.AuthDisable(context.TODO()); err != nil {
if _, err = rootCli.AuthDisable(context.TODO()); err != nil {
log.Fatal(err)
}
// Output: etcdserver: permission denied
// root user permission: key "foo", range end "zoo"
// user u permission: key "foo", range end "zoo"
}

View File

@ -455,3 +455,46 @@ func TestLeaseKeepAliveTTLTimeout(t *testing.T) {
clus.Members[0].Restart(t)
}
// TestLeaseRenewLostQuorum ensures keepalives work after losing quorum
// for a while.
func TestLeaseRenewLostQuorum(t *testing.T) {
defer testutil.AfterTest(t)
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 3})
defer clus.Terminate(t)
cli := clus.Client(0)
r, err := cli.Grant(context.TODO(), 4)
if err != nil {
t.Fatal(err)
}
kctx, kcancel := context.WithCancel(context.Background())
defer kcancel()
ka, err := cli.KeepAlive(kctx, r.ID)
if err != nil {
t.Fatal(err)
}
// consume first keepalive so next message sends when cluster is down
<-ka
// force keepalive stream message to timeout
clus.Members[1].Stop(t)
clus.Members[2].Stop(t)
// Use TTL-1 since the client closes the keepalive channel if no
// keepalive arrives before the lease deadline.
// The cluster has 1 second to recover and reply to the keepalive.
time.Sleep(time.Duration(r.TTL-1) * time.Second)
clus.Members[1].Restart(t)
clus.Members[2].Restart(t)
select {
case _, ok := <-ka:
if !ok {
t.Fatalf("keepalive closed")
}
case <-time.After(time.Duration(r.TTL) * time.Second):
t.Fatalf("timed out waiting for keepalive")
}
}

View File

@ -673,3 +673,131 @@ func TestWatchWithRequireLeader(t *testing.T) {
t.Fatalf("expected response, got closed channel")
}
}
// TestWatchOverlapContextCancel stresses the watcher stream teardown path by
// creating/canceling watchers to ensure that new watchers are not taken down
// by a torn down watch stream. The sort of race that's being detected:
// 1. create w1 using a cancelable ctx with %v as "ctx"
// 2. cancel ctx
// 3. watcher client begins tearing down watcher grpc stream since no more watchers
// 3. start creating watcher w2 using a new "ctx" (not canceled), attaches to old grpc stream
// 4. watcher client finishes tearing down stream on "ctx"
// 5. w2 comes back canceled
func TestWatchOverlapContextCancel(t *testing.T) {
f := func(clus *integration.ClusterV3) {}
testWatchOverlapContextCancel(t, f)
}
func TestWatchOverlapDropConnContextCancel(t *testing.T) {
f := func(clus *integration.ClusterV3) {
clus.Members[0].DropConnections()
}
testWatchOverlapContextCancel(t, f)
}
func testWatchOverlapContextCancel(t *testing.T, f func(*integration.ClusterV3)) {
defer testutil.AfterTest(t)
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
defer clus.Terminate(t)
// each unique context "%v" has a unique grpc stream
n := 100
ctxs, ctxc := make([]context.Context, 5), make([]chan struct{}, 5)
for i := range ctxs {
// make "%v" unique
ctxs[i] = context.WithValue(context.TODO(), "key", i)
// limits the maximum number of outstanding watchers per stream
ctxc[i] = make(chan struct{}, 2)
}
// issue concurrent watches on "abc" with cancel
cli := clus.RandClient()
if _, err := cli.Put(context.TODO(), "abc", "def"); err != nil {
t.Fatal(err)
}
ch := make(chan struct{}, n)
for i := 0; i < n; i++ {
go func() {
defer func() { ch <- struct{}{} }()
idx := rand.Intn(len(ctxs))
ctx, cancel := context.WithCancel(ctxs[idx])
ctxc[idx] <- struct{}{}
wch := cli.Watch(ctx, "abc", clientv3.WithRev(1))
f(clus)
select {
case _, ok := <-wch:
if !ok {
t.Fatalf("unexpected closed channel %p", wch)
}
// may take a second or two to reestablish a watcher because of
// grpc backoff policies for disconnects
case <-time.After(5 * time.Second):
t.Errorf("timed out waiting for watch on %p", wch)
}
// randomize how cancel overlaps with watch creation
if rand.Intn(2) == 0 {
<-ctxc[idx]
cancel()
} else {
cancel()
<-ctxc[idx]
}
}()
}
// join on watches
for i := 0; i < n; i++ {
select {
case <-ch:
case <-time.After(5 * time.Second):
t.Fatalf("timed out waiting for completed watch")
}
}
}
// TestWatchCanelAndCloseClient ensures that canceling a watcher then immediately
// closing the client does not return a client closing error.
func TestWatchCancelAndCloseClient(t *testing.T) {
defer testutil.AfterTest(t)
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
defer clus.Terminate(t)
cli := clus.Client(0)
ctx, cancel := context.WithCancel(context.Background())
wch := cli.Watch(ctx, "abc")
donec := make(chan struct{})
go func() {
defer close(donec)
select {
case wr, ok := <-wch:
if ok {
t.Fatalf("expected closed watch after cancel(), got resp=%+v err=%v", wr, wr.Err())
}
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for closed channel")
}
}()
cancel()
if err := cli.Close(); err != nil {
t.Fatal(err)
}
<-donec
clus.TakeClient(0)
}
// TestWatchCancelDisconnected ensures canceling a watcher works when
// its grpc stream is disconnected / reconnecting.
func TestWatchCancelDisconnected(t *testing.T) {
defer testutil.AfterTest(t)
clus := integration.NewClusterV3(t, &integration.ClusterConfig{Size: 1})
defer clus.Terminate(t)
cli := clus.Client(0)
ctx, cancel := context.WithCancel(context.Background())
// add more watches than can be resumed before the cancel
wch := cli.Watch(ctx, "abc")
clus.Members[0].Stop(t)
cancel()
select {
case <-wch:
case <-time.After(time.Second):
t.Fatal("took too long to cancel disconnected watcher")
}
}

View File

@ -157,14 +157,14 @@ func (kv *kv) do(ctx context.Context, op Op) (OpResponse, error) {
}
case tPut:
var resp *pb.PutResponse
r := &pb.PutRequest{Key: op.key, Value: op.val, Lease: int64(op.leaseID)}
r := &pb.PutRequest{Key: op.key, Value: op.val, Lease: int64(op.leaseID), PrevKv: op.prevKV}
resp, err = kv.remote.Put(ctx, r)
if err == nil {
return OpResponse{put: (*PutResponse)(resp)}, nil
}
case tDeleteRange:
var resp *pb.DeleteRangeResponse
r := &pb.DeleteRangeRequest{Key: op.key, RangeEnd: op.end}
r := &pb.DeleteRangeRequest{Key: op.key, RangeEnd: op.end, PrevKv: op.prevKV}
resp, err = kv.remote.DeleteRange(ctx, r)
if err == nil {
return OpResponse{del: (*DeleteResponse)(resp)}, nil

View File

@ -143,9 +143,6 @@ func (l *lessor) Grant(ctx context.Context, ttl int64) (*LeaseGrantResponse, err
if isHaltErr(cctx, err) {
return nil, toErr(ctx, err)
}
if nerr := l.newStream(); nerr != nil {
return nil, nerr
}
}
}
@ -164,9 +161,6 @@ func (l *lessor) Revoke(ctx context.Context, id LeaseID) (*LeaseRevokeResponse,
if isHaltErr(ctx, err) {
return nil, toErr(ctx, err)
}
if nerr := l.newStream(); nerr != nil {
return nil, nerr
}
}
}
@ -213,10 +207,6 @@ func (l *lessor) KeepAliveOnce(ctx context.Context, id LeaseID) (*LeaseKeepAlive
if isHaltErr(ctx, err) {
return nil, toErr(ctx, err)
}
if nerr := l.newStream(); nerr != nil {
return nil, nerr
}
}
}
@ -312,10 +302,23 @@ func (l *lessor) recvKeepAliveLoop() {
// resetRecv opens a new lease stream and starts sending LeaseKeepAliveRequests
func (l *lessor) resetRecv() (pb.Lease_LeaseKeepAliveClient, error) {
if err := l.newStream(); err != nil {
sctx, cancel := context.WithCancel(l.stopCtx)
stream, err := l.remote.LeaseKeepAlive(sctx, grpc.FailFast(false))
if err = toErr(sctx, err); err != nil {
cancel()
return nil, err
}
stream := l.getKeepAliveStream()
l.mu.Lock()
defer l.mu.Unlock()
if l.stream != nil && l.streamCancel != nil {
l.stream.CloseSend()
l.streamCancel()
}
l.streamCancel = cancel
l.stream = stream
go l.sendKeepAliveLoop(stream)
return stream, nil
}
@ -411,32 +414,6 @@ func (l *lessor) sendKeepAliveLoop(stream pb.Lease_LeaseKeepAliveClient) {
}
}
func (l *lessor) getKeepAliveStream() pb.Lease_LeaseKeepAliveClient {
l.mu.Lock()
defer l.mu.Unlock()
return l.stream
}
func (l *lessor) newStream() error {
sctx, cancel := context.WithCancel(l.stopCtx)
stream, err := l.remote.LeaseKeepAlive(sctx, grpc.FailFast(false))
if err != nil {
cancel()
return toErr(sctx, err)
}
l.mu.Lock()
defer l.mu.Unlock()
if l.stream != nil && l.streamCancel != nil {
l.stream.CloseSend()
l.streamCancel()
}
l.streamCancel = cancel
l.stream = stream
return nil
}
func (ka *keepAlive) Close() {
close(ka.donec)
for _, ch := range ka.chs {

View File

@ -47,6 +47,9 @@ type Op struct {
// for range, watch
rev int64
// for watch, put, delete
prevKV bool
// progressNotify is for progress updates.
progressNotify bool
@ -73,10 +76,10 @@ func (op Op) toRequestOp() *pb.RequestOp {
}
return &pb.RequestOp{Request: &pb.RequestOp_RequestRange{RequestRange: r}}
case tPut:
r := &pb.PutRequest{Key: op.key, Value: op.val, Lease: int64(op.leaseID)}
r := &pb.PutRequest{Key: op.key, Value: op.val, Lease: int64(op.leaseID), PrevKv: op.prevKV}
return &pb.RequestOp{Request: &pb.RequestOp_RequestPut{RequestPut: r}}
case tDeleteRange:
r := &pb.DeleteRangeRequest{Key: op.key, RangeEnd: op.end}
r := &pb.DeleteRangeRequest{Key: op.key, RangeEnd: op.end, PrevKv: op.prevKV}
return &pb.RequestOp{Request: &pb.RequestOp_RequestDeleteRange{RequestDeleteRange: r}}
default:
panic("Unknown Op")
@ -212,14 +215,15 @@ func WithPrefix() OpOption {
}
}
// WithRange specifies the range of 'Get' or 'Delete' requests.
// WithRange specifies the range of 'Get', 'Delete', 'Watch' requests.
// For example, 'Get' requests with 'WithRange(end)' returns
// the keys in the range [key, end).
// endKey must be lexicographically greater than start key.
func WithRange(endKey string) OpOption {
return func(op *Op) { op.end = []byte(endKey) }
}
// WithFromKey specifies the range of 'Get' or 'Delete' requests
// WithFromKey specifies the range of 'Get', 'Delete', 'Watch' requests
// to be equal or greater than the key in the argument.
func WithFromKey() OpOption { return WithRange("\x00") }
@ -271,3 +275,11 @@ func WithProgressNotify() OpOption {
op.progressNotify = true
}
}
// WithPrevKV gets the previous key-value pair before the event happens. If the previous KV is already compacted,
// nothing will be returned.
func WithPrevKV() OpOption {
return func(op *Op) {
op.prevKV = true
}
}

View File

@ -61,6 +61,9 @@ type WatchResponse struct {
// the channel sends a final response that has Canceled set to true with a non-nil Err().
Canceled bool
// created is used to indicate the creation of the watcher.
created bool
closeErr error
}
@ -89,7 +92,7 @@ func (wr *WatchResponse) Err() error {
// IsProgressNotify returns true if the WatchResponse is progress notification.
func (wr *WatchResponse) IsProgressNotify() bool {
return len(wr.Events) == 0 && !wr.Canceled
return len(wr.Events) == 0 && !wr.Canceled && !wr.created && wr.CompactRevision == 0 && wr.Header.Revision != 0
}
// watcher implements the Watcher interface
@ -102,6 +105,7 @@ type watcher struct {
streams map[string]*watchGrpcStream
}
// watchGrpcStream tracks all watch resources attached to a single grpc stream.
type watchGrpcStream struct {
owner *watcher
remote pb.WatchClient
@ -112,23 +116,25 @@ type watchGrpcStream struct {
ctxKey string
cancel context.CancelFunc
// mu protects the streams map
mu sync.RWMutex
// streams holds all active watchers
streams map[int64]*watcherStream
// substreams holds all active watchers on this grpc stream
substreams map[int64]*watcherStream
// resuming holds all resuming watchers on this grpc stream
resuming []*watcherStream
// reqc sends a watch request from Watch() to the main goroutine
reqc chan *watchRequest
// respc receives data from the watch client
respc chan *pb.WatchResponse
// stopc is sent to the main goroutine to stop all processing
stopc chan struct{}
// donec closes to broadcast shutdown
donec chan struct{}
// errc transmits errors from grpc Recv to the watch stream reconn logic
errc chan error
// closingc gets the watcherStream of closing watchers
closingc chan *watcherStream
// the error that closed the watch stream
// resumec closes to signal that all substreams should begin resuming
resumec chan struct{}
// closeErr is the error that closed the watch stream
closeErr error
}
@ -140,6 +146,8 @@ type watchRequest struct {
rev int64
// progressNotify is for progress updates.
progressNotify bool
// get the previous key-value pair before the event happens
prevKV bool
// retc receives a chan WatchResponse once the watcher is established
retc chan chan WatchResponse
}
@ -150,15 +158,18 @@ type watcherStream struct {
initReq watchRequest
// outc publishes watch responses to subscriber
outc chan<- WatchResponse
outc chan WatchResponse
// recvc buffers watch responses before publishing
recvc chan *WatchResponse
id int64
// donec closes when the watcherStream goroutine stops.
donec chan struct{}
// closing is set to true when stream should be scheduled to shutdown.
closing bool
// id is the registered watch id on the grpc stream
id int64
// lastRev is revision last successfully sent over outc
lastRev int64
// resumec indicates the stream must recover at a given revision
resumec chan int64
// buf holds all events received from etcd but not yet consumed by the client
buf []*WatchResponse
}
func NewWatcher(c *Client) Watcher {
@ -182,18 +193,19 @@ func (vc *valCtx) Err() error { return nil }
func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
ctx, cancel := context.WithCancel(&valCtx{inctx})
wgs := &watchGrpcStream{
owner: w,
remote: w.remote,
ctx: ctx,
ctxKey: fmt.Sprintf("%v", inctx),
cancel: cancel,
streams: make(map[int64]*watcherStream),
owner: w,
remote: w.remote,
ctx: ctx,
ctxKey: fmt.Sprintf("%v", inctx),
cancel: cancel,
substreams: make(map[int64]*watcherStream),
respc: make(chan *pb.WatchResponse),
reqc: make(chan *watchRequest),
stopc: make(chan struct{}),
donec: make(chan struct{}),
errc: make(chan error, 1),
respc: make(chan *pb.WatchResponse),
reqc: make(chan *watchRequest),
donec: make(chan struct{}),
errc: make(chan error, 1),
closingc: make(chan *watcherStream),
resumec: make(chan struct{}),
}
go wgs.run()
return wgs
@ -203,14 +215,14 @@ func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
ow := opWatch(key, opts...)
retc := make(chan chan WatchResponse, 1)
wr := &watchRequest{
ctx: ctx,
key: string(ow.key),
end: string(ow.end),
rev: ow.rev,
progressNotify: ow.progressNotify,
retc: retc,
prevKV: ow.prevKV,
retc: make(chan chan WatchResponse, 1),
}
ok := false
@ -242,7 +254,6 @@ func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) Watch
case reqc <- wr:
ok = true
case <-wr.ctx.Done():
wgs.stopIfEmpty()
case <-donec:
if wgs.closeErr != nil {
closeCh <- WatchResponse{closeErr: wgs.closeErr}
@ -255,7 +266,7 @@ func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) Watch
// receive channel
if ok {
select {
case ret := <-retc:
case ret := <-wr.retc:
return ret
case <-ctx.Done():
case <-donec:
@ -286,12 +297,7 @@ func (w *watcher) Close() (err error) {
}
func (w *watchGrpcStream) Close() (err error) {
w.mu.Lock()
if w.stopc != nil {
close(w.stopc)
w.stopc = nil
}
w.mu.Unlock()
w.cancel()
<-w.donec
select {
case err = <-w.errc:
@ -300,67 +306,57 @@ func (w *watchGrpcStream) Close() (err error) {
return toErr(w.ctx, err)
}
func (w *watchGrpcStream) addStream(resp *pb.WatchResponse, pendingReq *watchRequest) {
if pendingReq == nil {
// no pending request; ignore
return
}
if resp.Canceled || resp.CompactRevision != 0 {
// a cancel at id creation time means the start revision has
// been compacted out of the store
ret := make(chan WatchResponse, 1)
ret <- WatchResponse{
Header: *resp.Header,
CompactRevision: resp.CompactRevision,
Canceled: true}
close(ret)
pendingReq.retc <- ret
return
}
ret := make(chan WatchResponse)
if resp.WatchId == -1 {
// failed; no channel
close(ret)
pendingReq.retc <- ret
return
}
ws := &watcherStream{
initReq: *pendingReq,
id: resp.WatchId,
outc: ret,
// buffered so unlikely to block on sending while holding mu
recvc: make(chan *WatchResponse, 4),
resumec: make(chan int64),
}
if pendingReq.rev == 0 {
// note the header revision so that a put following a current watcher
// disconnect will arrive on the watcher channel after reconnect
ws.initReq.rev = resp.Header.Revision
}
func (w *watcher) closeStream(wgs *watchGrpcStream) {
w.mu.Lock()
w.streams[ws.id] = ws
close(wgs.donec)
wgs.cancel()
if w.streams != nil {
delete(w.streams, wgs.ctxKey)
}
w.mu.Unlock()
// pass back the subscriber channel for the watcher
pendingReq.retc <- ret
// send messages to subscriber
go w.serveStream(ws)
}
// closeStream closes the watcher resources and removes it
func (w *watchGrpcStream) closeStream(ws *watcherStream) {
w.mu.Lock()
// cancels request stream; subscriber receives nil channel
close(ws.initReq.retc)
// close subscriber's channel
func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
if resp.WatchId == -1 {
// failed; no channel
close(ws.recvc)
return
}
ws.id = resp.WatchId
w.substreams[ws.id] = ws
}
func (w *watchGrpcStream) sendCloseSubstream(ws *watcherStream, resp *WatchResponse) {
select {
case ws.outc <- *resp:
case <-ws.initReq.ctx.Done():
case <-time.After(closeSendErrTimeout):
}
close(ws.outc)
delete(w.streams, ws.id)
w.mu.Unlock()
}
func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
// send channel response in case stream was never established
select {
case ws.initReq.retc <- ws.outc:
default:
}
// close subscriber's channel
if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
go w.sendCloseSubstream(ws, &WatchResponse{closeErr: w.closeErr})
} else if ws.outc != nil {
close(ws.outc)
}
if ws.id != -1 {
delete(w.substreams, ws.id)
return
}
for i := range w.resuming {
if w.resuming[i] == ws {
w.resuming[i] = nil
return
}
}
}
// run is the root of the goroutines for managing a watcher client
@ -368,66 +364,79 @@ func (w *watchGrpcStream) run() {
var wc pb.Watch_WatchClient
var closeErr error
defer func() {
w.owner.mu.Lock()
w.closeErr = closeErr
if w.owner.streams != nil {
delete(w.owner.streams, w.ctxKey)
}
close(w.donec)
w.owner.mu.Unlock()
w.cancel()
}()
// substreams marked to close but goroutine still running; needed for
// avoiding double-closing recvc on grpc stream teardown
closing := make(map[*watcherStream]struct{})
// already stopped?
w.mu.RLock()
stopc := w.stopc
w.mu.RUnlock()
if stopc == nil {
return
}
defer func() {
w.closeErr = closeErr
// shutdown substreams and resuming substreams
for _, ws := range w.substreams {
if _, ok := closing[ws]; !ok {
close(ws.recvc)
}
}
for _, ws := range w.resuming {
if _, ok := closing[ws]; ws != nil && !ok {
close(ws.recvc)
}
}
w.joinSubstreams()
for toClose := len(w.substreams) + len(w.resuming); toClose > 0; toClose-- {
w.closeSubstream(<-w.closingc)
}
w.owner.closeStream(w)
}()
// start a stream with the etcd grpc server
if wc, closeErr = w.newWatchClient(); closeErr != nil {
return
}
var pendingReq, failedReq *watchRequest
curReqC := w.reqc
cancelSet := make(map[int64]struct{})
for {
select {
// Watch() requested
case pendingReq = <-curReqC:
// no more watch requests until there's a response
curReqC = nil
if err := wc.Send(pendingReq.toPB()); err == nil {
// pendingReq now waits on w.respc
break
case wreq := <-w.reqc:
outc := make(chan WatchResponse, 1)
ws := &watcherStream{
initReq: *wreq,
id: -1,
outc: outc,
// unbufffered so resumes won't cause repeat events
recvc: make(chan *WatchResponse),
}
ws.donec = make(chan struct{})
go w.serveSubstream(ws, w.resumec)
// queue up for watcher creation/resume
w.resuming = append(w.resuming, ws)
if len(w.resuming) == 1 {
// head of resume queue, can register a new watcher
wc.Send(ws.initReq.toPB())
}
failedReq = pendingReq
// New events from the watch client
case pbresp := <-w.respc:
switch {
case pbresp.Created:
// response to pending req, try to add
w.addStream(pbresp, pendingReq)
pendingReq = nil
curReqC = w.reqc
// response to head of queue creation
if ws := w.resuming[0]; ws != nil {
w.addSubstream(pbresp, ws)
w.dispatchEvent(pbresp)
w.resuming[0] = nil
}
if ws := w.nextResume(); ws != nil {
wc.Send(ws.initReq.toPB())
}
case pbresp.Canceled:
delete(cancelSet, pbresp.WatchId)
// shutdown serveStream, if any
w.mu.Lock()
if ws, ok := w.streams[pbresp.WatchId]; ok {
if ws, ok := w.substreams[pbresp.WatchId]; ok {
// signal to stream goroutine to update closingc
close(ws.recvc)
delete(w.streams, ws.id)
}
numStreams := len(w.streams)
w.mu.Unlock()
if numStreams == 0 {
// don't leak watcher streams
return
closing[ws] = struct{}{}
}
default:
// dispatch to appropriate watch stream
@ -448,7 +457,6 @@ func (w *watchGrpcStream) run() {
wc.Send(req)
}
// watch client failed to recv; spawn another if possible
// TODO report watch client errors from errc?
case err := <-w.errc:
if toErr(w.ctx, err) == v3rpc.ErrNoLeader {
closeErr = err
@ -457,48 +465,58 @@ func (w *watchGrpcStream) run() {
if wc, closeErr = w.newWatchClient(); closeErr != nil {
return
}
curReqC = w.reqc
if pendingReq != nil {
failedReq = pendingReq
if ws := w.nextResume(); ws != nil {
wc.Send(ws.initReq.toPB())
}
cancelSet = make(map[int64]struct{})
case <-stopc:
case <-w.ctx.Done():
return
}
// send failed; queue for retry
if failedReq != nil {
go func(wr *watchRequest) {
select {
case w.reqc <- wr:
case <-wr.ctx.Done():
case <-w.donec:
}
}(pendingReq)
failedReq = nil
pendingReq = nil
case ws := <-w.closingc:
w.closeSubstream(ws)
delete(closing, ws)
if len(w.substreams)+len(w.resuming) == 0 {
// no more watchers on this stream, shutdown
return
}
}
}
}
// nextResume chooses the next resuming to register with the grpc stream. Abandoned
// streams are marked as nil in the queue since the head must wait for its inflight registration.
func (w *watchGrpcStream) nextResume() *watcherStream {
for len(w.resuming) != 0 {
if w.resuming[0] != nil {
return w.resuming[0]
}
w.resuming = w.resuming[1:len(w.resuming)]
}
return nil
}
// dispatchEvent sends a WatchResponse to the appropriate watcher stream
func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
w.mu.RLock()
defer w.mu.RUnlock()
ws, ok := w.streams[pbresp.WatchId]
ws, ok := w.substreams[pbresp.WatchId]
if !ok {
return false
}
events := make([]*Event, len(pbresp.Events))
for i, ev := range pbresp.Events {
events[i] = (*Event)(ev)
}
if ok {
wr := &WatchResponse{
Header: *pbresp.Header,
Events: events,
CompactRevision: pbresp.CompactRevision,
Canceled: pbresp.Canceled}
ws.recvc <- wr
wr := &WatchResponse{
Header: *pbresp.Header,
Events: events,
CompactRevision: pbresp.CompactRevision,
created: pbresp.Created,
Canceled: pbresp.Canceled,
}
return ok
select {
case ws.recvc <- wr:
case <-ws.donec:
return false
}
return true
}
// serveWatchClient forwards messages from the grpc stream to run()
@ -520,134 +538,169 @@ func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
}
}
// serveStream forwards watch responses from run() to the subscriber
func (w *watchGrpcStream) serveStream(ws *watcherStream) {
var closeErr error
emptyWr := &WatchResponse{}
wrs := []*WatchResponse{}
// serveSubstream forwards watch responses from run() to the subscriber
func (w *watchGrpcStream) serveSubstream(ws *watcherStream, resumec chan struct{}) {
if ws.closing {
panic("created substream goroutine but substream is closing")
}
// nextRev is the minimum expected next revision
nextRev := ws.initReq.rev
resuming := false
closing := false
for !closing {
defer func() {
if !resuming {
ws.closing = true
}
close(ws.donec)
if !resuming {
w.closingc <- ws
}
}()
emptyWr := &WatchResponse{}
for {
curWr := emptyWr
outc := ws.outc
if len(wrs) > 0 {
curWr = wrs[0]
if len(ws.buf) > 0 && ws.buf[0].created {
select {
case ws.initReq.retc <- ws.outc:
default:
}
ws.buf = ws.buf[1:]
}
if len(ws.buf) > 0 {
curWr = ws.buf[0]
} else {
outc = nil
}
select {
case outc <- *curWr:
if wrs[0].Err() != nil {
closing = true
break
}
var newRev int64
if len(wrs[0].Events) > 0 {
newRev = wrs[0].Events[len(wrs[0].Events)-1].Kv.ModRevision
} else {
newRev = wrs[0].Header.Revision
}
if newRev != ws.lastRev {
ws.lastRev = newRev
}
wrs[0] = nil
wrs = wrs[1:]
case wr, ok := <-ws.recvc:
if !ok {
// shutdown from closeStream
if ws.buf[0].Err() != nil {
return
}
// resume up to last seen event if disconnected
if resuming && wr.Err() == nil {
resuming = false
// trim events already seen
for i := 0; i < len(wr.Events); i++ {
if wr.Events[i].Kv.ModRevision > ws.lastRev {
wr.Events = wr.Events[i:]
break
}
}
// only forward new events
if wr.Events[0].Kv.ModRevision == ws.lastRev {
break
}
ws.buf[0] = nil
ws.buf = ws.buf[1:]
case wr, ok := <-ws.recvc:
if !ok {
// shutdown from closeSubstream
return
}
resuming = false
// TODO don't keep buffering if subscriber stops reading
wrs = append(wrs, wr)
case resumeRev := <-ws.resumec:
wrs = nil
resuming = true
if resumeRev == -1 {
// pause serving stream while resume gets set up
break
// TODO pause channel if buffer gets too large
ws.buf = append(ws.buf, wr)
nextRev = wr.Header.Revision
if len(wr.Events) > 0 {
nextRev = wr.Events[len(wr.Events)-1].Kv.ModRevision + 1
}
if resumeRev != ws.lastRev {
panic("unexpected resume revision")
}
case <-w.donec:
closing = true
closeErr = w.closeErr
ws.initReq.rev = nextRev
case <-w.ctx.Done():
return
case <-ws.initReq.ctx.Done():
closing = true
return
case <-resumec:
resuming = true
return
}
}
// try to send off close error
if closeErr != nil {
select {
case ws.outc <- WatchResponse{closeErr: w.closeErr}:
case <-w.donec:
case <-time.After(closeSendErrTimeout):
}
}
w.closeStream(ws)
w.stopIfEmpty()
// lazily send cancel message if events on missing id
}
func (wgs *watchGrpcStream) stopIfEmpty() {
wgs.mu.Lock()
if len(wgs.streams) == 0 && wgs.stopc != nil {
close(wgs.stopc)
wgs.stopc = nil
}
wgs.mu.Unlock()
}
func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
ws, rerr := w.resume()
if rerr != nil {
return nil, rerr
// mark all substreams as resuming
close(w.resumec)
w.resumec = make(chan struct{})
w.joinSubstreams()
for _, ws := range w.substreams {
ws.id = -1
w.resuming = append(w.resuming, ws)
}
go w.serveWatchClient(ws)
return ws, nil
}
// resume creates a new WatchClient with all current watchers reestablished
func (w *watchGrpcStream) resume() (ws pb.Watch_WatchClient, err error) {
for {
if ws, err = w.openWatchClient(); err != nil {
break
} else if err = w.resumeWatchers(ws); err == nil {
break
// strip out nils, if any
var resuming []*watcherStream
for _, ws := range w.resuming {
if ws != nil {
resuming = append(resuming, ws)
}
}
w.resuming = resuming
w.substreams = make(map[int64]*watcherStream)
// connect to grpc stream while accepting watcher cancelation
stopc := make(chan struct{})
donec := w.waitCancelSubstreams(stopc)
wc, err := w.openWatchClient()
close(stopc)
<-donec
// serve all non-closing streams, even if there's a client error
// so that the teardown path can shutdown the streams as expected.
for _, ws := range w.resuming {
if ws.closing {
continue
}
ws.donec = make(chan struct{})
go w.serveSubstream(ws, w.resumec)
}
if err != nil {
return nil, v3rpc.Error(err)
}
// receive data from new grpc stream
go w.serveWatchClient(wc)
return wc, nil
}
func (w *watchGrpcStream) waitCancelSubstreams(stopc <-chan struct{}) <-chan struct{} {
var wg sync.WaitGroup
wg.Add(len(w.resuming))
donec := make(chan struct{})
for i := range w.resuming {
go func(ws *watcherStream) {
defer wg.Done()
if ws.closing {
return
}
select {
case <-ws.initReq.ctx.Done():
// closed ws will be removed from resuming
ws.closing = true
close(ws.outc)
ws.outc = nil
go func() { w.closingc <- ws }()
case <-stopc:
}
}(w.resuming[i])
}
go func() {
defer close(donec)
wg.Wait()
}()
return donec
}
// joinSubstream waits for all substream goroutines to complete
func (w *watchGrpcStream) joinSubstreams() {
for _, ws := range w.substreams {
<-ws.donec
}
for _, ws := range w.resuming {
if ws != nil {
<-ws.donec
}
}
return ws, v3rpc.Error(err)
}
// openWatchClient retries opening a watchclient until retryConnection fails
func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
for {
w.mu.Lock()
stopc := w.stopc
w.mu.Unlock()
if stopc == nil {
select {
case <-w.ctx.Done():
if err == nil {
err = context.Canceled
return nil, w.ctx.Err()
}
return nil, err
default:
}
if ws, err = w.remote.Watch(w.ctx, grpc.FailFast(false)); ws != nil && err == nil {
break
@ -659,63 +712,6 @@ func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error)
return ws, nil
}
// resumeWatchers rebuilds every registered watcher on a new client
func (w *watchGrpcStream) resumeWatchers(wc pb.Watch_WatchClient) error {
w.mu.RLock()
streams := make([]*watcherStream, 0, len(w.streams))
for _, ws := range w.streams {
streams = append(streams, ws)
}
w.mu.RUnlock()
for _, ws := range streams {
// drain recvc so no old WatchResponses (e.g., Created messages)
// are processed while resuming
ws.drain()
// pause serveStream
ws.resumec <- -1
// reconstruct watcher from initial request
if ws.lastRev != 0 {
ws.initReq.rev = ws.lastRev
}
if err := wc.Send(ws.initReq.toPB()); err != nil {
return err
}
// wait for request ack
resp, err := wc.Recv()
if err != nil {
return err
} else if len(resp.Events) != 0 || !resp.Created {
return fmt.Errorf("watcher: unexpected response (%+v)", resp)
}
// id may be different since new remote watcher; update map
w.mu.Lock()
delete(w.streams, ws.id)
ws.id = resp.WatchId
w.streams[ws.id] = ws
w.mu.Unlock()
// unpause serveStream
ws.resumec <- ws.lastRev
}
return nil
}
// drain removes all buffered WatchResponses from the stream's receive channel.
func (ws *watcherStream) drain() {
for {
select {
case <-ws.recvc:
default:
return
}
}
}
// toPB converts an internal watch request structure to its protobuf messagefunc (wr *watchRequest)
func (wr *watchRequest) toPB() *pb.WatchRequest {
req := &pb.WatchCreateRequest{
@ -723,6 +719,7 @@ func (wr *watchRequest) toPB() *pb.WatchRequest {
Key: []byte(wr.key),
RangeEnd: []byte(wr.end),
ProgressNotify: wr.progressNotify,
PrevKv: wr.prevKV,
}
cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
return &pb.WatchRequest{RequestUnion: cr}

View File

@ -11,10 +11,6 @@
"Comment": "null-5",
"Rev": "'75cd24fc2f2c2a2088577d12123ddee5f54e0675'"
},
{
"ImportPath": "github.com/akrennmair/gopcap",
"Rev": "00e11033259acb75598ba416495bb708d864a010"
},
{
"ImportPath": "github.com/beorn7/perks/quantile",
"Rev": "b965b613227fddccbfffe13eae360ed3fa822f8d"

View File

@ -1,5 +0,0 @@
#*
*~
/tools/pass/pass
/tools/pcaptest/pcaptest
/tools/tcpdump/tcpdump

View File

@ -1,27 +0,0 @@
Copyright (c) 2009-2011 Andreas Krennmair. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Andreas Krennmair nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,11 +0,0 @@
# PCAP
This is a simple wrapper around libpcap for Go. Originally written by Andreas
Krennmair <ak@synflood.at> and only minorly touched up by Mark Smith <mark@qq.is>.
Please see the included pcaptest.go and tcpdump.go programs for instructions on
how to use this library.
Miek Gieben <miek@miek.nl> has created a more Go-like package and replaced functionality
with standard functions from the standard library. The package has also been renamed to
pcap.

View File

@ -1,527 +0,0 @@
package pcap
import (
"encoding/binary"
"fmt"
"net"
"reflect"
"strings"
)
const (
TYPE_IP = 0x0800
TYPE_ARP = 0x0806
TYPE_IP6 = 0x86DD
TYPE_VLAN = 0x8100
IP_ICMP = 1
IP_INIP = 4
IP_TCP = 6
IP_UDP = 17
)
const (
ERRBUF_SIZE = 256
// According to pcap-linktype(7).
LINKTYPE_NULL = 0
LINKTYPE_ETHERNET = 1
LINKTYPE_TOKEN_RING = 6
LINKTYPE_ARCNET = 7
LINKTYPE_SLIP = 8
LINKTYPE_PPP = 9
LINKTYPE_FDDI = 10
LINKTYPE_ATM_RFC1483 = 100
LINKTYPE_RAW = 101
LINKTYPE_PPP_HDLC = 50
LINKTYPE_PPP_ETHER = 51
LINKTYPE_C_HDLC = 104
LINKTYPE_IEEE802_11 = 105
LINKTYPE_FRELAY = 107
LINKTYPE_LOOP = 108
LINKTYPE_LINUX_SLL = 113
LINKTYPE_LTALK = 104
LINKTYPE_PFLOG = 117
LINKTYPE_PRISM_HEADER = 119
LINKTYPE_IP_OVER_FC = 122
LINKTYPE_SUNATM = 123
LINKTYPE_IEEE802_11_RADIO = 127
LINKTYPE_ARCNET_LINUX = 129
LINKTYPE_LINUX_IRDA = 144
LINKTYPE_LINUX_LAPD = 177
)
type addrHdr interface {
SrcAddr() string
DestAddr() string
Len() int
}
type addrStringer interface {
String(addr addrHdr) string
}
func decodemac(pkt []byte) uint64 {
mac := uint64(0)
for i := uint(0); i < 6; i++ {
mac = (mac << 8) + uint64(pkt[i])
}
return mac
}
// Decode decodes the headers of a Packet.
func (p *Packet) Decode() {
if len(p.Data) <= 14 {
return
}
p.Type = int(binary.BigEndian.Uint16(p.Data[12:14]))
p.DestMac = decodemac(p.Data[0:6])
p.SrcMac = decodemac(p.Data[6:12])
if len(p.Data) >= 15 {
p.Payload = p.Data[14:]
}
switch p.Type {
case TYPE_IP:
p.decodeIp()
case TYPE_IP6:
p.decodeIp6()
case TYPE_ARP:
p.decodeArp()
case TYPE_VLAN:
p.decodeVlan()
}
}
func (p *Packet) headerString(headers []interface{}) string {
// If there's just one header, return that.
if len(headers) == 1 {
if hdr, ok := headers[0].(fmt.Stringer); ok {
return hdr.String()
}
}
// If there are two headers (IPv4/IPv6 -> TCP/UDP/IP..)
if len(headers) == 2 {
// Commonly the first header is an address.
if addr, ok := p.Headers[0].(addrHdr); ok {
if hdr, ok := p.Headers[1].(addrStringer); ok {
return fmt.Sprintf("%s %s", p.Time, hdr.String(addr))
}
}
}
// For IP in IP, we do a recursive call.
if len(headers) >= 2 {
if addr, ok := headers[0].(addrHdr); ok {
if _, ok := headers[1].(addrHdr); ok {
return fmt.Sprintf("%s > %s IP in IP: ",
addr.SrcAddr(), addr.DestAddr(), p.headerString(headers[1:]))
}
}
}
var typeNames []string
for _, hdr := range headers {
typeNames = append(typeNames, reflect.TypeOf(hdr).String())
}
return fmt.Sprintf("unknown [%s]", strings.Join(typeNames, ","))
}
// String prints a one-line representation of the packet header.
// The output is suitable for use in a tcpdump program.
func (p *Packet) String() string {
// If there are no headers, print "unsupported protocol".
if len(p.Headers) == 0 {
return fmt.Sprintf("%s unsupported protocol %d", p.Time, int(p.Type))
}
return fmt.Sprintf("%s %s", p.Time, p.headerString(p.Headers))
}
// Arphdr is a ARP packet header.
type Arphdr struct {
Addrtype uint16
Protocol uint16
HwAddressSize uint8
ProtAddressSize uint8
Operation uint16
SourceHwAddress []byte
SourceProtAddress []byte
DestHwAddress []byte
DestProtAddress []byte
}
func (arp *Arphdr) String() (s string) {
switch arp.Operation {
case 1:
s = "ARP request"
case 2:
s = "ARP Reply"
}
if arp.Addrtype == LINKTYPE_ETHERNET && arp.Protocol == TYPE_IP {
s = fmt.Sprintf("%012x (%s) > %012x (%s)",
decodemac(arp.SourceHwAddress), arp.SourceProtAddress,
decodemac(arp.DestHwAddress), arp.DestProtAddress)
} else {
s = fmt.Sprintf("addrtype = %d protocol = %d", arp.Addrtype, arp.Protocol)
}
return
}
func (p *Packet) decodeArp() {
if len(p.Payload) < 8 {
return
}
pkt := p.Payload
arp := new(Arphdr)
arp.Addrtype = binary.BigEndian.Uint16(pkt[0:2])
arp.Protocol = binary.BigEndian.Uint16(pkt[2:4])
arp.HwAddressSize = pkt[4]
arp.ProtAddressSize = pkt[5]
arp.Operation = binary.BigEndian.Uint16(pkt[6:8])
if len(pkt) < int(8+2*arp.HwAddressSize+2*arp.ProtAddressSize) {
return
}
arp.SourceHwAddress = pkt[8 : 8+arp.HwAddressSize]
arp.SourceProtAddress = pkt[8+arp.HwAddressSize : 8+arp.HwAddressSize+arp.ProtAddressSize]
arp.DestHwAddress = pkt[8+arp.HwAddressSize+arp.ProtAddressSize : 8+2*arp.HwAddressSize+arp.ProtAddressSize]
arp.DestProtAddress = pkt[8+2*arp.HwAddressSize+arp.ProtAddressSize : 8+2*arp.HwAddressSize+2*arp.ProtAddressSize]
p.Headers = append(p.Headers, arp)
if len(pkt) >= int(8+2*arp.HwAddressSize+2*arp.ProtAddressSize) {
p.Payload = p.Payload[8+2*arp.HwAddressSize+2*arp.ProtAddressSize:]
}
}
// IPadr is the header of an IP packet.
type Iphdr struct {
Version uint8
Ihl uint8
Tos uint8
Length uint16
Id uint16
Flags uint8
FragOffset uint16
Ttl uint8
Protocol uint8
Checksum uint16
SrcIp []byte
DestIp []byte
}
func (p *Packet) decodeIp() {
if len(p.Payload) < 20 {
return
}
pkt := p.Payload
ip := new(Iphdr)
ip.Version = uint8(pkt[0]) >> 4
ip.Ihl = uint8(pkt[0]) & 0x0F
ip.Tos = pkt[1]
ip.Length = binary.BigEndian.Uint16(pkt[2:4])
ip.Id = binary.BigEndian.Uint16(pkt[4:6])
flagsfrags := binary.BigEndian.Uint16(pkt[6:8])
ip.Flags = uint8(flagsfrags >> 13)
ip.FragOffset = flagsfrags & 0x1FFF
ip.Ttl = pkt[8]
ip.Protocol = pkt[9]
ip.Checksum = binary.BigEndian.Uint16(pkt[10:12])
ip.SrcIp = pkt[12:16]
ip.DestIp = pkt[16:20]
pEnd := int(ip.Length)
if pEnd > len(pkt) {
pEnd = len(pkt)
}
if len(pkt) >= pEnd && int(ip.Ihl*4) < pEnd {
p.Payload = pkt[ip.Ihl*4 : pEnd]
} else {
p.Payload = []byte{}
}
p.Headers = append(p.Headers, ip)
p.IP = ip
switch ip.Protocol {
case IP_TCP:
p.decodeTcp()
case IP_UDP:
p.decodeUdp()
case IP_ICMP:
p.decodeIcmp()
case IP_INIP:
p.decodeIp()
}
}
func (ip *Iphdr) SrcAddr() string { return net.IP(ip.SrcIp).String() }
func (ip *Iphdr) DestAddr() string { return net.IP(ip.DestIp).String() }
func (ip *Iphdr) Len() int { return int(ip.Length) }
type Vlanhdr struct {
Priority byte
DropEligible bool
VlanIdentifier int
Type int // Not actually part of the vlan header, but the type of the actual packet
}
func (v *Vlanhdr) String() {
fmt.Sprintf("VLAN Priority:%d Drop:%v Tag:%d", v.Priority, v.DropEligible, v.VlanIdentifier)
}
func (p *Packet) decodeVlan() {
pkt := p.Payload
vlan := new(Vlanhdr)
if len(pkt) < 4 {
return
}
vlan.Priority = (pkt[2] & 0xE0) >> 13
vlan.DropEligible = pkt[2]&0x10 != 0
vlan.VlanIdentifier = int(binary.BigEndian.Uint16(pkt[:2])) & 0x0FFF
vlan.Type = int(binary.BigEndian.Uint16(p.Payload[2:4]))
p.Headers = append(p.Headers, vlan)
if len(pkt) >= 5 {
p.Payload = p.Payload[4:]
}
switch vlan.Type {
case TYPE_IP:
p.decodeIp()
case TYPE_IP6:
p.decodeIp6()
case TYPE_ARP:
p.decodeArp()
}
}
type Tcphdr struct {
SrcPort uint16
DestPort uint16
Seq uint32
Ack uint32
DataOffset uint8
Flags uint16
Window uint16
Checksum uint16
Urgent uint16
Data []byte
}
const (
TCP_FIN = 1 << iota
TCP_SYN
TCP_RST
TCP_PSH
TCP_ACK
TCP_URG
TCP_ECE
TCP_CWR
TCP_NS
)
func (p *Packet) decodeTcp() {
if len(p.Payload) < 20 {
return
}
pkt := p.Payload
tcp := new(Tcphdr)
tcp.SrcPort = binary.BigEndian.Uint16(pkt[0:2])
tcp.DestPort = binary.BigEndian.Uint16(pkt[2:4])
tcp.Seq = binary.BigEndian.Uint32(pkt[4:8])
tcp.Ack = binary.BigEndian.Uint32(pkt[8:12])
tcp.DataOffset = (pkt[12] & 0xF0) >> 4
tcp.Flags = binary.BigEndian.Uint16(pkt[12:14]) & 0x1FF
tcp.Window = binary.BigEndian.Uint16(pkt[14:16])
tcp.Checksum = binary.BigEndian.Uint16(pkt[16:18])
tcp.Urgent = binary.BigEndian.Uint16(pkt[18:20])
if len(pkt) >= int(tcp.DataOffset*4) {
p.Payload = pkt[tcp.DataOffset*4:]
}
p.Headers = append(p.Headers, tcp)
p.TCP = tcp
}
func (tcp *Tcphdr) String(hdr addrHdr) string {
return fmt.Sprintf("TCP %s:%d > %s:%d %s SEQ=%d ACK=%d LEN=%d",
hdr.SrcAddr(), int(tcp.SrcPort), hdr.DestAddr(), int(tcp.DestPort),
tcp.FlagsString(), int64(tcp.Seq), int64(tcp.Ack), hdr.Len())
}
func (tcp *Tcphdr) FlagsString() string {
var sflags []string
if 0 != (tcp.Flags & TCP_SYN) {
sflags = append(sflags, "syn")
}
if 0 != (tcp.Flags & TCP_FIN) {
sflags = append(sflags, "fin")
}
if 0 != (tcp.Flags & TCP_ACK) {
sflags = append(sflags, "ack")
}
if 0 != (tcp.Flags & TCP_PSH) {
sflags = append(sflags, "psh")
}
if 0 != (tcp.Flags & TCP_RST) {
sflags = append(sflags, "rst")
}
if 0 != (tcp.Flags & TCP_URG) {
sflags = append(sflags, "urg")
}
if 0 != (tcp.Flags & TCP_NS) {
sflags = append(sflags, "ns")
}
if 0 != (tcp.Flags & TCP_CWR) {
sflags = append(sflags, "cwr")
}
if 0 != (tcp.Flags & TCP_ECE) {
sflags = append(sflags, "ece")
}
return fmt.Sprintf("[%s]", strings.Join(sflags, " "))
}
type Udphdr struct {
SrcPort uint16
DestPort uint16
Length uint16
Checksum uint16
}
func (p *Packet) decodeUdp() {
if len(p.Payload) < 8 {
return
}
pkt := p.Payload
udp := new(Udphdr)
udp.SrcPort = binary.BigEndian.Uint16(pkt[0:2])
udp.DestPort = binary.BigEndian.Uint16(pkt[2:4])
udp.Length = binary.BigEndian.Uint16(pkt[4:6])
udp.Checksum = binary.BigEndian.Uint16(pkt[6:8])
p.Headers = append(p.Headers, udp)
p.UDP = udp
if len(p.Payload) >= 8 {
p.Payload = pkt[8:]
}
}
func (udp *Udphdr) String(hdr addrHdr) string {
return fmt.Sprintf("UDP %s:%d > %s:%d LEN=%d CHKSUM=%d",
hdr.SrcAddr(), int(udp.SrcPort), hdr.DestAddr(), int(udp.DestPort),
int(udp.Length), int(udp.Checksum))
}
type Icmphdr struct {
Type uint8
Code uint8
Checksum uint16
Id uint16
Seq uint16
Data []byte
}
func (p *Packet) decodeIcmp() *Icmphdr {
if len(p.Payload) < 8 {
return nil
}
pkt := p.Payload
icmp := new(Icmphdr)
icmp.Type = pkt[0]
icmp.Code = pkt[1]
icmp.Checksum = binary.BigEndian.Uint16(pkt[2:4])
icmp.Id = binary.BigEndian.Uint16(pkt[4:6])
icmp.Seq = binary.BigEndian.Uint16(pkt[6:8])
p.Payload = pkt[8:]
p.Headers = append(p.Headers, icmp)
return icmp
}
func (icmp *Icmphdr) String(hdr addrHdr) string {
return fmt.Sprintf("ICMP %s > %s Type = %d Code = %d ",
hdr.SrcAddr(), hdr.DestAddr(), icmp.Type, icmp.Code)
}
func (icmp *Icmphdr) TypeString() (result string) {
switch icmp.Type {
case 0:
result = fmt.Sprintf("Echo reply seq=%d", icmp.Seq)
case 3:
switch icmp.Code {
case 0:
result = "Network unreachable"
case 1:
result = "Host unreachable"
case 2:
result = "Protocol unreachable"
case 3:
result = "Port unreachable"
default:
result = "Destination unreachable"
}
case 8:
result = fmt.Sprintf("Echo request seq=%d", icmp.Seq)
case 30:
result = "Traceroute"
}
return
}
type Ip6hdr struct {
// http://www.networksorcery.com/enp/protocol/ipv6.htm
Version uint8 // 4 bits
TrafficClass uint8 // 8 bits
FlowLabel uint32 // 20 bits
Length uint16 // 16 bits
NextHeader uint8 // 8 bits, same as Protocol in Iphdr
HopLimit uint8 // 8 bits
SrcIp []byte // 16 bytes
DestIp []byte // 16 bytes
}
func (p *Packet) decodeIp6() {
if len(p.Payload) < 40 {
return
}
pkt := p.Payload
ip6 := new(Ip6hdr)
ip6.Version = uint8(pkt[0]) >> 4
ip6.TrafficClass = uint8((binary.BigEndian.Uint16(pkt[0:2]) >> 4) & 0x00FF)
ip6.FlowLabel = binary.BigEndian.Uint32(pkt[0:4]) & 0x000FFFFF
ip6.Length = binary.BigEndian.Uint16(pkt[4:6])
ip6.NextHeader = pkt[6]
ip6.HopLimit = pkt[7]
ip6.SrcIp = pkt[8:24]
ip6.DestIp = pkt[24:40]
if len(p.Payload) >= 40 {
p.Payload = pkt[40:]
}
p.Headers = append(p.Headers, ip6)
switch ip6.NextHeader {
case IP_TCP:
p.decodeTcp()
case IP_UDP:
p.decodeUdp()
case IP_ICMP:
p.decodeIcmp()
case IP_INIP:
p.decodeIp()
}
}
func (ip6 *Ip6hdr) SrcAddr() string { return net.IP(ip6.SrcIp).String() }
func (ip6 *Ip6hdr) DestAddr() string { return net.IP(ip6.DestIp).String() }
func (ip6 *Ip6hdr) Len() int { return int(ip6.Length) }

View File

@ -1,206 +0,0 @@
package pcap
import (
"encoding/binary"
"fmt"
"io"
"time"
)
// FileHeader is the parsed header of a pcap file.
// http://wiki.wireshark.org/Development/LibpcapFileFormat
type FileHeader struct {
MagicNumber uint32
VersionMajor uint16
VersionMinor uint16
TimeZone int32
SigFigs uint32
SnapLen uint32
Network uint32
}
type PacketTime struct {
Sec int32
Usec int32
}
// Convert the PacketTime to a go Time struct.
func (p *PacketTime) Time() time.Time {
return time.Unix(int64(p.Sec), int64(p.Usec)*1000)
}
// Packet is a single packet parsed from a pcap file.
//
// Convenient access to IP, TCP, and UDP headers is provided after Decode()
// is called if the packet is of the appropriate type.
type Packet struct {
Time time.Time // packet send/receive time
Caplen uint32 // bytes stored in the file (caplen <= len)
Len uint32 // bytes sent/received
Data []byte // packet data
Type int // protocol type, see LINKTYPE_*
DestMac uint64
SrcMac uint64
Headers []interface{} // decoded headers, in order
Payload []byte // remaining non-header bytes
IP *Iphdr // IP header (for IP packets, after decoding)
TCP *Tcphdr // TCP header (for TCP packets, after decoding)
UDP *Udphdr // UDP header (for UDP packets after decoding)
}
// Reader parses pcap files.
type Reader struct {
flip bool
buf io.Reader
err error
fourBytes []byte
twoBytes []byte
sixteenBytes []byte
Header FileHeader
}
// NewReader reads pcap data from an io.Reader.
func NewReader(reader io.Reader) (*Reader, error) {
r := &Reader{
buf: reader,
fourBytes: make([]byte, 4),
twoBytes: make([]byte, 2),
sixteenBytes: make([]byte, 16),
}
switch magic := r.readUint32(); magic {
case 0xa1b2c3d4:
r.flip = false
case 0xd4c3b2a1:
r.flip = true
default:
return nil, fmt.Errorf("pcap: bad magic number: %0x", magic)
}
r.Header = FileHeader{
MagicNumber: 0xa1b2c3d4,
VersionMajor: r.readUint16(),
VersionMinor: r.readUint16(),
TimeZone: r.readInt32(),
SigFigs: r.readUint32(),
SnapLen: r.readUint32(),
Network: r.readUint32(),
}
return r, nil
}
// Next returns the next packet or nil if no more packets can be read.
func (r *Reader) Next() *Packet {
d := r.sixteenBytes
r.err = r.read(d)
if r.err != nil {
return nil
}
timeSec := asUint32(d[0:4], r.flip)
timeUsec := asUint32(d[4:8], r.flip)
capLen := asUint32(d[8:12], r.flip)
origLen := asUint32(d[12:16], r.flip)
data := make([]byte, capLen)
if r.err = r.read(data); r.err != nil {
return nil
}
return &Packet{
Time: time.Unix(int64(timeSec), int64(timeUsec)),
Caplen: capLen,
Len: origLen,
Data: data,
}
}
func (r *Reader) read(data []byte) error {
var err error
n, err := r.buf.Read(data)
for err == nil && n != len(data) {
var chunk int
chunk, err = r.buf.Read(data[n:])
n += chunk
}
if len(data) == n {
return nil
}
return err
}
func (r *Reader) readUint32() uint32 {
data := r.fourBytes
if r.err = r.read(data); r.err != nil {
return 0
}
return asUint32(data, r.flip)
}
func (r *Reader) readInt32() int32 {
data := r.fourBytes
if r.err = r.read(data); r.err != nil {
return 0
}
return int32(asUint32(data, r.flip))
}
func (r *Reader) readUint16() uint16 {
data := r.twoBytes
if r.err = r.read(data); r.err != nil {
return 0
}
return asUint16(data, r.flip)
}
// Writer writes a pcap file.
type Writer struct {
writer io.Writer
buf []byte
}
// NewWriter creates a Writer that stores output in an io.Writer.
// The FileHeader is written immediately.
func NewWriter(writer io.Writer, header *FileHeader) (*Writer, error) {
w := &Writer{
writer: writer,
buf: make([]byte, 24),
}
binary.LittleEndian.PutUint32(w.buf, header.MagicNumber)
binary.LittleEndian.PutUint16(w.buf[4:], header.VersionMajor)
binary.LittleEndian.PutUint16(w.buf[6:], header.VersionMinor)
binary.LittleEndian.PutUint32(w.buf[8:], uint32(header.TimeZone))
binary.LittleEndian.PutUint32(w.buf[12:], header.SigFigs)
binary.LittleEndian.PutUint32(w.buf[16:], header.SnapLen)
binary.LittleEndian.PutUint32(w.buf[20:], header.Network)
if _, err := writer.Write(w.buf); err != nil {
return nil, err
}
return w, nil
}
// Writer writes a packet to the underlying writer.
func (w *Writer) Write(pkt *Packet) error {
binary.LittleEndian.PutUint32(w.buf, uint32(pkt.Time.Unix()))
binary.LittleEndian.PutUint32(w.buf[4:], uint32(pkt.Time.Nanosecond()))
binary.LittleEndian.PutUint32(w.buf[8:], uint32(pkt.Time.Unix()))
binary.LittleEndian.PutUint32(w.buf[12:], pkt.Len)
if _, err := w.writer.Write(w.buf[:16]); err != nil {
return err
}
_, err := w.writer.Write(pkt.Data)
return err
}
func asUint32(data []byte, flip bool) uint32 {
if flip {
return binary.BigEndian.Uint32(data)
}
return binary.LittleEndian.Uint32(data)
}
func asUint16(data []byte, flip bool) uint16 {
if flip {
return binary.BigEndian.Uint16(data)
}
return binary.LittleEndian.Uint16(data)
}

View File

@ -1,266 +0,0 @@
// Interface to both live and offline pcap parsing.
package pcap
/*
#cgo linux LDFLAGS: -lpcap
#cgo freebsd LDFLAGS: -lpcap
#cgo darwin LDFLAGS: -lpcap
#cgo windows CFLAGS: -I C:/WpdPack/Include
#cgo windows,386 LDFLAGS: -L C:/WpdPack/Lib -lwpcap
#cgo windows,amd64 LDFLAGS: -L C:/WpdPack/Lib/x64 -lwpcap
#include <stdlib.h>
#include <pcap.h>
// Workaround for not knowing how to cast to const u_char**
int hack_pcap_next_ex(pcap_t *p, struct pcap_pkthdr **pkt_header,
u_char **pkt_data) {
return pcap_next_ex(p, pkt_header, (const u_char **)pkt_data);
}
*/
import "C"
import (
"errors"
"net"
"syscall"
"time"
"unsafe"
)
type Pcap struct {
cptr *C.pcap_t
}
type Stat struct {
PacketsReceived uint32
PacketsDropped uint32
PacketsIfDropped uint32
}
type Interface struct {
Name string
Description string
Addresses []IFAddress
// TODO: add more elements
}
type IFAddress struct {
IP net.IP
Netmask net.IPMask
// TODO: add broadcast + PtP dst ?
}
func (p *Pcap) Next() (pkt *Packet) {
rv, _ := p.NextEx()
return rv
}
// Openlive opens a device and returns a *Pcap handler
func Openlive(device string, snaplen int32, promisc bool, timeout_ms int32) (handle *Pcap, err error) {
var buf *C.char
buf = (*C.char)(C.calloc(ERRBUF_SIZE, 1))
h := new(Pcap)
var pro int32
if promisc {
pro = 1
}
dev := C.CString(device)
defer C.free(unsafe.Pointer(dev))
h.cptr = C.pcap_open_live(dev, C.int(snaplen), C.int(pro), C.int(timeout_ms), buf)
if nil == h.cptr {
handle = nil
err = errors.New(C.GoString(buf))
} else {
handle = h
}
C.free(unsafe.Pointer(buf))
return
}
func Openoffline(file string) (handle *Pcap, err error) {
var buf *C.char
buf = (*C.char)(C.calloc(ERRBUF_SIZE, 1))
h := new(Pcap)
cf := C.CString(file)
defer C.free(unsafe.Pointer(cf))
h.cptr = C.pcap_open_offline(cf, buf)
if nil == h.cptr {
handle = nil
err = errors.New(C.GoString(buf))
} else {
handle = h
}
C.free(unsafe.Pointer(buf))
return
}
func (p *Pcap) NextEx() (pkt *Packet, result int32) {
var pkthdr *C.struct_pcap_pkthdr
var buf_ptr *C.u_char
var buf unsafe.Pointer
result = int32(C.hack_pcap_next_ex(p.cptr, &pkthdr, &buf_ptr))
buf = unsafe.Pointer(buf_ptr)
if nil == buf {
return
}
pkt = new(Packet)
pkt.Time = time.Unix(int64(pkthdr.ts.tv_sec), int64(pkthdr.ts.tv_usec)*1000)
pkt.Caplen = uint32(pkthdr.caplen)
pkt.Len = uint32(pkthdr.len)
pkt.Data = C.GoBytes(buf, C.int(pkthdr.caplen))
return
}
func (p *Pcap) Close() {
C.pcap_close(p.cptr)
}
func (p *Pcap) Geterror() error {
return errors.New(C.GoString(C.pcap_geterr(p.cptr)))
}
func (p *Pcap) Getstats() (stat *Stat, err error) {
var cstats _Ctype_struct_pcap_stat
if -1 == C.pcap_stats(p.cptr, &cstats) {
return nil, p.Geterror()
}
stats := new(Stat)
stats.PacketsReceived = uint32(cstats.ps_recv)
stats.PacketsDropped = uint32(cstats.ps_drop)
stats.PacketsIfDropped = uint32(cstats.ps_ifdrop)
return stats, nil
}
func (p *Pcap) Setfilter(expr string) (err error) {
var bpf _Ctype_struct_bpf_program
cexpr := C.CString(expr)
defer C.free(unsafe.Pointer(cexpr))
if -1 == C.pcap_compile(p.cptr, &bpf, cexpr, 1, 0) {
return p.Geterror()
}
if -1 == C.pcap_setfilter(p.cptr, &bpf) {
C.pcap_freecode(&bpf)
return p.Geterror()
}
C.pcap_freecode(&bpf)
return nil
}
func Version() string {
return C.GoString(C.pcap_lib_version())
}
func (p *Pcap) Datalink() int {
return int(C.pcap_datalink(p.cptr))
}
func (p *Pcap) Setdatalink(dlt int) error {
if -1 == C.pcap_set_datalink(p.cptr, C.int(dlt)) {
return p.Geterror()
}
return nil
}
func DatalinkValueToName(dlt int) string {
if name := C.pcap_datalink_val_to_name(C.int(dlt)); name != nil {
return C.GoString(name)
}
return ""
}
func DatalinkValueToDescription(dlt int) string {
if desc := C.pcap_datalink_val_to_description(C.int(dlt)); desc != nil {
return C.GoString(desc)
}
return ""
}
func Findalldevs() (ifs []Interface, err error) {
var buf *C.char
buf = (*C.char)(C.calloc(ERRBUF_SIZE, 1))
defer C.free(unsafe.Pointer(buf))
var alldevsp *C.pcap_if_t
if -1 == C.pcap_findalldevs((**C.pcap_if_t)(&alldevsp), buf) {
return nil, errors.New(C.GoString(buf))
}
defer C.pcap_freealldevs((*C.pcap_if_t)(alldevsp))
dev := alldevsp
var i uint32
for i = 0; dev != nil; dev = (*C.pcap_if_t)(dev.next) {
i++
}
ifs = make([]Interface, i)
dev = alldevsp
for j := uint32(0); dev != nil; dev = (*C.pcap_if_t)(dev.next) {
var iface Interface
iface.Name = C.GoString(dev.name)
iface.Description = C.GoString(dev.description)
iface.Addresses = findalladdresses(dev.addresses)
// TODO: add more elements
ifs[j] = iface
j++
}
return
}
func findalladdresses(addresses *_Ctype_struct_pcap_addr) (retval []IFAddress) {
// TODO - make it support more than IPv4 and IPv6?
retval = make([]IFAddress, 0, 1)
for curaddr := addresses; curaddr != nil; curaddr = (*_Ctype_struct_pcap_addr)(curaddr.next) {
var a IFAddress
var err error
if a.IP, err = sockaddr_to_IP((*syscall.RawSockaddr)(unsafe.Pointer(curaddr.addr))); err != nil {
continue
}
if a.Netmask, err = sockaddr_to_IP((*syscall.RawSockaddr)(unsafe.Pointer(curaddr.addr))); err != nil {
continue
}
retval = append(retval, a)
}
return
}
func sockaddr_to_IP(rsa *syscall.RawSockaddr) (IP []byte, err error) {
switch rsa.Family {
case syscall.AF_INET:
pp := (*syscall.RawSockaddrInet4)(unsafe.Pointer(rsa))
IP = make([]byte, 4)
for i := 0; i < len(IP); i++ {
IP[i] = pp.Addr[i]
}
return
case syscall.AF_INET6:
pp := (*syscall.RawSockaddrInet6)(unsafe.Pointer(rsa))
IP = make([]byte, 16)
for i := 0; i < len(IP); i++ {
IP[i] = pp.Addr[i]
}
return
}
err = errors.New("Unsupported address type")
return
}
func (p *Pcap) Inject(data []byte) (err error) {
buf := (*C.char)(C.malloc((C.size_t)(len(data))))
for i := 0; i < len(data); i++ {
*(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(buf)) + uintptr(i))) = data[i]
}
if -1 == C.pcap_sendpacket(p.cptr, (*C.u_char)(unsafe.Pointer(buf)), (C.int)(len(data))) {
err = p.Geterror()
}
C.free(unsafe.Pointer(buf))
return
}

View File

@ -75,11 +75,11 @@ func authCredWriteKeyTest(cx ctlCtx) {
cx.user, cx.pass = "root", "root"
authSetupTestUser(cx)
// confirm root role doesn't grant access to all keys
if err := ctlV3PutFailPerm(cx, "foo", "bar"); err != nil {
// confirm root role can access to all keys
if err := ctlV3Put(cx, "foo", "bar", ""); err != nil {
cx.t.Fatal(err)
}
if err := ctlV3GetFailPerm(cx, "foo"); err != nil {
if err := ctlV3Get(cx, []string{"foo"}, []kv{{"foo", "bar"}}...); err != nil {
cx.t.Fatal(err)
}
@ -90,17 +90,17 @@ func authCredWriteKeyTest(cx ctlCtx) {
}
// confirm put failed
cx.user, cx.pass = "test-user", "pass"
if err := ctlV3Get(cx, []string{"foo"}, []kv{{"foo", "a"}}...); err != nil {
if err := ctlV3Get(cx, []string{"foo"}, []kv{{"foo", "bar"}}...); err != nil {
cx.t.Fatal(err)
}
// try good user
cx.user, cx.pass = "test-user", "pass"
if err := ctlV3Put(cx, "foo", "bar", ""); err != nil {
if err := ctlV3Put(cx, "foo", "bar2", ""); err != nil {
cx.t.Fatal(err)
}
// confirm put succeeded
if err := ctlV3Get(cx, []string{"foo"}, []kv{{"foo", "bar"}}...); err != nil {
if err := ctlV3Get(cx, []string{"foo"}, []kv{{"foo", "bar2"}}...); err != nil {
cx.t.Fatal(err)
}
@ -111,7 +111,7 @@ func authCredWriteKeyTest(cx ctlCtx) {
}
// confirm put failed
cx.user, cx.pass = "test-user", "pass"
if err := ctlV3Get(cx, []string{"foo"}, []kv{{"foo", "bar"}}...); err != nil {
if err := ctlV3Get(cx, []string{"foo"}, []kv{{"foo", "bar2"}}...); err != nil {
cx.t.Fatal(err)
}
}
@ -282,10 +282,6 @@ func ctlV3PutFailPerm(cx ctlCtx, key, val string) error {
return spawnWithExpect(append(cx.PrefixArgs(), "put", key, val), "permission denied")
}
func ctlV3GetFailPerm(cx ctlCtx, key string) error {
return spawnWithExpect(append(cx.PrefixArgs(), "get", key), "permission denied")
}
func authSetupTestUser(cx ctlCtx) {
if err := ctlV3User(cx, []string{"add", "test-user", "--interactive=false"}, "User test-user created", []string{"pass"}); err != nil {
cx.t.Fatal(err)

View File

@ -89,8 +89,8 @@ func TestCtlV3Migrate(t *testing.T) {
if len(resp.Kvs) != 1 {
t.Fatalf("len(resp.Kvs) expected 1, got %+v", resp.Kvs)
}
if resp.Kvs[0].CreateRevision != 4 {
t.Fatalf("resp.Kvs[0].CreateRevision expected 4, got %d", resp.Kvs[0].CreateRevision)
if resp.Kvs[0].CreateRevision != 7 {
t.Fatalf("resp.Kvs[0].CreateRevision expected 7, got %d", resp.Kvs[0].CreateRevision)
}
}

View File

@ -221,7 +221,7 @@ OK
### WATCH [options] [key or prefix] [range_end]
Watch watches events stream on keys or prefixes, [key or prefix, range_end) if `range-end` is given. The watch command runs until it encounters an error or is terminated by the user.
Watch watches events stream on keys or prefixes, [key or prefix, range_end) if `range-end` is given. The watch command runs until it encounters an error or is terminated by the user. If range_end is given, it must be lexicographically greater than key or "\x00".
#### Options
@ -231,6 +231,8 @@ Watch watches events stream on keys or prefixes, [key or prefix, range_end) if `
- prefix -- watch on a prefix if prefix is set.
- prev-kv -- get the previous key-value pair before the event happens.
- rev -- the revision to start watching. Specifying a revision is useful for observing past events.
#### Input Format
@ -245,7 +247,7 @@ watch [options] <key or prefix>\n
##### Simple reply
- \<event\>\n\<key\>\n\<value\>\n\<event\>\n\<next_key\>\n\<next_value\>\n...
- \<event\>[\n\<old_key\>\n\<old_value\>]\n\<key\>\n\<value\>\n\<event\>\n\<next_key\>\n\<next_value\>\n...
- Additional error string if WATCH failed. Exit code is non-zero.

View File

@ -23,6 +23,7 @@ import (
var (
delPrefix bool
delPrevKV bool
)
// NewDelCommand returns the cobra command for "del".
@ -34,6 +35,7 @@ func NewDelCommand() *cobra.Command {
}
cmd.Flags().BoolVar(&delPrefix, "prefix", false, "delete keys with matching prefix")
cmd.Flags().BoolVar(&delPrevKV, "prev-kv", false, "return deleted key-value pairs")
return cmd
}
@ -65,6 +67,9 @@ func getDelOp(cmd *cobra.Command, args []string) (string, []clientv3.OpOption) {
if delPrefix {
opts = append(opts, clientv3.WithPrefix())
}
if delPrevKV {
opts = append(opts, clientv3.WithPrevKV())
}
return key, opts
}

View File

@ -27,11 +27,14 @@ import (
"github.com/coreos/etcd/client"
etcdErr "github.com/coreos/etcd/error"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
"github.com/coreos/etcd/store"
@ -42,9 +45,10 @@ import (
)
var (
migrateDatadir string
migrateWALdir string
migrateTransformer string
migrateExcludeTTLKey bool
migrateDatadir string
migrateWALdir string
migrateTransformer string
)
// NewMigrateCommand returns the cobra command for "migrate".
@ -55,6 +59,7 @@ func NewMigrateCommand() *cobra.Command {
Run: migrateCommandFunc,
}
mc.Flags().BoolVar(&migrateExcludeTTLKey, "no-ttl", false, "Do not convert TTL keys")
mc.Flags().StringVar(&migrateDatadir, "data-dir", "", "Path to the data directory")
mc.Flags().StringVar(&migrateWALdir, "wal-dir", "", "Path to the WAL directory")
mc.Flags().StringVar(&migrateTransformer, "transformer", "", "Path to the user-provided transformer program")
@ -74,18 +79,17 @@ func migrateCommandFunc(cmd *cobra.Command, args []string) {
writer, reader, errc = defaultTransformer()
}
st := rebuildStoreV2()
st, index := rebuildStoreV2()
be := prepareBackend()
defer be.Close()
maxIndexc := make(chan uint64, 1)
go func() {
maxIndexc <- writeStore(writer, st)
writeStore(writer, st)
writer.Close()
}()
readKeys(reader, be)
mvcc.UpdateConsistentIndex(be, <-maxIndexc)
mvcc.UpdateConsistentIndex(be, index)
err := <-errc
if err != nil {
fmt.Println("failed to transform keys")
@ -106,7 +110,10 @@ func prepareBackend() backend.Backend {
return be
}
func rebuildStoreV2() store.Store {
func rebuildStoreV2() (store.Store, uint64) {
var index uint64
cl := membership.NewCluster("")
waldir := migrateWALdir
if len(waldir) == 0 {
waldir = path.Join(migrateDatadir, "member", "wal")
@ -122,6 +129,7 @@ func rebuildStoreV2() store.Store {
var walsnap walpb.Snapshot
if snapshot != nil {
walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
index = snapshot.Metadata.Index
}
w, err := wal.OpenForRead(waldir, walsnap)
@ -143,9 +151,15 @@ func rebuildStoreV2() store.Store {
}
}
applier := etcdserver.NewApplierV2(st, nil)
cl.SetStore(st)
cl.Recover(api.UpdateCapability)
applier := etcdserver.NewApplierV2(st, cl)
for _, ent := range ents {
if ent.Type != raftpb.EntryNormal {
if ent.Type == raftpb.EntryConfChange {
var cc raftpb.ConfChange
pbutil.MustUnmarshal(&cc, ent.Data)
applyConf(cc, cl)
continue
}
@ -160,9 +174,34 @@ func rebuildStoreV2() store.Store {
applyRequest(req, applier)
}
}
if ent.Index > index {
index = ent.Index
}
}
return st
return st, index
}
func applyConf(cc raftpb.ConfChange, cl *membership.RaftCluster) {
if err := cl.ValidateConfigurationChange(cc); err != nil {
return
}
switch cc.Type {
case raftpb.ConfChangeAddNode:
m := new(membership.Member)
if err := json.Unmarshal(cc.Context, m); err != nil {
panic(err)
}
cl.AddMember(m)
case raftpb.ConfChangeRemoveNode:
cl.RemoveMember(types.ID(cc.NodeID))
case raftpb.ConfChangeUpdateNode:
m := new(membership.Member)
if err := json.Unmarshal(cc.Context, m); err != nil {
panic(err)
}
cl.UpdateRaftAttributes(m.ID, m.RaftAttributes)
}
}
func applyRequest(r *pb.Request, applyV2 etcdserver.ApplierV2) {
@ -216,11 +255,13 @@ func writeKeys(w io.Writer, n *store.NodeExtern) uint64 {
if n.Dir {
n.Nodes = nil
}
b, err := json.Marshal(n)
if err != nil {
ExitWithError(ExitError, err)
if !migrateExcludeTTLKey || n.TTL == 0 {
b, err := json.Marshal(n)
if err != nil {
ExitWithError(ExitError, err)
}
fmt.Fprint(w, string(b))
}
fmt.Fprintf(w, string(b))
for _, nn := range nodes {
max := writeKeys(w, nn)
if max > maxIndex {

View File

@ -108,6 +108,9 @@ type simplePrinter struct {
func (s *simplePrinter) Del(resp v3.DeleteResponse) {
fmt.Println(resp.Deleted)
for _, kv := range resp.PrevKvs {
printKV(s.isHex, kv)
}
}
func (s *simplePrinter) Get(resp v3.GetResponse) {
@ -116,7 +119,12 @@ func (s *simplePrinter) Get(resp v3.GetResponse) {
}
}
func (s *simplePrinter) Put(r v3.PutResponse) { fmt.Println("OK") }
func (s *simplePrinter) Put(r v3.PutResponse) {
fmt.Println("OK")
if r.PrevKv != nil {
printKV(s.isHex, r.PrevKv)
}
}
func (s *simplePrinter) Txn(resp v3.TxnResponse) {
if resp.Succeeded {
@ -143,6 +151,9 @@ func (s *simplePrinter) Txn(resp v3.TxnResponse) {
func (s *simplePrinter) Watch(resp v3.WatchResponse) {
for _, e := range resp.Events {
fmt.Println(e.Type)
if e.PrevKv != nil {
printKV(s.isHex, e.PrevKv)
}
printKV(s.isHex, e.Kv)
}
}

View File

@ -24,7 +24,8 @@ import (
)
var (
leaseStr string
leaseStr string
putPrevKV bool
)
// NewPutCommand returns the cobra command for "put".
@ -49,6 +50,7 @@ will store the content of the file to <key>.
Run: putCommandFunc,
}
cmd.Flags().StringVar(&leaseStr, "lease", "0", "lease ID (in hexadecimal) to attach to the key")
cmd.Flags().BoolVar(&putPrevKV, "prev-kv", false, "return changed key-value pairs")
return cmd
}
@ -85,6 +87,9 @@ func getPutOp(cmd *cobra.Command, args []string) (string, string, []clientv3.OpO
if id != 0 {
opts = append(opts, clientv3.WithLease(clientv3.LeaseID(id)))
}
if putPrevKV {
opts = append(opts, clientv3.WithPrevKV())
}
return key, value, opts
}

View File

@ -115,7 +115,7 @@ func snapshotSaveCommandFunc(cmd *cobra.Command, args []string) {
partpath := path + ".part"
f, err := os.Create(partpath)
defer f.Close()
if err != nil {
exiterr := fmt.Errorf("could not open %s (%v)", partpath, err)
ExitWithError(ExitBadArgs, exiterr)
@ -134,6 +134,8 @@ func snapshotSaveCommandFunc(cmd *cobra.Command, args []string) {
fileutil.Fsync(f)
f.Close()
if rerr := os.Rename(partpath, path); rerr != nil {
exiterr := fmt.Errorf("could not rename %s to %s (%v)", partpath, path, rerr)
ExitWithError(ExitIO, exiterr)

View File

@ -29,6 +29,7 @@ var (
watchRev int64
watchPrefix bool
watchInteractive bool
watchPrevKey bool
)
// NewWatchCommand returns the cobra command for "watch".
@ -42,6 +43,7 @@ func NewWatchCommand() *cobra.Command {
cmd.Flags().BoolVarP(&watchInteractive, "interactive", "i", false, "Interactive mode")
cmd.Flags().BoolVar(&watchPrefix, "prefix", false, "Watch on a prefix if prefix is set")
cmd.Flags().Int64Var(&watchRev, "rev", 0, "Revision to start watching")
cmd.Flags().BoolVar(&watchPrevKey, "prev-kv", false, "get the previous key-value pair before the event happens")
return cmd
}
@ -119,6 +121,9 @@ func getWatchChan(c *clientv3.Client, args []string) (clientv3.WatchChan, error)
if watchPrefix {
opts = append(opts, clientv3.WithPrefix())
}
if watchPrevKey {
opts = append(opts, clientv3.WithPrevKV())
}
return c.Watch(context.TODO(), key, opts...), nil
}

View File

@ -73,14 +73,14 @@ func (ls *LeaseServer) LeaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) erro
resp := &pb.LeaseKeepAliveResponse{ID: req.ID, Header: &pb.ResponseHeader{}}
ls.hdr.fill(resp.Header)
ttl, err := ls.le.LeaseRenew(lease.LeaseID(req.ID))
ttl, err := ls.le.LeaseRenew(stream.Context(), lease.LeaseID(req.ID))
if err == lease.ErrLeaseNotFound {
err = nil
ttl = 0
}
if err != nil {
return err
return togRPCError(err)
}
resp.TTL = ttl

View File

@ -49,9 +49,13 @@ var (
ErrGRPCRoleNotGranted = grpc.Errorf(codes.FailedPrecondition, "etcdserver: role is not granted to the user")
ErrGRPCPermissionNotGranted = grpc.Errorf(codes.FailedPrecondition, "etcdserver: permission is not granted to the role")
ErrGRPCNoLeader = grpc.Errorf(codes.Unavailable, "etcdserver: no leader")
ErrGRPCNotCapable = grpc.Errorf(codes.Unavailable, "etcdserver: not capable")
ErrGRPCStopped = grpc.Errorf(codes.Unavailable, "etcdserver: server stopped")
ErrGRPCNoLeader = grpc.Errorf(codes.Unavailable, "etcdserver: no leader")
ErrGRPCNotCapable = grpc.Errorf(codes.Unavailable, "etcdserver: not capable")
ErrGRPCStopped = grpc.Errorf(codes.Unavailable, "etcdserver: server stopped")
ErrGRPCTimeout = grpc.Errorf(codes.Unavailable, "etcdserver: request timed out")
ErrGRPCTimeoutDueToLeaderFail = grpc.Errorf(codes.Unavailable, "etcdserver: request timed out, possibly due to previous leader failure")
ErrGRPCTimeoutDueToConnectionLost = grpc.Errorf(codes.Unavailable, "etcdserver: request timed out, possibly due to connection lost")
ErrGRPCUnhealthy = grpc.Errorf(codes.Unavailable, "etcdserver: unhealthy cluster")
errStringToError = map[string]error{
grpc.ErrorDesc(ErrGRPCEmptyKey): ErrGRPCEmptyKey,
@ -82,9 +86,13 @@ var (
grpc.ErrorDesc(ErrGRPCRoleNotGranted): ErrGRPCRoleNotGranted,
grpc.ErrorDesc(ErrGRPCPermissionNotGranted): ErrGRPCPermissionNotGranted,
grpc.ErrorDesc(ErrGRPCNoLeader): ErrGRPCNoLeader,
grpc.ErrorDesc(ErrGRPCNotCapable): ErrGRPCNotCapable,
grpc.ErrorDesc(ErrGRPCStopped): ErrGRPCStopped,
grpc.ErrorDesc(ErrGRPCNoLeader): ErrGRPCNoLeader,
grpc.ErrorDesc(ErrGRPCNotCapable): ErrGRPCNotCapable,
grpc.ErrorDesc(ErrGRPCStopped): ErrGRPCStopped,
grpc.ErrorDesc(ErrGRPCTimeout): ErrGRPCTimeout,
grpc.ErrorDesc(ErrGRPCTimeoutDueToLeaderFail): ErrGRPCTimeoutDueToLeaderFail,
grpc.ErrorDesc(ErrGRPCTimeoutDueToConnectionLost): ErrGRPCTimeoutDueToConnectionLost,
grpc.ErrorDesc(ErrGRPCUnhealthy): ErrGRPCUnhealthy,
}
// client-side error
@ -116,9 +124,13 @@ var (
ErrRoleNotGranted = Error(ErrGRPCRoleNotGranted)
ErrPermissionNotGranted = Error(ErrGRPCPermissionNotGranted)
ErrNoLeader = Error(ErrGRPCNoLeader)
ErrNotCapable = Error(ErrGRPCNotCapable)
ErrStopped = Error(ErrGRPCStopped)
ErrNoLeader = Error(ErrGRPCNoLeader)
ErrNotCapable = Error(ErrGRPCNotCapable)
ErrStopped = Error(ErrGRPCStopped)
ErrTimeout = Error(ErrGRPCTimeout)
ErrTimeoutDueToLeaderFail = Error(ErrGRPCTimeoutDueToLeaderFail)
ErrTimeoutDueToConnectionLost = Error(ErrGRPCTimeoutDueToConnectionLost)
ErrUnhealthy = Error(ErrGRPCUnhealthy)
)
// EtcdError defines gRPC server errors.

View File

@ -38,6 +38,17 @@ func togRPCError(err error) error {
case etcdserver.ErrNoSpace:
return rpctypes.ErrGRPCNoSpace
case etcdserver.ErrNoLeader:
return rpctypes.ErrGRPCNoLeader
case etcdserver.ErrStopped:
return rpctypes.ErrGRPCStopped
case etcdserver.ErrTimeout:
return rpctypes.ErrGRPCTimeout
case etcdserver.ErrTimeoutDueToLeaderFail:
return rpctypes.ErrGRPCTimeoutDueToLeaderFail
case etcdserver.ErrTimeoutDueToConnectionLost:
return rpctypes.ErrGRPCTimeoutDueToConnectionLost
case auth.ErrRootUserNotExist:
return rpctypes.ErrGRPCRootUserNotExist
case auth.ErrRootRoleNotExist:

View File

@ -32,7 +32,7 @@ type watchServer struct {
clusterID int64
memberID int64
raftTimer etcdserver.RaftTimer
watchable mvcc.Watchable
watchable mvcc.WatchableKV
}
func NewWatchServer(s *etcdserver.EtcdServer) pb.WatchServer {
@ -82,6 +82,8 @@ type serverWatchStream struct {
memberID int64
raftTimer etcdserver.RaftTimer
watchable mvcc.WatchableKV
gRPCStream pb.Watch_WatchServer
watchStream mvcc.WatchStream
ctrlStream chan *pb.WatchResponse
@ -91,6 +93,7 @@ type serverWatchStream struct {
// progress tracks the watchID that stream might need to send
// progress to.
progress map[mvcc.WatchID]bool
prevKV map[mvcc.WatchID]bool
// closec indicates the stream is closed.
closec chan struct{}
@ -101,14 +104,18 @@ type serverWatchStream struct {
func (ws *watchServer) Watch(stream pb.Watch_WatchServer) (err error) {
sws := serverWatchStream{
clusterID: ws.clusterID,
memberID: ws.memberID,
raftTimer: ws.raftTimer,
clusterID: ws.clusterID,
memberID: ws.memberID,
raftTimer: ws.raftTimer,
watchable: ws.watchable,
gRPCStream: stream,
watchStream: ws.watchable.NewWatchStream(),
// chan for sending control response like watcher created and canceled.
ctrlStream: make(chan *pb.WatchResponse, ctrlStreamBufLen),
progress: make(map[mvcc.WatchID]bool),
prevKV: make(map[mvcc.WatchID]bool),
closec: make(chan struct{}),
}
@ -170,9 +177,14 @@ func (sws *serverWatchStream) recvLoop() error {
rev = wsrev + 1
}
id := sws.watchStream.Watch(creq.Key, creq.RangeEnd, rev)
if id != -1 && creq.ProgressNotify {
if id != -1 {
sws.mu.Lock()
sws.progress[id] = true
if creq.ProgressNotify {
sws.progress[id] = true
}
if creq.PrevKv {
sws.prevKV[id] = true
}
sws.mu.Unlock()
}
wr := &pb.WatchResponse{
@ -198,6 +210,7 @@ func (sws *serverWatchStream) recvLoop() error {
}
sws.mu.Lock()
delete(sws.progress, mvcc.WatchID(id))
delete(sws.prevKV, mvcc.WatchID(id))
sws.mu.Unlock()
}
}
@ -244,8 +257,19 @@ func (sws *serverWatchStream) sendLoop() {
// or define protocol buffer with []mvccpb.Event.
evs := wresp.Events
events := make([]*mvccpb.Event, len(evs))
sws.mu.Lock()
needPrevKV := sws.prevKV[wresp.WatchID]
sws.mu.Unlock()
for i := range evs {
events[i] = &evs[i]
if needPrevKV {
opt := mvcc.RangeOptions{Rev: evs[i].Kv.ModRevision - 1}
r, err := sws.watchable.Range(evs[i].Kv.Key, nil, opt)
if err == nil && len(r.KVs) != 0 {
events[i].PrevKv = &(r.KVs[0])
}
}
}
wr := &pb.WatchResponse{

View File

@ -159,6 +159,22 @@ func (a *applierV3backend) Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse,
rev int64
err error
)
var rr *mvcc.RangeResult
if p.PrevKv {
if txnID != noTxn {
rr, err = a.s.KV().TxnRange(txnID, p.Key, nil, mvcc.RangeOptions{})
if err != nil {
return nil, err
}
} else {
rr, err = a.s.KV().Range(p.Key, nil, mvcc.RangeOptions{})
if err != nil {
return nil, err
}
}
}
if txnID != noTxn {
rev, err = a.s.KV().TxnPut(txnID, p.Key, p.Value, lease.LeaseID(p.Lease))
if err != nil {
@ -174,6 +190,9 @@ func (a *applierV3backend) Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse,
rev = a.s.KV().Put(p.Key, p.Value, leaseID)
}
resp.Header.Revision = rev
if rr != nil && len(rr.KVs) != 0 {
resp.PrevKv = &rr.KVs[0]
}
return resp, nil
}
@ -191,6 +210,21 @@ func (a *applierV3backend) DeleteRange(txnID int64, dr *pb.DeleteRangeRequest) (
dr.RangeEnd = []byte{}
}
var rr *mvcc.RangeResult
if dr.PrevKv {
if txnID != noTxn {
rr, err = a.s.KV().TxnRange(txnID, dr.Key, dr.RangeEnd, mvcc.RangeOptions{})
if err != nil {
return nil, err
}
} else {
rr, err = a.s.KV().Range(dr.Key, dr.RangeEnd, mvcc.RangeOptions{})
if err != nil {
return nil, err
}
}
}
if txnID != noTxn {
n, rev, err = a.s.KV().TxnDeleteRange(txnID, dr.Key, dr.RangeEnd)
if err != nil {
@ -201,6 +235,11 @@ func (a *applierV3backend) DeleteRange(txnID int64, dr *pb.DeleteRangeRequest) (
}
resp.Deleted = n
if rr != nil {
for i := range rr.KVs {
resp.PrevKvs = append(resp.PrevKvs, &rr.KVs[i])
}
}
resp.Header.Revision = rev
return resp, nil
}

View File

@ -56,6 +56,9 @@ func (aa *authApplierV3) Put(txnID int64, r *pb.PutRequest) (*pb.PutResponse, er
if !aa.as.IsPutPermitted(aa.user, r.Key) {
return nil, auth.ErrPermissionDenied
}
if r.PrevKv && !aa.as.IsRangePermitted(aa.user, r.Key, nil) {
return nil, auth.ErrPermissionDenied
}
return aa.applierV3.Put(txnID, r)
}
@ -70,6 +73,9 @@ func (aa *authApplierV3) DeleteRange(txnID int64, r *pb.DeleteRangeRequest) (*pb
if !aa.as.IsDeleteRangePermitted(aa.user, r.Key, r.RangeEnd) {
return nil, auth.ErrPermissionDenied
}
if r.PrevKv && !aa.as.IsRangePermitted(aa.user, r.Key, r.RangeEnd) {
return nil, auth.ErrPermissionDenied
}
return aa.applierV3.DeleteRange(txnID, r)
}
@ -99,7 +105,7 @@ func (aa *authApplierV3) checkTxnReqsPermission(reqs []*pb.RequestOp) bool {
continue
}
if !aa.as.IsDeleteRangePermitted(aa.user, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd) {
if tv.RequestDeleteRange.PrevKv && !aa.as.IsRangePermitted(aa.user, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd) {
return false
}
}

View File

@ -102,9 +102,9 @@ import (
proto "github.com/golang/protobuf/proto"
math "math"
)
import io "io"
io "io"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal

View File

@ -10,9 +10,9 @@ import (
proto "github.com/golang/protobuf/proto"
math "math"
)
import io "io"
io "io"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal

File diff suppressed because it is too large Load Diff

View File

@ -396,10 +396,16 @@ message PutRequest {
// lease is the lease ID to associate with the key in the key-value store. A lease
// value of 0 indicates no lease.
int64 lease = 3;
// If prev_kv is set, etcd gets the previous key-value pair before changing it.
// The previous key-value pair will be returned in the put response.
bool prev_kv = 4;
}
message PutResponse {
ResponseHeader header = 1;
// if prev_kv is set in the request, the previous key-value pair will be returned.
mvccpb.KeyValue prev_kv = 2;
}
message DeleteRangeRequest {
@ -409,12 +415,17 @@ message DeleteRangeRequest {
// If range_end is not given, the range is defined to contain only the key argument.
// If range_end is '\0', the range is all keys greater than or equal to the key argument.
bytes range_end = 2;
// If prev_kv is set, etcd gets the previous key-value pairs before deleting it.
// The previous key-value pairs will be returned in the delte response.
bool prev_kv = 3;
}
message DeleteRangeResponse {
ResponseHeader header = 1;
// deleted is the number of keys deleted by the delete range request.
int64 deleted = 2;
// if prev_kv is set in the request, the previous key-value pairs will be returned.
repeated mvccpb.KeyValue prev_kvs = 3;
}
message RequestOp {
@ -563,6 +574,9 @@ message WatchCreateRequest {
// wish to recover a disconnected watcher starting from a recent known revision.
// The etcd server may decide how often it will send notifications based on current load.
bool progress_notify = 4;
// If prev_kv is set, created watcher gets the previous KV before the event happens.
// If the previous KV is already compacted, nothing will be returned.
bool prev_kv = 6;
}
message WatchCancelRequest {

View File

@ -154,13 +154,13 @@ type Server interface {
// EtcdServer is the production implementation of the Server interface
type EtcdServer struct {
// r and inflightSnapshots must be the first elements to keep 64-bit alignment for atomic
// access to fields
// count the number of inflight snapshots.
// MUST use atomic operation to access this field.
inflightSnapshots int64
Cfg *ServerConfig
// inflightSnapshots holds count the number of snapshots currently inflight.
inflightSnapshots int64 // must use atomic operations to access; keep 64-bit aligned.
appliedIndex uint64 // must use atomic operations to access; keep 64-bit aligned.
// consistIndex used to hold the offset of current executing entry
// It is initialized to 0 before executing any entry.
consistIndex consistentIndex // must use atomic operations to access; keep 64-bit aligned.
Cfg *ServerConfig
readych chan struct{}
r raftNode
@ -195,10 +195,6 @@ type EtcdServer struct {
// compactor is used to auto-compact the KV.
compactor *compactor.Periodic
// consistent index used to hold the offset of current executing entry
// It is initialized to 0 before executing any entry.
consistIndex consistentIndex
// peerRt used to send requests (version, lease) to peers.
peerRt http.RoundTripper
reqIDGen *idutil.Generator
@ -212,8 +208,6 @@ type EtcdServer struct {
// wg is used to wait for the go routines that depends on the server state
// to exit when stopping the server.
wg sync.WaitGroup
appliedIndex uint64
}
// NewServer creates a new EtcdServer from the supplied configuration. The

View File

@ -20,6 +20,7 @@ import (
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/lease/leasehttp"
"github.com/coreos/etcd/mvcc"
@ -54,7 +55,7 @@ type Lessor interface {
// LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
// is returned.
LeaseRenew(id lease.LeaseID) (int64, error)
LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error)
}
type Authenticator interface {
@ -218,7 +219,7 @@ func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest)
return result.resp.(*pb.LeaseRevokeResponse), nil
}
func (s *EtcdServer) LeaseRenew(id lease.LeaseID) (int64, error) {
func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error) {
ttl, err := s.lessor.Renew(id)
if err == nil {
return ttl, nil
@ -228,29 +229,44 @@ func (s *EtcdServer) LeaseRenew(id lease.LeaseID) (int64, error) {
}
// renewals don't go through raft; forward to leader manually
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
defer cancel()
// renewals don't go through raft; forward to leader manually
for cctx.Err() == nil && err != nil {
leader, lerr := s.waitLeader(cctx)
if lerr != nil {
return -1, lerr
}
for _, url := range leader.PeerURLs {
lurl := url + "/leases"
ttl, err = leasehttp.RenewHTTP(cctx, id, lurl, s.peerRt)
if err == nil || err == lease.ErrLeaseNotFound {
return ttl, err
}
}
}
return -1, ErrTimeout
}
func (s *EtcdServer) waitLeader(ctx context.Context) (*membership.Member, error) {
leader := s.cluster.Member(s.Leader())
for i := 0; i < 5 && leader == nil; i++ {
for leader == nil {
// wait an election
dur := time.Duration(s.Cfg.ElectionTicks) * time.Duration(s.Cfg.TickMs) * time.Millisecond
select {
case <-time.After(dur):
leader = s.cluster.Member(s.Leader())
case <-s.done:
return -1, ErrStopped
return nil, ErrStopped
case <-ctx.Done():
return nil, ErrNoLeader
}
}
if leader == nil || len(leader.PeerURLs) == 0 {
return -1, ErrNoLeader
return nil, ErrNoLeader
}
for _, url := range leader.PeerURLs {
lurl := url + "/leases"
ttl, err = leasehttp.RenewHTTP(id, lurl, s.peerRt, s.Cfg.peerDialTimeout())
if err == nil {
break
}
}
return ttl, err
return leader, nil
}
func (s *EtcdServer) Alarm(ctx context.Context, r *pb.AlarmRequest) (*pb.AlarmResponse, error) {
@ -551,4 +567,4 @@ func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.Intern
}
// Watchable returns a watchable interface attached to the etcdserver.
func (s *EtcdServer) Watchable() mvcc.Watchable { return s.KV() }
func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() }

View File

@ -379,6 +379,7 @@ func TestV3DeleteRange(t *testing.T) {
keySet []string
begin string
end string
prevKV bool
wantSet [][]byte
deleted int64
@ -386,39 +387,45 @@ func TestV3DeleteRange(t *testing.T) {
// delete middle
{
[]string{"foo", "foo/abc", "fop"},
"foo/", "fop",
"foo/", "fop", false,
[][]byte{[]byte("foo"), []byte("fop")}, 1,
},
// no delete
{
[]string{"foo", "foo/abc", "fop"},
"foo/", "foo/",
"foo/", "foo/", false,
[][]byte{[]byte("foo"), []byte("foo/abc"), []byte("fop")}, 0,
},
// delete first
{
[]string{"foo", "foo/abc", "fop"},
"fo", "fop",
"fo", "fop", false,
[][]byte{[]byte("fop")}, 2,
},
// delete tail
{
[]string{"foo", "foo/abc", "fop"},
"foo/", "fos",
"foo/", "fos", false,
[][]byte{[]byte("foo")}, 2,
},
// delete exact
{
[]string{"foo", "foo/abc", "fop"},
"foo/abc", "",
"foo/abc", "", false,
[][]byte{[]byte("foo"), []byte("fop")}, 1,
},
// delete none, [x,x)
{
[]string{"foo"},
"foo", "foo",
"foo", "foo", false,
[][]byte{[]byte("foo")}, 0,
},
// delete middle with preserveKVs set
{
[]string{"foo", "foo/abc", "fop"},
"foo/", "fop", true,
[][]byte{[]byte("foo"), []byte("fop")}, 1,
},
}
for i, tt := range tests {
@ -436,7 +443,9 @@ func TestV3DeleteRange(t *testing.T) {
dreq := &pb.DeleteRangeRequest{
Key: []byte(tt.begin),
RangeEnd: []byte(tt.end)}
RangeEnd: []byte(tt.end),
PrevKv: tt.prevKV,
}
dresp, err := kvc.DeleteRange(context.TODO(), dreq)
if err != nil {
t.Fatalf("couldn't delete range on test %d (%v)", i, err)
@ -444,6 +453,11 @@ func TestV3DeleteRange(t *testing.T) {
if tt.deleted != dresp.Deleted {
t.Errorf("expected %d on test %v, got %d", tt.deleted, i, dresp.Deleted)
}
if tt.prevKV {
if len(dresp.PrevKvs) != int(dresp.Deleted) {
t.Errorf("preserve %d keys, want %d", len(dresp.PrevKvs), dresp.Deleted)
}
}
rreq := &pb.RangeRequest{Key: []byte{0x0}, RangeEnd: []byte{0xff}}
rresp, err := kvc.Range(context.TODO(), rreq)

View File

@ -19,11 +19,13 @@ import (
"testing"
"time"
"golang.org/x/net/context"
"google.golang.org/grpc/metadata"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/testutil"
"golang.org/x/net/context"
)
// TestV3LeasePrmote ensures the newly elected leader can promote itself
@ -332,7 +334,9 @@ func TestV3LeaseFailover(t *testing.T) {
lreq := &pb.LeaseKeepAliveRequest{ID: lresp.ID}
ctx, cancel := context.WithCancel(context.Background())
md := metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
mctx := metadata.NewContext(context.Background(), md)
ctx, cancel := context.WithCancel(mctx)
defer cancel()
lac, err := lc.LeaseKeepAlive(ctx)
if err != nil {

View File

@ -348,6 +348,51 @@ func TestV3WatchFutureRevision(t *testing.T) {
}
}
// TestV3WatchWrongRange tests wrong range does not create watchers.
func TestV3WatchWrongRange(t *testing.T) {
defer testutil.AfterTest(t)
clus := NewClusterV3(t, &ClusterConfig{Size: 1})
defer clus.Terminate(t)
wAPI := toGRPC(clus.RandClient()).Watch
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
wStream, err := wAPI.Watch(ctx)
if err != nil {
t.Fatalf("wAPI.Watch error: %v", err)
}
tests := []struct {
key []byte
end []byte
canceled bool
}{
{[]byte("a"), []byte("a"), true}, // wrong range end
{[]byte("b"), []byte("a"), true}, // wrong range end
{[]byte("foo"), []byte{0}, false}, // watch request with 'WithFromKey'
}
for i, tt := range tests {
if err := wStream.Send(&pb.WatchRequest{RequestUnion: &pb.WatchRequest_CreateRequest{
CreateRequest: &pb.WatchCreateRequest{Key: tt.key, RangeEnd: tt.end, StartRevision: 1}}}); err != nil {
t.Fatalf("#%d: wStream.Send error: %v", i, err)
}
cresp, err := wStream.Recv()
if err != nil {
t.Fatalf("#%d: wStream.Recv error: %v", i, err)
}
if !cresp.Created {
t.Fatalf("#%d: create %v, want %v", i, cresp.Created, true)
}
if cresp.Canceled != tt.canceled {
t.Fatalf("#%d: canceled %v, want %v", i, tt.canceled, cresp.Canceled)
}
if tt.canceled && cresp.WatchId != -1 {
t.Fatalf("#%d: canceled watch ID %d, want -1", i, cresp.WatchId)
}
}
}
// TestV3WatchCancelSynced tests Watch APIs cancellation from synced map.
func TestV3WatchCancelSynced(t *testing.T) {
defer testutil.AfterTest(t)

View File

@ -19,10 +19,10 @@ import (
"fmt"
"io/ioutil"
"net/http"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/lease"
"golang.org/x/net/context"
)
// NewHandler returns an http Handler for lease renewals
@ -75,15 +75,22 @@ func (h *leaseHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// RenewHTTP renews a lease at a given primary server.
// TODO: Batch request in future?
func RenewHTTP(id lease.LeaseID, url string, rt http.RoundTripper, timeout time.Duration) (int64, error) {
func RenewHTTP(ctx context.Context, id lease.LeaseID, url string, rt http.RoundTripper) (int64, error) {
// will post lreq protobuf to leader
lreq, err := (&pb.LeaseKeepAliveRequest{ID: int64(id)}).Marshal()
if err != nil {
return -1, err
}
cc := &http.Client{Transport: rt, Timeout: timeout}
resp, err := cc.Post(url, "application/protobuf", bytes.NewReader(lreq))
cc := &http.Client{Transport: rt}
req, err := http.NewRequest("POST", url, bytes.NewReader(lreq))
if err != nil {
return -1, err
}
req.Header.Set("Content-Type", "application/protobuf")
req.Cancel = ctx.Done()
resp, err := cc.Do(req)
if err != nil {
// TODO detect if leader failed and retry?
return -1, err

View File

@ -0,0 +1,51 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package leasehttp
import (
"net/http"
"net/http/httptest"
"os"
"testing"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"golang.org/x/net/context"
)
func TestRenewHTTP(t *testing.T) {
be, tmpPath := backend.NewTmpBackend(time.Hour, 10000)
defer os.Remove(tmpPath)
defer be.Close()
le := lease.NewLessor(be)
le.Promote(time.Second)
l, err := le.Grant(1, int64(5))
if err != nil {
t.Fatalf("failed to create lease: %v", err)
}
ts := httptest.NewServer(NewHandler(le))
defer ts.Close()
ttl, err := RenewHTTP(context.TODO(), l.ID, ts.URL+"/leases", http.DefaultTransport)
if err != nil {
t.Fatal(err)
}
if ttl != 5 {
t.Fatalf("ttl expected 5, got %d", ttl)
}
}

View File

@ -19,9 +19,9 @@ import (
proto "github.com/golang/protobuf/proto"
math "math"
)
import io "io"
io "io"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal

View File

@ -408,6 +408,13 @@ func (s *store) restore() error {
s.currentRev = rev
}
// keys in the range [compacted revision -N, compaction] might all be deleted due to compaction.
// the correct revision should be set to compaction revision in the case, not the largest revision
// we have seen.
if s.currentRev.main < s.compactMainRev {
s.currentRev.main = s.compactMainRev
}
for key, lid := range keyToLease {
if s.le == nil {
panic("no lessor to attach lease")

View File

@ -15,8 +15,10 @@
package mvcc
import (
"os"
"reflect"
"testing"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
@ -93,3 +95,41 @@ func TestScheduleCompaction(t *testing.T) {
cleanup(s, b, tmpPath)
}
}
func TestCompactAllAndRestore(t *testing.T) {
b, tmpPath := backend.NewDefaultTmpBackend()
s0 := NewStore(b, &lease.FakeLessor{}, nil)
defer os.Remove(tmpPath)
s0.Put([]byte("foo"), []byte("bar"), lease.NoLease)
s0.Put([]byte("foo"), []byte("bar1"), lease.NoLease)
s0.Put([]byte("foo"), []byte("bar2"), lease.NoLease)
s0.DeleteRange([]byte("foo"), nil)
rev := s0.Rev()
// compact all keys
done, err := s0.Compact(rev)
if err != nil {
t.Fatal(err)
}
select {
case <-done:
case <-time.After(10 * time.Second):
t.Fatal("timeout waiting for compaction to finish")
}
err = s0.Close()
if err != nil {
t.Fatal(err)
}
s1 := NewStore(b, &lease.FakeLessor{}, nil)
if s1.Rev() != rev {
t.Errorf("rev = %v, want %v", s1.Rev(), rev)
}
_, err = s1.Range([]byte("foo"), nil, RangeOptions{})
if err != nil {
t.Errorf("unexpect range error %v", err)
}
}

View File

@ -20,9 +20,9 @@ import (
proto "github.com/golang/protobuf/proto"
math "math"
)
import io "io"
io "io"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
@ -89,6 +89,8 @@ type Event struct {
// A DELETE/EXPIRE event contains the deleted key with
// its modification revision set to the revision of deletion.
Kv *KeyValue `protobuf:"bytes,2,opt,name=kv" json:"kv,omitempty"`
// prev_kv holds the key-value pair before the event happens.
PrevKv *KeyValue `protobuf:"bytes,3,opt,name=prev_kv,json=prevKv" json:"prev_kv,omitempty"`
}
func (m *Event) Reset() { *m = Event{} }
@ -181,6 +183,16 @@ func (m *Event) MarshalTo(data []byte) (int, error) {
}
i += n1
}
if m.PrevKv != nil {
data[i] = 0x1a
i++
i = encodeVarintKv(data, i, uint64(m.PrevKv.Size()))
n2, err := m.PrevKv.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n2
}
return i, nil
}
@ -247,6 +259,10 @@ func (m *Event) Size() (n int) {
l = m.Kv.Size()
n += 1 + l + sovKv(uint64(l))
}
if m.PrevKv != nil {
l = m.PrevKv.Size()
n += 1 + l + sovKv(uint64(l))
}
return n
}
@ -532,6 +548,39 @@ func (m *Event) Unmarshal(data []byte) error {
return err
}
iNdEx = postIndex
case 3:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field PrevKv", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if shift >= 64 {
return ErrIntOverflowKv
}
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
if msglen < 0 {
return ErrInvalidLengthKv
}
postIndex := iNdEx + msglen
if postIndex > l {
return io.ErrUnexpectedEOF
}
if m.PrevKv == nil {
m.PrevKv = &KeyValue{}
}
if err := m.PrevKv.Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
default:
iNdEx = preIndex
skippy, err := skipKv(data[iNdEx:])
@ -659,23 +708,24 @@ var (
)
var fileDescriptorKv = []byte{
// 285 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x44, 0x90, 0x41, 0x4a, 0xc3, 0x40,
0x14, 0x86, 0x33, 0x4d, 0x9b, 0xd6, 0xd7, 0x52, 0xc3, 0x50, 0x70, 0x70, 0x31, 0xc4, 0x6e, 0x2c,
0x08, 0x11, 0xea, 0x0d, 0xc4, 0xac, 0x74, 0x21, 0x21, 0xba, 0x95, 0x34, 0x7d, 0x94, 0x92, 0xa6,
0x13, 0xd2, 0x38, 0x98, 0x9b, 0x78, 0x0a, 0xcf, 0xd1, 0x65, 0x8f, 0x60, 0xe3, 0x45, 0x24, 0x6f,
0x4c, 0xdd, 0x0c, 0xef, 0xff, 0xff, 0x6f, 0x98, 0xff, 0x0d, 0x0c, 0x52, 0xed, 0xe7, 0x85, 0x2a,
0x15, 0x77, 0x32, 0x9d, 0x24, 0xf9, 0xe2, 0x72, 0xb2, 0x52, 0x2b, 0x45, 0xd6, 0x6d, 0x33, 0x99,
0x74, 0xfa, 0xc5, 0x60, 0xf0, 0x88, 0xd5, 0x6b, 0xbc, 0x79, 0x47, 0xee, 0x82, 0x9d, 0x62, 0x25,
0x98, 0xc7, 0x66, 0xa3, 0xb0, 0x19, 0xf9, 0x35, 0x9c, 0x27, 0x05, 0xc6, 0x25, 0xbe, 0x15, 0xa8,
0xd7, 0xbb, 0xb5, 0xda, 0x8a, 0x8e, 0xc7, 0x66, 0x76, 0x38, 0x36, 0x76, 0xf8, 0xe7, 0xf2, 0x2b,
0x18, 0x65, 0x6a, 0xf9, 0x4f, 0xd9, 0x44, 0x0d, 0x33, 0xb5, 0x3c, 0x21, 0x02, 0xfa, 0x1a, 0x0b,
0x4a, 0xbb, 0x94, 0xb6, 0x92, 0x4f, 0xa0, 0xa7, 0x9b, 0x02, 0xa2, 0x47, 0x2f, 0x1b, 0xd1, 0xb8,
0x1b, 0x8c, 0x77, 0x28, 0x1c, 0xa2, 0x8d, 0x98, 0x7e, 0x40, 0x2f, 0xd0, 0xb8, 0x2d, 0xf9, 0x0d,
0x74, 0xcb, 0x2a, 0x47, 0x6a, 0x3b, 0x9e, 0x5f, 0xf8, 0x66, 0x4d, 0x9f, 0x42, 0x73, 0x46, 0x55,
0x8e, 0x21, 0x41, 0xdc, 0x83, 0x4e, 0xaa, 0xa9, 0xfa, 0x70, 0xee, 0xb6, 0x68, 0xbb, 0x77, 0xd8,
0x49, 0xf5, 0xd4, 0x83, 0xb3, 0xd3, 0x25, 0xde, 0x07, 0xfb, 0xf9, 0x25, 0x72, 0x2d, 0x0e, 0xe0,
0x3c, 0x04, 0x4f, 0x41, 0x14, 0xb8, 0xec, 0x5e, 0xec, 0x8f, 0xd2, 0x3a, 0x1c, 0xa5, 0xb5, 0xaf,
0x25, 0x3b, 0xd4, 0x92, 0x7d, 0xd7, 0x92, 0x7d, 0xfe, 0x48, 0x6b, 0xe1, 0xd0, 0x5f, 0xde, 0xfd,
0x06, 0x00, 0x00, 0xff, 0xff, 0xd6, 0x21, 0x8f, 0x2c, 0x75, 0x01, 0x00, 0x00,
// 303 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x6c, 0x90, 0x41, 0x4e, 0xc2, 0x40,
0x14, 0x86, 0x3b, 0x14, 0x0a, 0x3e, 0x08, 0x36, 0x13, 0x12, 0x27, 0x2e, 0x26, 0x95, 0x8d, 0x18,
0x13, 0x4c, 0xf0, 0x06, 0xc6, 0xae, 0x70, 0x61, 0x1a, 0x74, 0x4b, 0x4a, 0x79, 0x21, 0xa4, 0x94,
0x69, 0x4a, 0x9d, 0xa4, 0x37, 0x71, 0xef, 0xde, 0x73, 0xb0, 0xe4, 0x08, 0x52, 0x2f, 0x62, 0xfa,
0xc6, 0xe2, 0xc6, 0xcd, 0xe4, 0xfd, 0xff, 0xff, 0x65, 0xe6, 0x7f, 0x03, 0x9d, 0x58, 0x8f, 0xd3,
0x4c, 0xe5, 0x8a, 0x3b, 0x89, 0x8e, 0xa2, 0x74, 0x71, 0x39, 0x58, 0xa9, 0x95, 0x22, 0xeb, 0xae,
0x9a, 0x4c, 0x3a, 0xfc, 0x64, 0xd0, 0x99, 0x62, 0xf1, 0x1a, 0x6e, 0xde, 0x90, 0xbb, 0x60, 0xc7,
0x58, 0x08, 0xe6, 0xb1, 0x51, 0x2f, 0xa8, 0x46, 0x7e, 0x0d, 0xe7, 0x51, 0x86, 0x61, 0x8e, 0xf3,
0x0c, 0xf5, 0x7a, 0xb7, 0x56, 0x5b, 0xd1, 0xf0, 0xd8, 0xc8, 0x0e, 0xfa, 0xc6, 0x0e, 0x7e, 0x5d,
0x7e, 0x05, 0xbd, 0x44, 0x2d, 0xff, 0x28, 0x9b, 0xa8, 0x6e, 0xa2, 0x96, 0x27, 0x44, 0x40, 0x5b,
0x63, 0x46, 0x69, 0x93, 0xd2, 0x5a, 0xf2, 0x01, 0xb4, 0x74, 0x55, 0x40, 0xb4, 0xe8, 0x65, 0x23,
0x2a, 0x77, 0x83, 0xe1, 0x0e, 0x85, 0x43, 0xb4, 0x11, 0xc3, 0x0f, 0x06, 0x2d, 0x5f, 0xe3, 0x36,
0xe7, 0xb7, 0xd0, 0xcc, 0x8b, 0x14, 0xa9, 0x6e, 0x7f, 0x72, 0x31, 0x36, 0x7b, 0x8e, 0x29, 0x34,
0xe7, 0xac, 0x48, 0x31, 0x20, 0x88, 0x7b, 0xd0, 0x88, 0x35, 0x75, 0xef, 0x4e, 0xdc, 0x1a, 0xad,
0x17, 0x0f, 0x1a, 0xb1, 0xe6, 0x37, 0xd0, 0x4e, 0x33, 0xd4, 0xf3, 0x58, 0x53, 0xf9, 0xff, 0x30,
0xa7, 0x02, 0xa6, 0x7a, 0xe8, 0xc1, 0xd9, 0xe9, 0x7e, 0xde, 0x06, 0xfb, 0xf9, 0x65, 0xe6, 0x5a,
0x1c, 0xc0, 0x79, 0xf4, 0x9f, 0xfc, 0x99, 0xef, 0xb2, 0x07, 0xb1, 0x3f, 0x4a, 0xeb, 0x70, 0x94,
0xd6, 0xbe, 0x94, 0xec, 0x50, 0x4a, 0xf6, 0x55, 0x4a, 0xf6, 0xfe, 0x2d, 0xad, 0x85, 0x43, 0xff,
0x7e, 0xff, 0x13, 0x00, 0x00, 0xff, 0xff, 0xb5, 0x45, 0x92, 0x5d, 0xa1, 0x01, 0x00, 0x00,
}

View File

@ -43,4 +43,6 @@ message Event {
// A DELETE/EXPIRE event contains the deleted key with
// its modification revision set to the revision of deletion.
KeyValue kv = 2;
// prev_kv holds the key-value pair before the event happens.
KeyValue prev_kv = 3;
}

View File

@ -15,6 +15,7 @@
package mvcc
import (
"bytes"
"errors"
"sync"
@ -96,6 +97,12 @@ type watchStream struct {
// Watch creates a new watcher in the stream and returns its WatchID.
// TODO: return error if ws is closed?
func (ws *watchStream) Watch(key, end []byte, startRev int64) WatchID {
// prevent wrong range where key >= end lexicographically
// watch request with 'WithFromKey' has empty-byte range end
if len(end) != 0 && bytes.Compare(key, end) != -1 {
return -1
}
ws.mu.Lock()
defer ws.mu.Unlock()
if ws.closed {

View File

@ -153,6 +153,28 @@ func TestWatcherWatchPrefix(t *testing.T) {
}
}
// TestWatcherWatchWrongRange ensures that watcher with wrong 'end' range
// does not create watcher, which panics when canceling in range tree.
func TestWatcherWatchWrongRange(t *testing.T) {
b, tmpPath := backend.NewDefaultTmpBackend()
s := WatchableKV(newWatchableStore(b, &lease.FakeLessor{}, nil))
defer cleanup(s, b, tmpPath)
w := s.NewWatchStream()
defer w.Close()
if id := w.Watch([]byte("foa"), []byte("foa"), 1); id != -1 {
t.Fatalf("key == end range given; id expected -1, got %d", id)
}
if id := w.Watch([]byte("fob"), []byte("foa"), 1); id != -1 {
t.Fatalf("key > end range given; id expected -1, got %d", id)
}
// watch request with 'WithFromKey' has empty-byte range end
if id := w.Watch([]byte("foo"), []byte{}, 1); id != 0 {
t.Fatalf("\x00 is range given; id expected 0, got %d", id)
}
}
func TestWatchDeleteRange(t *testing.T) {
b, tmpPath := backend.NewDefaultTmpBackend()
s := newWatchableStore(b, &lease.FakeLessor{}, nil)

View File

@ -38,9 +38,12 @@ type PageWriter struct {
bufWatermarkBytes int
}
func NewPageWriter(w io.Writer, pageBytes int) *PageWriter {
// NewPageWriter creates a new PageWriter. pageBytes is the number of bytes
// to write per page. pageOffset is the starting offset of io.Writer.
func NewPageWriter(w io.Writer, pageBytes, pageOffset int) *PageWriter {
return &PageWriter{
w: w,
pageOffset: pageOffset,
pageBytes: pageBytes,
buf: make([]byte, defaultBufferBytes+pageBytes),
bufWatermarkBytes: defaultBufferBytes,

View File

@ -25,7 +25,7 @@ func TestPageWriterRandom(t *testing.T) {
pageBytes := 128
buf := make([]byte, 4*defaultBufferBytes)
cw := &checkPageWriter{pageBytes: pageBytes, t: t}
w := NewPageWriter(cw, pageBytes)
w := NewPageWriter(cw, pageBytes, 0)
n := 0
for i := 0; i < 4096; i++ {
c, err := w.Write(buf[:rand.Intn(len(buf))])
@ -51,7 +51,7 @@ func TestPageWriterPartialSlack(t *testing.T) {
pageBytes := 128
buf := make([]byte, defaultBufferBytes)
cw := &checkPageWriter{pageBytes: 64, t: t}
w := NewPageWriter(cw, pageBytes)
w := NewPageWriter(cw, pageBytes, 0)
// put writer in non-zero page offset
if _, err := w.Write(buf[:64]); err != nil {
t.Fatal(err)
@ -82,6 +82,35 @@ func TestPageWriterPartialSlack(t *testing.T) {
}
}
// TestPageWriterOffset tests if page writer correctly repositions when offset is given.
func TestPageWriterOffset(t *testing.T) {
defaultBufferBytes = 1024
pageBytes := 128
buf := make([]byte, defaultBufferBytes)
cw := &checkPageWriter{pageBytes: 64, t: t}
w := NewPageWriter(cw, pageBytes, 0)
if _, err := w.Write(buf[:64]); err != nil {
t.Fatal(err)
}
if err := w.Flush(); err != nil {
t.Fatal(err)
}
if w.pageOffset != 64 {
t.Fatalf("w.pageOffset expected 64, got %d", w.pageOffset)
}
w = NewPageWriter(cw, w.pageOffset, pageBytes)
if _, err := w.Write(buf[:64]); err != nil {
t.Fatal(err)
}
if err := w.Flush(); err != nil {
t.Fatal(err)
}
if w.pageOffset != 0 {
t.Fatalf("w.pageOffset expected 0, got %d", w.pageOffset)
}
}
// checkPageWriter implements an io.Writer that fails a test on unaligned writes.
type checkPageWriter struct {
pageBytes int

View File

@ -38,7 +38,7 @@ var (
// SoftState provides state that is useful for logging and debugging.
// The state is volatile and does not need to be persisted to the WAL.
type SoftState struct {
Lead uint64
Lead uint64 // must use atomic operations to access; keep 64-bit aligned.
RaftState StateType
}

View File

@ -25,9 +25,9 @@ import (
proto "github.com/golang/protobuf/proto"
math "math"
)
import io "io"
io "io"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
@ -183,9 +183,9 @@ func (x *ConfChangeType) UnmarshalJSON(data []byte) error {
func (ConfChangeType) EnumDescriptor() ([]byte, []int) { return fileDescriptorRaft, []int{2} }
type Entry struct {
Type EntryType `protobuf:"varint,1,opt,name=Type,json=type,enum=raftpb.EntryType" json:"Type"`
Term uint64 `protobuf:"varint,2,opt,name=Term,json=term" json:"Term"`
Index uint64 `protobuf:"varint,3,opt,name=Index,json=index" json:"Index"`
Type EntryType `protobuf:"varint,1,opt,name=Type,json=type,enum=raftpb.EntryType" json:"Type"`
Data []byte `protobuf:"bytes,4,opt,name=Data,json=data" json:"Data,omitempty"`
XXX_unrecognized []byte `json:"-"`
}

View File

@ -15,9 +15,9 @@ enum EntryType {
}
message Entry {
optional uint64 Term = 2 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations
optional uint64 Index = 3 [(gogoproto.nullable) = false]; // must be 64-bit aligned for atomic operations
optional EntryType Type = 1 [(gogoproto.nullable) = false];
optional uint64 Term = 2 [(gogoproto.nullable) = false];
optional uint64 Index = 3 [(gogoproto.nullable) = false];
optional bytes Data = 4;
}

View File

@ -49,6 +49,7 @@ var (
"2.1.0": {streamTypeMsgAppV2, streamTypeMessage},
"2.2.0": {streamTypeMsgAppV2, streamTypeMessage},
"2.3.0": {streamTypeMsgAppV2, streamTypeMessage},
"3.0.0": {streamTypeMsgAppV2, streamTypeMessage},
}
)

View File

@ -19,9 +19,9 @@ import (
proto "github.com/golang/protobuf/proto"
math "math"
)
import io "io"
io "io"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal

9
test
View File

@ -125,15 +125,6 @@ function fmt_tests {
fi
if which gosimple >/dev/null; then
echo "Checking gosimple..."
for path in $GOSIMPLE_UNUSED_PATHS; do
simplResult=`gosimple $REPO_PATH/${path} || true`
if [ -n "${simplResult}" ]; then
echo -e "gosimple checking ${path} failed:\n${simplResult}"
exit 255
fi
done
else
echo "Skipping gosimple..."
fi

View File

@ -1,23 +0,0 @@
# etcd-top
etcd realtime workload analyzer. Useful for rapid diagnosis of production usage issues and analysis of production request distributions.
usage:
```
-iface="eth0": interface for sniffing traffic on
-period=1: seconds between submissions
-ports="2379": etcd listening ports
-promiscuous=true: whether to perform promiscuous sniffing or not.
-topk=10: submit stats for the top <K> sniffed paths
```
result:
```
go run etcd-top.go --period=1 -topk=3
1440035702 sniffed 1074 requests over last 1 seconds
Top 3 most popular http requests:
Sum Rate Verb Path
1305 22 GET /v2/keys/c
1302 8 GET /v2/keys/S
1297 10 GET /v2/keys/h
```

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-top is a utility for analyzing etcd v2 API workload traffic.
package main

View File

@ -1,229 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bufio"
"bytes"
"flag"
"fmt"
"math"
"net/http"
"os"
"runtime"
"sort"
"strconv"
"strings"
"time"
"github.com/akrennmair/gopcap"
"github.com/spacejam/loghisto"
)
type nameSum struct {
Name string
Sum float64
Rate float64
}
type nameSums []nameSum
func (n nameSums) Len() int {
return len(n)
}
func (n nameSums) Less(i, j int) bool {
return n[i].Sum > n[j].Sum
}
func (n nameSums) Swap(i, j int) {
n[i], n[j] = n[j], n[i]
}
// This function listens for periodic metrics from the loghisto metric system,
// and upon receipt of a batch of them it will print out the desired topK.
func statPrinter(metricStream chan *loghisto.ProcessedMetricSet, topK, period uint) {
for m := range metricStream {
requestCounter := float64(0)
nvs := nameSums{}
for k, v := range m.Metrics {
// loghisto adds _rate suffixed metrics for counters and histograms
if strings.HasSuffix(k, "_rate") && !strings.HasSuffix(k, "_rate_rate") {
continue
}
nvs = append(nvs, nameSum{
Name: k,
Sum: v,
Rate: m.Metrics[k+"_rate"],
})
requestCounter += m.Metrics[k+"_rate"]
}
fmt.Printf("\n%d sniffed %d requests over last %d seconds\n\n", time.Now().Unix(),
uint(requestCounter), period)
if len(nvs) == 0 {
continue
}
sort.Sort(nvs)
fmt.Printf("Top %d most popular http requests:\n", topK)
fmt.Println("Total Sum Period Sum Verb Path")
for _, nv := range nvs[0:int(math.Min(float64(len(nvs)), float64(topK)))] {
fmt.Printf("%9.1d %7.1d %s\n", int(nv.Sum), int(nv.Rate), nv.Name)
}
}
}
// packetDecoder decodes packets and hands them off to the streamRouter
func packetDecoder(packetsIn chan *pcap.Packet, packetsOut chan *pcap.Packet) {
for pkt := range packetsIn {
pkt.Decode()
select {
case packetsOut <- pkt:
default:
fmt.Fprint(os.Stderr, "shedding at decoder!")
}
}
}
// processor tries to parse an http request from each packet, and if
// successful it records metrics about it in the loghisto metric system.
func processor(ms *loghisto.MetricSystem, packetsIn chan *pcap.Packet) {
for pkt := range packetsIn {
req, reqErr := http.ReadRequest(bufio.NewReader(bytes.NewReader(pkt.Payload)))
if reqErr == nil {
ms.Counter(req.Method+" "+req.URL.Path, 1)
}
}
}
// streamRouter takes a decoded packet and routes it to a processor that can deal with all requests
// and responses for this particular TCP connection. This allows the processor to own a local map
// of requests so that it can avoid coordinating with other goroutines to perform analysis.
func streamRouter(ports []uint16, parsedPackets chan *pcap.Packet, processors []chan *pcap.Packet) {
for pkt := range parsedPackets {
if pkt.TCP == nil {
continue
}
clientPort := uint16(0)
for _, p := range ports {
if pkt.TCP.SrcPort == p {
clientPort = pkt.TCP.DestPort
break
}
if pkt.TCP.DestPort == p {
clientPort = pkt.TCP.SrcPort
break
}
}
if clientPort != 0 {
// client Port can be assumed to have sufficient entropy for
// distribution among processors, and we want the same
// tcp stream to go to the same processor every time
// so that if we do proper packet reconstruction it will
// be easier.
select {
case processors[int(clientPort)%len(processors)] <- pkt:
default:
fmt.Fprint(os.Stderr, "Shedding load at router!")
}
}
}
}
// 1. parse args
// 2. start the loghisto metric system
// 3. start the processing and printing goroutines
// 4. open the pcap handler
// 5. hand off packets from the handler to the decoder
func main() {
portsArg := flag.String("ports", "2379", "etcd listening ports")
iface := flag.String("iface", "eth0", "interface for sniffing traffic on")
promisc := flag.Bool("promiscuous", true, "promiscuous mode")
period := flag.Uint("period", 1, "seconds between submissions")
topK := flag.Uint("topk", 10, "submit stats for the top <K> sniffed paths")
flag.Parse()
numCPU := runtime.NumCPU()
runtime.GOMAXPROCS(numCPU)
ms := loghisto.NewMetricSystem(time.Duration(*period)*time.Second, false)
ms.Start()
metricStream := make(chan *loghisto.ProcessedMetricSet, 2)
ms.SubscribeToProcessedMetrics(metricStream)
defer ms.UnsubscribeFromProcessedMetrics(metricStream)
go statPrinter(metricStream, *topK, *period)
ports := []uint16{}
for _, p := range strings.Split(*portsArg, ",") {
port, err := strconv.Atoi(p)
if err == nil {
ports = append(ports, uint16(port))
} else {
fmt.Fprintf(os.Stderr, "Failed to parse port \"%s\": %v\n", p, err)
os.Exit(1)
}
}
if len(ports) == 0 {
fmt.Fprint(os.Stderr, "No ports given! Exiting.\n")
os.Exit(1)
}
// We choose 1518 for the snaplen because it's the default
// ethernet MTU at the link layer. We choose 1000 for the
// timeout based on a measurement for its impact on latency
// impact, but it is less precise.
h, err := pcap.Openlive(*iface, 1518, *promisc, 1000)
if err != nil {
fmt.Fprintf(os.Stderr, "%v", err)
os.Exit(1)
}
defer h.Close()
portArray := strings.Split(*portsArg, ",")
dst := strings.Join(portArray, " or dst port ")
src := strings.Join(portArray, " or src port ")
filter := fmt.Sprintf("tcp and (dst port %s or src port %s)", dst, src)
fmt.Println("using bpf filter: ", filter)
if err := h.Setfilter(filter); err != nil {
fmt.Fprintf(os.Stderr, "%v", err)
os.Exit(1)
}
unparsedPackets := make(chan *pcap.Packet, 16384)
parsedPackets := make(chan *pcap.Packet, 16384)
for i := 0; i < int(math.Max(2, float64(numCPU/4))); i++ {
go packetDecoder(unparsedPackets, parsedPackets)
}
processors := []chan *pcap.Packet{}
for i := 0; i < int(math.Max(2, float64(numCPU/4))); i++ {
p := make(chan *pcap.Packet, 16384)
processors = append(processors, p)
go processor(ms, p)
}
go streamRouter(ports, parsedPackets, processors)
for {
pkt := h.Next()
if pkt != nil {
select {
case unparsedPackets <- pkt:
default:
fmt.Fprint(os.Stderr, "SHEDDING IN MAIN")
}
}
}
}

View File

@ -29,7 +29,7 @@ import (
var (
// MinClusterVersion is the min cluster version this etcd binary is compatible with.
MinClusterVersion = "2.3.0"
Version = "3.0.9"
Version = "3.0.16"
// Git SHA Value will be set during build
GitSHA = "Not provided (use ./build instead of go build)"

View File

@ -18,6 +18,7 @@ import (
"encoding/binary"
"hash"
"io"
"os"
"sync"
"github.com/coreos/etcd/pkg/crc"
@ -39,9 +40,9 @@ type encoder struct {
uint64buf []byte
}
func newEncoder(w io.Writer, prevCrc uint32) *encoder {
func newEncoder(w io.Writer, prevCrc uint32, pageOffset int) *encoder {
return &encoder{
bw: ioutil.NewPageWriter(w, walPageBytes),
bw: ioutil.NewPageWriter(w, walPageBytes, pageOffset),
crc: crc.New(prevCrc, crcTable),
// 1MB buffer
buf: make([]byte, 1024*1024),
@ -49,6 +50,15 @@ func newEncoder(w io.Writer, prevCrc uint32) *encoder {
}
}
// newFileEncoder creates a new encoder with current file offset for the page writer.
func newFileEncoder(f *os.File, prevCrc uint32) (*encoder, error) {
offset, err := f.Seek(0, os.SEEK_CUR)
if err != nil {
return nil, err
}
return newEncoder(f, prevCrc, int(offset)), nil
}
func (e *encoder) encode(rec *walpb.Record) error {
e.mu.Lock()
defer e.mu.Unlock()

View File

@ -69,7 +69,7 @@ func TestWriteRecord(t *testing.T) {
typ := int64(0xABCD)
d := []byte("Hello world!")
buf := new(bytes.Buffer)
e := newEncoder(buf, 0)
e := newEncoder(buf, 0, 0)
e.encode(&walpb.Record{Type: typ, Data: d})
e.flush()
decoder := newDecoder(ioutil.NopCloser(buf))

View File

@ -120,7 +120,10 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
w := &WAL{
dir: dirpath,
metadata: metadata,
encoder: newEncoder(f, 0),
}
w.encoder, err = newFileEncoder(f.File, 0)
if err != nil {
return nil, err
}
w.locks = append(w.locks, f)
if err = w.saveCrc(0); err != nil {
@ -341,7 +344,10 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
if w.tail() != nil {
// create encoder (chain crc with the decoder), enable appending
w.encoder = newEncoder(w.tail(), w.decoder.lastCRC())
w.encoder, err = newFileEncoder(w.tail().File, w.decoder.lastCRC())
if err != nil {
return
}
}
w.decoder = nil
@ -375,7 +381,10 @@ func (w *WAL) cut() error {
// update writer and save the previous crc
w.locks = append(w.locks, newTail)
prevCrc := w.encoder.crc.Sum32()
w.encoder = newEncoder(w.tail(), prevCrc)
w.encoder, err = newFileEncoder(w.tail().File, prevCrc)
if err != nil {
return err
}
if err = w.saveCrc(prevCrc); err != nil {
return err
}
@ -414,7 +423,10 @@ func (w *WAL) cut() error {
w.locks[len(w.locks)-1] = newTail
prevCrc = w.encoder.crc.Sum32()
w.encoder = newEncoder(w.tail(), prevCrc)
w.encoder, err = newFileEncoder(w.tail().File, prevCrc)
if err != nil {
return err
}
plog.Infof("segmented wal file %v is created", fpath)
return nil

View File

@ -61,7 +61,7 @@ func TestNew(t *testing.T) {
}
var wb bytes.Buffer
e := newEncoder(&wb, 0)
e := newEncoder(&wb, 0, 0)
err = e.encode(&walpb.Record{Type: crcType, Crc: 0})
if err != nil {
t.Fatalf("err = %v, want nil", err)
@ -465,7 +465,7 @@ func TestSaveEmpty(t *testing.T) {
var buf bytes.Buffer
var est raftpb.HardState
w := WAL{
encoder: newEncoder(&buf, 0),
encoder: newEncoder(&buf, 0, 0),
}
if err := w.saveState(&est); err != nil {
t.Errorf("err = %v, want nil", err)

View File

@ -20,9 +20,9 @@ import (
proto "github.com/golang/protobuf/proto"
math "math"
)
import io "io"
io "io"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal