Compare commits

...

328 Commits

Author SHA1 Message Date
420a452267 version: bump up to 3.2.24 2018-07-24 10:24:31 -07:00
348edfeae6 etcdserver: add "etcd_server_go_version" metric
Currently, one has to look at server logs manually,
to see what Go version was used to build etcd server.

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-23 16:38:52 -07:00
0d5497a107 clientv3: fix keepalive send interval when response queue is full
client should update next keepalive send time
even when lease keepalive response queue becomes full.

Otherwise, client sends keepalive request every 500ms
regardless of TTL when the send is only expected to happen
with the interval of TTL / 3 at minimum.

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-23 08:50:44 -07:00
87418c3432 Merge pull request #9942 from wenjiaswe/automated-cherry-pick-of-#9761-upstream-release-3.2
Automated cherry pick of #9761
2018-07-20 14:26:13 -07:00
8c9fd1b5e6 remove hashRevDurations 2018-07-20 13:48:35 -07:00
a3c0a99067 remove hashRevDurations 2018-07-20 13:45:33 -07:00
b3ab14ca9a remove HashByRev 2018-07-20 13:44:15 -07:00
8798c5cd43 etcdserver: rename to "heartbeat_send_failures_total"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-20 09:58:32 -07:00
4e08898571 mvcc: add "etcd_mvcc_hash_(rev)_duration_seconds"
etcd_mvcc_hash_duration_seconds
etcd_mvcc_hash_rev_duration_seconds

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-20 09:57:47 -07:00
8ac6c888cd mvcc/backend: fix defrag duration scale
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-20 09:52:46 -07:00
aca5c8f4b6 mvcc/backend: add "etcd_disk_backend_defrag_duration_seconds"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-20 09:52:46 -07:00
3535f7a61f mvcc/backend: document metrics ExponentialBuckets
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-20 09:44:15 -07:00
fae9b6f667 mvcc/backend: clean up mutex, logging
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-20 09:44:15 -07:00
66d8194e4d etcdserver: add "etcd_server_slow_apply_total"
{"level":"warn","ts":1527101858.6985068,"caller":"etcdserver/util.go:115","msg":"apply request took too long","took":0.114101529,"expected-duration":0.1,"prefix":"","request":"header:<ID:1029181977902852337> put:<key:\"\\000\\000...

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-20 09:42:52 -07:00
2f0e3fd2df etcdserver: add "etcd_server_heartbeat_failures_total"
{"level":"warn","ts":1527101858.4149103,"caller":"etcdserver/raft.go:370","msg":"failed to send out heartbeat; took too long, server is overloaded likely from slow disk","heartbeat-interval":0.1,"expected-duration":0.2,"exceeded-duration":0.025771662}
{"level":"warn","ts":1527101858.4149644,"caller":"etcdserver/raft.go:370","msg":"failed to send out heartbeat; took too long, server is overloaded likely from slow disk","heartbeat-interval":0.1,"expected-duration":0.2,"exceeded-duration":0.034015766}

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-20 09:37:04 -07:00
cad3cf7b11 mvcc/backend: avoid unnecessary metrics update
https://github.com/coreos/etcd/pull/9300

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-03 14:52:16 -07:00
bedba66c69 mvcc: add "etcd_mvcc_db_total_size_in_use_in_bytes"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-03 14:32:56 -07:00
9bc1e15386 mvcc: add "etcd_mvcc_db_total_size_in_bytes"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-03 14:24:56 -07:00
6e0131e83b etcdserver: add "etcd_server_quota_backend_bytes"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-03 13:27:15 -07:00
c0e9e14248 etcdserver: add "etcd_server_slow_read_indexes_total"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-03 12:59:53 -07:00
b763b506ab etcdserver: clarify read index warnings
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-07-03 12:54:42 -07:00
d22ee8423d Merge pull request #9894 from xmudrii/3.2-grpcproxy-tls
etcdmain: backport support for different certs for etcd-gRPC proxy
2018-07-02 10:57:39 -07:00
e5531a4d54 etcdmain/grpc-proxy: add 'metrics-addr' option
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2018-07-02 12:06:25 +02:00
8dabfe12ca etcdmain: cleanup grpcproxy; support different certs for proxy/etcd
Enables TLS termination in grpcproxy.
2018-07-02 11:20:14 +02:00
360484a3f0 tests: update test scripts
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-18 14:14:15 -07:00
f8af50a8d8 version: bump up to 3.2.23+git 2018-06-15 09:45:59 -07:00
c9504f61fc version: bump up to 3.2.23 2018-06-15 09:40:41 -07:00
75c159baa8 clientv3: backoff on reestablishing watches when Unavailable errors are encountered 2018-06-14 10:52:52 -07:00
41ece2cf2d e2e: do not test cipher suite in release-3.2
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-13 16:04:21 -07:00
5e6adfac06 Merge pull request #9845 from wenjiaswe/automated-cherry-pick-of-#8960-upstream-release-3.2
Automated cherry pick of #8960
2018-06-13 16:02:48 -07:00
b163084a5f metrics: Add server_version metric 2018-06-13 15:03:10 -07:00
ad7db2bb1e tests/semaphore.test.bash: update
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-13 14:41:17 -07:00
b5dc2266a6 Makefile: update
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-13 14:40:42 -07:00
ba233791e1 Merge pull request #9821 from jpbetz/automated-cherry-pick-of-#9288-origin-release-3.2
Automated cherry pick of detailed "took too long" warnings to release-3.2
2018-06-12 12:51:18 -07:00
0ce2ef14a1 etcdserver: Fix txn request 'took too long' warnings to use loggable request stringer 2018-06-12 12:31:38 -07:00
4db8b94cca etcdserver: Add response byte size and range response count to took too long warning 2018-06-11 16:23:31 -07:00
734e4cf8e6 etcdserver: Replace value contents with value_size in request took too long warning 2018-06-11 15:58:03 -07:00
dcf30b1c54 etcdserver: not print password in the warning message of expensive request
Fix https://github.com/coreos/etcd/issues/9635
2018-06-11 15:50:55 -07:00
065053d859 etcdserver: Fix to backport of #9288 for pre-RequestV2 code 2018-06-07 11:02:00 -07:00
1935a663df etcdserver: improve request took too long warning 2018-06-07 10:29:29 -07:00
2c7eb87c85 version: bump up to 3.2.22+git 2018-06-06 10:48:55 -07:00
1674e682fe version: 3.2.22
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-05 19:53:43 -07:00
7c47afd7d2 e2e: test client-side cipher suites with curl
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-05 19:53:43 -07:00
3e0cc1e717 etcdmain: add "--cipher-suites" flag
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-05 19:53:43 -07:00
6fa95eb497 embed: support custom cipher suites
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-05 19:53:43 -07:00
ba4a7e004b integration: test client-side TLS cipher suites
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-05 19:53:43 -07:00
4bd81d0933 pkg/transport: add "TLSInfo.CipherSuites" field
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-05 19:53:43 -07:00
f690f3a425 pkg/tlsutil: add "GetCipherSuite"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-06-05 19:53:40 -07:00
af6f459a23 version: bump up to 3.2.21+git 2018-05-31 12:50:14 -07:00
3ac81f3ae2 version: bump up to 3.2.21 2018-05-31 12:43:49 -07:00
4ace7c7d77 mvcc: fix panic by allowing future revision watcher from restore operation
This also happens without gRPC proxy.

Fix panic when gRPC proxy leader watcher is restored:

```
go test -v -tags cluster_proxy -cpu 4 -race -run TestV3WatchRestoreSnapshotUnsync

=== RUN   TestV3WatchRestoreSnapshotUnsync
panic: watcher minimum revision 9223372036854775805 should not exceed current revision 16

goroutine 156 [running]:
github.com/coreos/etcd/mvcc.(*watcherGroup).chooseAll(0xc4202b8720, 0x10, 0xffffffffffffffff, 0x1)
	/home/gyuho/go/src/github.com/coreos/etcd/mvcc/watcher_group.go:242 +0x3b5
github.com/coreos/etcd/mvcc.(*watcherGroup).choose(0xc4202b8720, 0x200, 0x10, 0xffffffffffffffff, 0xc420253378, 0xc420253378)
	/home/gyuho/go/src/github.com/coreos/etcd/mvcc/watcher_group.go:225 +0x289
github.com/coreos/etcd/mvcc.(*watchableStore).syncWatchers(0xc4202b86e0, 0x0)
	/home/gyuho/go/src/github.com/coreos/etcd/mvcc/watchable_store.go:340 +0x237
github.com/coreos/etcd/mvcc.(*watchableStore).syncWatchersLoop(0xc4202b86e0)
	/home/gyuho/go/src/github.com/coreos/etcd/mvcc/watchable_store.go:214 +0x280
created by github.com/coreos/etcd/mvcc.newWatchableStore
	/home/gyuho/go/src/github.com/coreos/etcd/mvcc/watchable_store.go:90 +0x477
exit status 2
FAIL	github.com/coreos/etcd/integration	2.551s
```

gRPC proxy spawns a watcher with a key "proxy-namespace__lostleader"
and watch revision "int64(math.MaxInt64 - 2)" to detect leader loss.
But, when the partitioned node restores, this watcher triggers
panic with "watcher minimum revision ... should not exceed current ...".

This check was added a long time ago, by my PR, when there was no gRPC proxy:

https://github.com/coreos/etcd/pull/4043#discussion_r48457145

> we can remove this checking actually. it is impossible for a unsynced watching to have a future rev. or we should just panic here.

However, now it's possible that a unsynced watcher has a future
revision, when it was moved from a synced watcher group through
restore operation.

This PR adds "restore" flag to indicate that a watcher was moved
from the synced watcher group with restore operation. Otherwise,
the watcher with future revision in an unsynced watcher group
would still panic.

Example logs with future revision watcher from restore operation:

```
{"level":"info","ts":1527196358.9057755,"caller":"mvcc/watcher_group.go:261","msg":"choosing future revision watcher from restore operation","watch-key":"proxy-namespace__lostleader","watch-revision":9223372036854775805,"current-revision":16}
{"level":"info","ts":1527196358.910349,"caller":"mvcc/watcher_group.go:261","msg":"choosing future revision watcher from restore operation","watch-key":"proxy-namespace__lostleader","watch-revision":9223372036854775805,"current-revision":16}
```

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-05-31 11:42:50 -07:00
a09874b40c auth: Fix simpleToken to respect disabled state for assign 2018-05-23 15:45:34 -07:00
a5437f246b version: bump up to 3.2.20+git 2018-05-09 10:19:54 -07:00
f272557516 version: bump up to 3.2.20 2018-05-09 10:03:42 -07:00
71eba353d2 Merge pull request #9694 from mohitsoni/release-3.2
Cherry-picking PR 7967 to release-3.2
2018-05-04 12:16:09 -07:00
557eee826f etcdserver: purge old snap.db files
Lots of garbage db files in #7957. Should purge.
2018-05-04 10:51:59 -07:00
b71df1f814 version: bump up to 3.2.19+git 2018-04-24 14:25:10 -07:00
8a9b3d5385 version: bump up to 3.2.19 2018-04-24 14:15:36 -07:00
4e7af272b5 etcdmain: fix "InitialElectionTickAdvance"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-23 11:09:46 -07:00
8ba6bf466f etcdserver: log skipping initial election tick
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-23 10:58:11 -07:00
d549256dd9 etcdmain: add "--initial-election-tick-advance"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-23 10:57:47 -07:00
40aee7bdf8 embed: add "InitialElectionTickAdvance"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-23 10:56:03 -07:00
7de2064559 integration: set InitialElectionTickAdvance to true by default
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-23 10:53:49 -07:00
0d2fe21d8e etcdserver: add "InitialElectionTickAdvance"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-23 10:53:33 -07:00
d45053c068 etcdserver: add is_leader prometheus metric that is 1 on the leader.
Before this change, we had now way to find a leader using /metrics
endpoint. This commit adds a metric to do that.
2018-04-19 14:59:53 -07:00
dfcdaa5cc9 integration: fix peer TLS tests
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-17 15:15:02 -07:00
f9d58d2c9f integration: re-overwrite "httptest.Server" TLS.Certificates
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-17 06:16:52 -07:00
7f1225a128 pkg/transport: don't set certificates on tls config 2018-04-17 06:16:15 -07:00
21e7a30d31 functional/tester: remove Txn stresser in 3.2
Nested Txn is not supported

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 19:42:33 -07:00
4e11cea8cb functional: disable auto TLS
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 19:13:50 -07:00
8f59849ca2 vendor: add "gogo/protobuf/gogoproto"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 19:00:01 -07:00
3b770ee8b4 test: set up gopath in 3.2
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 18:26:15 -07:00
71a5f77032 functional: create symlinks for build
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 16:05:13 -07:00
14ce0ea9ba travis: run "build" tests for "functional"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 15:57:15 -07:00
7b1d09023b functional/rpcpb: remove "InsecureSkipVerify"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 15:55:20 -07:00
6efde070b8 functional: disable TLS in release-3.2
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 15:32:14 -07:00
244b3b3d3c snapshot: remove tests
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 15:23:35 -07:00
5bc5c49193 functional: initial commit (copied from master)
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 13:20:06 -07:00
2a42b47400 snapshot: initial commit (for functional tests)
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 13:20:06 -07:00
df90e3ce21 test: simplify
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-12 11:08:01 -07:00
1dfce6b565 etcdserver/stats: make all fields guarded by mutex. 2018-04-11 19:49:31 -07:00
67a97c9f1a etcdserver/stats: fix stats data race. 2018-04-11 19:49:31 -07:00
7f1d94d5e2 test: remove build flag "-a"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-11 10:17:13 -07:00
a43ae13106 cmd/vendor: add "go.uber.org/zap"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-10 23:48:34 -07:00
748d2204a2 pkg/proxy: initial commit (for functional tests)
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-10 23:47:47 -07:00
9dea2f7f1f tools: remove
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-10 23:46:52 -07:00
c3c88f49bd Makefile: sync with master
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-10 23:34:50 -07:00
9e88e0c017 tests/*: clean up travis, semaphore scripts
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-10 23:32:41 -07:00
487c8d3d61 etcdserver: fix "lease_expired_total" metrics
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-10 17:59:07 -07:00
a5dc3b7cb1 tests: move test scripts
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-04-09 11:34:44 -07:00
71e96522dc version: bump up to 3.2.18+git 2018-03-29 10:57:58 -07:00
eddf599c68 version: bump up to 3.2.18 2018-03-29 10:45:17 -07:00
a00b652460 semaphore: run release tests with v3.2.17
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-29 09:23:55 -07:00
a089a747b5 e2e: remove "/v3beta" endpoints
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-28 12:12:44 -07:00
dd64080eac e2e: remove "authHeader"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-28 11:38:45 -07:00
3f8213a7af e2e: remove some tests from master branch
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-28 11:34:55 -07:00
2160c476a2 clientv3: skip "TestDialTimeout"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-28 11:28:14 -07:00
29185da0e0 Merge pull request #9502 from jpbetz/automated-cherry-pick-of-#9415-release-3.2
Automated cherry pick of #9415
2018-03-28 11:21:36 -07:00
4acfe50869 Merge pull request #9503 from jpbetz/automated-cherry-pick-of-#9437-release-3.2
Automated cherry pick of #9437
2018-03-28 11:04:17 -07:00
e6c5cdf935 etcdserver: adjust election ticks on restart
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-28 10:58:09 -07:00
0a4560319f rafthttp: add missing "peer_sent_failures_total" metrics call
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-28 10:09:50 -07:00
6d7f592c38 etcdserver: make "advanceTicks" method
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-28 10:06:57 -07:00
431fd391da rafthttp: add "ActivePeers" to "Transport"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-28 10:06:28 -07:00
39ea00bc92 Documentation/upgrades: backport all upgrade guides
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-27 10:31:57 -07:00
6f848b3fd3 version: bump up to 3.2.17+git 2018-03-08 14:16:24 -08:00
28c47bb2f8 version: bump up to 3.2.17 2018-03-08 13:45:31 -08:00
ea0fda66eb clientv3/integration: test "rpctypes.ErrLeaseTTLTooLarge"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-08 10:38:18 -08:00
6e5e3d134e *: enforce max lease TTL with 9,000,000,000 seconds
math.MaxInt64 / time.Second is 9,223,372,036. 9,000,000,000 is easier to
remember/document.

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-08 10:38:07 -08:00
58f9080f60 Merge pull request #9404 from jpbetz/automated-cherry-pick-of-#9379-origin-release-3.2
Automated cherry pick of #9379
2018-03-08 08:21:35 -08:00
f8fc817ce8 *: remove unused env vars
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-08 01:36:21 -08:00
3bb8edc6aa e2e: fix missing "apiPrefix"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-08 01:08:26 -08:00
4e7b9d223d e2e: add "Election" grpc-gateway test cases
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-08 01:07:19 -08:00
fe90bc448c Merge pull request #9405 from jpbetz/automated-cherry-pick-of-#9347-origin-release-3.2
Automated cherry pick of #9347
2018-03-08 00:59:26 -08:00
3710c249eb Merge pull request #9403 from jpbetz/automated-cherry-pick-of-#9336-origin-release-3.2
Automated cherry pick of #9336
2018-03-08 00:58:52 -08:00
cbea4efaf2 etcdserver: enable "CheckQuorum" when starting with "ForceNewCluster"
We enable "raft.Config.CheckQuorum" by default in other
Raft initial starts. So should start with "ForceNewCluster".

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-07 23:12:32 -08:00
273a43d4d8 api/v3election: error on missing "leader" field
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-07 23:08:28 -08:00
ceaa55e57e httpproxy: cancel requests when client closes a connection 2018-03-07 23:01:10 -08:00
a537163e9e hack/scripts-dev: fix indentation in run.sh
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-07 14:32:08 -08:00
660f7fd8a0 hack/scripts-dev: sync with master
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-07 14:22:18 -08:00
e48a18256f travis: use Go 1.8.7, sync with master
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-07 14:21:02 -08:00
a61ba42918 Documentation/op-guide: highlight defrag operation "--endpoints" flag
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-05 11:13:54 -08:00
83c94e9e58 etcdctl: highlight "defrag" command caveats
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-05 11:12:41 -08:00
55e008f64b Documentation: make "Consul" section more objective
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-03-02 10:42:13 -08:00
a4827447be travis: update Go version 2018-02-27 11:28:37 -08:00
41830ca523 semaphore: update Go, release test version 2018-02-27 11:27:47 -08:00
4a620e2013 version: 3.2.16+git 2018-02-12 14:29:08 -08:00
121edf0467 version: 3.2.16 2018-02-12 09:43:33 -08:00
b5abfe1858 Merge pull request #9297 from jpbetz/automated-cherry-pick-of-#9281-origin-release-3.2
Automated cherry pick of #9281
2018-02-07 22:32:59 -08:00
33633da64c mvcc: fix watchable store test for 3.2 cherrypick of #9281 2018-02-07 15:57:34 -08:00
e08abbeae4 mvcc: restore unsynced watchers
In case syncWatchersLoop() starts before Restore() is called,
watchers already added by that moment are moved to s.synced by the loop.
However, there is a broken logic that moves watchers from s.synced
to s.uncyned without setting keyWatchers of the watcherGroup.
Eventually syncWatchers() fails to pickup those watchers from s.unsynced
and no events are sent to the watchers, because newWatcherBatch() called
in the function uses wg.watcherSetByKey() internally that requires
a proper keyWatchers value.
2018-02-07 15:34:21 -08:00
bdc3ed1970 version: 3.2.15+git
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-23 14:04:11 -08:00
1b3ac99e8a version: 3.2.15
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-22 11:31:16 -08:00
fd4595aa04 clientv3/integration: add TestMemberAddUpdateWrongURLs
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-22 11:31:01 -08:00
e5f63b64c3 clientv3: prevent no-scheme URLs to cluster APIs
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-22 11:27:04 -08:00
68d27b2d84 etcdserver/api/v3rpc: debug-log client disconnect on TLS, http/2 stream CANCEL
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-19 12:50:06 -08:00
7c4274be05 version: 3.2.14+git
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-11 14:15:36 -08:00
fb5cd6f1c7 version: 3.2.14
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-11 11:19:37 -08:00
6999bbb47b mvcc: check null before set FillPercent not to panic
Since CreateBucketIfNotExists() can return nil when it gets an error,
accessing FillPercent must be done after a nil check, not to cause
a panic.
2018-01-08 17:46:06 -08:00
df4036ab73 etcdserver/api/v3rpc: debug user cancellation and log warning for rest
The context error with cancel code is typically for user cancellation which
should be at debug level. For other error codes we should display a warning.

Fixes #9085
2018-01-08 17:46:01 -08:00
848590e99e version: bump up to 3.2.13+git
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-02 14:32:36 -08:00
95a726a27e version: bump up to 3.2.13
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-02 13:29:56 -08:00
288ef7d6fc embed: fix gRPC server panic on GracefulStop
Cherry-pick https://github.com/coreos/etcd/pull/8987.

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-02 13:29:40 -08:00
7b7722ed97 integration: test GracefulStop on secure embedded server
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-02 12:44:28 -08:00
8a358f832a clientv3/integration: fix TestKVLargeRequests with -tags cluster_proxy
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-02 11:12:44 -08:00
2a63909648 tools/functional-tester: remove duplicate grpclog set
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-02 11:12:38 -08:00
7fb1fafe0c etcdserver/api/v3rpc: set grpclog once
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>

Conflicts:
	etcdserver/api/v3rpc/grpc.go
2018-01-02 11:12:33 -08:00
7025d7c665 etcdserver,embed: discard gRPC info logs when debug is off
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>

Conflicts:
	embed/etcd.go
	etcdserver/api/v3rpc/grpc.go
	etcdserver/config.go
2018-01-02 11:12:26 -08:00
4ab213a4ec etcdserver/api/v3rpc: log stream error with debug level
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-02 11:11:38 -08:00
bb27a63e64 version: bump up to 3.2.12+git
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2018-01-02 11:11:25 -08:00
b19dae0065 version: bump up to 3.2.12
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 12:49:55 -08:00
c8915bdb04 integration: bump up wait leader timeout for slow CIs
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 12:47:58 -08:00
b6896aa951 clientv3/integration: fix TestKVPutError
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 12:47:49 -08:00
452ccd693d clientv3/integration: test large KV requests
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 12:47:30 -08:00
348b25f3dc clientv3: call other APIs with default gRPC call options
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 12:41:57 -08:00
c67e6d5f5e clientv3: call KV/Txn APIs with default gRPC call options
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 12:39:11 -08:00
e82f0557ac clientv3: configure gRPC message limits in Config
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 12:22:48 -08:00
4cebdd274c integration: remove typo in "TestV3LargeRequests"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 10:10:45 -08:00
0363c4b1ef integration: test large request response back from server
Address https://github.com/coreos/etcd/issues/9043.
Won't fix it, but we need test coverage on response back
from server as well.

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 10:10:15 -08:00
5579dc200d test: bump up clientv3/integration timeout
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-20 10:09:47 -08:00
3fd6e7e1de vendor: pin grpc v1.7.5, grpc-gateway v1.3.0 (no code change)
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-19 13:04:45 -08:00
1fa227da71 integration: add "TestV3AuthWithLeaseRevokeWithRoot"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-15 09:17:30 -08:00
47f6d32e3e Merge pull request #9013 from gyuho/automated-cherry-pick-of-#8999-origin-release-3.2
Automated cherry pick of #8999
2017-12-14 09:40:21 -08:00
0265457183 compactor: fix error message of Revision compactor
Reorder the parameters so that Noticef can output the error properly.
2017-12-14 08:39:22 -08:00
04ec94f8d1 semaphore: run upgrade tests against v3.2.11
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-07 14:36:30 -08:00
ed4d70888c semaphore.sh: do not fail on "Too many goroutines"
To not fail on "pkg/testutil" unit tests.

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-05 17:03:32 -08:00
b9aa507f66 hack: sync with master branch (needed for release)
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-05 11:11:58 -08:00
4ed57689cb gitignore: sync with master branch
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-05 11:09:26 -08:00
a2850218b2 scripts/build-docker: build both gcr.io and quay.io images
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-05 11:08:34 -08:00
fc25300cf0 version: bump up to v3.2.11+git
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-05 11:07:43 -08:00
1e1dbb2392 version: bump up to v3.2.11
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-04 14:24:14 -08:00
ff1f08c93f vendor: upgrade grpc/grpc-go to v1.7.4
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-04 14:23:43 -08:00
78fb932156 Merge pull request #8947 from gyuho/automated-cherry-pick-of-#8939-origin-release-3.2
Automated cherry pick of #8939
2017-12-04 09:35:30 -08:00
c142134a28 Documentation/op-guide: remove non-released flag in monitoring.md
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
2017-12-04 09:27:45 -08:00
b44b91462e etcdmain: add more details to TLS HandshakeFailure
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-12-01 09:50:57 -08:00
5921b2c035 api/v3rpc: log grpc stream send/recv errors in server-side
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-30 11:21:48 -08:00
a19672befc version: bump up to v3.2.10+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-17 11:10:48 -08:00
694728c496 version: bump up to v3.2.10
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 13:20:19 -08:00
1557f8b534 Merge pull request #8813 from jpbetz/bbolt-3.2
vendor: Backport bbolt freelist corruption and fragmentation fixes to 3.2 branch
2017-11-16 13:15:04 -08:00
4b9bfa17ee test: Clean agent directories on disk before functional test runs, not after
This is primarily so CI tooling can capture the agent logs after the functional tester runs.
2017-11-16 12:44:30 -08:00
8de0c0419a vendor: Switch from boltdb v1.3.0 to coreos/bbolt v1.3.1-coreos.3 2017-11-16 12:43:17 -08:00
3039c639c0 Merge pull request #8867 from gyuho/clientv3-backport-to-release-3.2
clientv3: backport new balancer to release-3.2, upgrade gRPC to v1.7.3
2017-11-16 10:12:40 -08:00
91335d01bb proxy/grpcproxy: wait until register before Serve
It was fatal-ing with:

grpclog.Fatalf("grpc: Server.RegisterService after Server.Serve for %q", sd.ServiceName)

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:34:11 -08:00
a8c84ffc93 clientv3: fix client balancer with gRPC v1.7
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:05:06 -08:00
939337f450 *: add max requests bytes, keepalive to server, blackhole methods to integration
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:05:06 -08:00
2a6d50470d *: use grpclog.NewLoggerV2
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:05:06 -08:00
d62e39d5ca *: deprecate "metadata.NewContext"
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:05:06 -08:00
7f0f5e2b3c bill-of-materials: regenerate
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:05:06 -08:00
eb1589ad35 *: regenerate proto
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:05:06 -08:00
546d5fe835 scripts/genproto: update protobuf, grpc-gateway gen
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:05:06 -08:00
fddae84ce2 vendor: update grpc, grpc-gateway, protobuf
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 09:05:02 -08:00
6d406285e6 glide: update grpc, grpc-gateway
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-16 05:32:02 -08:00
8dc20ead31 Merge pull request #8886 from gyuho/qqq
release-3.2: Revert "embed: fix HTTPs + DNS SRV discovery"
2017-11-15 14:47:57 -08:00
d3a3c3154e Revert "embed: fix HTTPs + DNS SRV discovery"
This reverts commit f79d5aaca4.
2017-11-15 14:46:32 -08:00
d5572964e1 Merge pull request #8874 from gyuho/release-branch
release-3.2: fix unit test script, remove old tests, backport functional testing data dir commands
2017-11-15 14:41:41 -08:00
ea51c25030 clientv3: remove balancer tests
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-15 00:15:06 -08:00
d1447a8f5a test: fix unit tests, remove some unnecessary tests
Unit tests weren't running in CIs.
And removing some unnecessary tests (v2 client, Examples)
in release branch.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-15 00:15:02 -08:00
c28c14a5f4 test: Clean agent directories on disk before functional test runs, not after
This is primarily so CI tooling can capture the agent logs after the functional tester runs.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-14 17:08:26 -08:00
f9eb75044a semaphore: priotize time out test fails
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-14 17:04:31 -08:00
2250f71e23 semaphore: manually pin v3.2.9 for release upgrade tests
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-08 12:45:00 -08:00
52be1d7b19 hack/scripts-dev: add Makefile, Dockerfile-test
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-06 14:13:10 -08:00
712024d3e5 semaphore.sh: fail tests with "(--- FAIL:|leak)"
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-03 10:59:01 -07:00
7d99afdc7c test: fail tests with "--- FAIL:"
To differentiate from gRPC client log "TRANSIENT_FAILURE"

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-11-03 10:58:43 -07:00
5ceea41af4 travis: upgrade Go to 1.8.5, and use container
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-25 19:45:33 -07:00
2f74456443 semaphore.sh: add to release-3.2 branch
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-25 19:44:05 -07:00
fc87ae4202 version: bump up to v3.2.9+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-25 19:43:30 -07:00
f1d7dd87da version: bump up to v3.2.9
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-06 08:58:06 -07:00
ad212d339b Makefile: sync with master branch on test commands
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-06 08:57:42 -07:00
9f49665284 Documentation/op-guide: remove git merge line in monitoring.md
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-06 08:55:56 -07:00
78f8d6e185 embed: fix HTTPs + DNS SRV discovery 2017-10-05 16:03:22 -07:00
a954a0de53 Makefile: initial commit, update Dockerfile
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-05 10:31:24 -07:00
0c3defdd2b vendor: update 'golang.org/x/crypto'
To include 6c586e17d9.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-05 09:49:00 -07:00
814588d166 travis: use Go 1.8.4
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-05 09:19:29 -07:00
2d2932822c version: bump up to 3.2.8+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-10-05 09:18:48 -07:00
e211fb6de3 version: bump up to 3.2.8
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-09-26 02:41:18 +09:00
fb7e274309 Documentation/op-guide: remove grafana demo link
The dashboard was removed during Tectonic migration
in AWS, while the Grafana still runs in GCP.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-09-26 02:40:59 +09:00
4a61fcf42d docs: remove link-breaking space 2017-09-20 08:11:02 +09:00
4c8fa30dda e2e: test no value is returned in TestCtlV3GetKeysOnly
Test was checking key name is returned, but was not correctly checking
no value is returned.
2017-09-14 04:42:06 +09:00
01c4f35b30 grpcproxy: respect KeysOnly flag
Fixes #8478
2017-09-14 04:41:58 +09:00
15e9510d2c client: fail over to next endpoint on oneshot failure
Fixes #8515
2017-09-08 13:28:55 -07:00
09b7fd4975 version: bump up to 3.2.7+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-09-01 14:03:26 -07:00
bb66589f8c version: bump up to 3.2.7
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-09-01 09:15:15 -07:00
267a2fc8c9 integration: check concurrent auth ops don't cause old rev errors 2017-08-25 13:13:56 -07:00
1fc300ecbd testutil: don't panic on AssertNil on non-nil errors 2017-08-25 13:13:26 -07:00
877d0ce469 etcdserver: consolidate error checking for v3_server functions
Duplicated error checking code moved into raftRequest/raftRequestOnce.
2017-08-23 14:39:59 -07:00
2188513161 concurrency: retry snapshot serializable stm if writes since first header rev
Was checking the rset key mod rev, which does not work.
2017-08-22 20:53:47 -07:00
5c7cff66b6 integration: test serializable snapshot STM with old readset revisions
Was hanging.
2017-08-22 20:53:41 -07:00
8c99ab80bd version: bump up to 3.2.6+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-08-21 13:07:06 -07:00
9d43462d17 version: bump up to 3.2.6
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-08-21 10:40:55 -07:00
78d68226e6 mvcc: sending events after restore
Fixes: #8411
2017-08-21 10:39:46 -07:00
e9d576c3d6 Documentation: fix broken link on FAQ
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-08-18 08:43:14 -07:00
1c578cd442 e2e: test booting etcd with multiple peer listeners 2017-08-18 08:43:06 -07:00
b97714b3e6 embed: associate peer serve() listener with corresponding peer
Fixes #8383
2017-08-17 14:16:56 -07:00
ce0a61ff67 dl_build: fix minor typo 2017-08-17 11:44:02 -07:00
74783a38ae docs: revising to match sidebar structure. 2017-08-16 17:03:28 -07:00
d372ff96a0 docs: link fix. 2017-08-16 17:03:12 -07:00
8c7b9db9cc docs: slight rearranging of top two sections. 2017-08-16 17:02:05 -07:00
f1fb342305 docs: adding an index for upgrade pages. 2017-08-16 17:01:49 -07:00
fa0f278783 embed: add 'enable-pprof' tag for config file
Fix https://github.com/coreos/etcd/issues/8402.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-08-15 11:52:48 -07:00
e197c14847 mvcc: test keys gauge is reloaded correctly on restore 2017-08-10 12:59:24 -07:00
e7bf5477de mvcc: reset keys gauge on restore
Fixes #8388
2017-08-10 12:59:19 -07:00
f81b72fd93 version: bump up to v3.2.5+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-08-04 11:30:57 -07:00
d0d1a87aa9 version: bump up to v3.2.5
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-08-04 08:40:50 -07:00
7c6a9a7317 contrib/raftexample: use bytes.Buffer.String (no 'string()')
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-08-04 08:40:29 -07:00
be8f102efb grpcproxy: forward PrevKv flag in Put 2017-08-04 07:32:17 -07:00
3003901447 integration: test Put with PrevKey=true
Was missing in proxy.
2017-08-04 07:32:11 -07:00
157cfac31b ctlv3/command: remove double-quote typos in fields printer
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-08-01 17:25:53 -07:00
40a1704e6f ctlv3: exit non-zero on unhealty ep command 2017-07-31 16:00:23 -07:00
30981ecb0a e2e/docker: docker image for testing wildcard DNS 2017-07-24 09:54:55 -07:00
f65a11ced5 fixtures: generate wildcard DNS SAN cert
DNS: *.etcd.local
2017-07-24 09:54:55 -07:00
db4838d4eb transport: use reverse lookup to match wildcard DNS SAN
Fixes #8268
2017-07-24 09:54:55 -07:00
8ab42fb045 *: move v2http handlers without /v2 prefix to etcdhttp
Lets --enable-v2=false configurations provide /metrics, /health, etc.

Fixes #8167
2017-07-24 09:54:48 -07:00
ff9a0a3527 version: bump up to 3.2.4+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-24 09:14:34 -07:00
c31bec0f29 version: bump up to 3.2.4
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-19 08:37:30 -07:00
19fe4b0cac grpcproxy: return nil on receiving snapshot EOF
Gets "code = OutOfRange desc = EOF" errors otherwise.
2017-07-19 08:33:44 -07:00
a5d94fe229 integration: test embed.Etcd.Close with watch
Ensure 'Close' returns in time when there are open
connections (watch streams).

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-14 18:52:20 -07:00
e8f3cbf1c6 embed: wait up to request timeout for pending RPCs when closing
Both grpc.Server.Stop and grpc.Server.GracefulStop close the listeners
first, to stop accepting the new connections. GracefulStop blocks until
all clients close their open transports(connections). Unary RPCs
only take a few seconds to finish. Stream RPCs, like watch, might never
close the connections from client side, thus making gRPC server wait
forever.

This patch still calls GracefulStop, but waits up to 10s before manually
closing the open transports.

Address https://github.com/coreos/etcd/issues/8224.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-14 18:52:20 -07:00
856502f788 version: bump up to 3.2.3+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-14 16:04:54 -07:00
ae23b0ef2f version: bump up to 3.2.3
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-13 12:09:48 -07:00
5ee89be616 testutil: whitelist WaitGroup.Done
Calling a WaitGroup.Done() in a defer will sometimes trigger the leak
detector since the WaitGroup.Wait() will unblock before the defer
block completes. If the leak detector runs before the Done() is
rescheduled, it will spuriously report the finishing Done() as a leak.
This happens enough in CI to be irritating; whitelist it and ignore.
2017-07-13 11:14:12 -07:00
38373b342d test: sync with etcd-agent start in functional_pass
Fix https://github.com/coreos/etcd/issues/8211.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-13 11:14:03 -07:00
536a5f594b v3rpc: Let clients establish unlimited streams
From go-grpc v1.2.0, the number of max streams per client is set to 100
by default by the server side. This change makes it impossible
for third party proxies and custom clients to establish many streams.
2017-07-12 10:46:33 -07:00
49e6916e66 dev-guide: document using range_end for prefixes with json
Lack of a range_end example has caused some confusion.
2017-07-12 10:40:37 -07:00
b9b6f6f7c4 Documentation: refer to LeaseKeepAliveRequest for lease refresh 2017-07-12 10:40:26 -07:00
6ecbb3bbc5 version: bump up to 3.2.2+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-12 10:36:16 -07:00
cb2a496c4d version: bump up to 3.2.2
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-07-07 09:01:47 -07:00
fdf525a3fd dev-guide: update experimental APIs
No experimental APIs at the moment.

Fixes #8212
2017-07-07 09:01:30 -07:00
40468ab11f transport: accept connection if matched IP SAN but no DNS match
The IP SAN check would always do a DNS SAN check if DNS is given
and the connection's IP is verified. Instead, don't check DNS
entries if there's a matching iP.

Fixes #8206
2017-07-07 09:01:11 -07:00
f8f79666d4 embed: connect json gateway with user-provided listen address
net.Listener says its address is [::] when given 0.0.0.0, breaking
hosts that have ipv6 disabled.

Fixes #8151
Fixes #7961
2017-07-07 09:00:40 -07:00
fefcf348f1 embed: share grpc connection for grpc json services 2017-07-07 09:00:32 -07:00
81d39a75ff fixtures: add gencerts.sh, generate CRL 2017-07-07 09:00:01 -07:00
8f2b48465f lease: stop lessors after tests
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-30 11:18:55 -07:00
026c1734b2 Documentation/faq: fix typo in flag names
Signed-off-by: Hui Kang <kangh@us.ibm.com>
2017-06-30 01:28:44 -07:00
81e1d03d02 Documentation/v2: 'etcd v2' to the title
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-30 01:28:20 -07:00
6171334595 benchmark: refactor watch benchmark 2017-06-27 07:35:01 -07:00
55de54a757 lessor: extend leases on promote if expires will be rate limited
Instead of unconditionally randomizing, extend leases on promotion
if too many leases expire within the same time span. If the server
has few leases or spread out expires, there will be no extension.

Squashed previous commits for https://github.com/coreos/etcd/pull/8149.

Author: Anthony Romano <anthony.romano@coreos.com>

This is a combination of 4 commits below:

lease: randomize expiry on initial refresh call

Randomize the very first expiry on lease recovery
to prevent recovered leases from expiring all at
the same time.

Address https://github.com/coreos/etcd/issues/8096.

integration: remove lease exist checking on randomized expiry

Lease with TTL 5 should be renewed with randomization,
thus it's still possible to exist after 3 seconds.

lessor: extend leases on promote if expires will be rate limited

Instead of unconditionally randomizing, extend leases on promotion
if too many leases expire within the same time span. If the server
has few leases or spread out expires, there will be no extension.

Revert "integration: remove lease exist checking on randomized expiry"

This reverts commit 95bc33f37f. The new
lease extension algorithm should pass this test.
2017-06-23 13:31:59 -07:00
c14aad0ba6 lease: rate limit revoke runLoop
Fix https://github.com/coreos/etcd/issues/8097.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-23 13:28:33 -07:00
91ccc93042 version: bump up to v3.2.1+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-23 10:33:24 -07:00
61fc123e7a version: bump up to 3.2.1
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-22 09:47:21 -07:00
71d2008385 mvcc: use GaugeFunc metric to load db size when requested
Relying on mvcc to set the db size metric can cause it to
miss size changes when a txn commits after the last write
completes before a quiescent period. Instead, load the
db size on demand.

Fixes #8146
2017-06-22 09:47:01 -07:00
79794bf556 integration: test mvcc db size metric is updated following defrag 2017-06-22 09:46:54 -07:00
db0ca8963f test: run basic functional tests
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-20 17:15:22 -07:00
27a3356c74 etcd-tester: add 'exit-on-failure'
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-20 17:15:16 -07:00
4526284326 mvcc: restore into tree index with one key index
Clobbering the mvcc kvindex with new keyIndexes for each restore
chunk would cause index corruption by dropping historical information.
2017-06-20 10:58:42 -07:00
0b0b1992b8 mvcc: test restore and deletes with small chunk sizes 2017-06-20 10:58:35 -07:00
ed7ef5be8b mvcc: set db size metric on restore
Fixes #8080
2017-06-20 10:58:16 -07:00
ff5be50ee5 integration: test mvcc db size metric is set on restore 2017-06-20 10:58:10 -07:00
a032b3b914 v3rpc: treat nil txn request op as error
Fixes #7889
2017-06-20 10:57:41 -07:00
9388a27649 dev-guide: add txn json example 2017-06-20 10:57:35 -07:00
af1d732916 e2e: test txn over grpc json 2017-06-20 10:57:27 -07:00
939aa66b48 test: 'FAIL' on release binary download failure
I see CI is failing to download release binaries
but exit code doesn't trigger CI job failure.

We need 'FAIL' string.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-20 10:55:19 -07:00
3365dd4ff0 Documentation/op-guide: fix failed RPC rate, leader election metrics
This fixes failed RPC rate query, where we do not need
subtraction because we already query by the status code.
Also adds grpc_method to make it more specific. Most of the
time, the failure recovers within 10-second, which is our
Prometheus scrap interval, so 'rate' query might not cover
that time window, showing as 0s, but still shows up in the graph.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-15 12:00:40 -07:00
959d55ae80 bill-of-materials: regenerate with multi licenses
Fix https://github.com/coreos/etcd/issues/8086.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-14 08:44:11 -07:00
3e1992140a build-aci: Fix ACI image name
The appc discovery spec states that the architecture specifier in the ACI
image file name will be an ACI architecture value.  Our build scripts were
using GOARCH in the image name, which is incorrect for arm64/aarch64.
See: https://github.com/appc/spec/blob/master/spec/discovery.md

Fixes errors like these on arm64 machines:

  $ rkt --debug --insecure-options=image fetch coreos.com/etcd:v3.2.0-rc.1
  image: remote fetching from URL "https://github.com/coreos/etcd/releases/download/v3.2.0-rc.1/etcd-v3.2.0-rc.1-linux-aarch64.aci"
  fetch: bad HTTP status code: 404

Signed-off-by: Geoff Levand <geoff@infradead.org>
2017-06-14 08:43:58 -07:00
b547b982b9 Documentation/upgrades: link to previous guides
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-09 13:04:10 -07:00
56477ca998 version: bump up to 3.2.0+git
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-09 13:03:56 -07:00
66722b1ada version: bump up to 3.2.0
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-09 10:59:09 -07:00
963339d265 rafthttp: permit very large v2 snapshots
v2 snapshots were hitting the 512MB message decode limit, causing
sending snapshots to new members to fail for being too big.
2017-06-09 10:49:51 -07:00
c87594f27c etcdserver: use same ReadView for read-only txns
A read-only txn isn't serialized by raft, but it uses a fresh
read txn for every mvcc access prior to executing its request ops.
If a write txn modifies the keys matching the read txn's comparisons,
the read txn may return inconsistent results.

To fix, use the same read-only mvcc txn for the duration of the etcd
txn. Probably gets a modest txn speedup as well since there are
fewer read txn allocations.
2017-06-09 09:50:43 -07:00
e72ad5dd2a mvcc: create TxnWrites from TxnRead with NewReadOnlyTxnWrite
Already used internally by mvcc, but needed by etcdserver txns.
2017-06-09 09:50:37 -07:00
3eb5d24cab integration: test txn comparison and concurrent put ordering 2017-06-09 09:50:30 -07:00
8b9041a938 Documentation/op-guide: do not use host network, fix indentation
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-09 09:14:21 -07:00
864ffec88c v2http: put back /v2/machines and mark as non-deprecated
This reverts commit 2bb33181b6. python-etcd
seems to depend on /v2/machines and the maintainer vanished. Plus, it is
prefixed with /v2/ so it probably can't be deprecated anyway.
2017-06-08 12:05:59 -07:00
12bc2bba36 etcdserver: add leaseExpired debugging metrics
Fix https://github.com/coreos/etcd/issues/8050.

Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-08 11:23:12 -07:00
3a43afce5a Documentation/op-guide: fix 'grpc_code' field in metrics
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-08 10:16:07 -07:00
0e56ea37e7 fileutil: return immediately if preallocating 0 bytes
fallocate will return EINVAL, causing zeroing to the end of a
0 byte file to fail.

Fixes #8045
2017-06-07 12:59:35 -07:00
743192aa3b *: clear rarer shellcheck errors on scripts
Clean up the tail of the warnings
2017-06-06 10:44:59 -07:00
e8b156578f travis: add shellcheck 2017-06-06 10:44:53 -07:00
61f3338ce7 test: shellcheck 2017-06-06 10:44:46 -07:00
effffdbdca test, osutil: disable setting SIG_DFL on linux if built with cov tag
Was causing etcd to terminate before finishing writing its
coverage profile.
2017-06-06 09:47:22 -07:00
9bac803bee Documentation/op-guide: fix typo in grafana.json
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-06-06 09:47:15 -07:00
9169ad0d7d *: fix go tool vet -all -shadow errors 2017-06-06 09:47:06 -07:00
482a7839d9 test: speedup and strengthen go vet checking
Was iterating over every file, reloading everything. Instead,
analyze the package directories. On my machine, the time for
vet checking goes from 34s to 3s. Scans more code too.
2017-06-06 09:46:54 -07:00
ba3058ca79 op-guide: document CN certs in security.md 2017-06-06 09:46:47 -07:00
0e90e504f5 scripts, Documentation: fix swagger generation
Changes to the genproto to support splitting out the grpc-gateway broke
swagger generation.
2017-06-02 11:05:21 -07:00
998fa0de76 Documentation, scripts: regen RPC docs
Was missing the new cancel_reason field. Also includes updated protodoc
sha to fix generating documentation for upcoming txn compare range patchset.
2017-06-02 10:27:49 -07:00
c273735729 op-guide: document configuration flags for gateway 2017-06-01 15:59:49 -07:00
c85f736522 mvcc: time restore in restore benchmark
This never worked.
2017-06-01 14:59:31 -07:00
a375ff172e mvcc: chunk reads for restoring
Loading all keys at once would cause etcd to use twice as much
memory than it would need to serve the keys, causing RSS to spike on
boot. Instead, load the keys into the mvcc by chunk. Uses pipelining
for some concurrency.

Fixes #7822
2017-06-01 14:59:27 -07:00
1893af9bbd integration: use unixs:// if client port configured for tls 2017-06-01 09:47:08 -07:00
b4c655677a clientv3: support unixs:// scheme
For using TLS without giving a TLSConfig to the client.
2017-06-01 09:47:03 -07:00
c2160adf1d clientv3/integration: test dialing to TLS without a TLS config times out
etcdctl was getting ctx errors from timing out trying to issue RPCs to
a TLS endpoint but without using TLS for transmission. Client should
immediately bail out with a time out error.
2017-06-01 09:46:57 -07:00
5ada311416 clientv3: use Endpoints[0] to initialize grpc creds
Dialing out without specifying TLS creds but giving https uses some
default behavior that depends on passing an endpoint with https to
Dial(), so it's not enough to completely rely on the balancer to supply
endpoints.

Fixes #8008

Also ctx-izes grpc.Dial
2017-06-01 09:46:48 -07:00
f042cd7d9c vendor: ghodss/yaml v1.0.0 2017-05-30 14:44:30 -07:00
f0a400a3a8 vendor: kr/pty v1.0.0 2017-05-30 14:44:23 -07:00
6066977280 op-guide: update performance.md
It's been a year, time to refresh with 3.2.0 data.
2017-05-30 10:16:19 -07:00
fc88eccc74 vendor: use v0.2.0 of go-semver 2017-05-30 10:15:23 -07:00
5cb28a7d83 Documentation: add 'yaml.NewConfig' change in 3.2
Signed-off-by: Gyu-Ho Lee <gyuhox@gmail.com>
2017-05-30 10:14:55 -07:00
de57e88643 Documentation: add FAQ entry for "database space exceeded" errors
Also moves miscategorized cluster id mismatch entry from "performance"
to "operation".
2017-05-26 09:13:13 -07:00
855 changed files with 59787 additions and 13156 deletions

7
.gitignore vendored
View File

@ -1,15 +1,22 @@
/agent-*
/coverage
/covdir
/gopath
/gopath.proto
/go-bindata
/release
/machine*
/bin
.Dockerfile-test
.vagrant
*.etcd
*.log
/etcd
*.swp
/hack/insta-discovery/.env
*.test
tools/functional-tester/docker/bin
hack/scripts-dev/docker-dns/.Dockerfile
hack/scripts-dev/docker-dns-srv/.Dockerfile
hack/tls-setup/certs
.idea

View File

@ -1,11 +1,12 @@
dist: trusty
language: go
go_import_path: github.com/coreos/etcd
sudo: false
sudo: required
services: docker
go:
- 1.8.3
- tip
- 1.8.7
notifications:
on_success: never
@ -13,68 +14,60 @@ notifications:
env:
matrix:
- TARGET=amd64
- TARGET=darwin-amd64
- TARGET=windows-amd64
- TARGET=arm64
- TARGET=arm
- TARGET=386
- TARGET=ppc64le
- TARGET=linux-amd64-integration
- TARGET=linux-amd64-functional
- TARGET=linux-amd64-unit
- TARGET=all-build
- TARGET=linux-386-unit
matrix:
fast_finish: true
allow_failures:
- go: tip
- go: 1.8.7
env: TARGET=linux-386-unit
exclude:
- go: tip
env: TARGET=darwin-amd64
- go: tip
env: TARGET=windows-amd64
- go: tip
env: TARGET=arm
- go: tip
env: TARGET=arm64
- go: tip
env: TARGET=386
- go: tip
env: TARGET=ppc64le
addons:
apt:
packages:
- libpcap-dev
- libaspell-dev
- libhunspell-dev
env: TARGET=linux-386-unit
before_install:
- go get -v -u github.com/chzchzchz/goword
- go get -v -u github.com/coreos/license-bill-of-materials
- go get -v -u honnef.co/go/tools/cmd/gosimple
- go get -v -u honnef.co/go/tools/cmd/unused
- go get -v -u honnef.co/go/tools/cmd/staticcheck
- ./scripts/install-marker.sh amd64
- if [[ $TRAVIS_GO_VERSION == 1.* ]]; then docker pull gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION}; fi
# disable godep restore override
install:
- pushd cmd/etcd && go get -t -v ./... && popd
- pushd cmd/etcd && go get -t -v ./... && popd
script:
- echo "TRAVIS_GO_VERSION=${TRAVIS_GO_VERSION}"
- >
case "${TARGET}" in
amd64)
GOARCH=amd64 ./test
linux-amd64-integration)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "GOARCH=amd64 PASSES='integration' ./test"
;;
darwin-amd64)
GO_BUILD_FLAGS="-a -v" GOPATH="" GOOS=darwin GOARCH=amd64 ./build
linux-amd64-functional)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "./build && GOARCH=amd64 PASSES='functional' ./test"
;;
windows-amd64)
GO_BUILD_FLAGS="-a -v" GOPATH="" GOOS=windows GOARCH=amd64 ./build
linux-amd64-unit)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "GOARCH=amd64 PASSES='unit' ./test"
;;
386)
GOARCH=386 PASSES="build unit" ./test
all-build)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "GOARCH=amd64 PASSES='build' ./test \
&& GOARCH=386 PASSES='build' ./test \
&& GO_BUILD_FLAGS='-v' GOOS=darwin GOARCH=amd64 ./build \
&& GO_BUILD_FLAGS='-v' GOOS=windows GOARCH=amd64 ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=arm ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=arm64 ./build \
&& GO_BUILD_FLAGS='-v' GOARCH=ppc64le ./build"
;;
*)
# test building out of gopath
GO_BUILD_FLAGS="-a -v" GOPATH="" GOARCH="${TARGET}" ./build
linux-386-unit)
docker run --rm \
--volume=`pwd`:/go/src/github.com/coreos/etcd gcr.io/etcd-development/etcd-test:go${TRAVIS_GO_VERSION} \
/bin/bash -c "GOARCH=386 PASSES='unit' ./test"
;;
esac

57
Dockerfile-test Normal file
View File

@ -0,0 +1,57 @@
FROM ubuntu:16.10
RUN rm /bin/sh && ln -s /bin/bash /bin/sh
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
RUN apt-get -y update \
&& apt-get -y install \
build-essential \
gcc \
apt-utils \
pkg-config \
software-properties-common \
apt-transport-https \
libssl-dev \
sudo \
bash \
curl \
wget \
tar \
git \
netcat \
libaspell-dev \
libhunspell-dev \
hunspell-en-us \
aspell-en \
shellcheck \
&& apt-get -y update \
&& apt-get -y upgrade \
&& apt-get -y autoremove \
&& apt-get -y autoclean
ENV GOROOT /usr/local/go
ENV GOPATH /go
ENV PATH ${GOPATH}/bin:${GOROOT}/bin:${PATH}
ENV GO_VERSION REPLACE_ME_GO_VERSION
ENV GO_DOWNLOAD_URL https://storage.googleapis.com/golang
RUN rm -rf ${GOROOT} \
&& curl -s ${GO_DOWNLOAD_URL}/go${GO_VERSION}.linux-amd64.tar.gz | tar -v -C /usr/local/ -xz \
&& mkdir -p ${GOPATH}/src ${GOPATH}/bin \
&& go version
RUN mkdir -p ${GOPATH}/src/github.com/coreos/etcd
WORKDIR ${GOPATH}/src/github.com/coreos/etcd
ADD ./scripts/install-marker.sh /tmp/install-marker.sh
RUN go get -v -u -tags spell github.com/chzchzchz/goword \
&& go get -v -u github.com/coreos/license-bill-of-materials \
&& go get -v -u honnef.co/go/tools/cmd/gosimple \
&& go get -v -u honnef.co/go/tools/cmd/unused \
&& go get -v -u honnef.co/go/tools/cmd/staticcheck \
&& go get -v -u github.com/wadey/gocovmerge \
&& go get -v -u github.com/gordonklaus/ineffassign \
&& /tmp/install-marker.sh amd64 \
&& rm -f /tmp/install-marker.sh \
&& curl -s https://codecov.io/bash >/codecov \
&& chmod 700 /codecov

View File

@ -24,6 +24,11 @@ curl -L http://localhost:2379/v3alpha/kv/put \
curl -L http://localhost:2379/v3alpha/kv/range \
-X POST -d '{"key": "Zm9v"}'
# {"header":{"cluster_id":"12585971608760269493","member_id":"13847567121247652255","revision":"2","raft_term":"3"},"kvs":[{"key":"Zm9v","create_revision":"2","mod_revision":"2","version":"1","value":"YmFy"}],"count":"1"}
# get all keys prefixed with "foo"
curl -L http://localhost:2379/v3alpha/kv/range \
-X POST -d '{"key": "Zm9v", "range_end": "Zm9w"}'
# {"header":{"cluster_id":"12585971608760269493","member_id":"13847567121247652255","revision":"2","raft_term":"3"},"kvs":[{"key":"Zm9v","create_revision":"2","mod_revision":"2","version":"1","value":"YmFy"}],"count":"1"}
```
Use `curl` to watch a key:
@ -38,6 +43,15 @@ curl -L http://localhost:2379/v3alpha/kv/put \
# {"result":{"header":{"cluster_id":"12585971608760269493","member_id":"13847567121247652255","revision":"2","raft_term":"2"},"events":[{"kv":{"key":"Zm9v","create_revision":"2","mod_revision":"2","version":"1","value":"YmFy"}}]}}
```
Use `curl` to issue a transaction:
```bash
curl -L http://localhost:2379/v3alpha/kv/txn \
-X POST \
-d '{"compare":[{"target":"CREATE","key":"Zm9v","createRevision":"2"}],"success":[{"requestPut":{"key":"Zm9v","value":"YmFy"}}]}'
# {"header":{"cluster_id":"12585971608760269493","member_id":"13847567121247652255","revision":"3","raft_term":"2"},"succeeded":true,"responses":[{"response_put":{"header":{"revision":"3"}}}]}
```
## Swagger
Generated [Swagger][swagger] API definitions can be found at [rpc.swagger.json][swagger-doc].

View File

@ -790,6 +790,7 @@ From google paxosdb paper: Our implementation hinges around a powerful primitive
| created | created is set to true if the response is for a create watch request. The client should record the watch_id and expect to receive events for the created watcher from the same stream. All events sent to the created watcher will attach with the same watch_id. | bool |
| canceled | canceled is set to true if the response is for a cancel watch request. No further events will be sent to the canceled watcher. | bool |
| compact_revision | compact_revision is set to the minimum index if a watcher tries to watch at a compacted index. This happens when creating a watcher at a compacted revision or the watcher cannot catch up with the progress of the key-value store. The client should treat the watcher as canceled and should not try to create any watcher with the same start_revision again. | int64 |
| cancel_reason | cancel_reason indicates the reason for canceling the watcher. | string |
| events | | (slice of) mvccpb.Event |

View File

@ -2179,6 +2179,10 @@
"format": "int64",
"description": "compact_revision is set to the minimum index if a watcher tries to watch\nat a compacted index.\n\nThis happens when creating a watcher at a compacted revision or the watcher cannot\ncatch up with the progress of the key-value store. \n\nThe client should treat the watcher as canceled and should not try to create any\nwatcher with the same start_revision again."
},
"cancel_reason": {
"type": "string",
"description": "cancel_reason indicates the reason for canceling the watcher."
},
"events": {
"type": "array",
"items": {

View File

@ -4,8 +4,4 @@ For the most part, the etcd project is stable, but we are still moving fast! We
## The current experimental API/features are:
- [gateway][gateway]: beta, to be stable in 3.2 release
- [gRPC proxy][grpc-proxy]: alpha, to be stable in 3.2 release
[gateway]: ../op-guide/gateway.md
[grpc-proxy]: ../op-guide/grpc_proxy.md
(none currently)

View File

@ -2,7 +2,7 @@
## System requirements
The etcd performance benchmarks run etcd on 8 vCPU, 16GB RAM, 50GB SSD GCE instances, but any relatively modern machine with low latency storage and a few gigabytes of memory should suffice for most use cases. Applications with large v2 data stores will require more memory than a large v3 data store since data is kept in anonymous memory instead of memory mapped from a file. than For running etcd on a cloud provider, we suggest at least a medium instance on AWS or a standard-1 instance on GCE.
The etcd performance benchmarks run etcd on 8 vCPU, 16GB RAM, 50GB SSD GCE instances, but any relatively modern machine with low latency storage and a few gigabytes of memory should suffice for most use cases. Applications with large v2 data stores will require more memory than a large v3 data store since data is kept in anonymous memory instead of memory mapped from a file. For running etcd on a cloud provider, we suggest at least a medium instance on AWS or a standard-1 instance on GCE.
## Download the pre-built binary

View File

@ -47,12 +47,19 @@ Administrators who need to create reliable and scalable key-value stores for the
- [Amazon Web Services][aws_platform]
- [FreeBSD][freebsd_platform]
### Upgrading and compatibility
### Security
- [Migrate applications from using API v2 to API v3][v2_migration]
- [Upgrading a v2.3 cluster to v3.0][v3_upgrade]
- [Upgrading a v3.0 cluster to v3.1][v31_upgrade]
- [Upgrading a v3.1 cluster to v3.2][v32_upgrade]
- [TLS][security]
- [Role-based access control][authentication]
### Maintenance and troubleshooting
- [Frequently asked questions][common questions]
- [Monitoring][monitoring]
- [Maintenance][maintenance]
- [Failure modes][failures]
- [Disaster recovery][recovery]
- [Upgrading][upgrading]
## Learning
@ -106,8 +113,6 @@ Answers to [common questions] about etcd.
[freebsd_platform]: platforms/freebsd.md
[aws_platform]: platforms/aws.md
[experimental]: dev-guide/experimental_apis.md
[v3_upgrade]: upgrades/upgrade_3_0.md
[v31_upgrade]: upgrades/upgrade_3_1.md
[v32_upgrade]: upgrades/upgrade_3_2.md
[authentication]: op-guide/authentication.md
[auth_design]: learning/auth_design.md
[upgrading]: upgrades/upgrading-etcd.md

View File

@ -8,11 +8,11 @@
### Configuration
#### What is the difference between advertise-urls and listen-urls?
#### What is the difference between listen-<client,peer>-urls, advertise-client-urls or initial-advertise-peer-urls?
`listen-urls` specifies the local addresses etcd server binds to for accepting incoming connections. To listen on a port for all interfaces, specify `0.0.0.0` as the listen IP address.
`listen-client-urls` and `listen-peer-urls` specify the local addresses etcd server binds to for accepting incoming connections. To listen on a port for all interfaces, specify `0.0.0.0` as the listen IP address.
`advertise-urls` specifies the addresses etcd clients or other etcd members should use to contact the etcd server. The advertise addresses must be reachable from the remote machines. Do not advertise addresses like `localhost` or `0.0.0.0` for a production setup since these addresses are unreachable from remote machines.
`advertise-client-urls` and `initial-advertise-peer-urls` specify the addresses etcd clients or other etcd members should use to contact the etcd server. The advertise addresses must be reachable from the remote machines. Do not advertise addresses like `localhost` or `0.0.0.0` for a production setup since these addresses are unreachable from remote machines.
### Deployment
@ -78,10 +78,26 @@ On the other hand, if the downed member is removed from cluster membership first
etcd sets `strict-reconfig-check` in order to reject reconfiguration requests that would cause quorum loss. Abandoning quorum is really risky (especially when the cluster is already unhealthy). Although it may be tempting to disable quorum checking if there's quorum loss to add a new member, this could lead to full fledged cluster inconsistency. For many applications, this will make the problem even worse ("disk geometry corruption" being a candidate for most terrifying).
### Why does etcd lose its leader from disk latency spikes?
#### Why does etcd lose its leader from disk latency spikes?
This is intentional; disk latency is part of leader liveness. Suppose the cluster leader takes a minute to fsync a raft log update to disk, but the etcd cluster has a one second election timeout. Even though the leader can process network messages within the election interval (e.g., send heartbeats), it's effectively unavailable because it can't commit any new proposals; it's waiting on the slow disk. If the cluster frequently loses its leader due to disk latencies, try [tuning][tuning] the disk settings or etcd time parameters.
#### What does the etcd warning "request ignored (cluster ID mismatch)" mean?
Every new etcd cluster generates a new cluster ID based on the initial cluster configuration and a user-provided unique `initial-cluster-token` value. By having unique cluster ID's, etcd is protected from cross-cluster interaction which could corrupt the cluster.
Usually this warning happens after tearing down an old cluster, then reusing some of the peer addresses for the new cluster. If any etcd process from the old cluster is still running it will try to contact the new cluster. The new cluster will recognize a cluster ID mismatch, then ignore the request and emit this warning. This warning is often cleared by ensuring peer addresses among distinct clusters are disjoint.
#### What does "mvcc: database space exceeded" mean and how do I fix it?
The [multi-version concurrency control][api-mvcc] data model in etcd keeps an exact history of the keyspace. Without periodically compacting this history (e.g., by setting `--auto-compaction`), etcd will eventually exhaust its storage space. If etcd runs low on storage space, it raises a space quota alarm to protect the cluster from further writes. So long as the alarm is raised, etcd responds to write requests with the error `mvcc: database space exceeded`.
To recover from the low space quota alarm:
1. [Compact][maintenance-compact] etcd's history.
2. [Defragment][maintenance-defragment] every etcd endpoint.
3. [Disarm][maintenance-disarm] the alarm.
### Performance
#### How should I benchmark etcd?
@ -91,7 +107,7 @@ Try the [benchmark] tool. Current [benchmark results][benchmark-result] are avai
#### What does the etcd warning "apply entries took too long" mean?
After a majority of etcd members agree to commit a request, each etcd server applies the request to its data store and persists the result to disk. Even with a slow mechanical disk or a virtualized network disk, such as Amazons EBS or Googles PD, applying a request should normally take fewer than 50 milliseconds. If the average apply duration exceeds 100 milliseconds, etcd will warn that entries are taking too long to apply.
Usually this issue is caused by a slow disk. The disk could be experiencing contention among etcd and other applications, or the disk is too simply slow (e.g., a shared virtualized disk). To rule out a slow disk from causing this warning, monitor [backend_commit_duration_seconds][backend_commit_metrics] (p99 duration should be less than 25ms) to confirm the disk is reasonably fast. If the disk is too slow, assigning a dedicated disk to etcd or using faster disk will typically solve the problem.
The second most common cause is CPU starvation. If monitoring of the machines CPU usage shows heavy utilization, there may not be enough compute capacity for etcd. Moving etcd to dedicated machine, increasing process resource isolation cgroups, or renicing the etcd server process into a higher priority can usually solve the problem.
@ -112,12 +128,6 @@ A slow network can also cause this issue. If network metrics among the etcd mach
If none of the above suggestions clear the warnings, please [open an issue][new_issue] with detailed logging, monitoring, metrics and optionally workload information.
#### What does the etcd warning "request ignored (cluster ID mismatch)" mean?
Every new etcd cluster generates a new cluster ID based on the initial cluster configuration and a user-provided unique `initial-cluster-token` value. By having unique cluster ID's, etcd is protected from cross-cluster interaction which could corrupt the cluster.
Usually this warning happens after tearing down an old cluster, then reusing some of the peer addresses for the new cluster. If any etcd process from the old cluster is still running it will try to contact the new cluster. The new cluster will recognize a cluster ID mismatch, then ignore the request and emit this warning. This warning is often cleared by ensuring peer addresses among distinct clusters are disjoint.
#### What does the etcd warning "snapshotting is taking more than x seconds to finish ..." mean?
etcd sends a snapshot of its complete key-value store to refresh slow followers and for [backups][backup]. Slow snapshot transfer times increase MTTR; if the cluster is ingesting data with high throughput, slow followers may livelock by needing a new snapshot before finishing receiving a snapshot. To catch slow snapshot performance, etcd warns when sending a snapshot takes more than thirty seconds and exceeds the expected transfer time for a 1Gbps connection.
@ -135,3 +145,7 @@ etcd sends a snapshot of its complete key-value store to refresh slow followers
[runtime reconfiguration]: https://github.com/coreos/etcd/blob/master/Documentation/op-guide/runtime-configuration.md
[benchmark]: https://github.com/coreos/etcd/tree/master/tools/benchmark
[benchmark-result]: https://github.com/coreos/etcd/blob/master/Documentation/op-guide/performance.md
[api-mvcc]: learning/api.md#revisions
[maintenance-compact]: op-guide/maintenance.md#history-compaction
[maintenance-defragment]: op-guide/maintenance.md#defragmentation
[maintenance-disarm]: ../etcdctl/README.md#alarm-disarm

View File

@ -449,7 +449,7 @@ message LeaseRevokeRequest {
### Keep alives
Leases are refreshed using a bi-directional stream created with the `LeaseKeepAlive` API call. When the client wishes to refresh a lease, it sends a `LeaseGrantRequest` over the stream:
Leases are refreshed using a bi-directional stream created with the `LeaseKeepAlive` API call. When the client wishes to refresh a lease, it sends a `LeaseKeepAliveRequest` over the stream:
```protobuf
message LeaseKeepAliveRequest {

View File

@ -60,7 +60,7 @@ For avoiding such a situation, the API layer performs *version number validation
After authenticating with `Authenticate()`, a client can create a gRPC connection as it would without auth. In addition to the existing initialization process, the client must associate the token with the newly created connection. `grpc.WithPerRPCCredentials()` provides the functionality for this purpose.
Every authenticated request from the client has a token. The token can be obtained with `grpc.metadata.FromContext()` in the server side. The server can obtain who is issuing the request and when the user was authorized. The information will be filled by the API layer in the header (`etcdserverpb.RequestHeader.Username` and `etcdserverpb.RequestHeader.AuthRevision`) of a raft log entry (`etcdserverpb.InternalRaftRequest`).
Every authenticated request from the client has a token. The token can be obtained with `grpc.metadata.FromIncomingContext()` in the server side. The server can obtain who is issuing the request and when the user was authorized. The information will be filled by the API layer in the header (`etcdserverpb.RequestHeader.Username` and `etcdserverpb.RequestHeader.AuthRevision`) of a raft log entry (`etcdserverpb.InternalRaftRequest`).
### Checking permission in the state machine

View File

@ -1,17 +1,17 @@
# Why etcd
# etcd versus other key-value stores
The name "etcd" originated from two ideas, the unix "/etc" folder and "d"istibuted systems. The "/etc" folder is a place to store configuration data for a single system whereas etcd stores configuration information for large scale distributed systems. Hence, a "d"istributed "/etc" is "etcd".
etcd stores metadata in a consistent and fault-tolerant way. Distributed systems use etcd as a consistent key-value store for configuration management, service discovery, and coordinating distributed work. Common distributed patterns using etcd include [leader election][etcd-etcdctl-elect], [distributed locks][etcd-etcdctl-lock], and monitoring machine liveness.
etcd is designed as a general substrate for large scale distributed systems. These are systems that will never tolerate split-brain operation and are willing to sacrifice availability to achieve this end. etcd stores metadata in a consistent and fault-tolerant way. An etcd cluster is meant to provide key-value storage with best of class stability, reliability, scalability and performance.
Distributed systems use etcd as a consistent key-value store for configuration management, service discovery, and coordinating distributed work. Many [organizations][production-users] use etcd to implement production systems such as container schedulers, service discovery services, and distributed data storage. Common distributed patterns using etcd include [leader election][etcd-etcdctl-elect], [distributed locks][etcd-etcdctl-lock], and monitoring machine liveness.
## Use cases
- Container Linux by CoreOS: Application running on [Container Linux][container-linux] gets automatic, zero-downtime Linux kernel updates. Container Linux uses [locksmith] to coordinate updates. locksmith implements a distributed semaphore over etcd to ensure only a subset of a cluster is rebooting at any given time.
- Container Linux by CoreOS: Applications running on [Container Linux][container-linux] get automatic, zero-downtime Linux kernel updates. Container Linux uses [locksmith] to coordinate updates. Locksmith implements a distributed semaphore over etcd to ensure only a subset of a cluster is rebooting at any given time.
- [Kubernetes][kubernetes] stores configuration data into etcd for service discovery and cluster management; etcd's consistency is crucial for correctly scheduling and operating services. The Kubernetes API server persists cluster state into etcd. It uses etcd's watch API to monitor the cluster and roll out critical configuration changes.
## etcd versus other key-value stores
When deciding whether to use etcd as a key-value store, its worth keeping in mind etcds main goal. Namely, etcd is designed as a general substrate for large scale distributed systems. These are systems that will never tolerate split-brain operation and are willing to sacrifice availability to achieve this end. An etcd cluster is meant to provide consistent key-value storage with best of class stability, reliability, scalability and performance. The upshot of this focus is many [organizations][production-users] already use etcd to implement production systems such as container schedulers, service discovery services, distributed data storage, and more.
## Comparison chart
Perhaps etcd already seems like a good fit, but as with all technological decisions, proceed with caution. Please note this documentation is written by the etcd team. Although the ideal is a disinterested comparison of technology and features, the authors expertise and biases obviously favor etcd. Use only as directed.
@ -47,7 +47,7 @@ When considering features, support, and stability, new applications planning to
### Consul
Consul bills itself as an end-to-end service discovery framework. To wit, it includes services such as health checking, failure detection, and DNS. Incidentally, Consul also exposes a key value store with mediocre performance and an intricate API. As it stands in Consul 0.7, the storage system does not scales well; systems requiring millions of keys will suffer from high latencies and memory pressure. The key value API is missing, most notably, multi-version keys, conditional transactions, and reliable streaming watches.
Consul is an end-to-end service discovery framework. It provides built-in health checking, failure detection, and DNS services. In addition, Consul exposes a key value store with RESTful HTTP APIs. [As it stands in Consul 1.0][dbtester-comparison-results], the storage system does not scale as well as other systems like etcd or Zookeeper in key-value operations; systems requiring millions of keys will suffer from high latencies and memory pressure. The key value API is missing, most notably, multi-version keys, conditional transactions, and reliable streaming watches.
etcd and Consul solve different problems. If looking for a distributed consistent key value store, etcd is a better choice over Consul. If looking for end-to-end cluster service discovery, etcd will not have enough features; choose Kubernetes, Consul, or SmartStack.
@ -84,7 +84,7 @@ For distributed coordination, choosing etcd can help prevent operational headach
[tidb]: https://github.com/pingcap/tidb
[etcd-v3lock]: https://godoc.org/github.com/coreos/etcd/etcdserver/api/v3lock/v3lockpb
[etcd-v3election]: https://godoc.org/github.com/coreos/etcd/etcdserver/api/v3election/v3electionpb
[etcd-etcdctl-lock]: ../../etcdctl/README.md#lock-lockname
[etcd-etcdctl-lock]: ../../etcdctl/README.md#lock-lockname-command-arg1-arg2-
[etcd-etcdctl-elect]: ../../etcdctl/README.md#elect-options-election-name-proposal
[etcd-mvcc]: data_model.md
[etcd-recipe]: https://godoc.org/github.com/coreos/etcd/contrib/recipes
@ -113,4 +113,4 @@ For distributed coordination, choosing etcd can help prevent operational headach
[container-linux]: https://coreos.com/why
[locksmith]: https://github.com/coreos/locksmith
[kubernetes]: http://kubernetes.io/docs/whatisk8s
[dbtester-comparison-results]: https://github.com/coreos/dbtester/tree/master/test-results/2018Q1-02-etcd-zookeeper-consul

View File

@ -79,14 +79,16 @@ export NODE1=192.168.1.21
Run the latest version of etcd:
```
docker run --net=host \
--volume=${DATA_DIR}:/etcd-data \
--name etcd quay.io/coreos/etcd:latest \
/usr/local/bin/etcd \
--data-dir=/etcd-data --name node1 \
--initial-advertise-peer-urls http://${NODE1}:2380 --listen-peer-urls http://${NODE1}:2380 \
--advertise-client-urls http://${NODE1}:2379 --listen-client-urls http://${NODE1}:2379 \
--initial-cluster node1=http://${NODE1}:2380
docker run \
-p 2379:2379 \
-p 2380:2380 \
--volume=${DATA_DIR}:/etcd-data \
--name etcd quay.io/coreos/etcd:latest \
/usr/local/bin/etcd \
--data-dir=/etcd-data --name node1 \
--initial-advertise-peer-urls http://${NODE1}:2380 --listen-peer-urls http://${NODE1}:2380 \
--advertise-client-urls http://${NODE1}:2379 --listen-client-urls http://${NODE1}:2379 \
--initial-cluster node1=http://${NODE1}:2380
```
List the cluster member:
@ -114,41 +116,47 @@ DATA_DIR=/var/lib/etcd
# For node 1
THIS_NAME=${NAME_1}
THIS_IP=${HOST_1}
docker run --net=host \
--volume=${DATA_DIR}:/etcd-data \
--name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
/usr/local/bin/etcd \
--data-dir=/etcd-data --name ${THIS_NAME} \
--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
--initial-cluster ${CLUSTER} \
--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
docker run \
-p 2379:2379 \
-p 2380:2380 \
--volume=${DATA_DIR}:/etcd-data \
--name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
/usr/local/bin/etcd \
--data-dir=/etcd-data --name ${THIS_NAME} \
--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
--initial-cluster ${CLUSTER} \
--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
# For node 2
THIS_NAME=${NAME_2}
THIS_IP=${HOST_2}
docker run --net=host \
--volume=${DATA_DIR}:/etcd-data \
--name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
/usr/local/bin/etcd \
--data-dir=/etcd-data --name ${THIS_NAME} \
--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
--initial-cluster ${CLUSTER} \
--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
docker run \
-p 2379:2379 \
-p 2380:2380 \
--volume=${DATA_DIR}:/etcd-data \
--name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
/usr/local/bin/etcd \
--data-dir=/etcd-data --name ${THIS_NAME} \
--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
--initial-cluster ${CLUSTER} \
--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
# For node 3
THIS_NAME=${NAME_3}
THIS_IP=${HOST_3}
docker run --net=host \
--volume=${DATA_DIR}:/etcd-data \
--name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
/usr/local/bin/etcd \
--data-dir=/etcd-data --name ${THIS_NAME} \
--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
--initial-cluster ${CLUSTER} \
--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
docker run \
-p 2379:2379 \
-p 2380:2380 \
--volume=${DATA_DIR}:/etcd-data \
--name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
/usr/local/bin/etcd \
--data-dir=/etcd-data --name ${THIS_NAME} \
--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
--initial-cluster ${CLUSTER} \
--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
```
To run `etcdctl` using API version 3:
@ -170,17 +178,19 @@ rkt run \
--volume etcd-ssl-certs-bundle,kind=host,source=/etc/ssl/certs/ca-certificates.crt \
--mount volume=etcd-ssl-certs-bundle,target=/etc/ssl/certs/ca-certificates.crt \
quay.io/coreos/etcd:latest -- --name my-name \
--initial-advertise-peer-urls http://localhost:2380 --listen-peer-urls http://localhost:2380 \
--advertise-client-urls http://localhost:2379 --listen-client-urls http://localhost:2379 \
--discovery https://discovery.etcd.io/c11fbcdc16972e45253491a24fcf45e1
--initial-advertise-peer-urls http://localhost:2380 --listen-peer-urls http://localhost:2380 \
--advertise-client-urls http://localhost:2379 --listen-client-urls http://localhost:2379 \
--discovery https://discovery.etcd.io/c11fbcdc16972e45253491a24fcf45e1
```
```
docker run \
--volume=/etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt \
quay.io/coreos/etcd:latest \
/usr/local/bin/etcd --name my-name \
--initial-advertise-peer-urls http://localhost:2380 --listen-peer-urls http://localhost:2380 \
--advertise-client-urls http://localhost:2379 --listen-client-urls http://localhost:2379 \
--discovery https://discovery.etcd.io/86a9ff6c8cb8b4c4544c1a2f88f8b801
-p 2379:2379 \
-p 2380:2380 \
--volume=/etc/ssl/certs/ca-certificates.crt:/etc/ssl/certs/ca-certificates.crt \
quay.io/coreos/etcd:latest \
/usr/local/bin/etcd --name my-name \
--initial-advertise-peer-urls http://localhost:2380 --listen-peer-urls http://localhost:2380 \
--advertise-client-urls http://localhost:2379 --listen-client-urls http://localhost:2379 \
--discovery https://discovery.etcd.io/86a9ff6c8cb8b4c4544c1a2f88f8b801
```

View File

@ -10,8 +10,7 @@ The gateway supports multiple etcd server endpoints and works on a simple round-
Every application that accesses etcd must first have the address of an etcd cluster client endpoint. If multiple applications on the same server access the same etcd cluster, every application still needs to know the advertised client endpoints of the etcd cluster. If the etcd cluster is reconfigured to have different endpoints, every application may also need to update its endpoint list. This wide-scale reconfiguration is both tedious and error prone.
etcd gateway solves this problem by serving as a stable local endpoint. A typical etcd gateway configuration has
each machine running a gateway listening on a local address and every etcd application connecting to its local gateway. The upshot is only the gateway needs to update its endpoints instead of updating each and every application.
etcd gateway solves this problem by serving as a stable local endpoint. A typical etcd gateway configuration has each machine running a gateway listening on a local address and every etcd application connecting to its local gateway. The upshot is only the gateway needs to update its endpoints instead of updating each and every application.
In summary, to automatically propagate cluster endpoint changes, the etcd gateway runs on every machine serving multiple applications accessing the same etcd cluster.
@ -64,3 +63,43 @@ Start the etcd gateway to fetch the endpoints from the DNS SRV entries with the
$ etcd gateway --discovery-srv=example.com
2016-08-16 11:21:18.867350 I | tcpproxy: ready to proxy client requests to [...]
```
## Configuration flags
### etcd cluster
#### --endpoints
* Comma-separated list of etcd server targets for forwarding client connections.
* Default: `127.0.0.1:2379`
* Invalid example: `https://127.0.0.1:2379` (gateway does not terminate TLS)
#### --discovery-srv
* DNS domain used to bootstrap cluster endpoints through SRV recrods.
* Default: (not set)
### Network
#### --listen-addr
* Interface and port to bind for accepting client requests.
* Default: `127.0.0.1:23790`
#### --retry-delay
* Duration of delay before retrying to connect to failed endpoints.
* Default: 1m0s
* Invalid example: "123" (expects time unit in format)
### Security
#### --insecure-discovery
* Accept SRV records that are insecure or susceptible to man-in-the-middle attacks.
* Default: `false`
#### --trusted-ca-file
* Path to the client TLS CA file for the etcd cluster. Used to authenticate endpoints.
* Default: (not set)

View File

@ -114,18 +114,21 @@
"span": 5,
"stack": false,
"steppedLine": false,
"targets": [{
"expr": "sum(rate({grpc_type=\"unary\",grpc_code!=\"OK\"} [1m]))",
"targets": [
{
"expr": "sum(rate(grpc_server_started_total{grpc_type=\"unary\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} RPC Rate",
"legendFormat": "RPC Rate",
"metric": "grpc_server_started_total",
"refId": "A",
"step": 2
},
{
"expr": "sum(rate(grpc_server_started_total{grpc_type=\"unary\",grpc_code!=\"OK\"} [1m])) - sum(rate(grpc_server_handled_total{grpc_type=\"unary\"} [1m]))",
"expr": "sum(rate(grpc_server_handled_total{grpc_type=\"unary\",grpc_code!=\"OK\"}[5m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} RPC Failed Rate",
"legendFormat": "RPC Failed Rate",
"metric": "grpc_server_handled_total",
"refId": "B",
"step": 2
@ -197,7 +200,7 @@
"stack": true,
"steppedLine": false,
"targets": [{
"expr": "sum(grpc_server_started_total {grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\",grpc_code!=\"OK\"}) - sum(grpc_server_handled_total {grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})",
"expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})",
"intervalFactor": 2,
"legendFormat": "Watch Streams",
"metric": "grpc_server_handled_total",
@ -205,7 +208,7 @@
"step": 4
},
{
"expr": "sum(grpc_server_started_total {grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total {grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})",
"expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})",
"intervalFactor": 2,
"legendFormat": "Lease Streams",
"metric": "grpc_server_handled_total",
@ -361,7 +364,7 @@
"stack": false,
"steppedLine": true,
"targets": [{
"expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket [5m])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le))",
"hide": false,
"intervalFactor": 2,
"legendFormat": "{{instance}} WAL fsync",
@ -370,7 +373,7 @@
"step": 4
},
{
"expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket [5m])) by (instance, le))",
"expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le))",
"intervalFactor": 2,
"legendFormat": "{{instance}} DB fsync",
"metric": "etcd_disk_backend_commit_duration_seconds_bucket",
@ -522,7 +525,7 @@
"stack": true,
"steppedLine": false,
"targets": [{
"expr": "rate(etcd_network_client_grpc_received_bytes_total [1m])",
"expr": "rate(etcd_network_client_grpc_received_bytes_total[5m])",
"intervalFactor": 2,
"legendFormat": "{{instance}} Client Traffic In",
"metric": "etcd_network_client_grpc_received_bytes_total",
@ -595,7 +598,7 @@
"stack": true,
"steppedLine": false,
"targets": [{
"expr": "rate(etcd_network_client_grpc_sent_bytes_total [1m])",
"expr": "rate(etcd_network_client_grpc_sent_bytes_total[5m])",
"intervalFactor": 2,
"legendFormat": "{{instance}} Client Traffic Out",
"metric": "etcd_network_client_grpc_sent_bytes_total",
@ -668,7 +671,7 @@
"stack": false,
"steppedLine": false,
"targets": [{
"expr": "sum(rate(etcd_network_peer_received_bytes_total [1m])) by (instance)",
"expr": "sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)",
"intervalFactor": 2,
"legendFormat": "{{instance}} Peer Traffic In",
"metric": "etcd_network_peer_received_bytes_total",
@ -742,7 +745,7 @@
"stack": false,
"steppedLine": false,
"targets": [{
"expr": "sum(rate(etcd_network_peer_sent_bytes_total [1m])) by (instance)",
"expr": "sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)",
"hide": false,
"interval": "",
"intervalFactor": 2,
@ -822,7 +825,7 @@
"stack": false,
"steppedLine": false,
"targets": [{
"expr": "sum(rate(etcd_server_proposals_failed_total [1m]))",
"expr": "sum(rate(etcd_server_proposals_failed_total[5m]))",
"intervalFactor": 2,
"legendFormat": "Proposal Failure Rate",
"metric": "etcd_server_proposals_failed_total",
@ -838,7 +841,7 @@
"step": 2
},
{
"expr": "sum(rate(etcd_server_proposals_committed_total [1m]))",
"expr": "sum(rate(etcd_server_proposals_committed_total[5m]))",
"intervalFactor": 2,
"legendFormat": "Proposal Commit Rate",
"metric": "etcd_server_proposals_committed_total",
@ -846,7 +849,7 @@
"step": 2
},
{
"expr": "sum(rate(etcd_server_proposals_applied_total [1m]))",
"expr": "sum(rate(etcd_server_proposals_applied_total[5m]))",
"intervalFactor": 2,
"legendFormat": "Proposal Apply Rate",
"refId": "D",
@ -922,9 +925,9 @@
"stack": false,
"steppedLine": false,
"targets": [{
"expr": "etcd_server_leader_changes_seen_total",
"expr": "changes(etcd_server_leader_changes_seen_total[1d])",
"intervalFactor": 2,
"legendFormat": "{{instance}} Leader Change Seen",
"legendFormat": "{{instance}} Total Leader Elections Per Day",
"metric": "etcd_server_leader_changes_seen_total",
"refId": "A",
"step": 2
@ -932,7 +935,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Rate Leader Elections",
"title": "Total Leader Elections Per Day",
"tooltip": {
"msResolution": false,
"shared": true,
@ -1009,4 +1012,4 @@
"version": 215,
"links": [],
"gnetId": null
}
}

View File

@ -47,6 +47,10 @@ $ etcdctl defrag
Finished defragmenting etcd member[127.0.0.1:2379]
```
**Note that defragmentation to a live member blocks the system from reading and writing data while rebuilding its states**.
**Note that defragmentation request does not get replicated over cluster. That is, the request is only applied to the local node. Specify all members in `--endpoints` flag.**
## Space quota
The space quota in `etcd` ensures the cluster operates in a reliable fashion. Without a space quota, `etcd` may suffer from poor performance if the keyspace grows excessively large, or it may simply run out of storage space, leading to unpredictable cluster behavior. If the keyspace's backend database for any member exceeds the space quota, `etcd` raises a cluster-wide alarm that puts the cluster into a maintenance mode which only accepts key reads and deletes. Only after freeing enough space in the keyspace and defragmenting the backend database, along with clearing the space quota alarm can the cluster resume normal operation.
@ -74,7 +78,7 @@ $ ETCDCTL_API=3 etcdctl --write-out=table endpoint status
+----------------+------------------+-----------+---------+-----------+-----------+------------+
# confirm alarm is raised
$ ETCDCTL_API=3 etcdctl alarm list
memberID:13803658152347727308 alarm:NOSPACE
memberID:13803658152347727308 alarm:NOSPACE
```
Removing excessive keyspace data and defragmenting the backend database will put the cluster back within the quota limits:
@ -90,7 +94,7 @@ $ ETCDCTL_API=3 etcdctl defrag
Finished defragmenting etcd member[127.0.0.1:2379]
# disarm alarm
$ ETCDCTL_API=3 etcdctl alarm disarm
memberID:13803658152347727308 alarm:NOSPACE
memberID:13803658152347727308 alarm:NOSPACE
# test puts are allowed again
$ ETCDCTL_API=3 etcdctl put newkey 123
OK

View File

@ -1,6 +1,45 @@
# Monitoring etcd
Each etcd server exports metrics under the `/metrics` path on its client port.
Each etcd server provides local monitoring information on its client port through http endpoints. The monitoring data is useful for both system health checking and cluster debugging.
## Debug endpoint
If `--debug` is set, the etcd server exports debugging information on its client port under the `/debug` path. Take care when setting `--debug`, since there will be degraded performance and verbose logging.
The `/debug/pprof` endpoint is the standard go runtime profiling endpoint. This can be used to profile CPU, heap, mutex, and goroutine utilization. For example, here `go tool pprof` gets the top 10 functions where etcd spends its time:
```sh
$ go tool pprof http://localhost:2379/debug/pprof/profile
Fetching profile from http://localhost:2379/debug/pprof/profile
Please wait... (30s)
Saved profile in /home/etcd/pprof/pprof.etcd.localhost:2379.samples.cpu.001.pb.gz
Entering interactive mode (type "help" for commands)
(pprof) top10
310ms of 480ms total (64.58%)
Showing top 10 nodes out of 157 (cum >= 10ms)
flat flat% sum% cum cum%
130ms 27.08% 27.08% 130ms 27.08% runtime.futex
70ms 14.58% 41.67% 70ms 14.58% syscall.Syscall
20ms 4.17% 45.83% 20ms 4.17% github.com/coreos/etcd/cmd/vendor/golang.org/x/net/http2/hpack.huffmanDecode
20ms 4.17% 50.00% 30ms 6.25% runtime.pcvalue
20ms 4.17% 54.17% 50ms 10.42% runtime.schedule
10ms 2.08% 56.25% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver.(*EtcdServer).AuthInfoFromCtx
10ms 2.08% 58.33% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/etcdserver.(*EtcdServer).Lead
10ms 2.08% 60.42% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/github.com/coreos/etcd/pkg/wait.(*timeList).Trigger
10ms 2.08% 62.50% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/github.com/prometheus/client_golang/prometheus.(*MetricVec).hashLabelValues
10ms 2.08% 64.58% 10ms 2.08% github.com/coreos/etcd/cmd/vendor/golang.org/x/net/http2.(*Framer).WriteHeaders
```
The `/debug/requests` endpoint gives gRPC traces and performance statistics through a web browser. For example, here is a `Range` request for the key `abc`:
```
When Elapsed (s)
2017/08/18 17:34:51.999317 0.000244 /etcdserverpb.KV/Range
17:34:51.999382 . 65 ... RPC: from 127.0.0.1:47204 deadline:4.999377747s
17:34:51.999395 . 13 ... recv: key:"abc"
17:34:51.999499 . 104 ... OK
17:34:51.999535 . 36 ... sent: header:<cluster_id:14841639068965178418 member_id:10276657743932975437 revision:15 raft_term:17 > kvs:<key:"abc" create_revision:6 mod_revision:14 version:9 value:"asda" > count:1
```
The metrics can be fetched with `curl`:
@ -75,8 +114,6 @@ Access: proxy
Then import the default [etcd dashboard template][template] and customize. For instance, if Prometheus data source name is `my-etcd`, the `datasource` field values in JSON also need to be `my-etcd`.
See the [demo][demo].
Sample dashboard:
![](./etcd-sample-grafana.png)
@ -85,4 +122,3 @@ Sample dashboard:
[prometheus]: https://prometheus.io/
[grafana]: http://grafana.org/
[template]: ./grafana.json
[demo]: http://dash.etcd.io/dashboard/db/test-etcd

View File

@ -17,58 +17,54 @@ For some baseline performance numbers, we consider a three member etcd cluster w
- Google Cloud Compute Engine
- 3 machines of 8 vCPUs + 16GB Memory + 50GB SSD
- 1 machine(client) of 16 vCPUs + 30GB Memory + 50GB SSD
- Ubuntu 15.10
- etcd v3 master branch (commit SHA d8f325d), Go 1.6.2
- Ubuntu 17.04
- etcd 3.2.0, go 1.8.3
With this configuration, etcd can approximately write:
| Number of keys | Key size in bytes | Value size in bytes | Number of connections | Number of clients | Target etcd server | Average write QPS | Average latency per request | Memory |
|----------------|-------------------|---------------------|-----------------------|-------------------|--------------------|-------------------|-----------------------------|--------|
| 10,000 | 8 | 256 | 1 | 1 | leader only | 525 | 2ms | 35 MB |
| 100,000 | 8 | 256 | 100 | 1000 | leader only | 25,000 | 30ms | 35 MB |
| 100,000 | 8 | 256 | 100 | 1000 | all members | 33,000 | 25ms | 35 MB |
| Number of keys | Key size in bytes | Value size in bytes | Number of connections | Number of clients | Target etcd server | Average write QPS | Average latency per request | Average server RSS |
|---------------:|------------------:|--------------------:|----------------------:|------------------:|--------------------|------------------:|----------------------------:|-------------------:|
| 10,000 | 8 | 256 | 1 | 1 | leader only | 583 | 1.6ms | 48 MB |
| 100,000 | 8 | 256 | 100 | 1000 | leader only | 44,341 | 22ms | 124MB |
| 100,000 | 8 | 256 | 100 | 1000 | all members | 50,104 | 20ms | 126MB |
Sample commands are:
```
# assuming IP_1 is leader, write requests to the leader
benchmark --endpoints={IP_1} --conns=1 --clients=1 \
```sh
# write to leader
benchmark --endpoints=${HOST_1} --target-leader --conns=1 --clients=1 \
put --key-size=8 --sequential-keys --total=10000 --val-size=256
benchmark --endpoints={IP_1} --conns=100 --clients=1000 \
benchmark --endpoints=${HOST_1} --target-leader --conns=100 --clients=1000 \
put --key-size=8 --sequential-keys --total=100000 --val-size=256
# write to all members
benchmark --endpoints={IP_1},{IP_2},{IP_3} --conns=100 --clients=1000 \
benchmark --endpoints=${HOST_1},${HOST_2},${HOST_3} --conns=100 --clients=1000 \
put --key-size=8 --sequential-keys --total=100000 --val-size=256
```
Linearizable read requests go through a quorum of cluster members for consensus to fetch the most recent data. Serializable read requests are cheaper than linearizable reads since they are served by any single etcd member, instead of a quorum of members, in exchange for possibly serving stale data. etcd can read:
| Number of requests | Key size in bytes | Value size in bytes | Number of connections | Number of clients | Consistency | Average latency per request | Average read QPS |
|--------------------|-------------------|---------------------|-----------------------|-------------------|-------------|-----------------------------|------------------|
| 10,000 | 8 | 256 | 1 | 1 | Linearizable | 2ms | 560 |
| 10,000 | 8 | 256 | 1 | 1 | Serializable | 0.4ms | 7,500 |
| 100,000 | 8 | 256 | 100 | 1000 | Linearizable | 15ms | 43,000 |
| 100,000 | 8 | 256 | 100 | 1000 | Serializable | 9ms | 93,000 |
| Number of requests | Key size in bytes | Value size in bytes | Number of connections | Number of clients | Consistency | Average read QPS | Average latency per request |
|-------------------:|------------------:|--------------------:|----------------------:|------------------:|-------------|-----------------:|----------------------------:|
| 10,000 | 8 | 256 | 1 | 1 | Linearizable | 1,353 | 0.7ms |
| 10,000 | 8 | 256 | 1 | 1 | Serializable | 2,909 | 0.3ms |
| 100,000 | 8 | 256 | 100 | 1000 | Linearizable | 141,578 | 5.5ms |
| 100,000 | 8 | 256 | 100 | 1000 | Serializable | 185,758 | 2.2ms |
Sample commands are:
```
# Linearizable read requests
benchmark --endpoints={IP_1},{IP_2},{IP_3} --conns=1 --clients=1 \
```sh
# Single connection read requests
benchmark --endpoints=${HOST_1},${HOST_2},${HOST_3} --conns=1 --clients=1 \
range YOUR_KEY --consistency=l --total=10000
benchmark --endpoints={IP_1},{IP_2},{IP_3} --conns=100 --clients=1000 \
range YOUR_KEY --consistency=l --total=100000
benchmark --endpoints=${HOST_1},${HOST_2},${HOST_3} --conns=1 --clients=1 \
range YOUR_KEY --consistency=s --total=10000
# Serializable read requests for each member and sum up the numbers
for endpoint in {IP_1} {IP_2} {IP_3}; do
benchmark --endpoints=$endpoint --conns=1 --clients=1 \
range YOUR_KEY --consistency=s --total=10000
done
for endpoint in {IP_1} {IP_2} {IP_3}; do
benchmark --endpoints=$endpoint --conns=100 --clients=1000 \
range YOUR_KEY --consistency=s --total=100000
done
# Many concurrent read requests
benchmark --endpoints=${HOST_1},${HOST_2},${HOST_3} --conns=100 --clients=1000 \
range YOUR_KEY --consistency=l --total=100000
benchmark --endpoints=${HOST_1},${HOST_2},${HOST_3} --conns=100 --clients=1000 \
range YOUR_KEY --consistency=s --total=100000
```
We encourage running the benchmark test when setting up an etcd cluster for the first time in a new environment to ensure the cluster achieves adequate performance; cluster latency and throughput can be sensitive to minor environment differences.
We encourage running the benchmark test when setting up an etcd cluster for the first time in a new environment to ensure the cluster achieves adequate performance; cluster latency and throughput can be sensitive to minor environment differences.

View File

@ -16,7 +16,7 @@ etcd takes several certificate related configuration options, either through com
`--key-file=<path>`: Key for the certificate. Must be unencrypted.
`--client-cert-auth`: When this is set etcd will check all incoming HTTPS requests for a client certificate signed by the trusted CA, requests that don't supply a valid client certificate will fail.
`--client-cert-auth`: When this is set etcd will check all incoming HTTPS requests for a client certificate signed by the trusted CA, requests that don't supply a valid client certificate will fail. If [authentication][auth] is enabled, the certificate provides credentials for the user name given by the Common Name field.
`--trusted-ca-file=<path>`: Trusted certificate authority.
@ -222,3 +222,4 @@ The certificate needs to be signed for the member's FQDN in its Subject Name, us
[tls-setup]: ../../hack/tls-setup
[tls-guide]: https://github.com/coreos/docs/blob/master/os/generate-self-signed-certificates.md
[alt-name]: http://wiki.cacert.org/FAQ/subjectAltName
[auth]: authentication.md

View File

@ -6,7 +6,7 @@ This guide assumes operational knowledge of Amazon Web Services (AWS), specifica
As a critical building block for distributed systems it is crucial to perform adequate capacity planning in order to support the intended cluster workload. As a highly available and strongly consistent data store increasing the number of nodes in an etcd cluster will generally affect performance adversely. This makes sense intuitively, as more nodes means more members for the leader to coordinate state across. The most direct way to increase throughput and decrease latency of an etcd cluster is allocate more disk I/O, network I/O, CPU, and memory to cluster members. In the event it is impossible to temporarily divert incoming requests to the cluster, scaling the EC2 instances which comprise the etcd cluster members one at a time may improve performance. It is, however, best to avoid bottlenecks through capacity planning.
The etcd team has produced a [hardware recommendation guide]( ../op-guide/hardware.md) which is very useful for “ballparking” how many nodes and what instance type are necessary for a cluster.
The etcd team has produced a [hardware recommendation guide](../op-guide/hardware.md) which is very useful for “ballparking” how many nodes and what instance type are necessary for a cluster.
AWS provides a service for creating groups of EC2 instances which are dynamically sized to match load on the instances. Using an Auto Scaling Group ([ASG](http://docs.aws.amazon.com/autoscaling/latest/userguide/AutoScalingGroup.html)) to dynamically scale an etcd cluster is not recommended for several reasons including:

View File

@ -8,9 +8,11 @@ Before [starting an upgrade](#upgrade-procedure), read through the rest of this
### Upgrade checklists
**NOTE:** When [migrating from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480), etcd server v3.2+ panics when etcd restores from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. This happens when the server had migrated from v2 with no previous v3 data. This also prevents accidental v3 data loss (e.g. `db` file might have been moved). etcd requires that post v3 migration can only happen with v3 data. Do not upgrade to newer v3 versions until v3.0 server contains v3 data.
#### Upgrade requirements
To upgrade an existing etcd deployment to 3.0, the running cluster must be 2.3 or greater. If it's before 2.3, please upgrade to [2.3](https://github.com/coreos/etcd/releases/tag/v2.3.0) before upgrading to 3.0.
To upgrade an existing etcd deployment to 3.0, the running cluster must be 2.3 or greater. If it's before 2.3, please upgrade to [2.3](https://github.com/coreos/etcd/releases/tag/v2.3.8) before upgrading to 3.0.
Also, to ensure a smooth rolling upgrade, the running cluster must be healthy. Check the health of the cluster by using the `etcdctl cluster-health` command before proceeding.
@ -52,7 +54,7 @@ member 8211f1d0f64f3269 is healthy: got healthy result from http://localhost:123
cluster is healthy
$ curl http://localhost:2379/version
{"etcdserver":"2.3.x","etcdcluster":"2.3.0"}
{"etcdserver":"2.3.x","etcdcluster":"2.3.8"}
```
#### 2. Stop the existing etcd process

View File

@ -8,9 +8,20 @@ Before [starting an upgrade](#upgrade-procedure), read through the rest of this
### Upgrade checklists
**NOTE:** When [migrating from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480), etcd server v3.2+ panics when etcd restores from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. This happens when the server had migrated from v2 with no previous v3 data. This also prevents accidental v3 data loss (e.g. `db` file might have been moved). etcd requires that post v3 migration can only happen with v3 data. Do not upgrade to newer v3 versions until v3.0 server contains v3 data.
#### Monitoring
Following metrics from v3.0.x have been deprecated in favor of [go-grpc-prometheus](https://github.com/grpc-ecosystem/go-grpc-prometheus):
- `etcd_grpc_requests_total`
- `etcd_grpc_requests_failed_total`
- `etcd_grpc_active_streams`
- `etcd_grpc_unary_requests_duration_seconds`
#### Upgrade requirements
To upgrade an existing etcd deployment to 3.1, the running cluster must be 3.0 or greater. If it's before 3.0, please upgrade to [3.0](https://github.com/coreos/etcd/releases/tag/v3.0.16) before upgrading to 3.1.
To upgrade an existing etcd deployment to 3.1, the running cluster must be 3.0 or greater. If it's before 3.0, please [upgrade to 3.0](upgrade_3_0.md) before upgrading to 3.1.
Also, to ensure a smooth rolling upgrade, the running cluster must be healthy. Check the health of the cluster by using the `etcdctl endpoint health` command before proceeding.

View File

@ -6,9 +6,167 @@ In the general case, upgrading from etcd 3.1 to 3.2 can be a zero-downtime, roll
Before [starting an upgrade](#upgrade-procedure), read through the rest of this guide to prepare.
### Client upgrade checklists
### Upgrade checklists
3.2 introduces two breaking changes.
**NOTE:** When [migrating from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480), etcd server v3.2+ panics when etcd restores from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. This happens when the server had migrated from v2 with no previous v3 data. This also prevents accidental v3 data loss (e.g. `db` file might have been moved). etcd requires that post v3 migration can only happen with v3 data. Do not upgrade to newer v3 versions until v3.0 server contains v3 data.
Highlighted breaking changes in 3.2.
#### Change in default `snapshot-count` value
The default value of `--snapshot-count` has [changed from from 10,000 to 100,000](https://github.com/coreos/etcd/pull/7160). Higher snapshot count means it holds Raft entries in memory for longer before discarding old entries. It is a trade-off between less frequent snapshotting and [higher memory usage](https://github.com/kubernetes/kubernetes/issues/60589#issuecomment-371977156). Higher `--snapshot-count` will be manifested with higher memory usage, while retaining more Raft entries helps with the availabilities of slow followers: leader is still able to replicate its logs to followers, rather than forcing followers to rebuild its stores from leader snapshots.
#### Change in gRPC dependency (>=3.2.10)
3.2.10 or later now requires [grpc/grpc-go](https://github.com/grpc/grpc-go/releases) `v1.7.5` (<=3.2.9 requires `v1.2.1`).
##### Deprecate `grpclog.Logger`
`grpclog.Logger` has been deprecated in favor of [`grpclog.LoggerV2`](https://github.com/grpc/grpc-go/blob/master/grpclog/loggerv2.go). `clientv3.Logger` is now `grpclog.LoggerV2`.
Before
```go
import "github.com/coreos/etcd/clientv3"
clientv3.SetLogger(log.New(os.Stderr, "grpc: ", 0))
```
After
```go
import "github.com/coreos/etcd/clientv3"
import "google.golang.org/grpc/grpclog"
clientv3.SetLogger(grpclog.NewLoggerV2(os.Stderr, os.Stderr, os.Stderr))
// log.New above cannot be used (not implement grpclog.LoggerV2 interface)
```
##### Deprecate `grpc.ErrClientConnTimeout`
Previously, `grpc.ErrClientConnTimeout` error is returned on client dial time-outs. 3.2 instead returns `context.DeadlineExceeded` (see [#8504](https://github.com/coreos/etcd/issues/8504)).
Before
```go
// expect dial time-out on ipv4 blackhole
_, err := clientv3.New(clientv3.Config{
Endpoints: []string{"http://254.0.0.1:12345"},
DialTimeout: 2 * time.Second
})
if err == grpc.ErrClientConnTimeout {
// handle errors
}
```
After
```go
_, err := clientv3.New(clientv3.Config{
Endpoints: []string{"http://254.0.0.1:12345"},
DialTimeout: 2 * time.Second
})
if err == context.DeadlineExceeded {
// handle errors
}
```
#### Change in maximum request size limits (>=3.2.10)
3.2.10 and 3.2.11 allow custom request size limits in server side. >=3.2.12 allows custom request size limits for both server and **client side**. In previous versions(v3.2.10, v3.2.11), client response size was limited to only 4 MiB.
Server-side request limits can be configured with `--max-request-bytes` flag:
```bash
# limits request size to 1.5 KiB
etcd --max-request-bytes 1536
# client writes exceeding 1.5 KiB will be rejected
etcdctl put foo [LARGE VALUE...]
# etcdserver: request is too large
```
Or configure `embed.Config.MaxRequestBytes` field:
```go
import "github.com/coreos/etcd/embed"
import "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
// limit requests to 5 MiB
cfg := embed.NewConfig()
cfg.MaxRequestBytes = 5 * 1024 * 1024
// client writes exceeding 5 MiB will be rejected
_, err := cli.Put(ctx, "foo", [LARGE VALUE...])
err == rpctypes.ErrRequestTooLarge
```
**If not specified, server-side limit defaults to 1.5 MiB**.
Client-side request limits must be configured based on server-side limits.
```bash
# limits request size to 1 MiB
etcd --max-request-bytes 1048576
```
```go
import "github.com/coreos/etcd/clientv3"
cli, _ := clientv3.New(clientv3.Config{
Endpoints: []string{"127.0.0.1:2379"},
MaxCallSendMsgSize: 2 * 1024 * 1024,
MaxCallRecvMsgSize: 3 * 1024 * 1024,
})
// client writes exceeding "--max-request-bytes" will be rejected from etcd server
_, err := cli.Put(ctx, "foo", strings.Repeat("a", 1*1024*1024+5))
err == rpctypes.ErrRequestTooLarge
// client writes exceeding "MaxCallSendMsgSize" will be rejected from client-side
_, err = cli.Put(ctx, "foo", strings.Repeat("a", 5*1024*1024))
err.Error() == "rpc error: code = ResourceExhausted desc = grpc: trying to send message larger than max (5242890 vs. 2097152)"
// some writes under limits
for i := range []int{0,1,2,3,4} {
_, err = cli.Put(ctx, fmt.Sprintf("foo%d", i), strings.Repeat("a", 1*1024*1024-500))
if err != nil {
panic(err)
}
}
// client reads exceeding "MaxCallRecvMsgSize" will be rejected from client-side
_, err = cli.Get(ctx, "foo", clientv3.WithPrefix())
err.Error() == "rpc error: code = ResourceExhausted desc = grpc: received message larger than max (5240509 vs. 3145728)"
```
**If not specified, client-side send limit defaults to 2 MiB (1.5 MiB + gRPC overhead bytes) and receive limit to `math.MaxInt32`**. Please see [clientv3 godoc](https://godoc.org/github.com/coreos/etcd/clientv3#Config) for more detail.
#### Change in raw gRPC client wrappers
3.2.12 or later changes the function signatures of `clientv3` gRPC client wrapper. This change was needed to support [custom `grpc.CallOption` on message size limits](https://github.com/coreos/etcd/pull/9047).
Before and after
```diff
-func NewKVFromKVClient(remote pb.KVClient) KV {
+func NewKVFromKVClient(remote pb.KVClient, c *Client) KV {
-func NewClusterFromClusterClient(remote pb.ClusterClient) Cluster {
+func NewClusterFromClusterClient(remote pb.ClusterClient, c *Client) Cluster {
-func NewLeaseFromLeaseClient(remote pb.LeaseClient, keepAliveTimeout time.Duration) Lease {
+func NewLeaseFromLeaseClient(remote pb.LeaseClient, c *Client, keepAliveTimeout time.Duration) Lease {
-func NewMaintenanceFromMaintenanceClient(remote pb.MaintenanceClient) Maintenance {
+func NewMaintenanceFromMaintenanceClient(remote pb.MaintenanceClient, c *Client) Maintenance {
-func NewWatchFromWatchClient(wc pb.WatchClient) Watcher {
+func NewWatchFromWatchClient(wc pb.WatchClient, c *Client) Watcher {
```
#### Change in `clientv3.Lease.TimeToLive` API
Previously, `clientv3.Lease.TimeToLive` API returned `lease.ErrLeaseNotFound` on non-existent lease ID. 3.2 instead returns TTL=-1 in its response and no error (see [#7305](https://github.com/coreos/etcd/pull/7305)).
@ -30,11 +188,35 @@ resp.TTL == -1
err == nil
```
#### Change in `clientv3.NewFromConfigFile`
`clientv3.NewFromConfigFile` is moved to `yaml.NewConfig`.
Before
```go
import "github.com/coreos/etcd/clientv3"
clientv3.NewFromConfigFile
```
After
```go
import clientv3yaml "github.com/coreos/etcd/clientv3/yaml"
clientv3yaml.NewConfig
```
#### Change in `--listen-peer-urls` and `--listen-client-urls`
3.2 now rejects domains names for `--listen-peer-urls` and `--listen-client-urls` (3.1 only prints out warnings), since domain name is invalid for network interface binding. Make sure that those URLs are properly formated as `scheme://IP:port`.
See [issue #6336](https://github.com/coreos/etcd/issues/6336) for more contexts.
### Server upgrade checklists
#### Upgrade requirements
To upgrade an existing etcd deployment to 3.2, the running cluster must be 3.1 or greater. If it's before 3.1, please upgrade to [3.1](https://github.com/coreos/etcd/releases/tag/v3.1.7) before upgrading to 3.2.
To upgrade an existing etcd deployment to 3.2, the running cluster must be 3.1 or greater. If it's before 3.1, please [upgrade to 3.1](upgrade_3_1.md) before upgrading to 3.2.
Also, to ensure a smooth rolling upgrade, the running cluster must be healthy. Check the health of the cluster by using the `etcdctl endpoint health` command before proceeding.

View File

@ -0,0 +1,476 @@
## Upgrade etcd from 3.2 to 3.3
In the general case, upgrading from etcd 3.2 to 3.3 can be a zero-downtime, rolling upgrade:
- one by one, stop the etcd v3.2 processes and replace them with etcd v3.3 processes
- after running all v3.3 processes, new features in v3.3 are available to the cluster
Before [starting an upgrade](#upgrade-procedure), read through the rest of this guide to prepare.
### Upgrade checklists
**NOTE:** When [migrating from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480), etcd server v3.2+ panics when etcd restores from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. This happens when the server had migrated from v2 with no previous v3 data. This also prevents accidental v3 data loss (e.g. `db` file might have been moved). etcd requires that post v3 migration can only happen with v3 data. Do not upgrade to newer v3 versions until v3.0 server contains v3 data.
Highlighted breaking changes in 3.3.
#### Change in `etcdserver.EtcdServer` struct
`etcdserver.EtcdServer` has changed the type of its member field `*etcdserver.ServerConfig` to `etcdserver.ServerConfig`. And `etcdserver.NewServer` now takes `etcdserver.ServerConfig`, instead of `*etcdserver.ServerConfig`.
Before and after (e.g. [k8s.io/kubernetes/test/e2e_node/services/etcd.go](https://github.com/kubernetes/kubernetes/blob/release-1.8/test/e2e_node/services/etcd.go#L50-L55))
```diff
import "github.com/coreos/etcd/etcdserver"
type EtcdServer struct {
*etcdserver.EtcdServer
- config *etcdserver.ServerConfig
+ config etcdserver.ServerConfig
}
func NewEtcd(dataDir string) *EtcdServer {
- config := &etcdserver.ServerConfig{
+ config := etcdserver.ServerConfig{
DataDir: dataDir,
...
}
return &EtcdServer{config: config}
}
func (e *EtcdServer) Start() error {
var err error
e.EtcdServer, err = etcdserver.NewServer(e.config)
...
```
#### Change in `embed.EtcdServer` struct
Field `LogOutput` is added to `embed.Config`:
```diff
package embed
type Config struct {
Debug bool `json:"debug"`
LogPkgLevels string `json:"log-package-levels"`
+ LogOutput string `json:"log-output"`
...
```
Before gRPC server warnings were logged in etcdserver.
```
WARNING: 2017/11/02 11:35:51 grpc: addrConn.resetTransport failed to create client transport: connection error: desc = "transport: Error while dialing dial tcp: operation was canceled"; Reconnecting to {localhost:2379 <nil>}
WARNING: 2017/11/02 11:35:51 grpc: addrConn.resetTransport failed to create client transport: connection error: desc = "transport: Error while dialing dial tcp: operation was canceled"; Reconnecting to {localhost:2379 <nil>}
```
From v3.3, gRPC server logs are disabled by default.
```go
import "github.com/coreos/etcd/embed"
cfg := &embed.Config{Debug: false}
cfg.SetupLogging()
```
Set `embed.Config.Debug` field to `true` to enable gRPC server logs.
#### Change in `/health` endpoint response
Previously, `[endpoint]:[client-port]/health` returned manually marshaled JSON value. 3.3 now defines [`etcdhttp.Health`](https://godoc.org/github.com/coreos/etcd/etcdserver/api/etcdhttp#Health) struct.
Note that in v3.3.0-rc.0, v3.3.0-rc.1, and v3.3.0-rc.2, `etcdhttp.Health` has boolean type `"health"` and `"errors"` fields. For backward compatibilities, we reverted `"health"` field to `string` type and removed `"errors"` field. Further health information will be provided in separate APIs.
```bash
$ curl http://localhost:2379/health
{"health":"true"}
```
#### Change in gRPC gateway HTTP endpoints (replaced `/v3alpha` with `/v3beta`)
Before
```bash
curl -L http://localhost:2379/v3alpha/kv/put \
-X POST -d '{"key": "Zm9v", "value": "YmFy"}'
```
After
```bash
curl -L http://localhost:2379/v3beta/kv/put \
-X POST -d '{"key": "Zm9v", "value": "YmFy"}'
```
Requests to `/v3alpha` endpoints will redirect to `/v3beta`, and `/v3alpha` will be removed in 3.4 release.
#### Change in maximum request size limits
3.3 now allows custom request size limits for both server and **client side**. In previous versions(v3.2.10, v3.2.11), client response size was limited to only 4 MiB.
Server-side request limits can be configured with `--max-request-bytes` flag:
```bash
# limits request size to 1.5 KiB
etcd --max-request-bytes 1536
# client writes exceeding 1.5 KiB will be rejected
etcdctl put foo [LARGE VALUE...]
# etcdserver: request is too large
```
Or configure `embed.Config.MaxRequestBytes` field:
```go
import "github.com/coreos/etcd/embed"
import "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
// limit requests to 5 MiB
cfg := embed.NewConfig()
cfg.MaxRequestBytes = 5 * 1024 * 1024
// client writes exceeding 5 MiB will be rejected
_, err := cli.Put(ctx, "foo", [LARGE VALUE...])
err == rpctypes.ErrRequestTooLarge
```
**If not specified, server-side limit defaults to 1.5 MiB**.
Client-side request limits must be configured based on server-side limits.
```bash
# limits request size to 1 MiB
etcd --max-request-bytes 1048576
```
```go
import "github.com/coreos/etcd/clientv3"
cli, _ := clientv3.New(clientv3.Config{
Endpoints: []string{"127.0.0.1:2379"},
MaxCallSendMsgSize: 2 * 1024 * 1024,
MaxCallRecvMsgSize: 3 * 1024 * 1024,
})
// client writes exceeding "--max-request-bytes" will be rejected from etcd server
_, err := cli.Put(ctx, "foo", strings.Repeat("a", 1*1024*1024+5))
err == rpctypes.ErrRequestTooLarge
// client writes exceeding "MaxCallSendMsgSize" will be rejected from client-side
_, err = cli.Put(ctx, "foo", strings.Repeat("a", 5*1024*1024))
err.Error() == "rpc error: code = ResourceExhausted desc = grpc: trying to send message larger than max (5242890 vs. 2097152)"
// some writes under limits
for i := range []int{0,1,2,3,4} {
_, err = cli.Put(ctx, fmt.Sprintf("foo%d", i), strings.Repeat("a", 1*1024*1024-500))
if err != nil {
panic(err)
}
}
// client reads exceeding "MaxCallRecvMsgSize" will be rejected from client-side
_, err = cli.Get(ctx, "foo", clientv3.WithPrefix())
err.Error() == "rpc error: code = ResourceExhausted desc = grpc: received message larger than max (5240509 vs. 3145728)"
```
**If not specified, client-side send limit defaults to 2 MiB (1.5 MiB + gRPC overhead bytes) and receive limit to `math.MaxInt32`**. Please see [clientv3 godoc](https://godoc.org/github.com/coreos/etcd/clientv3#Config) for more detail.
#### Change in raw gRPC client wrappers
3.3 changes the function signatures of `clientv3` gRPC client wrapper. This change was needed to support [custom `grpc.CallOption` on message size limits](https://github.com/coreos/etcd/pull/9047).
Before and after
```diff
-func NewKVFromKVClient(remote pb.KVClient) KV {
+func NewKVFromKVClient(remote pb.KVClient, c *Client) KV {
-func NewClusterFromClusterClient(remote pb.ClusterClient) Cluster {
+func NewClusterFromClusterClient(remote pb.ClusterClient, c *Client) Cluster {
-func NewLeaseFromLeaseClient(remote pb.LeaseClient, keepAliveTimeout time.Duration) Lease {
+func NewLeaseFromLeaseClient(remote pb.LeaseClient, c *Client, keepAliveTimeout time.Duration) Lease {
-func NewMaintenanceFromMaintenanceClient(remote pb.MaintenanceClient) Maintenance {
+func NewMaintenanceFromMaintenanceClient(remote pb.MaintenanceClient, c *Client) Maintenance {
-func NewWatchFromWatchClient(wc pb.WatchClient) Watcher {
+func NewWatchFromWatchClient(wc pb.WatchClient, c *Client) Watcher {
```
#### Change in clientv3 `Snapshot` API error type
Previously, clientv3 `Snapshot` API returned raw [`grpc/*status.statusError`] type error. v3.3 now translates those errors to corresponding public error types, to be consistent with other APIs.
Before
```go
import "context"
// reading snapshot with canceled context should error out
ctx, cancel := context.WithCancel(context.Background())
rc, _ := cli.Snapshot(ctx)
cancel()
_, err := io.Copy(f, rc)
err.Error() == "rpc error: code = Canceled desc = context canceled"
// reading snapshot with deadline exceeded should error out
ctx, cancel = context.WithTimeout(context.Background(), time.Second)
defer cancel()
rc, _ = cli.Snapshot(ctx)
time.Sleep(2 * time.Second)
_, err = io.Copy(f, rc)
err.Error() == "rpc error: code = DeadlineExceeded desc = context deadline exceeded"
```
After
```go
import "context"
// reading snapshot with canceled context should error out
ctx, cancel := context.WithCancel(context.Background())
rc, _ := cli.Snapshot(ctx)
cancel()
_, err := io.Copy(f, rc)
err == context.Canceled
// reading snapshot with deadline exceeded should error out
ctx, cancel = context.WithTimeout(context.Background(), time.Second)
defer cancel()
rc, _ = cli.Snapshot(ctx)
time.Sleep(2 * time.Second)
_, err = io.Copy(f, rc)
err == context.DeadlineExceeded
```
#### Change in `etcdctl lease timetolive` command output
Previously, `lease timetolive LEASE_ID` command on expired lease prints `-1s` for remaining seconds. 3.3 now outputs clearer messages.
Before
```bash
lease 2d8257079fa1bc0c granted with TTL(0s), remaining(-1s)
```
After
```bash
lease 2d8257079fa1bc0c already expired
```
#### Change in `golang.org/x/net/context` imports
`clientv3` has deprecated `golang.org/x/net/context`. If a project vendors `golang.org/x/net/context` in other code (e.g. etcd generated protocol buffer code) and imports `github.com/coreos/etcd/clientv3`, it requires Go 1.9+ to compile.
Before
```go
import "golang.org/x/net/context"
cli.Put(context.Background(), "f", "v")
```
After
```go
import "context"
cli.Put(context.Background(), "f", "v")
```
#### Change in gRPC dependency
3.3 now requires [grpc/grpc-go](https://github.com/grpc/grpc-go/releases) `v1.7.5`.
##### Deprecate `grpclog.Logger`
`grpclog.Logger` has been deprecated in favor of [`grpclog.LoggerV2`](https://github.com/grpc/grpc-go/blob/master/grpclog/loggerv2.go). `clientv3.Logger` is now `grpclog.LoggerV2`.
Before
```go
import "github.com/coreos/etcd/clientv3"
clientv3.SetLogger(log.New(os.Stderr, "grpc: ", 0))
```
After
```go
import "github.com/coreos/etcd/clientv3"
import "google.golang.org/grpc/grpclog"
clientv3.SetLogger(grpclog.NewLoggerV2(os.Stderr, os.Stderr, os.Stderr))
// log.New above cannot be used (not implement grpclog.LoggerV2 interface)
```
##### Deprecate `grpc.ErrClientConnTimeout`
Previously, `grpc.ErrClientConnTimeout` error is returned on client dial time-outs. 3.3 instead returns `context.DeadlineExceeded` (see [#8504](https://github.com/coreos/etcd/issues/8504)).
Before
```go
// expect dial time-out on ipv4 blackhole
_, err := clientv3.New(clientv3.Config{
Endpoints: []string{"http://254.0.0.1:12345"},
DialTimeout: 2 * time.Second
})
if err == grpc.ErrClientConnTimeout {
// handle errors
}
```
After
```go
_, err := clientv3.New(clientv3.Config{
Endpoints: []string{"http://254.0.0.1:12345"},
DialTimeout: 2 * time.Second
})
if err == context.DeadlineExceeded {
// handle errors
}
```
#### Change in official container registry
etcd now uses [`gcr.io/etcd-development/etcd`](https://gcr.io/etcd-development/etcd) as a primary container registry, and [`quay.io/coreos/etcd`](https://quay.io/coreos/etcd) as secondary.
Before
```bash
docker pull quay.io/coreos/etcd:v3.2.5
```
After
```bash
docker pull gcr.io/etcd-development/etcd:v3.3.0
```
### Server upgrade checklists
#### Upgrade requirements
To upgrade an existing etcd deployment to 3.3, the running cluster must be 3.2 or greater. If it's before 3.2, please [upgrade to 3.2](upgrade_3_2.md) before upgrading to 3.3.
Also, to ensure a smooth rolling upgrade, the running cluster must be healthy. Check the health of the cluster by using the `etcdctl endpoint health` command before proceeding.
#### Preparation
Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
Before beginning, [backup the etcd data](../op-guide/maintenance.md#snapshot-backup). Should something go wrong with the upgrade, it is possible to use this backup to [downgrade](#downgrade) back to existing etcd version. Please note that the `snapshot` command only backs up the v3 data. For v2 data, see [backing up v2 datastore](../v2/admin_guide.md#backing-up-the-datastore).
#### Mixed versions
While upgrading, an etcd cluster supports mixed versions of etcd members, and operates with the protocol of the lowest common version. The cluster is only considered upgraded once all of its members are upgraded to version 3.3. Internally, etcd members negotiate with each other to determine the overall cluster version, which controls the reported version and the supported features.
#### Limitations
Note: If the cluster only has v3 data and no v2 data, it is not subject to this limitation.
If the cluster is serving a v2 data set larger than 50MB, each newly upgraded member may take up to two minutes to catch up with the existing cluster. Check the size of a recent snapshot to estimate the total data size. In other words, it is safest to wait for 2 minutes between upgrading each member.
For a much larger total data size, 100MB or more , this one-time process might take even more time. Administrators of very large etcd clusters of this magnitude can feel free to contact the [etcd team][etcd-contact] before upgrading, and we'll be happy to provide advice on the procedure.
#### Downgrade
If all members have been upgraded to v3.3, the cluster will be upgraded to v3.3, and downgrade from this completed state is **not possible**. If any single member is still v3.2, however, the cluster and its operations remains "v3.2", and it is possible from this mixed cluster state to return to using a v3.2 etcd binary on all members.
Please [backup the data directory](../op-guide/maintenance.md#snapshot-backup) of all etcd members to make downgrading the cluster possible even after it has been completely upgraded.
### Upgrade procedure
This example shows how to upgrade a 3-member v3.2 ectd cluster running on a local machine.
#### 1. Check upgrade requirements
Is the cluster healthy and running v3.2.x?
```
$ ETCDCTL_API=3 etcdctl endpoint health --endpoints=localhost:2379,localhost:22379,localhost:32379
localhost:2379 is healthy: successfully committed proposal: took = 6.600684ms
localhost:22379 is healthy: successfully committed proposal: took = 8.540064ms
localhost:32379 is healthy: successfully committed proposal: took = 8.763432ms
$ curl http://localhost:2379/version
{"etcdserver":"3.2.7","etcdcluster":"3.2.0"}
```
#### 2. Stop the existing etcd process
When each etcd process is stopped, expected errors will be logged by other cluster members. This is normal since a cluster member connection has been (temporarily) broken:
```
14:13:31.491746 I | raft: c89feb932daef420 [term 3] received MsgTimeoutNow from 6d4f535bae3ab960 and starts an election to get leadership.
14:13:31.491769 I | raft: c89feb932daef420 became candidate at term 4
14:13:31.491788 I | raft: c89feb932daef420 received MsgVoteResp from c89feb932daef420 at term 4
14:13:31.491797 I | raft: c89feb932daef420 [logterm: 3, index: 9] sent MsgVote request to 6d4f535bae3ab960 at term 4
14:13:31.491805 I | raft: c89feb932daef420 [logterm: 3, index: 9] sent MsgVote request to 9eda174c7df8a033 at term 4
14:13:31.491815 I | raft: raft.node: c89feb932daef420 lost leader 6d4f535bae3ab960 at term 4
14:13:31.524084 I | raft: c89feb932daef420 received MsgVoteResp from 6d4f535bae3ab960 at term 4
14:13:31.524108 I | raft: c89feb932daef420 [quorum:2] has received 2 MsgVoteResp votes and 0 vote rejections
14:13:31.524123 I | raft: c89feb932daef420 became leader at term 4
14:13:31.524136 I | raft: raft.node: c89feb932daef420 elected leader c89feb932daef420 at term 4
14:13:31.592650 W | rafthttp: lost the TCP streaming connection with peer 6d4f535bae3ab960 (stream MsgApp v2 reader)
14:13:31.592825 W | rafthttp: lost the TCP streaming connection with peer 6d4f535bae3ab960 (stream Message reader)
14:13:31.693275 E | rafthttp: failed to dial 6d4f535bae3ab960 on stream Message (dial tcp [::1]:2380: getsockopt: connection refused)
14:13:31.693289 I | rafthttp: peer 6d4f535bae3ab960 became inactive
14:13:31.936678 W | rafthttp: lost the TCP streaming connection with peer 6d4f535bae3ab960 (stream Message writer)
```
It's a good idea at this point to [backup the etcd data](../op-guide/maintenance.md#snapshot-backup) to provide a downgrade path should any problems occur:
```
$ etcdctl snapshot save backup.db
```
#### 3. Drop-in etcd v3.3 binary and start the new etcd process
The new v3.3 etcd will publish its information to the cluster:
```
14:14:25.363225 I | etcdserver: published {Name:s1 ClientURLs:[http://localhost:2379]} to cluster a9ededbffcb1b1f1
```
Verify that each member, and then the entire cluster, becomes healthy with the new v3.3 etcd binary:
```
$ ETCDCTL_API=3 /etcdctl endpoint health --endpoints=localhost:2379,localhost:22379,localhost:32379
localhost:22379 is healthy: successfully committed proposal: took = 5.540129ms
localhost:32379 is healthy: successfully committed proposal: took = 7.321771ms
localhost:2379 is healthy: successfully committed proposal: took = 10.629901ms
```
Upgraded members will log warnings like the following until the entire cluster is upgraded. This is expected and will cease after all etcd cluster members are upgraded to v3.3:
```
14:15:17.071804 W | etcdserver: member c89feb932daef420 has a higher version 3.3.0
14:15:21.073110 W | etcdserver: the local etcd version 3.2.7 is not up-to-date
14:15:21.073142 W | etcdserver: member 6d4f535bae3ab960 has a higher version 3.3.0
14:15:21.073157 W | etcdserver: the local etcd version 3.2.7 is not up-to-date
14:15:21.073164 W | etcdserver: member c89feb932daef420 has a higher version 3.3.0
```
#### 4. Repeat step 2 to step 3 for all other members
#### 5. Finish
When all members are upgraded, the cluster will report upgrading to 3.3 successfully:
```
14:15:54.536901 N | etcdserver/membership: updated the cluster version from 3.2 to 3.3
14:15:54.537035 I | etcdserver/api: enabled capabilities for version 3.3
```
```
$ ETCDCTL_API=3 /etcdctl endpoint health --endpoints=localhost:2379,localhost:22379,localhost:32379
localhost:2379 is healthy: successfully committed proposal: took = 2.312897ms
localhost:22379 is healthy: successfully committed proposal: took = 2.553476ms
localhost:32379 is healthy: successfully committed proposal: took = 2.517902ms
```
[etcd-contact]: https://groups.google.com/forum/#!forum/etcd-dev

View File

@ -0,0 +1,171 @@
## Upgrade etcd from 3.3 to 3.4
In the general case, upgrading from etcd 3.3 to 3.4 can be a zero-downtime, rolling upgrade:
- one by one, stop the etcd v3.3 processes and replace them with etcd v3.4 processes
- after running all v3.4 processes, new features in v3.4 are available to the cluster
Before [starting an upgrade](#upgrade-procedure), read through the rest of this guide to prepare.
### Upgrade checklists
**NOTE:** When [migrating from v2 with no v3 data](https://github.com/coreos/etcd/issues/9480), etcd server v3.2+ panics when etcd restores from existing snapshots but no v3 `ETCD_DATA_DIR/member/snap/db` file. This happens when the server had migrated from v2 with no previous v3 data. This also prevents accidental v3 data loss (e.g. `db` file might have been moved). etcd requires that post v3 migration can only happen with v3 data. Do not upgrade to newer v3 versions until v3.0 server contains v3 data.
Highlighted breaking changes in 3.4.
#### Change in `etcd` flags
`--ca-file` and `--peer-ca-file` flags are deprecated; they have been deprecated since v2.1.
```diff
-etcd --ca-file ca-client.crt
+etcd --trusted-ca-file ca-client.crt
```
```diff
-etcd --peer-ca-file ca-peer.crt
+etcd --peer-trusted-ca-file ca-peer.crt
```
#### Change in ``pkg/transport`
Deprecated `pkg/transport.TLSInfo.CAFile` field.
```diff
import "github.com/coreos/etcd/pkg/transport"
tlsInfo := transport.TLSInfo{
CertFile: "/tmp/test-certs/test.pem",
KeyFile: "/tmp/test-certs/test-key.pem",
- CAFile: "/tmp/test-certs/trusted-ca.pem",
+ TrustedCAFile: "/tmp/test-certs/trusted-ca.pem",
}
tlsConfig, err := tlsInfo.ClientConfig()
if err != nil {
panic(err)
}
```
### Server upgrade checklists
#### Upgrade requirements
To upgrade an existing etcd deployment to 3.4, the running cluster must be 3.3 or greater. If it's before 3.3, please [upgrade to 3.3](upgrade_3_3.md) before upgrading to 3.4.
Also, to ensure a smooth rolling upgrade, the running cluster must be healthy. Check the health of the cluster by using the `etcdctl endpoint health` command before proceeding.
#### Preparation
Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
Before beginning, [backup the etcd data](../op-guide/maintenance.md#snapshot-backup). Should something go wrong with the upgrade, it is possible to use this backup to [downgrade](#downgrade) back to existing etcd version. Please note that the `snapshot` command only backs up the v3 data. For v2 data, see [backing up v2 datastore](../v2/admin_guide.md#backing-up-the-datastore).
#### Mixed versions
While upgrading, an etcd cluster supports mixed versions of etcd members, and operates with the protocol of the lowest common version. The cluster is only considered upgraded once all of its members are upgraded to version 3.4. Internally, etcd members negotiate with each other to determine the overall cluster version, which controls the reported version and the supported features.
#### Limitations
Note: If the cluster only has v3 data and no v2 data, it is not subject to this limitation.
If the cluster is serving a v2 data set larger than 50MB, each newly upgraded member may take up to two minutes to catch up with the existing cluster. Check the size of a recent snapshot to estimate the total data size. In other words, it is safest to wait for 2 minutes between upgrading each member.
For a much larger total data size, 100MB or more , this one-time process might take even more time. Administrators of very large etcd clusters of this magnitude can feel free to contact the [etcd team][etcd-contact] before upgrading, and we'll be happy to provide advice on the procedure.
#### Downgrade
If all members have been upgraded to v3.4, the cluster will be upgraded to v3.4, and downgrade from this completed state is **not possible**. If any single member is still v3.3, however, the cluster and its operations remains "v3.3", and it is possible from this mixed cluster state to return to using a v3.3 etcd binary on all members.
Please [backup the data directory](../op-guide/maintenance.md#snapshot-backup) of all etcd members to make downgrading the cluster possible even after it has been completely upgraded.
### Upgrade procedure
This example shows how to upgrade a 3-member v3.3 ectd cluster running on a local machine.
#### 1. Check upgrade requirements
Is the cluster healthy and running v3.3.x?
```
$ ETCDCTL_API=3 etcdctl endpoint health --endpoints=localhost:2379,localhost:22379,localhost:32379
localhost:2379 is healthy: successfully committed proposal: took = 6.600684ms
localhost:22379 is healthy: successfully committed proposal: took = 8.540064ms
localhost:32379 is healthy: successfully committed proposal: took = 8.763432ms
$ curl http://localhost:2379/version
{"etcdserver":"3.3.0","etcdcluster":"3.3.0"}
```
#### 2. Stop the existing etcd process
When each etcd process is stopped, expected errors will be logged by other cluster members. This is normal since a cluster member connection has been (temporarily) broken:
```
14:13:31.491746 I | raft: c89feb932daef420 [term 3] received MsgTimeoutNow from 6d4f535bae3ab960 and starts an election to get leadership.
14:13:31.491769 I | raft: c89feb932daef420 became candidate at term 4
14:13:31.491788 I | raft: c89feb932daef420 received MsgVoteResp from c89feb932daef420 at term 4
14:13:31.491797 I | raft: c89feb932daef420 [logterm: 3, index: 9] sent MsgVote request to 6d4f535bae3ab960 at term 4
14:13:31.491805 I | raft: c89feb932daef420 [logterm: 3, index: 9] sent MsgVote request to 9eda174c7df8a033 at term 4
14:13:31.491815 I | raft: raft.node: c89feb932daef420 lost leader 6d4f535bae3ab960 at term 4
14:13:31.524084 I | raft: c89feb932daef420 received MsgVoteResp from 6d4f535bae3ab960 at term 4
14:13:31.524108 I | raft: c89feb932daef420 [quorum:2] has received 2 MsgVoteResp votes and 0 vote rejections
14:13:31.524123 I | raft: c89feb932daef420 became leader at term 4
14:13:31.524136 I | raft: raft.node: c89feb932daef420 elected leader c89feb932daef420 at term 4
14:13:31.592650 W | rafthttp: lost the TCP streaming connection with peer 6d4f535bae3ab960 (stream MsgApp v2 reader)
14:13:31.592825 W | rafthttp: lost the TCP streaming connection with peer 6d4f535bae3ab960 (stream Message reader)
14:13:31.693275 E | rafthttp: failed to dial 6d4f535bae3ab960 on stream Message (dial tcp [::1]:2380: getsockopt: connection refused)
14:13:31.693289 I | rafthttp: peer 6d4f535bae3ab960 became inactive
14:13:31.936678 W | rafthttp: lost the TCP streaming connection with peer 6d4f535bae3ab960 (stream Message writer)
```
It's a good idea at this point to [backup the etcd data](../op-guide/maintenance.md#snapshot-backup) to provide a downgrade path should any problems occur:
```
$ etcdctl snapshot save backup.db
```
#### 3. Drop-in etcd v3.4 binary and start the new etcd process
The new v3.4 etcd will publish its information to the cluster:
```
14:14:25.363225 I | etcdserver: published {Name:s1 ClientURLs:[http://localhost:2379]} to cluster a9ededbffcb1b1f1
```
Verify that each member, and then the entire cluster, becomes healthy with the new v3.4 etcd binary:
```
$ ETCDCTL_API=3 /etcdctl endpoint health --endpoints=localhost:2379,localhost:22379,localhost:32379
localhost:22379 is healthy: successfully committed proposal: took = 5.540129ms
localhost:32379 is healthy: successfully committed proposal: took = 7.321771ms
localhost:2379 is healthy: successfully committed proposal: took = 10.629901ms
```
Upgraded members will log warnings like the following until the entire cluster is upgraded. This is expected and will cease after all etcd cluster members are upgraded to v3.4:
```
14:15:17.071804 W | etcdserver: member c89feb932daef420 has a higher version 3.4.0
14:15:21.073110 W | etcdserver: the local etcd version 3.3.0 is not up-to-date
14:15:21.073142 W | etcdserver: member 6d4f535bae3ab960 has a higher version 3.4.0
14:15:21.073157 W | etcdserver: the local etcd version 3.3.0 is not up-to-date
14:15:21.073164 W | etcdserver: member c89feb932daef420 has a higher version 3.4.0
```
#### 4. Repeat step 2 to step 3 for all other members
#### 5. Finish
When all members are upgraded, the cluster will report upgrading to 3.4 successfully:
```
14:15:54.536901 N | etcdserver/membership: updated the cluster version from 3.3 to 3.4
14:15:54.537035 I | etcdserver/api: enabled capabilities for version 3.4
```
```
$ ETCDCTL_API=3 /etcdctl endpoint health --endpoints=localhost:2379,localhost:22379,localhost:32379
localhost:2379 is healthy: successfully committed proposal: took = 2.312897ms
localhost:22379 is healthy: successfully committed proposal: took = 2.553476ms
localhost:32379 is healthy: successfully committed proposal: took = 2.517902ms
```
[etcd-contact]: https://groups.google.com/forum/#!forum/etcd-dev

View File

@ -0,0 +1,19 @@
# Upgrading etcd clusters and applications
This section contains documents specific to upgrading etcd clusters and applications.
## Moving from etcd API v2 to API v3
* [Migrate applications from using API v2 to API v3][migrate-apps]
## Upgrading an etcd v3.x cluster
* [Upgrade etcd from 3.0 to 3.1][upgrade-3-1]
* [Upgrade etcd from 3.1 to 3.2][upgrade-3-2]
## Upgrading from etcd v2.3
* [Upgrade a v2.3 cluster to v3.0][upgrade-cluster]
[migrate-apps]: ../op-guide/v2-migration.md
[upgrade-cluster]: upgrade_3_0.md
[upgrade-3-1]: upgrade_3_1.md
[upgrade-3-2]: upgrade_3_2.md

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Snapshot Migration
You can migrate a snapshot of your data from a v0.4.9+ cluster into a new etcd 2.2 cluster using a snapshot migration. After snapshot migration, the etcd indexes of your data will change. Many etcd applications rely on these indexes to behave correctly. This operation should only be done while all etcd applications are stopped.

View File

@ -1,165 +1,85 @@
# etcd2
# Documentation
[![Go Report Card](https://goreportcard.com/badge/github.com/coreos/etcd)](https://goreportcard.com/report/github.com/coreos/etcd)
[![Build Status](https://travis-ci.org/coreos/etcd.svg?branch=master)](https://travis-ci.org/coreos/etcd)
[![Build Status](https://semaphoreci.com/api/v1/coreos/etcd/branches/master/shields_badge.svg)](https://semaphoreci.com/coreos/etcd)
[![Docker Repository on Quay.io](https://quay.io/repository/coreos/etcd-git/status "Docker Repository on Quay.io")](https://quay.io/repository/coreos/etcd-git)
etcd is a distributed key-value store designed to reliably and quickly preserve and provide access to critical data. It enables reliable distributed coordination through distributed locking, leader elections, and write barriers. An etcd cluster is intended for high availability and permanent data storage and retrieval.
**Note**: The `master` branch may be in an *unstable or even broken state* during development. Please use [releases][github-release] instead of the `master` branch in order to get stable binaries.
This is the etcd v2 documentation set. For more recent versions, please see the [etcd v3 guides][etcd-v3].
![etcd Logo](../../logos/etcd-horizontal-color.png)
## Communicating with etcd v2
etcd is a distributed, consistent key-value store for shared configuration and service discovery, with a focus on being:
Reading and writing into the etcd keyspace is done via a simple, RESTful HTTP API, or using language-specific libraries that wrap the HTTP API with higher level primitives.
* *Simple*: curl'able user-facing API (HTTP+JSON)
* *Secure*: optional SSL client cert authentication
* *Fast*: benchmarked 1000s of writes/s per instance
* *Reliable*: properly distributed using Raft
### Reading and Writing
etcd is written in Go and uses the [Raft][raft] consensus algorithm to manage a highly-available replicated log.
- [Client API Documentation][api]
- [Libraries, Tools, and Language Bindings][libraries]
- [Admin API Documentation][admin-api]
- [Members API][members-api]
etcd is used [in production by many companies](./production-users.md), and the development team stands behind it in critical deployment scenarios, where etcd is frequently teamed with applications such as [Kubernetes][k8s], [fleet][fleet], [locksmith][locksmith], [vulcand][vulcand], and many others.
### Security, Auth, Access control
See [etcdctl][etcdctl] for a simple command line client.
Or feel free to just use `curl`, as in the examples below.
- [Security Model][security]
- [Auth and Security][auth_api]
- [Authentication Guide][authentication]
[raft]: https://raft.github.io/
[k8s]: http://kubernetes.io/
[fleet]: https://github.com/coreos/fleet
[locksmith]: https://github.com/coreos/locksmith
[vulcand]: https://github.com/vulcand/vulcand
[etcdctl]: https://github.com/coreos/etcd/tree/master/etcdctl
## etcd v2 Cluster Administration
## Getting Started
Configuration values are distributed within the cluster for your applications to read. Values can be changed programmatically and smart applications can reconfigure automatically. You'll never again have to run a configuration management tool on every machine in order to change a single config value.
### Getting etcd
### General Info
The easiest way to get etcd is to use one of the pre-built release binaries which are available for OSX, Linux, Windows, AppC (ACI), and Docker. Instructions for using these binaries are on the [GitHub releases page][github-release].
- [etcd Proxies][proxy]
- [Production Users][production-users]
- [Admin Guide][admin_guide]
- [Configuration Flags][configuration]
- [Frequently Asked Questions][faq]
For those wanting to try the very latest version, you can build the latest version of etcd from the `master` branch.
You will first need [*Go*](https://golang.org/) installed on your machine (version 1.5+ is required).
All development occurs on `master`, including new features and bug fixes.
Bug fixes are first targeted at `master` and subsequently ported to release branches, as described in the [branch management][branch-management] guide.
### Initial Setup
[github-release]: https://github.com/coreos/etcd/releases/
[branch-management]: branch_management.md
- [Tuning etcd Clusters][tuning]
- [Discovery Service Protocol][discovery_protocol]
- [Running etcd under Docker][docker_guide]
### Running etcd
### Live Reconfiguration
First start a single-member cluster of etcd:
- [Runtime Configuration][runtime-configuration]
```sh
./bin/etcd
```
### Debugging etcd
This will bring up etcd listening on port 2379 for client communication and on port 2380 for server-to-server communication.
- [Metrics Collection][metrics]
- [Error Code][errorcode]
- [Reporting Bugs][reporting_bugs]
Next, let's set a single key, and then retrieve it:
### Migration
```
curl -L http://127.0.0.1:2379/v2/keys/mykey -XPUT -d value="this is awesome"
curl -L http://127.0.0.1:2379/v2/keys/mykey
```
- [Upgrade etcd to 2.3][upgrade_2_3]
- [Upgrade etcd to 2.2][upgrade_2_2]
- [Upgrade to etcd 2.1][upgrade_2_1]
- [Snapshot Migration (0.4.x to 2.x)][04_to_2_snapshot_migration]
- [Backward Compatibility][backward_compatibility]
You have successfully started an etcd and written a key to the store.
### etcd TCP ports
The [official etcd ports][iana-ports] are 2379 for client requests, and 2380 for peer communication. To maintain compatibility, some etcd configuration and documentation continues to refer to the legacy ports 4001 and 7001, but all new etcd use and discussion should adopt the IANA-assigned ports. The legacy ports 4001 and 7001 will be fully deprecated, and support for their use removed, in future etcd releases.
[iana-ports]: http://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.txt
### Running local etcd cluster
First install [goreman](https://github.com/mattn/goreman), which manages Procfile-based applications.
Our [Procfile script](../../V2Procfile) will set up a local example cluster. You can start it with:
```sh
goreman start
```
This will bring up 3 etcd members `infra1`, `infra2` and `infra3` and etcd proxy `proxy`, which runs locally and composes a cluster.
You can write a key to the cluster and retrieve the value back from any member or proxy.
### Next Steps
Now it's time to dig into the full etcd API and other guides.
- Explore the full [API][api].
- Set up a [multi-machine cluster][clustering].
- Learn the [config format, env variables and flags][configuration].
- Find [language bindings and tools][libraries-and-tools].
- Use TLS to [secure an etcd cluster][security].
- [Tune etcd][tuning].
- [Upgrade from 0.4.9+ to 2.2.0][upgrade].
[api]: ./api.md
[clustering]: ./clustering.md
[configuration]: ./configuration.md
[libraries-and-tools]: ./libraries-and-tools.md
[security]: ./security.md
[tuning]: ./tuning.md
[upgrade]: ./04_to_2_snapshot_migration.md
## Contact
- Mailing list: [etcd-dev](https://groups.google.com/forum/?hl=en#!forum/etcd-dev)
- IRC: #[etcd](irc://irc.freenode.org:6667/#etcd) on freenode.org
- Planning/Roadmap: [milestones](https://github.com/coreos/etcd/milestones), [roadmap](../../ROADMAP.md)
- Bugs: [issues](https://github.com/coreos/etcd/issues)
## Contributing
See [CONTRIBUTING](../../CONTRIBUTING.md) for details on submitting patches and the contribution workflow.
## Reporting bugs
See [reporting bugs](reporting_bugs.md) for details about reporting any issue you may encounter.
## Known bugs
[GH518](https://github.com/coreos/etcd/issues/518) is a known bug. Issue is that:
```
curl http://127.0.0.1:2379/v2/keys/foo -XPUT -d value=bar
curl http://127.0.0.1:2379/v2/keys/foo -XPUT -d dir=true -d prevExist=true
```
If the previous node is a key and client tries to overwrite it with `dir=true`, it does not give warnings such as `Not a directory`. Instead, the key is set to empty value.
## Project Details
### Versioning
#### Service Versioning
etcd uses [semantic versioning](http://semver.org)
New minor versions may add additional features to the API.
You can get the version of etcd by issuing a request to /version:
```sh
curl -L http://127.0.0.1:2379/version
```
#### API Versioning
The `v2` API responses should not change after the 2.0.0 release but new features will be added over time.
#### 32-bit and other unsupported systems
etcd has known issues on 32-bit systems due to a bug in the Go runtime. See #[358][358] for more information.
To avoid inadvertently running a possibly unstable etcd server, `etcd` on unsupported architectures will print
a warning message and immediately exit if the environment variable `ETCD_UNSUPPORTED_ARCH` is not set to
the target architecture.
Currently only the amd64 architecture is officially supported by `etcd`.
[358]: https://github.com/coreos/etcd/issues/358
### License
etcd is under the Apache 2.0 license. See the [LICENSE](../../LICENSE) file for details.
[etcd-v3]: ../docs.md
[api]: api.md
[libraries]: libraries-and-tools.md
[admin-api]: other_apis.md
[members-api]: members_api.md
[security]: security.md
[auth_api]: auth_api.md
[authentication]: authentication.md
[proxy]: proxy.md
[production-users]: production-users.md
[admin_guide]: admin_guide.md
[configuration]: configuration.md
[faq]: faq.md
[tuning]: tuning.md
[discovery_protocol]: discovery_protocol.md
[docker_guide]: docker_guide.md
[runtime-configuration]: runtime-configuration.md
[metrics]: metrics.md
[errorcode]: errorcode.md
[reporting_bugs]: reporting_bugs.md
[upgrade_2_3]: upgrade_2_3.md
[upgrade_2_2]: upgrade_2_2.md
[upgrade_2_1]: upgrade_2_1.md
[04_to_2_snapshot_migration]: 04_to_2_snapshot_migration.md
[backward_compatibility]: backward_compatibility.md

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Administration
## Data Directory
@ -8,7 +13,7 @@ When first started, etcd stores its configuration into a data directory specifie
Configuration is stored in the write ahead log and includes: the local member ID, cluster ID, and initial cluster configuration.
The write ahead log and snapshot files are used during member operation and to recover after a restart.
Having a dedicated disk to store wal files can improve the throughput and stabilize the cluster.
Having a dedicated disk to store wal files can improve the throughput and stabilize the cluster.
It is highly recommended to dedicate a wal disk and set `--wal-dir` to point to a directory on that device for a production cluster deployment.
If a members data directory is ever lost or corrupted then the user should [remove][remove-a-member] the etcd member from the cluster using `etcdctl` tool.
@ -51,7 +56,7 @@ $ curl -L http://127.0.0.1:2379/health
You can also use etcdctl to check the cluster-wide health information. It will contact all the members of the cluster and collect the health information for you.
```
$./etcdctl cluster-health
$./etcdctl cluster-health
member 8211f1d0f64f3269 is healthy: got healthy result from http://127.0.0.1:12379
member 91bc3c398fb3c146 is healthy: got healthy result from http://127.0.0.1:22379
member fd422379fda50e48 is healthy: got healthy result from http://127.0.0.1:32379

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# etcd API
## Running a Single Machine Cluster
@ -318,7 +323,7 @@ The first terminal should get the notification and return with the same response
However, the watch command can do more than this.
Using the index, we can watch for commands that have happened in the past.
This is useful for ensuring you don't miss events between watch commands.
This is useful for ensuring you don't miss events between watch commands.
Typically, we watch again from the `modifiedIndex` + 1 of the node we got.
Let's try to watch for the set command of index 7 again:
@ -338,13 +343,13 @@ curl 'http://127.0.0.1:2379/v2/keys/foo?wait=true&waitIndex=8'
Then even if etcd is on index 9 or 800, the first event to occur to the `/foo`
key between 8 and the current index will be returned.
**Note**: etcd only keeps the responses of the most recent 1000 events across all etcd keys.
**Note**: etcd only keeps the responses of the most recent 1000 events across all etcd keys.
It is recommended to send the response to another thread to process immediately
instead of blocking the watch while processing the result.
instead of blocking the watch while processing the result.
#### Watch from cleared event index
If we miss all the 1000 events, we need to recover the current state of the
If we miss all the 1000 events, we need to recover the current state of the
watching key space through a get and then start to watch from the
`X-Etcd-Index` + 1.
@ -366,7 +371,7 @@ To start watch, first we need to fetch the current state of key `/foo`:
curl 'http://127.0.0.1:2379/v2/keys/foo' -vv
```
```
```
< HTTP/1.1 200 OK
< Content-Type: application/json
< X-Etcd-Cluster-Id: 7e27652122e8b2ae
@ -375,7 +380,7 @@ curl 'http://127.0.0.1:2379/v2/keys/foo' -vv
< X-Raft-Term: 2
< Date: Mon, 05 Jan 2015 18:54:43 GMT
< Transfer-Encoding: chunked
<
<
{"action":"get","node":{"key":"/foo","value":"bar","modifiedIndex":7,"createdIndex":7}}
```

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# etcd3 API
TODO: API doc

View File

@ -1,13 +1,18 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# v2 Auth and Security
## etcd Resources
## etcd Resources
There are three types of resources in etcd
1. permission resources: users and roles in the user store
2. key-value resources: key-value pairs in the key-value store
3. settings resources: security settings, auth settings, and dynamic etcd cluster settings (election/heartbeat)
### Permission Resources
### Permission Resources
#### Users
A user is an identity to be authenticated. Each user can have multiple roles. The user has a capability (such as reading or writing) on the resource if one of the roles has that capability.
@ -15,7 +20,7 @@ A user is an identity to be authenticated. Each user can have multiple roles. Th
A user named `root` is required before authentication can be enabled, and it always has the ROOT role. The ROOT role can be granted to multiple users, but `root` is required for recovery purposes.
#### Roles
Each role has exact one associated Permission List. An permission list exists for each permission on key-value resources.
Each role has exact one associated Permission List. An permission list exists for each permission on key-value resources.
The special static ROOT (named `root`) role has a full permissions on all key-value resources, the permission to manage user resources and settings resources. Only the ROOT role has the permission to manage user resources and modify settings resources. The ROOT role is built-in and does not need to be created.
@ -30,8 +35,8 @@ A Permission List is a list of allowed patterns for that particular permission (
### Key-Value Resources
A key-value resource is a key-value pairs in the store. Given a list of matching patterns, permission for any given key in a request is granted if any of the patterns in the list match.
Only prefixes or exact keys are supported. A prefix permission string ends in `*`.
A permission on `/foo` is for that exact key or directory, not its children or recursively. `/foo*` is a prefix that matches `/foo` recursively, and all keys thereunder, and keys with that prefix (eg. `/foobar`. Contrast to the prefix `/foo/*`). `*` alone is permission on the full keyspace.
Only prefixes or exact keys are supported. A prefix permission string ends in `*`.
A permission on `/foo` is for that exact key or directory, not its children or recursively. `/foo*` is a prefix that matches `/foo` recursively, and all keys thereunder, and keys with that prefix (eg. `/foobar`. Contrast to the prefix `/foo/*`). `*` alone is permission on the full keyspace.
### Settings Resources
@ -66,7 +71,7 @@ An Error JSON corresponds to:
}
#### Enable and Disable Authentication
**Get auth status**
GET /v2/auth/enable
@ -215,8 +220,8 @@ PUT /v2/auth/users/charlie
Sent Headers:
Authorization: Basic <BasicAuthString>
Put Body:
JSON struct, above, matching the appropriate name
* Starting password and roles when creating.
JSON struct, above, matching the appropriate name
* Starting password and roles when creating.
* Grant/Revoke/Password filled in when updating (to grant roles, revoke roles, or change the password).
Possible Status Codes:
200 OK
@ -345,7 +350,7 @@ PUT /v2/auth/roles/rkt
401 Unauthorized
404 Not Found (update non-existent roles)
409 Conflict (when granting duplicated permission or revoking non-existent permission)
200 Body:
200 Body:
JSON state of the role
**Remove A Role**

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Authentication Guide
## Overview
@ -14,7 +19,7 @@ There is one special user, `root`, and there are two special roles, `root` and `
### User `root`
User `root` must be created before security can be activated. It has the `root` role and allows for the changing of anything inside etcd. The idea behind the `root` user is for recovery purposes -- a password is generated and stored somewhere -- and the root role is granted to the administrator accounts on the system. In the future, for troubleshooting and recovery, we will need to assume some access to the system, and future documentation will assume this root user (though anyone with the role will suffice).
User `root` must be created before security can be activated. It has the `root` role and allows for the changing of anything inside etcd. The idea behind the `root` user is for recovery purposes -- a password is generated and stored somewhere -- and the root role is granted to the administrator accounts on the system. In the future, for troubleshooting and recovery, we will need to assume some access to the system, and future documentation will assume this root user (though anyone with the role will suffice).
### Role `root`
@ -104,7 +109,7 @@ $ etcdctl role grant myrolename -path '/foo/bar' -write
$ etcdctl role grant myrolename -path '/pub/*' -readwrite
```
Beware that
Beware that
```
# Give full access to keys under /pub??
@ -133,12 +138,12 @@ $ etcdctl role remove myrolename
## Enabling authentication
The minimal steps to enabling auth are as follows. The administrator can set up users and roles before or after enabling authentication, as a matter of preference.
The minimal steps to enabling auth are as follows. The administrator can set up users and roles before or after enabling authentication, as a matter of preference.
Make sure the root user is created:
```
$ etcdctl user add root
$ etcdctl user add root
New password:
```

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Backward Compatibility
The main goal of etcd 2.0 release is to improve cluster safety around bootstrapping and dynamic reconfiguration. To do this, we deprecated the old error-prone APIs and provide a new set of APIs.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
# Benchmarks
etcd benchmarks will be published regularly and tracked for each release below:

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
## Physical machines
GCE n1-highcpu-2 machine type

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
# Benchmarking etcd v2.2.0
## Physical Machines

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
## Physical machines
GCE n1-highcpu-2 machine type

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
## Physical machine
GCE n1-standard-2 machine type

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
## Physical machines
GCE n1-highcpu-2 machine type

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
# Watch Memory Usage Benchmark
*NOTE*: The watch features are under active development, and their memory usage may change as that development progresses. We do not expect it to significantly increase beyond the figures stated below.
@ -5,10 +10,10 @@
A primary goal of etcd is supporting a very large number of watchers doing a massively large amount of watching. etcd aims to support O(10k) clients, O(100K) watch streams (O(10) streams per client) and O(10M) total watchings (O(100) watching per stream). The memory consumed by each individual watching accounts for the largest portion of etcd's overall usage, and is therefore the focus of current and future optimizations.
Three related components of etcd watch consume physical memory: each `grpc.Conn`, each watch stream, and each instance of the watching activity. `grpc.Conn` maintains the actual TCP connection and other gRPC connection state. Each `grpc.Conn` consumes O(10kb) of memory, and might have multiple watch streams attached.
Three related components of etcd watch consume physical memory: each `grpc.Conn`, each watch stream, and each instance of the watching activity. `grpc.Conn` maintains the actual TCP connection and other gRPC connection state. Each `grpc.Conn` consumes O(10kb) of memory, and might have multiple watch streams attached.
Each watch stream is an independent HTTP2 connection which consumes another O(10kb) of memory.
Multiple watchings might share one watch stream.
Each watch stream is an independent HTTP2 connection which consumes another O(10kb) of memory.
Multiple watchings might share one watch stream.
Watching is the actual struct that tracks the changes on the key-value store. Each watching should only consume < O(1kb).

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
# Storage Memory Usage Benchmark
<!---todo: link storage to storage design doc-->
@ -60,7 +65,7 @@ GCE n1-standard-2 machine type
In this test, we only benchmark the memory usage of the in-memory index. The goal is to find `c1` and `c2` mentioned above and to understand the hard limit of memory consumption of the storage.
We calculate the memory usage consumption via the Go runtime.ReadMemStats. We calculate the total allocated bytes difference before creating the index and after creating the index. It cannot perfectly reflect the memory usage of the in-memory index itself but can show the rough consumption pattern.
We calculate the memory usage consumption via the Go runtime.ReadMemStats. We calculate the total allocated bytes difference before creating the index and after creating the index. It cannot perfectly reflect the memory usage of the in-memory index itself but can show the rough consumption pattern.
| N | versions | key size | memory usage |
|------|----------|----------|--------------|

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Branch Management
## Guide

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Clustering Guide
## Overview

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Configuration Flags
etcd is configurable through command-line flags and environment variables. Options set on the command line take precedence over those from the environment.

View File

@ -1,8 +1,13 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
# etcd release guide
The guide talks about how to release a new version of etcd.
The procedure includes some manual steps for sanity checking but it can probably be further scripted. Please keep this document up-to-date if you want to make changes to the release process.
The procedure includes some manual steps for sanity checking but it can probably be further scripted. Please keep this document up-to-date if you want to make changes to the release process.
## Prepare Release

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Discovery Service Protocol
Discovery service protocol helps new etcd member to discover all other members in cluster bootstrap phase using a shared discovery URL.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Running etcd under Docker
The following guide will show you how to run etcd under Docker using the [static bootstrap process](clustering.md#static).

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Error Code
======

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# FAQ
## 1) Why can an etcd client read an old version of data when a majority of the etcd cluster members are down?

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Glossary
This document defines the various terms used in etcd documentation, command line and source code.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# FAQ
## Initial Bootstrapping UX

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Versioning
Goal: We want to be able to upgrade an individual peer in an etcd cluster to a newer version of etcd.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Libraries and Tools
**Tools**

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Members API
* [List members](#list-members)

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Metrics
etcd uses [Prometheus][prometheus] for metrics reporting. The metrics can be used for real-time monitoring and debugging. etcd does not persist its metrics; if a member restarts, the metrics will be reset.
@ -14,9 +19,9 @@ The metrics under the `etcd` prefix are for monitoring and alerting. They are st
### http requests
These metrics describe the serving of requests (non-watch events) served by etcd members in non-proxy mode: total
These metrics describe the serving of requests (non-watch events) served by etcd members in non-proxy mode: total
incoming requests, request failures and processing latency (inc. raft rounds for storage). They are useful for tracking
user-generated traffic hitting the etcd cluster .
user-generated traffic hitting the etcd cluster .
All these metrics are prefixed with `etcd_http_`
@ -28,20 +33,20 @@ All these metrics are prefixed with `etcd_http_`
Example Prometheus queries that may be useful from these metrics (across all etcd members):
* `sum(rate(etcd_http_failed_total{job="etcd"}[1m]) by (method) / sum(rate(etcd_http_events_received_total{job="etcd"})[1m]) by (method)`
* `sum(rate(etcd_http_failed_total{job="etcd"}[1m]) by (method) / sum(rate(etcd_http_events_received_total{job="etcd"})[1m]) by (method)`
Shows the fraction of events that failed by HTTP method across all members, across a time window of `1m`.
* `sum(rate(etcd_http_received_total{job="etcd",method="GET})[1m]) by (method)`
`sum(rate(etcd_http_received_total{job="etcd",method~="GET})[1m]) by (method)`
Shows the rate of successful readonly/write queries across all servers, across a time window of `1m`.
* `histogram_quantile(0.9, sum(rate(etcd_http_successful_duration_seconds{job="etcd",method="GET"}[5m]) ) by (le))`
`histogram_quantile(0.9, sum(rate(etcd_http_successful_duration_seconds{job="etcd",method!="GET"}[5m]) ) by (le))`
Show the 0.90-tile latency (in seconds) of read/write (respectively) event handling across all members, with a window of `5m`.
Show the 0.90-tile latency (in seconds) of read/write (respectively) event handling across all members, with a window of `5m`.
### proxy
@ -56,21 +61,21 @@ All these metrics are prefixed with `etcd_proxy_`
| requests_total | Total number of requests by this proxy instance. | Counter(method) |
| handled_total | Total number of fully handled requests, with responses from etcd members. | Counter(method) |
| dropped_total | Total number of dropped requests due to forwarding errors to etcd members.  | Counter(method,error) |
| handling_duration_seconds | Bucketed handling times by HTTP method, including round trip to member instances. | Histogram(method) |
| handling_duration_seconds | Bucketed handling times by HTTP method, including round trip to member instances. | Histogram(method) |
Example Prometheus queries that may be useful from these metrics (across all etcd servers):
* `sum(rate(etcd_proxy_handled_total{job="etcd"}[1m])) by (method)`
Rate of requests (by HTTP method) handled by all proxies, across a window of `1m`.
Rate of requests (by HTTP method) handled by all proxies, across a window of `1m`.
* `histogram_quantile(0.9, sum(rate(handling_duration_seconds{job="etcd",method="GET"}[5m])) by (le))`
`histogram_quantile(0.9, sum(rate(handling_duration_seconds{job="etcd",method!="GET"}[5m])) by (le))`
Show the 0.90-tile latency (in seconds) of handling of user requests across all proxy machines, with a window of `5m`.
Show the 0.90-tile latency (in seconds) of handling of user requests across all proxy machines, with a window of `5m`.
* `sum(rate(etcd_proxy_dropped_total{job="etcd"}[1m])) by (proxying_error)`
Number of failed request on the proxy. This should be 0, spikes here indicate connectivity issues to the etcd cluster.
## etcd_debugging namespace metrics

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Miscellaneous APIs
* [Getting the etcd version](#getting-the-etcd-version)

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
# FreeBSD
Starting with version 0.1.2 both etcd and etcdctl have been ported to FreeBSD and can

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Production Users
This document tracks people and use cases for etcd in production. By creating a list of production use cases we hope to build a community of advisors that we can reach out to with experience using various etcd applications, operation environments, and cluster sizes. The etcd development team may reach out periodically to check-in on your experience and update this list.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Proxy
etcd can run as a transparent proxy. Doing so allows for easy discovery of etcd within your infrastructure, since it can run on each machine as a local service. In this mode, etcd acts as a reverse proxy and forwards client requests to an active etcd cluster. The etcd proxy does not participate in the consensus replication of the etcd cluster, thus it neither increases the resilience nor decreases the write performance of the etcd cluster.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Reporting Bugs
If you find bugs or documentation mistakes in the etcd project, please let us know by [opening an issue][etcd-issue]. We treat bugs and mistakes very seriously and believe no issue is too small. Before creating a bug report, please check that an issue reporting the same problem does not already exist.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../../docs.md#documentation
# Overview
The etcd v3 API is designed to give users a more efficient and cleaner abstraction compared to etcd v2. There are a number of semantic and protocol changes in this new API. For an overview [see Xiang Li's video](https://youtu.be/J5AioGtEPeQ?t=211).

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Runtime Reconfiguration
etcd comes with support for incremental runtime reconfiguration, which allows users to update the membership of the cluster at run time.
@ -61,9 +66,9 @@ A wrongly updated client URL will not affect the health of the etcd cluster.
#### Update advertise peer URLs
If you would like to update the advertise peer URLs of a member, you have to first update
If you would like to update the advertise peer URLs of a member, you have to first update
it explicitly via member command and then restart the member. The additional action is required
since updating peer URLs changes the cluster wide configuration and can affect the health of the etcd cluster.
since updating peer URLs changes the cluster wide configuration and can affect the health of the etcd cluster.
To update the peer URLs, first, we need to find the target member's ID. You can list all members with `etcdctl`:

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Design of Runtime Reconfiguration
Runtime reconfiguration is one of the hardest and most error prone features in a distributed system, especially in a consensus based system like etcd.
@ -26,21 +31,21 @@ We think runtime reconfiguration should be a low frequent operation. We made the
If a cluster permanently loses a majority of its members, a new cluster will need to be started from an old data directory to recover the previous state.
It is entirely possible to force removing the failed members from the existing cluster to recover. However, we decided not to support this method since it bypasses the normal consensus committing phase, which is unsafe. If the member to remove is not actually dead or you force to remove different members through different members in the same cluster, you will end up with diverged cluster with same clusterID. This is very dangerous and hard to debug/fix afterwards.
It is entirely possible to force removing the failed members from the existing cluster to recover. However, we decided not to support this method since it bypasses the normal consensus committing phase, which is unsafe. If the member to remove is not actually dead or you force to remove different members through different members in the same cluster, you will end up with diverged cluster with same clusterID. This is very dangerous and hard to debug/fix afterwards.
If you have a correct deployment, the possibility of permanent majority lose is very low. But it is a severe enough problem that worth special care. We strongly suggest you to read the [disaster recovery documentation][disaster-recovery] and prepare for permanent majority lose before you put etcd into production.
## Do Not Use Public Discovery Service For Runtime Reconfiguration
The public discovery service should only be used for bootstrapping a cluster. To join member into an existing cluster, you should use runtime reconfiguration API.
The public discovery service should only be used for bootstrapping a cluster. To join member into an existing cluster, you should use runtime reconfiguration API.
Discovery service is designed for bootstrapping an etcd cluster in the cloud environment, when you do not know the IP addresses of all the members beforehand. After you successfully bootstrap a cluster, the IP addresses of all the members are known. Technically, you should not need the discovery service any more.
It seems that using public discovery service is a convenient way to do runtime reconfiguration, after all discovery service already has all the cluster configuration information. However relying on public discovery service brings troubles:
It seems that using public discovery service is a convenient way to do runtime reconfiguration, after all discovery service already has all the cluster configuration information. However relying on public discovery service brings troubles:
1. it introduces external dependencies for the entire life-cycle of your cluster, not just bootstrap time. If there is a network issue between your cluster and public discovery service, your cluster will suffer from it.
2. public discovery service must reflect correct runtime configuration of your cluster during it life-cycle. It has to provide security mechanism to avoid bad actions, and it is hard.
2. public discovery service must reflect correct runtime configuration of your cluster during it life-cycle. It has to provide security mechanism to avoid bad actions, and it is hard.
3. public discovery service has to keep tens of thousands of cluster configurations. Our public discovery service backend is not ready for that workload.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Security Model
etcd supports SSL/TLS as well as authentication through client certificates, both for clients to server as well as peer (server to server / cluster) communication.
@ -16,7 +21,7 @@ etcd takes several certificate related configuration options, either through com
`--key-file=<path>`: Key for the certificate. Must be unencrypted.
`--client-cert-auth`: When this is set etcd will check all incoming HTTPS requests for a client certificate signed by the trusted CA, requests that don't supply a valid client certificate will fail.
`--client-cert-auth`: When this is set etcd will check all incoming HTTPS requests for a client certificate signed by the trusted CA, requests that don't supply a valid client certificate will fail. If [authentication][auth] is enabled, the certificate provides credentials for the user name given by the Common Name field.
`--trusted-ca-file=<path>`: Trusted certificate authority.
@ -191,3 +196,4 @@ If you need your certificate to be signed for your member's FQDN in its Subject
[tls-setup]: ../../hack/tls-setup
[tls-guide]: https://github.com/coreos/docs/blob/master/os/generate-self-signed-certificates.md
[alt-name]: http://wiki.cacert.org/FAQ/subjectAltName
[auth]: authentication.md

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Tuning
The default settings in etcd should work well for installations on a local network where the average network latency is low.

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Upgrade etcd to 2.1
In the general case, upgrading from etcd 2.0 to 2.1 can be a zero-downtime, rolling upgrade:
@ -12,11 +17,11 @@ Before [starting an upgrade](#upgrade-procedure), read through the rest of this
To upgrade an existing etcd deployment to 2.1, you must be running 2.0. If youre running a version of etcd before 2.0, you must upgrade to [2.0][v2.0] before upgrading to 2.1.
Also, to ensure a smooth rolling upgrade, your running cluster must be healthy. You can check the health of the cluster by using `etcdctl cluster-health` command.
Also, to ensure a smooth rolling upgrade, your running cluster must be healthy. You can check the health of the cluster by using `etcdctl cluster-health` command.
### Preparedness
### Preparedness
Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
You might also want to [backup your data directory][backup-datastore] for a potential [downgrade](#downgrade).
@ -38,7 +43,7 @@ If you have even more data, this might take more time. If you have a data size l
### Downgrade
If all members have been upgraded to v2.1, the cluster will be upgraded to v2.1, and downgrade is **not possible**. If any member is still v2.0, the cluster will remain in v2.0, and you can go back to use v2.0 binary.
If all members have been upgraded to v2.1, the cluster will be upgraded to v2.1, and downgrade is **not possible**. If any member is still v2.0, the cluster will remain in v2.0, and you can go back to use v2.0 binary.
Please [backup your data directory][backup-datastore] of all etcd members if you want to downgrade the cluster, even if it is upgraded.
@ -96,7 +101,7 @@ member 924e2e83e93f2560 is healthy
member a8266ecf031671f3 is healthy
```
#### 4. Repeat step 2 to step 3 for all other members
#### 4. Repeat step 2 to step 3 for all other members
#### 5. Finish

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
# Upgrade etcd from 2.1 to 2.2
In the general case, upgrading from etcd 2.1 to 2.2 can be a zero-downtime, rolling upgrade:
@ -13,11 +18,11 @@ Before [starting an upgrade](#upgrade-procedure), read through the rest of this
To upgrade an existing etcd deployment to 2.2, you must be running 2.1. If youre running a version of etcd before 2.1, you must upgrade to [2.1][v2.1] before upgrading to 2.2.
Also, to ensure a smooth rolling upgrade, your running cluster must be healthy. You can check the health of the cluster by using `etcdctl cluster-health` command.
Also, to ensure a smooth rolling upgrade, your running cluster must be healthy. You can check the health of the cluster by using `etcdctl cluster-health` command.
### Preparedness
### Preparedness
Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
You might also want to [backup the data directory][backup-datastore] for a potential [downgrade].
@ -31,11 +36,11 @@ Internally, etcd members negotiate with each other to determine the overall etcd
If you have a data size larger than 100MB you should contact us before upgrading, so we can make sure the upgrades work smoothly.
Every etcd 2.2 member will do health checking across the cluster periodically. etcd 2.1 member does not support health checking. During the upgrade, etcd 2.2 member will log warning about the unhealthy state of etcd 2.1 member. You can ignore the warning.
Every etcd 2.2 member will do health checking across the cluster periodically. etcd 2.1 member does not support health checking. During the upgrade, etcd 2.2 member will log warning about the unhealthy state of etcd 2.1 member. You can ignore the warning.
### Downgrade
If all members have been upgraded to v2.2, the cluster will be upgraded to v2.2, and downgrade is **not possible**. If any member is still v2.1, the cluster will remain in v2.1, and you can go back to use v2.1 binary.
If all members have been upgraded to v2.2, the cluster will be upgraded to v2.2, and downgrade is **not possible**. If any member is still v2.1, the cluster will remain in v2.1, and you can go back to use v2.1 binary.
Please [backup the data directory][backup-datastore] of all etcd members if you want to downgrade the cluster, even if it is upgraded.
@ -112,7 +117,7 @@ member a8266ecf031671f3 is healthy: got healthy result from http://localhost:123
cluster is healthy
```
#### 4. Repeat step 2 to step 3 for all other members
#### 4. Repeat step 2 to step 3 for all other members
#### 5. Finish

View File

@ -1,3 +1,8 @@
**This is the documentation for etcd2 releases. Read [etcd3 doc][v3-docs] for etcd3 releases.**
[v3-docs]: ../docs.md#documentation
## Upgrade etcd from 2.2 to 2.3
In the general case, upgrading from etcd 2.2 to 2.3 can be a zero-downtime, rolling upgrade:

521
Makefile Normal file
View File

@ -0,0 +1,521 @@
# run from repository root
# Example:
# make build
# make clean
# make docker-clean
# make docker-start
# make docker-kill
# make docker-remove
.PHONY: build
build:
GO_BUILD_FLAGS="-v" ./build
./bin/etcd --version
ETCDCTL_API=3 ./bin/etcdctl version
clean:
rm -f ./codecov
rm -rf ./agent-*
rm -rf ./covdir
rm -f ./*.coverprofile
rm -f ./*.log
rm -f ./bin/Dockerfile-release
rm -rf ./bin/*.etcd
rm -rf ./default.etcd
rm -rf ./tests/e2e/default.etcd
rm -rf ./gopath
rm -rf ./gopath.proto
rm -rf ./release
rm -f ./snapshot/localhost:*
rm -f ./integration/127.0.0.1:* ./integration/localhost:*
rm -f ./clientv3/integration/127.0.0.1:* ./clientv3/integration/localhost:*
rm -f ./clientv3/ordering/127.0.0.1:* ./clientv3/ordering/localhost:*
docker-clean:
docker images
docker image prune --force
docker-start:
service docker restart
docker-kill:
docker kill `docker ps -q` || true
docker-remove:
docker rm --force `docker ps -a -q` || true
docker rmi --force `docker images -q` || true
# GO_VERSION ?= 1.10.3
GO_VERSION ?= 1.8.7
ETCD_VERSION ?= $(shell git rev-parse --short HEAD || echo "GitNotFound")
TEST_SUFFIX = $(shell date +%s | base64 | head -c 15)
TEST_OPTS ?= PASSES='unit'
TMP_DIR_MOUNT_FLAG = --mount type=tmpfs,destination=/tmp
ifdef HOST_TMP_DIR
TMP_DIR_MOUNT_FLAG = --mount type=bind,source=$(HOST_TMP_DIR),destination=/tmp
endif
# Example:
# GO_VERSION=1.8.7 make build-docker-test
# GO_VERSION=1.9.7 make build-docker-test
# make build-docker-test
#
# gcloud docker -- login -u _json_key -p "$(cat /etc/gcp-key-etcd-development.json)" https://gcr.io
# GO_VERSION=1.8.7 make push-docker-test
# GO_VERSION=1.9.7 make push-docker-test
# make push-docker-test
#
# gsutil -m acl ch -u allUsers:R -r gs://artifacts.etcd-development.appspot.com
# GO_VERSION=1.9.7 make pull-docker-test
# make pull-docker-test
build-docker-test:
$(info GO_VERSION: $(GO_VERSION))
@sed -i.bak 's|REPLACE_ME_GO_VERSION|$(GO_VERSION)|g' ./tests/Dockerfile
docker build \
--tag gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
--file ./tests/Dockerfile .
@mv ./tests/Dockerfile.bak ./tests/Dockerfile
push-docker-test:
$(info GO_VERSION: $(GO_VERSION))
gcloud docker -- push gcr.io/etcd-development/etcd-test:go$(GO_VERSION)
pull-docker-test:
$(info GO_VERSION: $(GO_VERSION))
docker pull gcr.io/etcd-development/etcd-test:go$(GO_VERSION)
# Example:
# make build-docker-test
# make compile-with-docker-test
# make compile-setup-gopath-with-docker-test
compile-with-docker-test:
$(info GO_VERSION: $(GO_VERSION))
docker run \
--rm \
--mount type=bind,source=`pwd`,destination=/go/src/github.com/coreos/etcd \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
/bin/bash -c "GO_BUILD_FLAGS=-v ./build && ./bin/etcd --version"
compile-setup-gopath-with-docker-test:
$(info GO_VERSION: $(GO_VERSION))
docker run \
--rm \
--mount type=bind,source=`pwd`,destination=/etcd \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && ETCD_SETUP_GOPATH=1 GO_BUILD_FLAGS=-v ./build && ./bin/etcd --version && rm -rf ./gopath"
# Example:
#
# Local machine:
# TEST_OPTS="PASSES='fmt'" make test
# TEST_OPTS="PASSES='fmt bom dep build unit'" make test
# TEST_OPTS="PASSES='build unit release integration_e2e functional'" make test
# TEST_OPTS="PASSES='build grpcproxy'" make test
#
# Example (test with docker):
# make pull-docker-test
# TEST_OPTS="PASSES='fmt'" make docker-test
# TEST_OPTS="VERBOSE=2 PASSES='unit'" make docker-test
#
# Travis CI (test with docker):
# TEST_OPTS="PASSES='fmt bom dep build unit'" make docker-test
#
# Semaphore CI (test with docker):
# TEST_OPTS="PASSES='build unit release integration_e2e functional'" make docker-test
# HOST_TMP_DIR=/tmp TEST_OPTS="PASSES='build unit release integration_e2e functional'" make docker-test
# TEST_OPTS="GOARCH=386 PASSES='build unit integration_e2e'" make docker-test
#
# grpc-proxy tests (test with docker):
# TEST_OPTS="PASSES='build grpcproxy'" make docker-test
# HOST_TMP_DIR=/tmp TEST_OPTS="PASSES='build grpcproxy'" make docker-test
.PHONY: test
test:
$(info TEST_OPTS: $(TEST_OPTS))
$(info log-file: test-$(TEST_SUFFIX).log)
$(TEST_OPTS) ./test 2>&1 | tee test-$(TEST_SUFFIX).log
! egrep "(--- FAIL:|panic: test timed out|appears to have leaked)" -B50 -A10 test-$(TEST_SUFFIX).log
docker-test:
$(info GO_VERSION: $(GO_VERSION))
$(info ETCD_VERSION: $(ETCD_VERSION))
$(info TEST_OPTS: $(TEST_OPTS))
$(info log-file: test-$(TEST_SUFFIX).log)
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`,destination=/go/src/github.com/coreos/etcd \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
/bin/bash -c "$(TEST_OPTS) ./test 2>&1 | tee test-$(TEST_SUFFIX).log"
! egrep "(--- FAIL:|panic: test timed out|appears to have leaked)" -B50 -A10 test-$(TEST_SUFFIX).log
docker-test-coverage:
$(info GO_VERSION: $(GO_VERSION))
$(info ETCD_VERSION: $(ETCD_VERSION))
$(info log-file: docker-test-coverage-$(TEST_SUFFIX).log)
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`,destination=/go/src/github.com/coreos/etcd \
gcr.io/etcd-development/etcd-test:go$(GO_VERSION) \
/bin/bash -c "COVERDIR=covdir PASSES='build build_cov cov' ./test 2>&1 | tee docker-test-coverage-$(TEST_SUFFIX).log && /codecov -t 6040de41-c073-4d6f-bbf8-d89256ef31e1"
! egrep "(--- FAIL:|panic: test timed out|appears to have leaked)" -B50 -A10 docker-test-coverage-$(TEST_SUFFIX).log
# Example:
# make compile-with-docker-test
# ETCD_VERSION=v3-test make build-docker-release-master
# ETCD_VERSION=v3-test make push-docker-release-master
# gsutil -m acl ch -u allUsers:R -r gs://artifacts.etcd-development.appspot.com
build-docker-release-master:
$(info ETCD_VERSION: $(ETCD_VERSION))
cp ./Dockerfile-release ./bin/Dockerfile-release
docker build \
--tag gcr.io/etcd-development/etcd:$(ETCD_VERSION) \
--file ./bin/Dockerfile-release \
./bin
rm -f ./bin/Dockerfile-release
docker run \
--rm \
gcr.io/etcd-development/etcd:$(ETCD_VERSION) \
/bin/sh -c "/usr/local/bin/etcd --version && ETCDCTL_API=3 /usr/local/bin/etcdctl version"
push-docker-release-master:
$(info ETCD_VERSION: $(ETCD_VERSION))
gcloud docker -- push gcr.io/etcd-development/etcd:$(ETCD_VERSION)
# Example:
# make build-docker-test
# make compile-with-docker-test
# make build-docker-static-ip-test
#
# gcloud docker -- login -u _json_key -p "$(cat /etc/gcp-key-etcd-development.json)" https://gcr.io
# make push-docker-static-ip-test
#
# gsutil -m acl ch -u allUsers:R -r gs://artifacts.etcd-development.appspot.com
# make pull-docker-static-ip-test
#
# make docker-static-ip-test-certs-run
# make docker-static-ip-test-certs-metrics-proxy-run
build-docker-static-ip-test:
$(info GO_VERSION: $(GO_VERSION))
@sed -i.bak 's|REPLACE_ME_GO_VERSION|$(GO_VERSION)|g' ./tests/docker-static-ip/Dockerfile
docker build \
--tag gcr.io/etcd-development/etcd-static-ip-test:go$(GO_VERSION) \
--file ./tests/docker-static-ip/Dockerfile \
./tests/docker-static-ip
@mv ./tests/docker-static-ip/Dockerfile.bak ./tests/docker-static-ip/Dockerfile
push-docker-static-ip-test:
$(info GO_VERSION: $(GO_VERSION))
gcloud docker -- push gcr.io/etcd-development/etcd-static-ip-test:go$(GO_VERSION)
pull-docker-static-ip-test:
$(info GO_VERSION: $(GO_VERSION))
docker pull gcr.io/etcd-development/etcd-static-ip-test:go$(GO_VERSION)
docker-static-ip-test-certs-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-static-ip/certs,destination=/certs \
gcr.io/etcd-development/etcd-static-ip-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs/run.sh && rm -rf m*.etcd"
docker-static-ip-test-certs-metrics-proxy-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-static-ip/certs-metrics-proxy,destination=/certs-metrics-proxy \
gcr.io/etcd-development/etcd-static-ip-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs-metrics-proxy/run.sh && rm -rf m*.etcd"
# Example:
# make build-docker-test
# make compile-with-docker-test
# make build-docker-dns-test
#
# gcloud docker -- login -u _json_key -p "$(cat /etc/gcp-key-etcd-development.json)" https://gcr.io
# make push-docker-dns-test
#
# gsutil -m acl ch -u allUsers:R -r gs://artifacts.etcd-development.appspot.com
# make pull-docker-dns-test
#
# make docker-dns-test-insecure-run
# make docker-dns-test-certs-run
# make docker-dns-test-certs-gateway-run
# make docker-dns-test-certs-wildcard-run
# make docker-dns-test-certs-common-name-auth-run
# make docker-dns-test-certs-common-name-multi-run
build-docker-dns-test:
$(info GO_VERSION: $(GO_VERSION))
@sed -i.bak 's|REPLACE_ME_GO_VERSION|$(GO_VERSION)|g' ./tests/docker-dns/Dockerfile
docker build \
--tag gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION) \
--file ./tests/docker-dns/Dockerfile \
./tests/docker-dns
@mv ./tests/docker-dns/Dockerfile.bak ./tests/docker-dns/Dockerfile
docker run \
--rm \
--dns 127.0.0.1 \
gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION) \
/bin/bash -c "/etc/init.d/bind9 start && cat /dev/null >/etc/hosts && dig etcd.local"
push-docker-dns-test:
$(info GO_VERSION: $(GO_VERSION))
gcloud docker -- push gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION)
pull-docker-dns-test:
$(info GO_VERSION: $(GO_VERSION))
docker pull gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION)
docker-dns-test-insecure-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns/insecure,destination=/insecure \
gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /insecure/run.sh && rm -rf m*.etcd"
docker-dns-test-certs-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns/certs,destination=/certs \
gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs/run.sh && rm -rf m*.etcd"
docker-dns-test-certs-gateway-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns/certs-gateway,destination=/certs-gateway \
gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs-gateway/run.sh && rm -rf m*.etcd"
docker-dns-test-certs-wildcard-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns/certs-wildcard,destination=/certs-wildcard \
gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs-wildcard/run.sh && rm -rf m*.etcd"
docker-dns-test-certs-common-name-auth-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns/certs-common-name-auth,destination=/certs-common-name-auth \
gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs-common-name-auth/run.sh && rm -rf m*.etcd"
docker-dns-test-certs-common-name-multi-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns/certs-common-name-multi,destination=/certs-common-name-multi \
gcr.io/etcd-development/etcd-dns-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs-common-name-multi/run.sh && rm -rf m*.etcd"
# Example:
# make build-docker-test
# make compile-with-docker-test
# make build-docker-dns-srv-test
# gcloud docker -- login -u _json_key -p "$(cat /etc/gcp-key-etcd-development.json)" https://gcr.io
# make push-docker-dns-srv-test
# gsutil -m acl ch -u allUsers:R -r gs://artifacts.etcd-development.appspot.com
# make pull-docker-dns-srv-test
# make docker-dns-srv-test-certs-run
# make docker-dns-srv-test-certs-gateway-run
# make docker-dns-srv-test-certs-wildcard-run
build-docker-dns-srv-test:
$(info GO_VERSION: $(GO_VERSION))
@sed -i.bak 's|REPLACE_ME_GO_VERSION|$(GO_VERSION)|g' ./tests/docker-dns-srv/Dockerfile
docker build \
--tag gcr.io/etcd-development/etcd-dns-srv-test:go$(GO_VERSION) \
--file ./tests/docker-dns-srv/Dockerfile \
./tests/docker-dns-srv
@mv ./tests/docker-dns-srv/Dockerfile.bak ./tests/docker-dns-srv/Dockerfile
docker run \
--rm \
--dns 127.0.0.1 \
gcr.io/etcd-development/etcd-dns-srv-test:go$(GO_VERSION) \
/bin/bash -c "/etc/init.d/bind9 start && cat /dev/null >/etc/hosts && dig +noall +answer SRV _etcd-client-ssl._tcp.etcd.local && dig +noall +answer SRV _etcd-server-ssl._tcp.etcd.local && dig +noall +answer m1.etcd.local m2.etcd.local m3.etcd.local"
push-docker-dns-srv-test:
$(info GO_VERSION: $(GO_VERSION))
gcloud docker -- push gcr.io/etcd-development/etcd-dns-srv-test:go$(GO_VERSION)
pull-docker-dns-srv-test:
$(info GO_VERSION: $(GO_VERSION))
docker pull gcr.io/etcd-development/etcd-dns-srv-test:go$(GO_VERSION)
docker-dns-srv-test-certs-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns-srv/certs,destination=/certs \
gcr.io/etcd-development/etcd-dns-srv-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs/run.sh && rm -rf m*.etcd"
docker-dns-srv-test-certs-gateway-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns-srv/certs-gateway,destination=/certs-gateway \
gcr.io/etcd-development/etcd-dns-srv-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs-gateway/run.sh && rm -rf m*.etcd"
docker-dns-srv-test-certs-wildcard-run:
$(info GO_VERSION: $(GO_VERSION))
$(info HOST_TMP_DIR: $(HOST_TMP_DIR))
$(info TMP_DIR_MOUNT_FLAG: $(TMP_DIR_MOUNT_FLAG))
docker run \
--rm \
--tty \
--dns 127.0.0.1 \
$(TMP_DIR_MOUNT_FLAG) \
--mount type=bind,source=`pwd`/bin,destination=/etcd \
--mount type=bind,source=`pwd`/tests/docker-dns-srv/certs-wildcard,destination=/certs-wildcard \
gcr.io/etcd-development/etcd-dns-srv-test:go$(GO_VERSION) \
/bin/bash -c "cd /etcd && /certs-wildcard/run.sh && rm -rf m*.etcd"
# Example:
# make build-functional
# make build-docker-functional
# make push-docker-functional
# make pull-docker-functional
build-functional:
$(info GO_VERSION: $(GO_VERSION))
$(info ETCD_VERSION: $(ETCD_VERSION))
./functional/build
./bin/etcd-agent -help || true && \
./bin/etcd-proxy -help || true && \
./bin/etcd-runner --help || true && \
./bin/etcd-tester -help || true
build-docker-functional:
$(info GO_VERSION: $(GO_VERSION))
$(info ETCD_VERSION: $(ETCD_VERSION))
@sed -i.bak 's|REPLACE_ME_GO_VERSION|$(GO_VERSION)|g' ./functional/Dockerfile
docker build \
--tag gcr.io/etcd-development/etcd-functional:go$(GO_VERSION) \
--file ./functional/Dockerfile \
.
@mv ./functional/Dockerfile.bak ./functional/Dockerfile
docker run \
--rm \
gcr.io/etcd-development/etcd-functional:go$(GO_VERSION) \
/bin/bash -c "./bin/etcd --version && \
./bin/etcd-failpoints --version && \
ETCDCTL_API=3 ./bin/etcdctl version && \
./bin/etcd-agent -help || true && \
./bin/etcd-proxy -help || true && \
./bin/etcd-runner --help || true && \
./bin/etcd-tester -help || true && \
./bin/benchmark --help || true"
push-docker-functional:
$(info GO_VERSION: $(GO_VERSION))
$(info ETCD_VERSION: $(ETCD_VERSION))
gcloud docker -- push gcr.io/etcd-development/etcd-functional:go$(GO_VERSION)
pull-docker-functional:
$(info GO_VERSION: $(GO_VERSION))
$(info ETCD_VERSION: $(ETCD_VERSION))
docker pull gcr.io/etcd-development/etcd-functional:go$(GO_VERSION)

View File

@ -118,6 +118,11 @@ func (t *tokenSimple) genTokenPrefix() (string, error) {
func (t *tokenSimple) assignSimpleTokenToUser(username, token string) {
t.simpleTokensMu.Lock()
defer t.simpleTokensMu.Unlock()
if t.simpleTokenKeeper == nil {
return
}
_, ok := t.simpleTokens[token]
if ok {
plog.Panicf("token %s is alredy used", token)
@ -125,7 +130,6 @@ func (t *tokenSimple) assignSimpleTokenToUser(username, token string) {
t.simpleTokens[token] = username
t.simpleTokenKeeper.addSimpleToken(token)
t.simpleTokensMu.Unlock()
}
func (t *tokenSimple) invalidateUser(username string) {

67
auth/simple_token_test.go Normal file
View File

@ -0,0 +1,67 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
import (
"context"
"testing"
)
// TestSimpleTokenDisabled ensures that TokenProviderSimple behaves correctly when
// disabled.
func TestSimpleTokenDisabled(t *testing.T) {
initialState := newTokenProviderSimple(dummyIndexWaiter)
explicitlyDisabled := newTokenProviderSimple(dummyIndexWaiter)
explicitlyDisabled.enable()
explicitlyDisabled.disable()
for _, tp := range []*tokenSimple{initialState, explicitlyDisabled} {
ctx := context.WithValue(context.WithValue(context.TODO(), "index", uint64(1)), "simpleToken", "dummy")
token, err := tp.assign(ctx, "user1", 0)
if err != nil {
t.Fatal(err)
}
authInfo, ok := tp.info(ctx, token, 0)
if ok {
t.Errorf("expected (true, \"user1\") got (%t, %s)", ok, authInfo.Username)
}
tp.invalidateUser("user1") // should be no-op
}
}
// TestSimpleTokenAssign ensures that TokenProviderSimple can correctly assign a
// token, look it up with info, and invalidate it by user.
func TestSimpleTokenAssign(t *testing.T) {
tp := newTokenProviderSimple(dummyIndexWaiter)
tp.enable()
ctx := context.WithValue(context.WithValue(context.TODO(), "index", uint64(1)), "simpleToken", "dummy")
token, err := tp.assign(ctx, "user1", 0)
if err != nil {
t.Fatal(err)
}
authInfo, ok := tp.info(ctx, token, 0)
if !ok || authInfo.Username != "user1" {
t.Errorf("expected (true, \"token2\") got (%t, %s)", ok, authInfo.Username)
}
tp.invalidateUser("user1")
_, ok = tp.info(context.TODO(), token, 0)
if ok {
t.Errorf("expected ok == false after user is invalidated")
}
}

View File

@ -992,7 +992,7 @@ func (as *authStore) AuthInfoFromTLS(ctx context.Context) *AuthInfo {
}
func (as *authStore) AuthInfoFromCtx(ctx context.Context) (*AuthInfo, error) {
md, ok := metadata.FromContext(ctx)
md, ok := metadata.FromIncomingContext(ctx)
if !ok {
return nil, nil
}

View File

@ -453,7 +453,8 @@ func TestAuthInfoFromCtx(t *testing.T) {
t.Errorf("expected (nil, nil), got (%v, %v)", ai, err)
}
ctx = metadata.NewContext(context.Background(), metadata.New(map[string]string{"tokens": "dummy"}))
// as if it came from RPC
ctx = metadata.NewIncomingContext(context.Background(), metadata.New(map[string]string{"tokens": "dummy"}))
ai, err = as.AuthInfoFromCtx(ctx)
if err != nil && ai != nil {
t.Errorf("expected (nil, nil), got (%v, %v)", ai, err)
@ -465,19 +466,19 @@ func TestAuthInfoFromCtx(t *testing.T) {
t.Error(err)
}
ctx = metadata.NewContext(context.Background(), metadata.New(map[string]string{"token": "Invalid Token"}))
ctx = metadata.NewIncomingContext(context.Background(), metadata.New(map[string]string{"token": "Invalid Token"}))
_, err = as.AuthInfoFromCtx(ctx)
if err != ErrInvalidAuthToken {
t.Errorf("expected %v, got %v", ErrInvalidAuthToken, err)
}
ctx = metadata.NewContext(context.Background(), metadata.New(map[string]string{"token": "Invalid.Token"}))
ctx = metadata.NewIncomingContext(context.Background(), metadata.New(map[string]string{"token": "Invalid.Token"}))
_, err = as.AuthInfoFromCtx(ctx)
if err != ErrInvalidAuthToken {
t.Errorf("expected %v, got %v", ErrInvalidAuthToken, err)
}
ctx = metadata.NewContext(context.Background(), metadata.New(map[string]string{"token": resp.Token}))
ctx = metadata.NewIncomingContext(context.Background(), metadata.New(map[string]string{"token": resp.Token}))
ai, err = as.AuthInfoFromCtx(ctx)
if err != nil {
t.Error(err)
@ -521,7 +522,7 @@ func TestAuthInfoFromCtxRace(t *testing.T) {
donec := make(chan struct{})
go func() {
defer close(donec)
ctx := metadata.NewContext(context.Background(), metadata.New(map[string]string{"token": "test"}))
ctx := metadata.NewIncomingContext(context.Background(), metadata.New(map[string]string{"token": "test"}))
as.AuthInfoFromCtx(ctx)
}()
as.UserAdd(&pb.AuthUserAddRequest{Name: "test"})

View File

@ -1,212 +1,388 @@
[
{
"project": "bitbucket.org/ww/goautoneg",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 1
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 1
}
]
},
{
"project": "github.com/beorn7/perks/quantile",
"license": "MIT License",
"confidence": 0.989
"licenses": [
{
"type": "MIT License",
"confidence": 0.9891304347826086
}
]
},
{
"project": "github.com/bgentry/speakeasy",
"license": "MIT License",
"confidence": 0.944
},
{
"project": "github.com/boltdb/bolt",
"license": "MIT License",
"confidence": 1
"licenses": [
{
"type": "MIT License",
"confidence": 0.9441624365482234
}
]
},
{
"project": "github.com/cockroachdb/cmux",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/coreos/bbolt",
"licenses": [
{
"type": "MIT License",
"confidence": 1
}
]
},
{
"project": "github.com/coreos/etcd",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/coreos/go-semver/semver",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/coreos/go-systemd",
"license": "Apache License 2.0",
"confidence": 0.997
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 0.9966703662597114
}
]
},
{
"project": "github.com/coreos/pkg",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/cpuguy83/go-md2man/md2man",
"license": "MIT License",
"confidence": 1
"licenses": [
{
"type": "MIT License",
"confidence": 1
}
]
},
{
"project": "github.com/dgrijalva/jwt-go",
"license": "MIT License",
"confidence": 0.989
"licenses": [
{
"type": "MIT License",
"confidence": 0.9891304347826086
}
]
},
{
"project": "github.com/dustin/go-humanize",
"license": "MIT License",
"confidence": 0.969
"licenses": [
{
"type": "MIT License",
"confidence": 0.96875
}
]
},
{
"project": "github.com/ghodss/yaml",
"license": "MIT License and BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 1
"licenses": [
{
"type": "MIT License and BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 1
}
]
},
{
"project": "github.com/gogo/protobuf/proto",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.909
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9090909090909091
}
]
},
{
"project": "github.com/golang/groupcache/lru",
"license": "Apache License 2.0",
"confidence": 0.997
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 0.9966703662597114
}
]
},
{
"project": "github.com/golang/protobuf",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.92
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.92
}
]
},
{
"project": "github.com/google/btree",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/grpc-ecosystem/go-grpc-prometheus",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/grpc-ecosystem/grpc-gateway",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.979
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.979253112033195
}
]
},
{
"project": "github.com/inconshreveable/mousetrap",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "MIT License and BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 1
},
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/jonboulle/clockwork",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/mattn/go-runewidth",
"license": "MIT License",
"confidence": 1
"licenses": [
{
"type": "MIT License",
"confidence": 1
}
]
},
{
"project": "github.com/matttproud/golang_protobuf_extensions/pbutil",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/olekukonko/tablewriter",
"license": "MIT License",
"confidence": 0.989
"licenses": [
{
"type": "MIT License",
"confidence": 0.9891304347826086
}
]
},
{
"project": "github.com/prometheus/client_golang/prometheus",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/prometheus/client_model/go",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/prometheus/common",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/prometheus/procfs",
"license": "Apache License 2.0",
"confidence": 1
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "github.com/russross/blackfriday",
"license": "BSD 2-clause \"Simplified\" License",
"confidence": 0.963
},
{
"project": "github.com/shurcooL/sanitized_anchor_name",
"license": "MIT License",
"confidence": 1
"licenses": [
{
"type": "BSD 2-clause \"Simplified\" License",
"confidence": 0.9626168224299065
}
]
},
{
"project": "github.com/spf13/cobra",
"license": "Apache License 2.0",
"confidence": 0.957
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 0.9573241061130334
}
]
},
{
"project": "github.com/spf13/pflag",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.966
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9663865546218487
}
]
},
{
"project": "github.com/ugorji/go/codec",
"license": "MIT License",
"confidence": 0.995
"licenses": [
{
"type": "MIT License",
"confidence": 0.9946524064171123
}
]
},
{
"project": "github.com/urfave/cli",
"license": "MIT License",
"confidence": 1
"licenses": [
{
"type": "MIT License",
"confidence": 1
}
]
},
{
"project": "github.com/xiang90/probing",
"license": "MIT License",
"confidence": 1
"licenses": [
{
"type": "MIT License",
"confidence": 1
}
]
},
{
"project": "golang.org/x/crypto",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.966
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9663865546218487
}
]
},
{
"project": "golang.org/x/net",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.966
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9663865546218487
}
]
},
{
"project": "golang.org/x/text",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.966
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9663865546218487
}
]
},
{
"project": "golang.org/x/time/rate",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.966
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9663865546218487
}
]
},
{
"project": "google.golang.org/genproto/googleapis",
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "google.golang.org/grpc",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.979
"licenses": [
{
"type": "Apache License 2.0",
"confidence": 1
}
]
},
{
"project": "gopkg.in/cheggaaa/pb.v1",
"license": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.992
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License",
"confidence": 0.9916666666666667
}
]
},
{
"project": "gopkg.in/yaml.v2",
"license": "Apache License 2.0 and MIT License",
"confidence": 1
"licenses": [
{
"type": "The Unlicense",
"confidence": 0.35294117647058826
},
{
"type": "MIT License",
"confidence": 0.8975609756097561
}
]
}
]

View File

@ -1,18 +1,26 @@
[
{
"project": "bitbucket.org/ww/goautoneg",
"license": "BSD 3-clause \"New\" or \"Revised\" License"
"licenses": [
{
"type": "BSD 3-clause \"New\" or \"Revised\" License"
}
]
},
{
"project": "github.com/ghodss/yaml",
"license": "MIT License and BSD 3-clause \"New\" or \"Revised\" License"
"licenses": [
{
"type": "MIT License and BSD 3-clause \"New\" or \"Revised\" License"
}
]
},
{
"project": "github.com/inconshreveable/mousetrap",
"license": "Apache License 2.0"
},
{
"project": "gopkg.in/yaml.v2",
"license": "Apache License 2.0 and MIT License"
"licenses": [
{
"type": "Apache License 2.0"
}
]
}
]

23
build
View File

@ -3,9 +3,7 @@
# set some environment variables
ORG_PATH="github.com/coreos"
REPO_PATH="${ORG_PATH}/etcd"
export GO15VENDOREXPERIMENT="1"
eval $(go env)
GIT_SHA=`git rev-parse --short HEAD || echo "GitNotFound"`
if [ ! -z "$FAILPOINTS" ]; then
GIT_SHA="$GIT_SHA"-FAILPOINTS
@ -17,11 +15,7 @@ GO_LDFLAGS="$GO_LDFLAGS -X ${REPO_PATH}/cmd/vendor/${REPO_PATH}/version.GitSHA=$
# enable/disable failpoints
toggle_failpoints() {
FAILPKGS="etcdserver/ mvcc/backend/"
mode="disable"
if [ ! -z "$FAILPOINTS" ]; then mode="enable"; fi
if [ ! -z "$1" ]; then mode="$1"; fi
mode="$1"
if which gofail >/dev/null 2>&1; then
gofail "$mode" $FAILPKGS
elif [ "$mode" != "disable" ]; then
@ -30,19 +24,26 @@ toggle_failpoints() {
fi
}
toggle_failpoints_default() {
mode="disable"
if [ ! -z "$FAILPOINTS" ]; then mode="enable"; fi
toggle_failpoints "$mode"
}
etcd_build() {
out="bin"
if [ -n "${BINDIR}" ]; then out="${BINDIR}"; fi
toggle_failpoints
toggle_failpoints_default
# Static compilation is useful when etcd is run in a container
CGO_ENABLED=0 go build $GO_BUILD_FLAGS -installsuffix cgo -ldflags "$GO_LDFLAGS" -o ${out}/etcd ${REPO_PATH}/cmd/etcd || return
CGO_ENABLED=0 go build $GO_BUILD_FLAGS -installsuffix cgo -ldflags "$GO_LDFLAGS" -o ${out}/etcdctl ${REPO_PATH}/cmd/etcdctl || return
}
etcd_setup_gopath() {
CDIR=$(cd `dirname "$0"` && pwd)
d=$(dirname "$0")
CDIR=$(cd "$d" && pwd)
cd "$CDIR"
etcdGOPATH=${CDIR}/gopath
etcdGOPATH="${CDIR}/gopath"
# preserve old gopath to support building with unvendored tooling deps (e.g., gofail)
if [ -n "$GOPATH" ]; then
GOPATH=":$GOPATH"
@ -53,7 +54,7 @@ etcd_setup_gopath() {
ln -s ${CDIR}/cmd/vendor ${etcdGOPATH}/src
}
toggle_failpoints
toggle_failpoints_default
# only build when called directly, not sourced
if echo "$0" | grep "build$" >/dev/null; then

View File

@ -372,12 +372,7 @@ func (c *httpClusterClient) Do(ctx context.Context, act httpAction) (*http.Respo
if err == context.Canceled || err == context.DeadlineExceeded {
return nil, nil, err
}
if isOneShot {
return nil, nil, err
}
continue
}
if resp.StatusCode/100 == 5 {
} else if resp.StatusCode/100 == 5 {
switch resp.StatusCode {
case http.StatusInternalServerError, http.StatusServiceUnavailable:
// TODO: make sure this is a no leader response
@ -385,10 +380,16 @@ func (c *httpClusterClient) Do(ctx context.Context, act httpAction) (*http.Respo
default:
cerr.Errors = append(cerr.Errors, fmt.Errorf("client: etcd member %s returns server error [%s]", eps[k].String(), http.StatusText(resp.StatusCode)))
}
if isOneShot {
return nil, nil, cerr.Errors[0]
err = cerr.Errors[0]
}
if err != nil {
if !isOneShot {
continue
}
continue
c.Lock()
c.pinned = (k + 1) % leps
c.Unlock()
return nil, nil, err
}
if k != pinned {
c.Lock()

View File

@ -16,6 +16,7 @@ package client
import (
"errors"
"fmt"
"io"
"io/ioutil"
"math/rand"
@ -304,7 +305,9 @@ func TestHTTPClusterClientDo(t *testing.T) {
fakeErr := errors.New("fake!")
fakeURL := url.URL{}
tests := []struct {
client *httpClusterClient
client *httpClusterClient
ctx context.Context
wantCode int
wantErr error
wantPinned int
@ -395,10 +398,30 @@ func TestHTTPClusterClientDo(t *testing.T) {
wantCode: http.StatusTeapot,
wantPinned: 1,
},
// 500-level errors cause one shot Do to fallthrough to next endpoint
{
client: &httpClusterClient{
endpoints: []url.URL{fakeURL, fakeURL},
clientFactory: newStaticHTTPClientFactory(
[]staticHTTPResponse{
{resp: http.Response{StatusCode: http.StatusBadGateway}},
{resp: http.Response{StatusCode: http.StatusTeapot}},
},
),
rand: rand.New(rand.NewSource(0)),
},
ctx: context.WithValue(context.Background(), &oneShotCtxValue, &oneShotCtxValue),
wantErr: fmt.Errorf("client: etcd member returns server error [Bad Gateway]"),
wantPinned: 1,
},
}
for i, tt := range tests {
resp, _, err := tt.client.Do(context.Background(), nil)
if tt.ctx == nil {
tt.ctx = context.Background()
}
resp, _, err := tt.client.Do(tt.ctx, nil)
if !reflect.DeepEqual(tt.wantErr, err) {
t.Errorf("#%d: got err=%v, want=%v", i, err, tt.wantErr)
continue
@ -407,11 +430,9 @@ func TestHTTPClusterClientDo(t *testing.T) {
if resp == nil {
if tt.wantCode != 0 {
t.Errorf("#%d: resp is nil, want=%d", i, tt.wantCode)
continue
}
continue
}
if resp.StatusCode != tt.wantCode {
} else if resp.StatusCode != tt.wantCode {
t.Errorf("#%d: resp code=%d, want=%d", i, resp.StatusCode, tt.wantCode)
continue
}

View File

@ -1,6 +1,6 @@
# etcd/clientv3
[![Godoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](https://godoc.org/github.com/coreos/etcd/clientv3)
[![Godoc](https://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](https://godoc.org/github.com/coreos/etcd/clientv3)
`etcd/clientv3` is the official Go etcd client for v3.

View File

@ -20,6 +20,7 @@ import (
"github.com/coreos/etcd/auth/authpb"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
"google.golang.org/grpc"
)
@ -100,60 +101,65 @@ type Auth interface {
}
type auth struct {
remote pb.AuthClient
remote pb.AuthClient
callOpts []grpc.CallOption
}
func NewAuth(c *Client) Auth {
return &auth{remote: pb.NewAuthClient(c.ActiveConnection())}
api := &auth{remote: RetryAuthClient(c)}
if c != nil {
api.callOpts = c.callOpts
}
return api
}
func (auth *auth) AuthEnable(ctx context.Context) (*AuthEnableResponse, error) {
resp, err := auth.remote.AuthEnable(ctx, &pb.AuthEnableRequest{}, grpc.FailFast(false))
resp, err := auth.remote.AuthEnable(ctx, &pb.AuthEnableRequest{}, auth.callOpts...)
return (*AuthEnableResponse)(resp), toErr(ctx, err)
}
func (auth *auth) AuthDisable(ctx context.Context) (*AuthDisableResponse, error) {
resp, err := auth.remote.AuthDisable(ctx, &pb.AuthDisableRequest{}, grpc.FailFast(false))
resp, err := auth.remote.AuthDisable(ctx, &pb.AuthDisableRequest{}, auth.callOpts...)
return (*AuthDisableResponse)(resp), toErr(ctx, err)
}
func (auth *auth) UserAdd(ctx context.Context, name string, password string) (*AuthUserAddResponse, error) {
resp, err := auth.remote.UserAdd(ctx, &pb.AuthUserAddRequest{Name: name, Password: password})
resp, err := auth.remote.UserAdd(ctx, &pb.AuthUserAddRequest{Name: name, Password: password}, auth.callOpts...)
return (*AuthUserAddResponse)(resp), toErr(ctx, err)
}
func (auth *auth) UserDelete(ctx context.Context, name string) (*AuthUserDeleteResponse, error) {
resp, err := auth.remote.UserDelete(ctx, &pb.AuthUserDeleteRequest{Name: name})
resp, err := auth.remote.UserDelete(ctx, &pb.AuthUserDeleteRequest{Name: name}, auth.callOpts...)
return (*AuthUserDeleteResponse)(resp), toErr(ctx, err)
}
func (auth *auth) UserChangePassword(ctx context.Context, name string, password string) (*AuthUserChangePasswordResponse, error) {
resp, err := auth.remote.UserChangePassword(ctx, &pb.AuthUserChangePasswordRequest{Name: name, Password: password})
resp, err := auth.remote.UserChangePassword(ctx, &pb.AuthUserChangePasswordRequest{Name: name, Password: password}, auth.callOpts...)
return (*AuthUserChangePasswordResponse)(resp), toErr(ctx, err)
}
func (auth *auth) UserGrantRole(ctx context.Context, user string, role string) (*AuthUserGrantRoleResponse, error) {
resp, err := auth.remote.UserGrantRole(ctx, &pb.AuthUserGrantRoleRequest{User: user, Role: role})
resp, err := auth.remote.UserGrantRole(ctx, &pb.AuthUserGrantRoleRequest{User: user, Role: role}, auth.callOpts...)
return (*AuthUserGrantRoleResponse)(resp), toErr(ctx, err)
}
func (auth *auth) UserGet(ctx context.Context, name string) (*AuthUserGetResponse, error) {
resp, err := auth.remote.UserGet(ctx, &pb.AuthUserGetRequest{Name: name}, grpc.FailFast(false))
resp, err := auth.remote.UserGet(ctx, &pb.AuthUserGetRequest{Name: name}, auth.callOpts...)
return (*AuthUserGetResponse)(resp), toErr(ctx, err)
}
func (auth *auth) UserList(ctx context.Context) (*AuthUserListResponse, error) {
resp, err := auth.remote.UserList(ctx, &pb.AuthUserListRequest{}, grpc.FailFast(false))
resp, err := auth.remote.UserList(ctx, &pb.AuthUserListRequest{}, auth.callOpts...)
return (*AuthUserListResponse)(resp), toErr(ctx, err)
}
func (auth *auth) UserRevokeRole(ctx context.Context, name string, role string) (*AuthUserRevokeRoleResponse, error) {
resp, err := auth.remote.UserRevokeRole(ctx, &pb.AuthUserRevokeRoleRequest{Name: name, Role: role})
resp, err := auth.remote.UserRevokeRole(ctx, &pb.AuthUserRevokeRoleRequest{Name: name, Role: role}, auth.callOpts...)
return (*AuthUserRevokeRoleResponse)(resp), toErr(ctx, err)
}
func (auth *auth) RoleAdd(ctx context.Context, name string) (*AuthRoleAddResponse, error) {
resp, err := auth.remote.RoleAdd(ctx, &pb.AuthRoleAddRequest{Name: name})
resp, err := auth.remote.RoleAdd(ctx, &pb.AuthRoleAddRequest{Name: name}, auth.callOpts...)
return (*AuthRoleAddResponse)(resp), toErr(ctx, err)
}
@ -163,27 +169,27 @@ func (auth *auth) RoleGrantPermission(ctx context.Context, name string, key, ran
RangeEnd: []byte(rangeEnd),
PermType: authpb.Permission_Type(permType),
}
resp, err := auth.remote.RoleGrantPermission(ctx, &pb.AuthRoleGrantPermissionRequest{Name: name, Perm: perm})
resp, err := auth.remote.RoleGrantPermission(ctx, &pb.AuthRoleGrantPermissionRequest{Name: name, Perm: perm}, auth.callOpts...)
return (*AuthRoleGrantPermissionResponse)(resp), toErr(ctx, err)
}
func (auth *auth) RoleGet(ctx context.Context, role string) (*AuthRoleGetResponse, error) {
resp, err := auth.remote.RoleGet(ctx, &pb.AuthRoleGetRequest{Role: role}, grpc.FailFast(false))
resp, err := auth.remote.RoleGet(ctx, &pb.AuthRoleGetRequest{Role: role}, auth.callOpts...)
return (*AuthRoleGetResponse)(resp), toErr(ctx, err)
}
func (auth *auth) RoleList(ctx context.Context) (*AuthRoleListResponse, error) {
resp, err := auth.remote.RoleList(ctx, &pb.AuthRoleListRequest{}, grpc.FailFast(false))
resp, err := auth.remote.RoleList(ctx, &pb.AuthRoleListRequest{}, auth.callOpts...)
return (*AuthRoleListResponse)(resp), toErr(ctx, err)
}
func (auth *auth) RoleRevokePermission(ctx context.Context, role string, key, rangeEnd string) (*AuthRoleRevokePermissionResponse, error) {
resp, err := auth.remote.RoleRevokePermission(ctx, &pb.AuthRoleRevokePermissionRequest{Role: role, Key: key, RangeEnd: rangeEnd})
resp, err := auth.remote.RoleRevokePermission(ctx, &pb.AuthRoleRevokePermissionRequest{Role: role, Key: key, RangeEnd: rangeEnd}, auth.callOpts...)
return (*AuthRoleRevokePermissionResponse)(resp), toErr(ctx, err)
}
func (auth *auth) RoleDelete(ctx context.Context, role string) (*AuthRoleDeleteResponse, error) {
resp, err := auth.remote.RoleDelete(ctx, &pb.AuthRoleDeleteRequest{Role: role})
resp, err := auth.remote.RoleDelete(ctx, &pb.AuthRoleDeleteRequest{Role: role}, auth.callOpts...)
return (*AuthRoleDeleteResponse)(resp), toErr(ctx, err)
}
@ -196,12 +202,13 @@ func StrToPermissionType(s string) (PermissionType, error) {
}
type authenticator struct {
conn *grpc.ClientConn // conn in-use
remote pb.AuthClient
conn *grpc.ClientConn // conn in-use
remote pb.AuthClient
callOpts []grpc.CallOption
}
func (auth *authenticator) authenticate(ctx context.Context, name string, password string) (*AuthenticateResponse, error) {
resp, err := auth.remote.Authenticate(ctx, &pb.AuthenticateRequest{Name: name, Password: password}, grpc.FailFast(false))
resp, err := auth.remote.Authenticate(ctx, &pb.AuthenticateRequest{Name: name, Password: password}, auth.callOpts...)
return (*AuthenticateResponse)(resp), toErr(ctx, err)
}
@ -209,14 +216,18 @@ func (auth *authenticator) close() {
auth.conn.Close()
}
func newAuthenticator(endpoint string, opts []grpc.DialOption) (*authenticator, error) {
func newAuthenticator(endpoint string, opts []grpc.DialOption, c *Client) (*authenticator, error) {
conn, err := grpc.Dial(endpoint, opts...)
if err != nil {
return nil, err
}
return &authenticator{
api := &authenticator{
conn: conn,
remote: pb.NewAuthClient(conn),
}, nil
}
if c != nil {
api.callOpts = c.callOpts
}
return api, nil
}

View File

@ -1,356 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"net/url"
"strings"
"sync"
"golang.org/x/net/context"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
)
// ErrNoAddrAvilable is returned by Get() when the balancer does not have
// any active connection to endpoints at the time.
// This error is returned only when opts.BlockingWait is true.
var ErrNoAddrAvilable = grpc.Errorf(codes.Unavailable, "there is no address available")
// simpleBalancer does the bare minimum to expose multiple eps
// to the grpc reconnection code path
type simpleBalancer struct {
// addrs are the client's endpoints for grpc
addrs []grpc.Address
// notifyCh notifies grpc of the set of addresses for connecting
notifyCh chan []grpc.Address
// readyc closes once the first connection is up
readyc chan struct{}
readyOnce sync.Once
// mu protects upEps, pinAddr, and connectingAddr
mu sync.RWMutex
// upc closes when upEps transitions from empty to non-zero or the balancer closes.
upc chan struct{}
// downc closes when grpc calls down() on pinAddr
downc chan struct{}
// stopc is closed to signal updateNotifyLoop should stop.
stopc chan struct{}
// donec closes when all goroutines are exited
donec chan struct{}
// updateAddrsC notifies updateNotifyLoop to update addrs.
updateAddrsC chan struct{}
// grpc issues TLS cert checks using the string passed into dial so
// that string must be the host. To recover the full scheme://host URL,
// have a map from hosts to the original endpoint.
host2ep map[string]string
// pinAddr is the currently pinned address; set to the empty string on
// intialization and shutdown.
pinAddr string
closed bool
}
func newSimpleBalancer(eps []string) *simpleBalancer {
notifyCh := make(chan []grpc.Address, 1)
addrs := make([]grpc.Address, len(eps))
for i := range eps {
addrs[i].Addr = getHost(eps[i])
}
sb := &simpleBalancer{
addrs: addrs,
notifyCh: notifyCh,
readyc: make(chan struct{}),
upc: make(chan struct{}),
stopc: make(chan struct{}),
downc: make(chan struct{}),
donec: make(chan struct{}),
updateAddrsC: make(chan struct{}, 1),
host2ep: getHost2ep(eps),
}
close(sb.downc)
go sb.updateNotifyLoop()
return sb
}
func (b *simpleBalancer) Start(target string, config grpc.BalancerConfig) error { return nil }
func (b *simpleBalancer) ConnectNotify() <-chan struct{} {
b.mu.Lock()
defer b.mu.Unlock()
return b.upc
}
func (b *simpleBalancer) getEndpoint(host string) string {
b.mu.Lock()
defer b.mu.Unlock()
return b.host2ep[host]
}
func getHost2ep(eps []string) map[string]string {
hm := make(map[string]string, len(eps))
for i := range eps {
_, host, _ := parseEndpoint(eps[i])
hm[host] = eps[i]
}
return hm
}
func (b *simpleBalancer) updateAddrs(eps []string) {
np := getHost2ep(eps)
b.mu.Lock()
match := len(np) == len(b.host2ep)
for k, v := range np {
if b.host2ep[k] != v {
match = false
break
}
}
if match {
// same endpoints, so no need to update address
b.mu.Unlock()
return
}
b.host2ep = np
addrs := make([]grpc.Address, 0, len(eps))
for i := range eps {
addrs = append(addrs, grpc.Address{Addr: getHost(eps[i])})
}
b.addrs = addrs
// updating notifyCh can trigger new connections,
// only update addrs if all connections are down
// or addrs does not include pinAddr.
update := !hasAddr(addrs, b.pinAddr)
b.mu.Unlock()
if update {
select {
case b.updateAddrsC <- struct{}{}:
case <-b.stopc:
}
}
}
func hasAddr(addrs []grpc.Address, targetAddr string) bool {
for _, addr := range addrs {
if targetAddr == addr.Addr {
return true
}
}
return false
}
func (b *simpleBalancer) updateNotifyLoop() {
defer close(b.donec)
for {
b.mu.RLock()
upc, downc, addr := b.upc, b.downc, b.pinAddr
b.mu.RUnlock()
// downc or upc should be closed
select {
case <-downc:
downc = nil
default:
}
select {
case <-upc:
upc = nil
default:
}
switch {
case downc == nil && upc == nil:
// stale
select {
case <-b.stopc:
return
default:
}
case downc == nil:
b.notifyAddrs()
select {
case <-upc:
case <-b.updateAddrsC:
b.notifyAddrs()
case <-b.stopc:
return
}
case upc == nil:
select {
// close connections that are not the pinned address
case b.notifyCh <- []grpc.Address{{Addr: addr}}:
case <-downc:
case <-b.stopc:
return
}
select {
case <-downc:
case <-b.updateAddrsC:
case <-b.stopc:
return
}
b.notifyAddrs()
}
}
}
func (b *simpleBalancer) notifyAddrs() {
b.mu.RLock()
addrs := b.addrs
b.mu.RUnlock()
select {
case b.notifyCh <- addrs:
case <-b.stopc:
}
}
func (b *simpleBalancer) Up(addr grpc.Address) func(error) {
b.mu.Lock()
defer b.mu.Unlock()
// gRPC might call Up after it called Close. We add this check
// to "fix" it up at application layer. Or our simplerBalancer
// might panic since b.upc is closed.
if b.closed {
return func(err error) {}
}
// gRPC might call Up on a stale address.
// Prevent updating pinAddr with a stale address.
if !hasAddr(b.addrs, addr.Addr) {
return func(err error) {}
}
if b.pinAddr != "" {
return func(err error) {}
}
// notify waiting Get()s and pin first connected address
close(b.upc)
b.downc = make(chan struct{})
b.pinAddr = addr.Addr
// notify client that a connection is up
b.readyOnce.Do(func() { close(b.readyc) })
return func(err error) {
b.mu.Lock()
b.upc = make(chan struct{})
close(b.downc)
b.pinAddr = ""
b.mu.Unlock()
}
}
func (b *simpleBalancer) Get(ctx context.Context, opts grpc.BalancerGetOptions) (grpc.Address, func(), error) {
var (
addr string
closed bool
)
// If opts.BlockingWait is false (for fail-fast RPCs), it should return
// an address it has notified via Notify immediately instead of blocking.
if !opts.BlockingWait {
b.mu.RLock()
closed = b.closed
addr = b.pinAddr
b.mu.RUnlock()
if closed {
return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
}
if addr == "" {
return grpc.Address{Addr: ""}, nil, ErrNoAddrAvilable
}
return grpc.Address{Addr: addr}, func() {}, nil
}
for {
b.mu.RLock()
ch := b.upc
b.mu.RUnlock()
select {
case <-ch:
case <-b.donec:
return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
case <-ctx.Done():
return grpc.Address{Addr: ""}, nil, ctx.Err()
}
b.mu.RLock()
closed = b.closed
addr = b.pinAddr
b.mu.RUnlock()
// Close() which sets b.closed = true can be called before Get(), Get() must exit if balancer is closed.
if closed {
return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
}
if addr != "" {
break
}
}
return grpc.Address{Addr: addr}, func() {}, nil
}
func (b *simpleBalancer) Notify() <-chan []grpc.Address { return b.notifyCh }
func (b *simpleBalancer) Close() error {
b.mu.Lock()
// In case gRPC calls close twice. TODO: remove the checking
// when we are sure that gRPC wont call close twice.
if b.closed {
b.mu.Unlock()
<-b.donec
return nil
}
b.closed = true
close(b.stopc)
b.pinAddr = ""
// In the case of following scenario:
// 1. upc is not closed; no pinned address
// 2. client issues an rpc, calling invoke(), which calls Get(), enters for loop, blocks
// 3. clientconn.Close() calls balancer.Close(); closed = true
// 4. for loop in Get() never exits since ctx is the context passed in by the client and may not be canceled
// we must close upc so Get() exits from blocking on upc
select {
case <-b.upc:
default:
// terminate all waiting Get()s
close(b.upc)
}
b.mu.Unlock()
// wait for updateNotifyLoop to finish
<-b.donec
close(b.notifyCh)
return nil
}
func getHost(ep string) string {
url, uerr := url.Parse(ep)
if uerr != nil || !strings.Contains(ep, "://") {
return ep
}
return url.Host
}

View File

@ -1,239 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"errors"
"net"
"sync"
"testing"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/testutil"
"golang.org/x/net/context"
"google.golang.org/grpc"
)
var (
endpoints = []string{"localhost:2379", "localhost:22379", "localhost:32379"}
)
func TestBalancerGetUnblocking(t *testing.T) {
sb := newSimpleBalancer(endpoints)
defer sb.Close()
if addrs := <-sb.Notify(); len(addrs) != len(endpoints) {
t.Errorf("Initialize newSimpleBalancer should have triggered Notify() chan, but it didn't")
}
unblockingOpts := grpc.BalancerGetOptions{BlockingWait: false}
_, _, err := sb.Get(context.Background(), unblockingOpts)
if err != ErrNoAddrAvilable {
t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
}
down1 := sb.Up(grpc.Address{Addr: endpoints[1]})
if addrs := <-sb.Notify(); len(addrs) != 1 {
t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
}
down2 := sb.Up(grpc.Address{Addr: endpoints[2]})
addrFirst, putFun, err := sb.Get(context.Background(), unblockingOpts)
if err != nil {
t.Errorf("Get() with up endpoints should success, got %v", err)
}
if addrFirst.Addr != endpoints[1] {
t.Errorf("Get() didn't return expected address, got %v", addrFirst)
}
if putFun == nil {
t.Errorf("Get() returned unexpected nil put function")
}
addrSecond, _, _ := sb.Get(context.Background(), unblockingOpts)
if addrFirst.Addr != addrSecond.Addr {
t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
}
down1(errors.New("error"))
if addrs := <-sb.Notify(); len(addrs) != len(endpoints) {
t.Errorf("closing the only connection should triggered balancer to send the all endpoints via Notify chan so that we can establish a connection")
}
down2(errors.New("error"))
_, _, err = sb.Get(context.Background(), unblockingOpts)
if err != ErrNoAddrAvilable {
t.Errorf("Get() with no up endpoints should return ErrNoAddrAvailable, got: %v", err)
}
}
func TestBalancerGetBlocking(t *testing.T) {
sb := newSimpleBalancer(endpoints)
defer sb.Close()
if addrs := <-sb.Notify(); len(addrs) != len(endpoints) {
t.Errorf("Initialize newSimpleBalancer should have triggered Notify() chan, but it didn't")
}
blockingOpts := grpc.BalancerGetOptions{BlockingWait: true}
ctx, _ := context.WithTimeout(context.Background(), time.Millisecond*100)
_, _, err := sb.Get(ctx, blockingOpts)
if err != context.DeadlineExceeded {
t.Errorf("Get() with no up endpoints should timeout, got %v", err)
}
downC := make(chan func(error), 1)
go func() {
// ensure sb.Up() will be called after sb.Get() to see if Up() releases blocking Get()
time.Sleep(time.Millisecond * 100)
f := sb.Up(grpc.Address{Addr: endpoints[1]})
if addrs := <-sb.Notify(); len(addrs) != 1 {
t.Errorf("first Up() should have triggered balancer to send the first connected address via Notify chan so that other connections can be closed")
}
downC <- f
}()
addrFirst, putFun, err := sb.Get(context.Background(), blockingOpts)
if err != nil {
t.Errorf("Get() with up endpoints should success, got %v", err)
}
if addrFirst.Addr != endpoints[1] {
t.Errorf("Get() didn't return expected address, got %v", addrFirst)
}
if putFun == nil {
t.Errorf("Get() returned unexpected nil put function")
}
down1 := <-downC
down2 := sb.Up(grpc.Address{Addr: endpoints[2]})
addrSecond, _, _ := sb.Get(context.Background(), blockingOpts)
if addrFirst.Addr != addrSecond.Addr {
t.Errorf("Get() didn't return the same address as previous call, got %v and %v", addrFirst, addrSecond)
}
down1(errors.New("error"))
if addrs := <-sb.Notify(); len(addrs) != len(endpoints) {
t.Errorf("closing the only connection should triggered balancer to send the all endpoints via Notify chan so that we can establish a connection")
}
down2(errors.New("error"))
ctx, _ = context.WithTimeout(context.Background(), time.Millisecond*100)
_, _, err = sb.Get(ctx, blockingOpts)
if err != context.DeadlineExceeded {
t.Errorf("Get() with no up endpoints should timeout, got %v", err)
}
}
// TestBalancerDoNotBlockOnClose ensures that balancer and grpc don't deadlock each other
// due to rapid open/close conn. The deadlock causes balancer.Close() to block forever.
// See issue: https://github.com/coreos/etcd/issues/7283 for more detail.
func TestBalancerDoNotBlockOnClose(t *testing.T) {
defer testutil.AfterTest(t)
kcl := newKillConnListener(t, 3)
defer kcl.close()
for i := 0; i < 5; i++ {
sb := newSimpleBalancer(kcl.endpoints())
conn, err := grpc.Dial("", grpc.WithInsecure(), grpc.WithBalancer(sb))
if err != nil {
t.Fatal(err)
}
kvc := pb.NewKVClient(conn)
<-sb.readyc
var wg sync.WaitGroup
wg.Add(100)
cctx, cancel := context.WithCancel(context.TODO())
for j := 0; j < 100; j++ {
go func() {
defer wg.Done()
kvc.Range(cctx, &pb.RangeRequest{}, grpc.FailFast(false))
}()
}
// balancer.Close() might block
// if balancer and grpc deadlock each other.
bclosec, cclosec := make(chan struct{}), make(chan struct{})
go func() {
defer close(bclosec)
sb.Close()
}()
go func() {
defer close(cclosec)
conn.Close()
}()
select {
case <-bclosec:
case <-time.After(3 * time.Second):
testutil.FatalStack(t, "balancer close timeout")
}
select {
case <-cclosec:
case <-time.After(3 * time.Second):
t.Fatal("grpc conn close timeout")
}
cancel()
wg.Wait()
}
}
// killConnListener listens incoming conn and kills it immediately.
type killConnListener struct {
wg sync.WaitGroup
eps []string
stopc chan struct{}
t *testing.T
}
func newKillConnListener(t *testing.T, size int) *killConnListener {
kcl := &killConnListener{stopc: make(chan struct{}), t: t}
for i := 0; i < size; i++ {
ln, err := net.Listen("tcp", ":0")
if err != nil {
t.Fatal(err)
}
kcl.eps = append(kcl.eps, ln.Addr().String())
kcl.wg.Add(1)
go kcl.listen(ln)
}
return kcl
}
func (kcl *killConnListener) endpoints() []string {
return kcl.eps
}
func (kcl *killConnListener) listen(l net.Listener) {
go func() {
defer kcl.wg.Done()
for {
conn, err := l.Accept()
select {
case <-kcl.stopc:
return
default:
}
if err != nil {
kcl.t.Fatal(err)
}
time.Sleep(1 * time.Millisecond)
conn.Close()
}
}()
<-kcl.stopc
l.Close()
}
func (kcl *killConnListener) close() {
close(kcl.stopc)
kcl.wg.Wait()
}

View File

@ -31,7 +31,9 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/keepalive"
"google.golang.org/grpc/metadata"
"google.golang.org/grpc/status"
)
var (
@ -51,21 +53,22 @@ type Client struct {
conn *grpc.ClientConn
dialerrc chan error
cfg Config
creds *credentials.TransportCredentials
balancer *simpleBalancer
retryWrapper retryRpcFunc
retryAuthWrapper retryRpcFunc
cfg Config
creds *credentials.TransportCredentials
balancer *healthBalancer
mu *sync.Mutex
ctx context.Context
cancel context.CancelFunc
// Username is a username for authentication
// Username is a user name for authentication.
Username string
// Password is a password for authentication
// Password is a password for authentication.
Password string
// tokenCred is an instance of WithPerRPCCredentials()'s argument
tokenCred *authTokenCredential
callOpts []grpc.CallOption
}
// New creates a new etcdv3 client from a given configuration.
@ -116,8 +119,23 @@ func (c *Client) Endpoints() (eps []string) {
// SetEndpoints updates client's endpoints.
func (c *Client) SetEndpoints(eps ...string) {
c.mu.Lock()
c.cfg.Endpoints = eps
c.balancer.updateAddrs(eps)
c.mu.Unlock()
c.balancer.updateAddrs(eps...)
// updating notifyCh can trigger new connections,
// need update addrs if all connections are down
// or addrs does not include pinAddr.
c.balancer.mu.RLock()
update := !hasAddr(c.balancer.addrs, c.balancer.pinAddr)
c.balancer.mu.RUnlock()
if update {
select {
case c.balancer.updateAddrsC <- notifyNext:
case <-c.balancer.stopc:
}
}
}
// Sync synchronizes client's endpoints with the known endpoints from the etcd membership.
@ -144,8 +162,10 @@ func (c *Client) autoSync() {
case <-c.ctx.Done():
return
case <-time.After(c.cfg.AutoSyncInterval):
ctx, _ := context.WithTimeout(c.ctx, 5*time.Second)
if err := c.Sync(ctx); err != nil && err != c.ctx.Err() {
ctx, cancel := context.WithTimeout(c.ctx, 5*time.Second)
err := c.Sync(ctx)
cancel()
if err != nil && err != c.ctx.Err() {
logger.Println("Auto sync endpoints failed:", err)
}
}
@ -174,7 +194,7 @@ func parseEndpoint(endpoint string) (proto string, host string, scheme string) {
host = endpoint
url, uerr := url.Parse(endpoint)
if uerr != nil || !strings.Contains(endpoint, "://") {
return
return proto, host, scheme
}
scheme = url.Scheme
@ -182,13 +202,13 @@ func parseEndpoint(endpoint string) (proto string, host string, scheme string) {
host = url.Host
switch url.Scheme {
case "http", "https":
case "unix":
case "unix", "unixs":
proto = "unix"
host = url.Host + url.Path
default:
proto, host = "", ""
}
return
return proto, host, scheme
}
func (c *Client) processCreds(scheme string) (creds *credentials.TransportCredentials) {
@ -197,7 +217,7 @@ func (c *Client) processCreds(scheme string) (creds *credentials.TransportCreden
case "unix":
case "http":
creds = nil
case "https":
case "https", "unixs":
if creds != nil {
break
}
@ -207,7 +227,7 @@ func (c *Client) processCreds(scheme string) (creds *credentials.TransportCreden
default:
creds = nil
}
return
return creds
}
// dialSetupOpts gives the dial opts prior to any authentication
@ -215,10 +235,17 @@ func (c *Client) dialSetupOpts(endpoint string, dopts ...grpc.DialOption) (opts
if c.cfg.DialTimeout > 0 {
opts = []grpc.DialOption{grpc.WithTimeout(c.cfg.DialTimeout)}
}
if c.cfg.DialKeepAliveTime > 0 {
params := keepalive.ClientParameters{
Time: c.cfg.DialKeepAliveTime,
Timeout: c.cfg.DialKeepAliveTimeout,
}
opts = append(opts, grpc.WithKeepaliveParams(params))
}
opts = append(opts, dopts...)
f := func(host string, t time.Duration) (net.Conn, error) {
proto, host, _ := parseEndpoint(c.balancer.getEndpoint(host))
proto, host, _ := parseEndpoint(c.balancer.endpoint(host))
if host == "" && endpoint != "" {
// dialing an endpoint not in the balancer; use
// endpoint passed into dial
@ -270,7 +297,7 @@ func (c *Client) getToken(ctx context.Context) error {
endpoint := c.cfg.Endpoints[i]
host := getHost(endpoint)
// use dial options without dopts to avoid reusing the client balancer
auth, err = newAuthenticator(host, c.dialSetupOpts(endpoint))
auth, err = newAuthenticator(host, c.dialSetupOpts(endpoint), c)
if err != nil {
continue
}
@ -311,7 +338,7 @@ func (c *Client) dial(endpoint string, dopts ...grpc.DialOption) (*grpc.ClientCo
if err != nil {
if toErr(ctx, err) != rpctypes.ErrAuthNotEnabled {
if err == ctx.Err() && ctx.Err() != c.ctx.Err() {
err = grpc.ErrClientConnTimeout
err = context.DeadlineExceeded
}
return nil, err
}
@ -322,7 +349,7 @@ func (c *Client) dial(endpoint string, dopts ...grpc.DialOption) (*grpc.ClientCo
opts = append(opts, c.cfg.DialOptions...)
conn, err := grpc.Dial(host, opts...)
conn, err := grpc.DialContext(c.ctx, host, opts...)
if err != nil {
return nil, err
}
@ -333,7 +360,7 @@ func (c *Client) dial(endpoint string, dopts ...grpc.DialOption) (*grpc.ClientCo
// when the cluster has a leader.
func WithRequireLeader(ctx context.Context) context.Context {
md := metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
return metadata.NewContext(ctx, md)
return metadata.NewOutgoingContext(ctx, md)
}
func newClient(cfg *Config) (*Client, error) {
@ -360,35 +387,57 @@ func newClient(cfg *Config) (*Client, error) {
creds: creds,
ctx: ctx,
cancel: cancel,
mu: new(sync.Mutex),
callOpts: defaultCallOpts,
}
if cfg.Username != "" && cfg.Password != "" {
client.Username = cfg.Username
client.Password = cfg.Password
}
if cfg.MaxCallSendMsgSize > 0 || cfg.MaxCallRecvMsgSize > 0 {
if cfg.MaxCallRecvMsgSize > 0 && cfg.MaxCallSendMsgSize > cfg.MaxCallRecvMsgSize {
return nil, fmt.Errorf("gRPC message recv limit (%d bytes) must be greater than send limit (%d bytes)", cfg.MaxCallRecvMsgSize, cfg.MaxCallSendMsgSize)
}
callOpts := []grpc.CallOption{
defaultFailFast,
defaultMaxCallSendMsgSize,
defaultMaxCallRecvMsgSize,
}
if cfg.MaxCallSendMsgSize > 0 {
callOpts[1] = grpc.MaxCallSendMsgSize(cfg.MaxCallSendMsgSize)
}
if cfg.MaxCallRecvMsgSize > 0 {
callOpts[2] = grpc.MaxCallRecvMsgSize(cfg.MaxCallRecvMsgSize)
}
client.callOpts = callOpts
}
client.balancer = newSimpleBalancer(cfg.Endpoints)
conn, err := client.dial("", grpc.WithBalancer(client.balancer))
client.balancer = newHealthBalancer(cfg.Endpoints, cfg.DialTimeout, func(ep string) (bool, error) {
return grpcHealthCheck(client, ep)
})
// use Endpoints[0] so that for https:// without any tls config given, then
// grpc will assume the certificate server name is the endpoint host.
conn, err := client.dial(cfg.Endpoints[0], grpc.WithBalancer(client.balancer))
if err != nil {
client.cancel()
client.balancer.Close()
return nil, err
}
client.conn = conn
client.retryWrapper = client.newRetryWrapper()
client.retryAuthWrapper = client.newAuthRetryWrapper()
// wait for a connection
if cfg.DialTimeout > 0 {
hasConn := false
waitc := time.After(cfg.DialTimeout)
select {
case <-client.balancer.readyc:
case <-client.balancer.ready():
hasConn = true
case <-ctx.Done():
case <-waitc:
}
if !hasConn {
err := grpc.ErrClientConnTimeout
err := context.DeadlineExceeded
select {
case err = <-client.dialerrc:
default:
@ -423,7 +472,7 @@ func (c *Client) checkVersion() (err error) {
errc := make(chan error, len(c.cfg.Endpoints))
ctx, cancel := context.WithCancel(c.ctx)
if c.cfg.DialTimeout > 0 {
ctx, _ = context.WithTimeout(ctx, c.cfg.DialTimeout)
ctx, cancel = context.WithTimeout(ctx, c.cfg.DialTimeout)
}
wg.Add(len(c.cfg.Endpoints))
for _, ep := range c.cfg.Endpoints {
@ -438,7 +487,7 @@ func (c *Client) checkVersion() (err error) {
vs := strings.Split(resp.Version, ".")
maj, min := 0, 0
if len(vs) >= 2 {
maj, rerr = strconv.Atoi(vs[0])
maj, _ = strconv.Atoi(vs[0])
min, rerr = strconv.Atoi(vs[1])
}
if maj < 3 || (maj == 3 && min < 2) {
@ -470,14 +519,28 @@ func isHaltErr(ctx context.Context, err error) bool {
if err == nil {
return false
}
code := grpc.Code(err)
ev, _ := status.FromError(err)
// Unavailable codes mean the system will be right back.
// (e.g., can't connect, lost leader)
// Treat Internal codes as if something failed, leaving the
// system in an inconsistent state, but retrying could make progress.
// (e.g., failed in middle of send, corrupted frame)
// TODO: are permanent Internal errors possible from grpc?
return code != codes.Unavailable && code != codes.Internal
return ev.Code() != codes.Unavailable && ev.Code() != codes.Internal
}
// isUnavailableErr returns true if the given error is an unavailable error
func isUnavailableErr(ctx context.Context, err error) bool {
if ctx != nil && ctx.Err() != nil {
return false
}
if err == nil {
return false
}
ev, _ := status.FromError(err)
// Unavailable codes mean the system will be right back.
// (e.g., can't connect, lost leader)
return ev.Code() == codes.Unavailable
}
func toErr(ctx context.Context, err error) error {
@ -488,7 +551,8 @@ func toErr(ctx context.Context, err error) error {
if _, ok := err.(rpctypes.EtcdError); ok {
return err
}
code := grpc.Code(err)
ev, _ := status.FromError(err)
code := ev.Code()
switch code {
case codes.DeadlineExceeded:
fallthrough
@ -497,7 +561,6 @@ func toErr(ctx context.Context, err error) error {
err = ctx.Err()
}
case codes.Unavailable:
err = ErrNoAvailableEndpoints
case codes.FailedPrecondition:
err = grpc.ErrClientConnClosing
}

View File

@ -22,8 +22,8 @@ import (
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/pkg/testutil"
"golang.org/x/net/context"
"google.golang.org/grpc"
)
func TestDialCancel(t *testing.T) {
@ -45,7 +45,7 @@ func TestDialCancel(t *testing.T) {
t.Fatal(err)
}
// connect to ipv4 blackhole so dial blocks
// connect to ipv4 black hole so dial blocks
c.SetEndpoints("http://254.0.0.1:12345")
// issue Get to force redial attempts
@ -79,6 +79,8 @@ func TestDialCancel(t *testing.T) {
}
func TestDialTimeout(t *testing.T) {
t.Skip()
defer testutil.AfterTest(t)
testCfgs := []Config{
@ -97,7 +99,7 @@ func TestDialTimeout(t *testing.T) {
for i, cfg := range testCfgs {
donec := make(chan error)
go func() {
// without timeout, dial continues forever on ipv4 blackhole
// without timeout, dial continues forever on ipv4 black hole
c, err := New(cfg)
if c != nil || err == nil {
t.Errorf("#%d: new client should fail", i)
@ -117,8 +119,8 @@ func TestDialTimeout(t *testing.T) {
case <-time.After(5 * time.Second):
t.Errorf("#%d: failed to timeout dial on time", i)
case err := <-donec:
if err != grpc.ErrClientConnTimeout {
t.Errorf("#%d: unexpected error %v, want %v", i, err, grpc.ErrClientConnTimeout)
if err != context.DeadlineExceeded {
t.Errorf("#%d: unexpected error %v, want %v", i, err, context.DeadlineExceeded)
}
}
}

View File

@ -15,11 +15,12 @@
package clientv3util_test
import (
"context"
"log"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/clientv3util"
"golang.org/x/net/context"
)
func ExampleKeyExists_put() {
@ -33,7 +34,7 @@ func ExampleKeyExists_put() {
kvc := clientv3.NewKV(cli)
// perform a put only if key is missing
// It is useful to do the check (transactionally) to avoid overwriting
// It is useful to do the check atomically to avoid overwriting
// the existing key which would generate potentially unwanted events,
// unless of course you wanted to do an overwrite no matter what.
_, err = kvc.Txn(context.Background()).

View File

@ -16,6 +16,8 @@ package clientv3
import (
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/types"
"golang.org/x/net/context"
"google.golang.org/grpc"
)
@ -43,20 +45,34 @@ type Cluster interface {
}
type cluster struct {
remote pb.ClusterClient
remote pb.ClusterClient
callOpts []grpc.CallOption
}
func NewCluster(c *Client) Cluster {
return &cluster{remote: RetryClusterClient(c)}
api := &cluster{remote: RetryClusterClient(c)}
if c != nil {
api.callOpts = c.callOpts
}
return api
}
func NewClusterFromClusterClient(remote pb.ClusterClient) Cluster {
return &cluster{remote: remote}
func NewClusterFromClusterClient(remote pb.ClusterClient, c *Client) Cluster {
api := &cluster{remote: remote}
if c != nil {
api.callOpts = c.callOpts
}
return api
}
func (c *cluster) MemberAdd(ctx context.Context, peerAddrs []string) (*MemberAddResponse, error) {
// fail-fast before panic in rafthttp
if _, err := types.NewURLs(peerAddrs); err != nil {
return nil, err
}
r := &pb.MemberAddRequest{PeerURLs: peerAddrs}
resp, err := c.remote.MemberAdd(ctx, r)
resp, err := c.remote.MemberAdd(ctx, r, c.callOpts...)
if err != nil {
return nil, toErr(ctx, err)
}
@ -65,7 +81,7 @@ func (c *cluster) MemberAdd(ctx context.Context, peerAddrs []string) (*MemberAdd
func (c *cluster) MemberRemove(ctx context.Context, id uint64) (*MemberRemoveResponse, error) {
r := &pb.MemberRemoveRequest{ID: id}
resp, err := c.remote.MemberRemove(ctx, r)
resp, err := c.remote.MemberRemove(ctx, r, c.callOpts...)
if err != nil {
return nil, toErr(ctx, err)
}
@ -73,28 +89,25 @@ func (c *cluster) MemberRemove(ctx context.Context, id uint64) (*MemberRemoveRes
}
func (c *cluster) MemberUpdate(ctx context.Context, id uint64, peerAddrs []string) (*MemberUpdateResponse, error) {
// it is safe to retry on update.
for {
r := &pb.MemberUpdateRequest{ID: id, PeerURLs: peerAddrs}
resp, err := c.remote.MemberUpdate(ctx, r, grpc.FailFast(false))
if err == nil {
return (*MemberUpdateResponse)(resp), nil
}
if isHaltErr(ctx, err) {
return nil, toErr(ctx, err)
}
// fail-fast before panic in rafthttp
if _, err := types.NewURLs(peerAddrs); err != nil {
return nil, err
}
// it is safe to retry on update.
r := &pb.MemberUpdateRequest{ID: id, PeerURLs: peerAddrs}
resp, err := c.remote.MemberUpdate(ctx, r, c.callOpts...)
if err == nil {
return (*MemberUpdateResponse)(resp), nil
}
return nil, toErr(ctx, err)
}
func (c *cluster) MemberList(ctx context.Context) (*MemberListResponse, error) {
// it is safe to retry on list.
for {
resp, err := c.remote.MemberList(ctx, &pb.MemberListRequest{}, grpc.FailFast(false))
if err == nil {
return (*MemberListResponse)(resp), nil
}
if isHaltErr(ctx, err) {
return nil, toErr(ctx, err)
}
resp, err := c.remote.MemberList(ctx, &pb.MemberListRequest{}, c.callOpts...)
if err == nil {
return (*MemberListResponse)(resp), nil
}
return nil, toErr(ctx, err)
}

View File

@ -44,10 +44,8 @@ func (op CompactOp) toRequest() *pb.CompactionRequest {
return &pb.CompactionRequest{Revision: op.revision, Physical: op.physical}
}
// WithCompactPhysical makes compact RPC call wait until
// the compaction is physically applied to the local database
// such that compacted entries are totally removed from the
// backend database.
// WithCompactPhysical makes Compact wait until all compacted entries are
// removed from the etcd server's storage.
func WithCompactPhysical() CompactOption {
return func(op *CompactOp) { op.physical = true }
}

View File

@ -99,6 +99,7 @@ func (cmp *Cmp) ValueBytes() []byte {
// WithValueBytes sets the byte slice for the comparison's value.
func (cmp *Cmp) WithValueBytes(v []byte) { cmp.TargetUnion.(*pb.Compare_Value).Value = v }
// mustInt64 panics if val isn't an int or int64. It returns an int64 otherwise.
func mustInt64(val interface{}) int64 {
if v, ok := val.(int64); ok {
return v
@ -108,3 +109,12 @@ func mustInt64(val interface{}) int64 {
}
panic("bad value")
}
// mustInt64orLeaseID panics if val isn't a LeaseID, int or int64. It returns an
// int64 otherwise.
func mustInt64orLeaseID(val interface{}) int64 {
if v, ok := val.(LeaseID); ok {
return int64(v)
}
return mustInt64(val)
}

View File

@ -21,6 +21,7 @@ import (
v3 "github.com/coreos/etcd/clientv3"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc/mvccpb"
"golang.org/x/net/context"
)
@ -185,12 +186,12 @@ func (e *Election) observe(ctx context.Context, ch chan<- v3.GetResponse) {
cancel()
return
}
// only accept PUTs; a DELETE will make observe() spin
// only accept puts; a delete will make observe() spin
for _, ev := range wr.Events {
if ev.Type == mvccpb.PUT {
hdr, kv = &wr.Header, ev.Kv
// may have multiple revs; hdr.rev = the last rev
// set to kv's rev in case batch has multiple PUTs
// set to kv's rev in case batch has multiple Puts
hdr.Revision = kv.ModRevision
break
}
@ -213,6 +214,7 @@ func (e *Election) observe(ctx context.Context, ch chan<- v3.GetResponse) {
for !keyDeleted {
wr, ok := <-wch
if !ok {
cancel()
return
}
for _, ev := range wr.Events {
@ -225,6 +227,7 @@ func (e *Election) observe(ctx context.Context, ch chan<- v3.GetResponse) {
select {
case ch <- *resp:
case <-cctx.Done():
cancel()
return
}
}
@ -240,4 +243,4 @@ func (e *Election) Key() string { return e.leaderKey }
func (e *Election) Rev() int64 { return e.leaderRev }
// Header is the response header from the last successful election proposal.
func (m *Election) Header() *pb.ResponseHeader { return m.hdr }
func (e *Election) Header() *pb.ResponseHeader { return e.hdr }

View File

@ -20,6 +20,7 @@ import (
v3 "github.com/coreos/etcd/clientv3"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc/mvccpb"
"golang.org/x/net/context"
)

View File

@ -20,6 +20,7 @@ import (
v3 "github.com/coreos/etcd/clientv3"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
@ -49,7 +50,9 @@ func (m *Mutex) Lock(ctx context.Context) error {
put := v3.OpPut(m.myKey, "", v3.WithLease(s.Lease()))
// reuse key in case this session already holds the lock
get := v3.OpGet(m.myKey)
resp, err := client.Txn(ctx).If(cmp).Then(put).Else(get).Commit()
// fetch current holder to complete uncontended path with only one RPC
getOwner := v3.OpGet(m.pfx, v3.WithFirstCreate()...)
resp, err := client.Txn(ctx).If(cmp).Then(put, getOwner).Else(get, getOwner).Commit()
if err != nil {
return err
}
@ -57,6 +60,12 @@ func (m *Mutex) Lock(ctx context.Context) error {
if !resp.Succeeded {
m.myRev = resp.Responses[0].GetResponseRange().Kvs[0].CreateRevision
}
// if no key on prefix / the minimum rev is key, already hold the lock
ownerKey := resp.Responses[1].GetResponseRange().Kvs
if len(ownerKey) == 0 || ownerKey[0].CreateRevision == m.myRev {
m.hdr = resp.Header
return nil
}
// wait for deletion revisions prior to myKey
hdr, werr := waitDeletes(ctx, client, m.pfx, m.myRev-1)

View File

@ -18,6 +18,7 @@ import (
"time"
v3 "github.com/coreos/etcd/clientv3"
"golang.org/x/net/context"
)
@ -53,6 +54,7 @@ func NewSession(client *v3.Client, opts ...SessionOption) (*Session, error) {
ctx, cancel := context.WithCancel(ops.ctx)
keepAlive, err := client.KeepAlive(ctx, id)
if err != nil || keepAlive == nil {
cancel()
return nil, err
}

View File

@ -18,6 +18,7 @@ import (
"math"
v3 "github.com/coreos/etcd/clientv3"
"golang.org/x/net/context"
)
@ -46,7 +47,7 @@ const (
// SerializableSnapshot provides serializable isolation and also checks
// for write conflicts.
SerializableSnapshot Isolation = iota
// Serializable reads within the same transactiona attempt return data
// Serializable reads within the same transaction attempt return data
// from the at the revision of the first read.
Serializable
// RepeatableReads reads within the same transaction attempt always
@ -85,7 +86,7 @@ func WithPrefetch(keys ...string) stmOption {
return func(so *stmOptions) { so.prefetch = append(so.prefetch, keys...) }
}
// NewSTM initiates a new STM instance, using snapshot isolation by default.
// NewSTM initiates a new STM instance, using serializable snapshot isolation by default.
func NewSTM(c *v3.Client, apply func(STM) error, so ...stmOption) (*v3.TxnResponse, error) {
opts := &stmOptions{ctx: c.Ctx()}
for _, f := range so {
@ -193,11 +194,12 @@ func (rs readSet) add(keys []string, txnresp *v3.TxnResponse) {
}
}
// first returns the store revision from the first fetch
func (rs readSet) first() int64 {
ret := int64(math.MaxInt64 - 1)
for _, resp := range rs {
if len(resp.Kvs) > 0 && resp.Kvs[0].ModRevision < ret {
ret = resp.Kvs[0].ModRevision
if rev := resp.Header.Revision; rev < ret {
ret = rev
}
}
return ret

View File

@ -33,10 +33,31 @@ type Config struct {
// DialTimeout is the timeout for failing to establish a connection.
DialTimeout time.Duration `json:"dial-timeout"`
// DialKeepAliveTime is the time in seconds after which client pings the server to see if
// transport is alive.
DialKeepAliveTime time.Duration `json:"dial-keep-alive-time"`
// DialKeepAliveTimeout is the time in seconds that the client waits for a response for the
// keep-alive probe. If the response is not received in this time, the connection is closed.
DialKeepAliveTimeout time.Duration `json:"dial-keep-alive-timeout"`
// MaxCallSendMsgSize is the client-side request send limit in bytes.
// If 0, it defaults to 2.0 MiB (2 * 1024 * 1024).
// Make sure that "MaxCallSendMsgSize" < server-side default send/recv limit.
// ("--max-request-bytes" flag to etcd or "embed.Config.MaxRequestBytes").
MaxCallSendMsgSize int
// MaxCallRecvMsgSize is the client-side response receive limit.
// If 0, it defaults to "math.MaxInt32", because range response can
// easily exceed request send limits.
// Make sure that "MaxCallRecvMsgSize" >= server-side default send/recv limit.
// ("--max-request-bytes" flag to etcd or "embed.Config.MaxRequestBytes").
MaxCallRecvMsgSize int
// TLS holds the client secure credentials, if any.
TLS *tls.Config
// Username is a username for authentication.
// Username is a user name for authentication.
Username string `json:"username"`
// Password is a password for authentication.

View File

@ -28,7 +28,7 @@
// Make sure to close the client after using it. If the client is not closed, the
// connection will have leaky goroutines.
//
// To specify client request timeout, pass context.WithTimeout to APIs:
// To specify a client request timeout, wrap the context with context.WithTimeout:
//
// ctx, cancel := context.WithTimeout(context.Background(), timeout)
// resp, err := kvc.Put(ctx, "sample_key", "sample_value")

View File

@ -1,113 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3_test
import (
"fmt"
"log"
"github.com/coreos/etcd/clientv3"
"golang.org/x/net/context"
)
func ExampleAuth() {
cli, err := clientv3.New(clientv3.Config{
Endpoints: endpoints,
DialTimeout: dialTimeout,
})
if err != nil {
log.Fatal(err)
}
defer cli.Close()
if _, err = cli.RoleAdd(context.TODO(), "root"); err != nil {
log.Fatal(err)
}
if _, err = cli.UserAdd(context.TODO(), "root", "123"); err != nil {
log.Fatal(err)
}
if _, err = cli.UserGrantRole(context.TODO(), "root", "root"); err != nil {
log.Fatal(err)
}
if _, err = cli.RoleAdd(context.TODO(), "r"); err != nil {
log.Fatal(err)
}
if _, err = cli.RoleGrantPermission(
context.TODO(),
"r", // role name
"foo", // key
"zoo", // range end
clientv3.PermissionType(clientv3.PermReadWrite),
); err != nil {
log.Fatal(err)
}
if _, err = cli.UserAdd(context.TODO(), "u", "123"); err != nil {
log.Fatal(err)
}
if _, err = cli.UserGrantRole(context.TODO(), "u", "r"); err != nil {
log.Fatal(err)
}
if _, err = cli.AuthEnable(context.TODO()); err != nil {
log.Fatal(err)
}
cliAuth, err := clientv3.New(clientv3.Config{
Endpoints: endpoints,
DialTimeout: dialTimeout,
Username: "u",
Password: "123",
})
if err != nil {
log.Fatal(err)
}
defer cliAuth.Close()
if _, err = cliAuth.Put(context.TODO(), "foo1", "bar"); err != nil {
log.Fatal(err)
}
_, err = cliAuth.Txn(context.TODO()).
If(clientv3.Compare(clientv3.Value("zoo1"), ">", "abc")).
Then(clientv3.OpPut("zoo1", "XYZ")).
Else(clientv3.OpPut("zoo1", "ABC")).
Commit()
fmt.Println(err)
// now check the permission with the root account
rootCli, err := clientv3.New(clientv3.Config{
Endpoints: endpoints,
DialTimeout: dialTimeout,
Username: "root",
Password: "123",
})
if err != nil {
log.Fatal(err)
}
defer rootCli.Close()
resp, err := rootCli.RoleGet(context.TODO(), "r")
if err != nil {
log.Fatal(err)
}
fmt.Printf("user u permission: key %q, range end %q\n", resp.Perm[0].Key, resp.Perm[0].RangeEnd)
if _, err = rootCli.AuthDisable(context.TODO()); err != nil {
log.Fatal(err)
}
// Output: etcdserver: permission denied
// user u permission: key "foo", range end "zoo"
}

View File

@ -19,6 +19,7 @@ import (
"log"
"github.com/coreos/etcd/clientv3"
"golang.org/x/net/context"
)

View File

@ -20,6 +20,7 @@ import (
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"golang.org/x/net/context"
)
@ -236,8 +237,11 @@ func ExampleKV_txn() {
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
_, err = kvc.Txn(ctx).
If(clientv3.Compare(clientv3.Value("key"), ">", "abc")). // txn value comparisons are lexical
Then(clientv3.OpPut("key", "XYZ")). // this runs, since 'xyz' > 'abc'
// txn value comparisons are lexical
If(clientv3.Compare(clientv3.Value("key"), ">", "abc")).
// the "Then" runs, since "xyz" > "abc"
Then(clientv3.OpPut("key", "XYZ")).
// the "Else" does not run
Else(clientv3.OpPut("key", "ABC")).
Commit()
cancel()

Some files were not shown because too many files have changed in this diff Show More