version: bump to v3.0.7

wal: lowercase segmentSizeBytes
clientv3/concurrency: allow election on prefixes of keys.
2016-08-31 09:49:24 -07:00 · 2016-08-31 09:48:30 -07:00 · 2016-08-31 09:46:05 -07:00 · 2016-08-31 09:39:57 -07:00 · 2016-08-30 21:06:36 -07:00 · 2016-08-30 21:06:33 -07:00
784 changed files with 33669 additions and 8637 deletions
--- a/.gitignore
+++ b/.gitignore
@ -10,3 +10,4 @@
 /hack/insta-discovery/.env
 *.test
 tools/functional-tester/docker/bin
+hack/tls-setup/certs
--- a/.header
+++ b/.header
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/.travis.yml
+++ b/.travis.yml
@ -4,7 +4,6 @@ go_import_path: github.com/coreos/etcd
 sudo: false

 go:
-  - 1.5
  - 1.6
  - tip

@ -14,10 +13,22 @@ env:
  matrix:
   - TARGET=amd64
   - TARGET=arm64
+   - TARGET=arm
+   - TARGET=ppc64le

 matrix:
+  fast_finish: true
  allow_failures:
    - go: tip
+  exclude:
+  - go: 1.6
+    env: TARGET=arm64
+  - go: tip
+    env: TARGET=arm
+  - go: tip
+    env: TARGET=arm64
+  - go: tip
+    env: TARGET=ppc64le

 addons:
  apt:
@ -37,8 +48,8 @@ install:

 script:
 - >
-        if [ "${TARGET}" == "amd64" ]; then
-                 GOARCH="${TARGET}" ./test;
-        elif [ "${TARGET}" == "arm64" ]; then
-                GOARCH="${TARGET}" ./build;
-        fi
+    if [ "${TARGET}" == "amd64" ]; then
+      GOARCH="${TARGET}" ./test;
+    else
+      GOARCH="${TARGET}" ./build;
+    fi
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -12,7 +12,7 @@ etcd is Apache 2.0 licensed and accepts contributions via GitHub pull requests.
 - Fork the repository on GitHub
 - Read the README.md for build instructions

-## Reporting Bugs and Creating Issues
+## Reporting bugs and creating issues

 Reporting bugs is one of the best ways to contribute. However, a good bug report
 has some very specific qualities, so please read over our short document on
@ -39,7 +39,7 @@ The coding style suggested by the Golang community is used in etcd. See the [sty

 Please follow this style to make etcd easy to review, maintain and develop.

-### Format of the Commit Message
+### Format of the commit message

 We follow a rough convention for commit messages that is designed to answer two
 questions: what changed and why. The subject line should feature the what and
--- a/8
+++ b/8
@ -1,2 +1,6 @@
-FROM golang:onbuild
-EXPOSE 4001 7001 2379 2380
+FROM golang
+ADD . /go/src/github.com/coreos/etcd
+ADD cmd/vendor /go/src/github.com/coreos/etcd/vendor
+RUN go install github.com/coreos/etcd
+EXPOSE 2379 2380
+ENTRYPOINT ["etcd"]
--- a/11
+++ b/11
@ -0,0 +1,11 @@
+FROM alpine:latest
+
+ADD etcd /usr/local/bin/
+ADD etcdctl /usr/local/bin/
+RUN mkdir -p /var/etcd/
+RUN mkdir -p /var/lib/etcd/
+
+EXPOSE 2379 2380
+
+# Define default command.
+CMD ["/usr/local/bin/etcd"]
--- a/Documentation/api_v3.md
+++ b/Documentation/api_v3.md
@ -1,146 +0,0 @@
-# etcd3 API
-
-TODO: finish API doc
-
-## Response Header
-
-All Responses from etcd API have a [response header][response_header] attached. The response header includes the metadata of the response.
-
-```proto
-message ResponseHeader {
-  uint64 cluster_id = 1;
-  uint64 member_id = 2;
-  int64 revision = 3;
-  uint64 raft_term = 4;
-}
-```
-
-* Cluster_ID - the ID of the cluster that generates the response
-* Member_ID - the ID of the member that generates the response
-* Revision - the revision of the key-value store when the response is generated
-* Raft_Term - the Raft term of the member when the response is generated
-
-An application may read the Cluster_ID (Member_ID) field to ensure it is communicating with the intended cluster (member).
-
-Applications can use the `Revision` to know the latest revision of the key-value store. This is especially useful when applications specify a historical revision to make time `travel query` and wishes to know the latest revision at the time of the request.
-
-Applications can use `Raft_Term` to detect when the cluster completes a new leader election.
-
-## Key-Value API
-
-Key-Value API is used to manipulate key-value pairs stored inside etcd. The key-value API is defined as a [gRPC service][kv-service]. The Key-Value pair is defined as structured data in [protobuf format][kv-proto].
-
-### Key-Value Pair
-
-A key-value pair is the smallest unit that the key-value API can manipulate. Each key-value pair has a number of fields:
-
-```protobuf
-message KeyValue {
-  bytes key = 1;
-  int64 create_revision = 2;
-  int64 mod_revision = 3;
-  int64 version = 4;
-  bytes value = 5;
-  int64 lease = 6;
-}
-```
-
-* Key - key in bytes. An empty key is not allowed.
-* Value - value in bytes.
-* Version - version is the version of the key. A deletion resets the version to zero and any modification of the key increases its version.
-* Create_Revision - revision of the last creation on the key.
-* Mod_Revision - revision of the last modification on the key.
-* Lease - the ID of the lease attached to the key. If lease is 0, then no lease is attached to the key.
-
-
-## Data Model
-
-etcd is designed to reliably store infrequently updated data and provide reliable watch queries. etcd exposes previous versions of key-value pairs to support inexpensive snapshots and watch history events (“time travel queries”). A persistent, multi-version, concurrency-control data model is a good fit for these use cases.
-
-etcd stores data in a multiversion [persistent][persistent-ds] key-value store. The persistent key-value store preserves the previous version of a key-value pair when its value is superseded with new data. The key-value store is effectively immutable; its operations do not update the structure in-place, but instead always generates a new updated structure. All past versions of keys are still accessible and watchable after modification. To prevent the data store from growing indefinitely over time from maintaining old versions, the store may be compacted to shed the oldest versions of superseded data.
-
-### Logical View
-
-The store’s logical view is a flat binary key space. The key space has a lexically sorted index on byte string keys so range queries are inexpensive.
-
-The key space maintains multiple revisions. Each atomic mutative operation (e.g., a transaction operation may contain multiple operations) creates a new revision on the key space. All data held by previous revisions remains unchanged. Old versions of key can still be accessed through previous revisions. Likewise, revisions are indexed as well; ranging over revisions with watchers is efficient. If the store is compacted to recover space, revisions before the compact revision will be removed.
-
-A key’s lifetime spans a generation. Each key may have one or multiple generations. Creating a key increments the generation of that key, starting at 1 if the key never existed. Deleting a key generates a key tombstone, concluding the key’s current generation. Each modification of a key creates a new version of the key. Once a compaction happens, any generation ended before the given revision will be removed and values set before the compaction revision except the latest one will be removed.
-
-### Physical View
-
-etcd stores the physical data as key-value pairs in a persistent [b+tree][b+tree]. Each revision of the store’s state only contains the delta from its previous revision to be efficient. A single revision may correspond to multiple keys in the tree. 
-
-The key of key-value pair is a 3-tuple (major, sub, type). Major is the store revision holding the key. Sub differentiates among  keys within the same revision. Type is an optional suffix for special value (e.g., `t` if the value contains a tombstone). The value of the key-value pair contains the modification from previous revision, thus one delta from previous revision. The b+tree is ordered by key in lexical byte-order. Ranged lookups over revision deltas are fast; this enables quickly finding modifications from one specific revision to another. Compaction removes out-of-date keys-value pairs.
-
-etcd also keeps a secondary in-memory [btree][btree] index to speed up range queries over keys. The keys in the btree index are the keys of the store exposed to user. The value is a pointer to the modification of the persistent b+tree. Compaction removes dead pointers.
-
-## KV API Guarantees
-
-etcd is a consistent and durable key value store with mini-transaction(TODO: link to txn doc when we have it) support. The key value store is exposed through the KV APIs. etcd tries to ensure the strongest consistency and durability guarantees for a distributed system. This specification enumerates the KV API guarantees made by etcd.
-
-### APIs to consider
-
-* Read APIs
-    * range
-    * watch
-* Write APIs
-    * put
-    * delete
-* Combination (read-modify-write) APIs
-    * txn
-
-### etcd Specific Definitions
-
-#### operation completed
-
-An etcd operation is considered complete when it is committed through consensus, and therefore “executed” -- permanently stored -- by the etcd storage engine. The client knows an operation is completed when it receives a response from the etcd server. Note that the client may be uncertain about the status of an operation if it times out, or there is a network disruption between the client and the etcd member. etcd may also abort operations when there is a leader election. etcd does not send `abort` responses to  clients’ outstanding requests in this event.
-
-#### revision
-
-An etcd operation that modifies the key value store is assigned with a single increasing revision. A transaction operation might modifies the key value store multiple times, but only one revision is assigned. The revision attribute of a key value pair that modified by the operation has the same value as the revision of the operation. The revision can be used as a logical clock for key value store. A key value pair that has a larger revision is modified after a key value pair with a smaller revision. Two key value pairs that have the same revision are modified by an operation "concurrently".
-
-### Guarantees Provided
-
-#### Atomicity
-
-All API requests are atomic; an operation either completes entirely or not at all. For watch requests, all events generated by one operation will be in one watch response. Watch never observes partial events for a single operation.
-
-#### Consistency
-
-All API calls ensure [sequential consistency][seq_consistency], the strongest consistency guarantee available from distributed systems. No matter which etcd member server a client makes requests to, a client reads the same events in the same order. If two members complete the same number of operations, the state of the two members is consistent.
-
-For watch operations, etcd guarantees to return the same value for the same key across all members for the same revision. For range operations, etcd has a similar guarantee for [linearized][Linearizability] access; serialized access may be behind the quorum state, so that the later revision is not yet available.
-
-As with all distributed systems, it is impossible for etcd to ensure [strict consistency][strict_consistency]. etcd does not guarantee that it will return to a read the “most recent” value (as measured by a wall clock when a request is completed) available on any cluster member.
-
-#### Isolation
-
-etcd ensures [serializable isolation][serializable_isolation], which is the highest isolation level available in distributed systems. Read operations will never observe any intermediate data.
-
-#### Durability
-
-Any completed operations are durable. All accessible data is also durable data. A read will never return data that has not been made durable.
-
-#### Linearizability
-
-Linearizability (also known as Atomic Consistency or External Consistency) is a consistency level between strict consistency and sequential consistency. 
-
-For linearizability, suppose each operation receives a timestamp from a loosely synchronized global clock. Operations are linearized if and only if they always complete as though they were executed in a sequential order and each operation appears to complete in the order specified by the program. Likewise, if an operation’s timestamp precedes another, that operation must also precede the other operation in the sequence.
-
-For example, consider a client completing a write at time point 1 (*t1*). A client issuing a read at *t2* (for *t2* > *t1*) should receive a value at least as recent as the previous write, completed at *t1*. However, the read might actually complete only by *t3*, and the returned value, current at *t2* when the read began, might be "stale" by *t3*.
-
-etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
-
-etcd ensures linearizability for all other operations by default. Linearizability comes with a cost, however, because linearized requests must go through the Raft consensus process. To obtain lower latencies and higher throughput for read requests, clients can configure a request’s consistency mode to `serializable`, which may access stale data with respect to quorum, but removes the performance penalty of linearized accesses' reliance on live consensus.
-
-[persistent-ds]: https://en.wikipedia.org/wiki/Persistent_data_structure
-[btree]: https://en.wikipedia.org/wiki/B-tree
-[b+tree]: https://en.wikipedia.org/wiki/B%2B_tree
-[seq_consistency]: https://en.wikipedia.org/wiki/Consistency_model#Sequential_consistency
-[strict_consistency]: https://en.wikipedia.org/wiki/Consistency_model#Strict_consistency
-[serializable_isolation]: https://en.wikipedia.org/wiki/Isolation_(database_systems)#Serializable
-[Linearizability]: #Linearizability
-[kv-proto]: https://github.com/coreos/etcd/blob/master/mvcc/mvccpb/kv.proto
-[kv-service]: https://github.com/coreos/etcd/blob/master/etcdserver/etcdserverpb/rpc.proto
-[response_header]: https://github.com/coreos/etcd/blob/master/etcdserver/etcdserverpb/rpc.proto
--- a/Documentation/branch_management.md
+++ b/Documentation/branch_management.md
@ -1,4 +1,4 @@
-# Branch Management
+# Branch management

 ## Guide

@ -13,7 +13,7 @@ The etcd team has adopted a *rolling release model* and supports one stable vers

 The `master` branch is our development branch. All new features land here first.

-If you want to try new features, pull `master` and play with it. Note that `master` may not be stable because new features may introduce bugs.
+To try new and experimental features, pull `master` and play with it. Note that `master` may not be stable because new features may introduce bugs.

 Before the release of the next stable version, feature PRs will be frozen. We will focus on the testing, bug-fix and documentation for one to two weeks.

--- a/Documentation/demo.md
+++ b/Documentation/demo.md
@ -0,0 +1,454 @@
+# Demo
+
+This series of examples shows the basic procedures for working with an etcd cluster.
+
+## Set up a cluster
+
+<img src="https://storage.googleapis.com/etcd/demo/01_etcd_clustering_2016051001.gif" alt="01_etcd_clustering_2016050601"/>
+
+On each etcd node, specify the cluster members:
+
+```
+TOKEN=token-01
+CLUSTER_STATE=new
+NAME_1=machine-1
+NAME_2=machine-2
+NAME_3=machine-3
+HOST_1=10.240.0.17
+HOST_2=10.240.0.18
+HOST_3=10.240.0.19
+CLUSTER=${NAME_1}=http://${HOST_1}:2380,${NAME_2}=http://${HOST_2}:2380,${NAME_3}=http://${HOST_3}:2380
+```
+
+Run this on each machine:
+
+```
+# For machine 1
+THIS_NAME=${NAME_1}
+THIS_IP=${HOST_1}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+
+# For machine 2
+THIS_NAME=${NAME_2}
+THIS_IP=${HOST_2}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+
+# For machine 3
+THIS_NAME=${NAME_3}
+THIS_IP=${HOST_3}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+```
+
+Or use our public discovery service:
+
+```
+curl https://discovery.etcd.io/new?size=3
+https://discovery.etcd.io/a81b5818e67a6ea83e9d4daea5ecbc92
+
+# grab this token
+TOKEN=token-01
+CLUSTER_STATE=new
+NAME_1=machine-1
+NAME_2=machine-2
+NAME_3=machine-3
+HOST_1=10.240.0.17
+HOST_2=10.240.0.18
+HOST_3=10.240.0.19
+DISCOVERY=https://discovery.etcd.io/a81b5818e67a6ea83e9d4daea5ecbc92
+
+THIS_NAME=${NAME_1}
+THIS_IP=${HOST_1}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--discovery ${DISCOVERY} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+
+THIS_NAME=${NAME_2}
+THIS_IP=${HOST_2}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--discovery ${DISCOVERY} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+
+THIS_NAME=${NAME_3}
+THIS_IP=${HOST_3}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--discovery ${DISCOVERY} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+```
+
+Now etcd is ready! To connect to etcd with etcdctl:
+
+```
+export ETCDCTL_API=3
+HOST_1=10.240.0.17
+HOST_2=10.240.0.18
+HOST_3=10.240.0.19
+ENDPOINTS=$HOST_1:2379,$HOST_2:2379,$HOST_3:2379
+
+etcdctl --endpoints=$ENDPOINTS member list
+```
+
+
+## Access etcd
+
+<img src="https://storage.googleapis.com/etcd/demo/02_etcdctl_access_etcd_2016051001.gif" alt="02_etcdctl_access_etcd_2016051001"/>
+
+`put` command to write:
+
+```
+etcdctl --endpoints=$ENDPOINTS put foo "Hello World!"
+```
+
+`get` to read from etcd:
+
+```
+etcdctl --endpoints=$ENDPOINTS get foo
+etcdctl --endpoints=$ENDPOINTS --write-out="json" get foo
+```
+
+
+## Get by prefix
+
+<img src="https://storage.googleapis.com/etcd/demo/03_etcdctl_get_by_prefix_2016050501.gif" alt="03_etcdctl_get_by_prefix_2016050501"/>
+
+```
+etcdctl --endpoints=$ENDPOINTS put web1 value1
+etcdctl --endpoints=$ENDPOINTS put web2 value2
+etcdctl --endpoints=$ENDPOINTS put web3 value3
+
+etcdctl --endpoints=$ENDPOINTS get web --prefix
+```
+
+
+## Delete
+
+<img src="https://storage.googleapis.com/etcd/demo/04_etcdctl_delete_2016050601.gif" alt="04_etcdctl_delete_2016050601"/>
+
+```
+etcdctl --endpoints=$ENDPOINTS put key myvalue
+etcdctl --endpoints=$ENDPOINTS del key
+
+etcdctl --endpoints=$ENDPOINTS put k1 value1
+etcdctl --endpoints=$ENDPOINTS put k2 value2
+etcdctl --endpoints=$ENDPOINTS del k --prefix
+```
+
+
+## Transactional write
+
+`txn` to wrap multiple requests into one transaction:
+
+<img src="https://storage.googleapis.com/etcd/demo/05_etcdctl_transaction_2016050501.gif" alt="05_etcdctl_transaction_2016050501"/>
+
+```
+etcdctl --endpoints=$ENDPOINTS put user1 bad
+etcdctl --endpoints=$ENDPOINTS txn --interactive
+
+compares:
+value("user1") = "bad"      
+
+success requests (get, put, delete):
+del user1  
+
+failure requests (get, put, delete):
+put user1 good
+```
+
+
+## Watch
+
+`watch` to get notified of future changes:
+
+<img src="https://storage.googleapis.com/etcd/demo/06_etcdctl_watch_2016050501.gif" alt="06_etcdctl_watch_2016050501"/>
+
+```
+etcdctl --endpoints=$ENDPOINTS watch stock1
+etcdctl --endpoints=$ENDPOINTS put stock1 1000
+
+etcdctl --endpoints=$ENDPOINTS watch stock --prefix
+etcdctl --endpoints=$ENDPOINTS put stock1 10
+etcdctl --endpoints=$ENDPOINTS put stock2 20
+```
+
+
+## Lease
+
+`lease` to write with TTL:
+
+<img src="https://storage.googleapis.com/etcd/demo/07_etcdctl_lease_2016050501.gif" alt="07_etcdctl_lease_2016050501"/>
+
+```
+etcdctl --endpoints=$ENDPOINTS lease grant 300
+# lease 2be7547fbc6a5afa granted with TTL(300s)
+
+etcdctl --endpoints=$ENDPOINTS put sample value --lease=2be7547fbc6a5afa
+etcdctl --endpoints=$ENDPOINTS get sample
+
+etcdctl --endpoints=$ENDPOINTS lease keep-alive 2be7547fbc6a5afa
+etcdctl --endpoints=$ENDPOINTS lease revoke 2be7547fbc6a5afa
+# or after 300 seconds
+etcdctl --endpoints=$ENDPOINTS get sample
+```
+
+
+## Distributed locks
+
+`lock` for distributed lock:
+
+<img src="https://storage.googleapis.com/etcd/demo/08_etcdctl_lock_2016050501.gif" alt="08_etcdctl_lock_2016050501"/>
+
+```
+etcdctl --endpoints=$ENDPOINTS lock mutex1
+
+# another client with the same name blocks
+etcdctl --endpoints=$ENDPOINTS lock mutex1
+```
+
+
+## Elections
+
+`elect` for leader election:
+
+<img src="https://storage.googleapis.com/etcd/demo/09_etcdctl_elect_2016050501.gif" alt="09_etcdctl_elect_2016050501"/>
+
+```
+etcdctl --endpoints=$ENDPOINTS elect one p1
+
+# another client with the same name blocks
+etcdctl --endpoints=$ENDPOINTS elect one p2
+```
+
+
+## Cluster status
+
+Specify the initial cluster configuration for each machine:
+
+<img src="https://storage.googleapis.com/etcd/demo/10_etcdctl_endpoint_2016050501.gif" alt="10_etcdctl_endpoint_2016050501"/>
+
+```
+etcdctl --write-out=table --endpoints=$ENDPOINTS endpoint status
+
+------------------+------------------+---------+---------+-----------+-----------+------------+
+|     ENDPOINT     |        ID        | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
+| 10.240.0.17:2379 | 4917a7ab173fabe7 | 3.0.0   | 45 kB   | true      |         4 |      16726 |
+| 10.240.0.18:2379 | 59796ba9cd1bcd72 | 3.0.0   | 45 kB   | false     |         4 |      16726 |
+| 10.240.0.19:2379 | 94df724b66343e6c | 3.0.0   | 45 kB   | false     |         4 |      16726 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
+```
+
+```
+etcdctl --endpoints=$ENDPOINTS endpoint health
+
+10.240.0.17:2379 is healthy: successfully committed proposal: took = 3.345431ms
+10.240.0.19:2379 is healthy: successfully committed proposal: took = 3.767967ms
+10.240.0.18:2379 is healthy: successfully committed proposal: took = 4.025451ms
+```
+
+
+## Snapshot
+
+`snapshot` to save point-in-time snapshot of etcd database:
+
+<img src="https://storage.googleapis.com/etcd/demo/11_etcdctl_snapshot_2016051001.gif" alt="11_etcdctl_snapshot_2016051001"/>
+
+```
+etcdctl --endpoints=$ENDPOINTS snapshot save my.db
+
+Snapshot saved at my.db
+```
+
+```
+etcdctl --write-out=table --endpoints=$ENDPOINTS snapshot status my.db
+
+---------+----------+------------+------------+
+|  HASH   | REVISION | TOTAL KEYS | TOTAL SIZE |
+---------+----------+------------+------------+
+| c55e8b8 |        9 |         13 | 25 kB      |
+---------+----------+------------+------------+
+```
+
+
+## Migrate
+
+`migrate` to transform etcd v2 to v3 data:
+
+<img src="https://storage.googleapis.com/etcd/demo/12_etcdctl_migrate_2016061602.gif" alt="12_etcdctl_migrate_2016061602"/>
+
+```
+# write key in etcd version 2 store
+export ETCDCTL_API=2
+etcdctl --endpoints=http://$ENDPOINT set foo bar
+
+# read key in etcd v2
+etcdctl --endpoints=$ENDPOINTS --output="json" get foo
+
+# stop etcd node to migrate, one by one
+
+# migrate v2 data
+export ETCDCTL_API=3
+etcdctl --endpoints=$ENDPOINT migrate --data-dir="default.etcd" --wal-dir="default.etcd/member/wal"
+
+# restart etcd node after migrate, one by one
+
+# confirm that the key got migrated
+etcdctl --endpoints=$ENDPOINTS get /foo
+```
+
+
+## Member
+
+`member` to add,remove,update membership:
+
+<img src="https://storage.googleapis.com/etcd/demo/13_etcdctl_member_2016062301.gif" alt="13_etcdctl_member_2016062301"/>
+
+```
+# For each machine
+TOKEN=my-etcd-token-1
+CLUSTER_STATE=new
+NAME_1=etcd-node-1
+NAME_2=etcd-node-2
+NAME_3=etcd-node-3
+HOST_1=10.240.0.13
+HOST_2=10.240.0.14
+HOST_3=10.240.0.15
+CLUSTER=${NAME_1}=http://${HOST_1}:2380,${NAME_2}=http://${HOST_2}:2380,${NAME_3}=http://${HOST_3}:2380
+
+# For node 1
+THIS_NAME=${NAME_1}
+THIS_IP=${HOST_1}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 \
+	--listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 \
+	--listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} \
+	--initial-cluster-token ${TOKEN}
+
+# For node 2
+THIS_NAME=${NAME_2}
+THIS_IP=${HOST_2}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 \
+	--listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 \
+	--listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} \
+	--initial-cluster-token ${TOKEN}
+
+# For node 3
+THIS_NAME=${NAME_3}
+THIS_IP=${HOST_3}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 \
+	--listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 \
+	--listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} \
+	--initial-cluster-token ${TOKEN}
+```
+
+Then replace a member with `member remove` and `member add` commands:
+
+```
+# get member ID
+export ETCDCTL_API=3
+HOST_1=10.240.0.13
+HOST_2=10.240.0.14
+HOST_3=10.240.0.15
+etcdctl --endpoints=${HOST_1}:2379,${HOST_2}:2379,${HOST_3}:2379 member list
+
+# remove the member
+MEMBER_ID=278c654c9a6dfd3b
+etcdctl --endpoints=${HOST_1}:2379,${HOST_2}:2379,${HOST_3}:2379 \
+	member remove ${MEMBER_ID}
+
+# add a new member (node 4)
+export ETCDCTL_API=3
+NAME_1=etcd-node-1
+NAME_2=etcd-node-2
+NAME_4=etcd-node-4
+HOST_1=10.240.0.13
+HOST_2=10.240.0.14
+HOST_4=10.240.0.16 # new member
+etcdctl --endpoints=${HOST_1}:2379,${HOST_2}:2379 \
+	member add ${NAME_4} \
+	--peer-urls=http://${HOST_4}:2380
+```
+
+Next, start the new member with `--initial-cluster-state existing` flag:
+
+```
+# [WARNING] If the new member starts from the same disk space,
+# make sure to remove the data directory of the old member
+#
+# restart with 'existing' flag
+TOKEN=my-etcd-token-1
+CLUSTER_STATE=existing
+NAME_1=etcd-node-1
+NAME_2=etcd-node-2
+NAME_4=etcd-node-4
+HOST_1=10.240.0.13
+HOST_2=10.240.0.14
+HOST_4=10.240.0.16 # new member
+CLUSTER=${NAME_1}=http://${HOST_1}:2380,${NAME_2}=http://${HOST_2}:2380,${NAME_4}=http://${HOST_4}:2380
+
+THIS_NAME=${NAME_4}
+THIS_IP=${HOST_4}
+etcd --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 \
+	--listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 \
+	--listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} \
+	--initial-cluster-token ${TOKEN}
+```
+
+
+## Auth
+
+`auth`,`user`,`role` for authentication:
+
+<img src="https://storage.googleapis.com/etcd/demo/14_etcdctl_auth_2016062301.gif" alt="14_etcdctl_auth_2016062301"/>
+
+```
+export ETCDCTL_API=3
+ENDPOINTS=localhost:2379
+
+etcdctl --endpoints=${ENDPOINTS} role add root
+etcdctl --endpoints=${ENDPOINTS} role grant-permission root readwrite foo
+etcdctl --endpoints=${ENDPOINTS} role get root
+
+etcdctl --endpoints=${ENDPOINTS} user add root
+etcdctl --endpoints=${ENDPOINTS} user grant-role root root
+etcdctl --endpoints=${ENDPOINTS} user get root
+
+etcdctl --endpoints=${ENDPOINTS} auth enable
+# now all client requests go through auth
+
+etcdctl --endpoints=${ENDPOINTS} --user=root:123 put foo bar
+etcdctl --endpoints=${ENDPOINTS} get foo
+etcdctl --endpoints=${ENDPOINTS} --user=root:123 get foo
+etcdctl --endpoints=${ENDPOINTS} --user=root:123 get foo1
+```
--- a/Documentation/dev-guide/api_grpc_gateway.md
+++ b/Documentation/dev-guide/api_grpc_gateway.md
@ -0,0 +1,38 @@
+
+## Why grpc-gateway
+
+etcd v3 uses [gRPC][grpc] for its messaging protocol. The etcd project includes a gRPC-based [Go client][go-client] and a command line utility, [etcdctl][etcdctl], for communicating with an etcd cluster through gRPC. For languages with no gRPC support, etcd provides a JSON [grpc-gateway][grpc-gateway]. This gateway serves a RESTful proxy that translates HTTP/JSON requests into gRPC messages.
+
+
+## Using grpc-gateway
+
+The gateway accepts a [JSON mapping][json-mapping] for etcd's [protocol buffer][api-ref] message definitions. Note that `key` and `value` fields are defined as byte arrays and therefore must be base64 encoded in JSON.
+
+```bash
+<<COMMENT
+https://www.base64encode.org/
+foo is 'Zm9v' in Base64
+bar is 'YmFy'
+COMMENT
+
+curl -L http://localhost:2379/v3alpha/kv/put \
+	-X POST -d '{"key": "Zm9v", "value": "YmFy"}'
+
+curl -L http://localhost:2379/v3alpha/kv/range \
+	-X POST -d '{"key": "Zm9v"}'
+```
+
+
+## Swagger
+
+Generated [Swagger][swagger] API definitions can be found at [rpc.swagger.json][swagger-doc].
+
+[api-ref]: ./api_reference_v3.md
+[go-client]: https://github.com/coreos/etcd/tree/master/clientv3
+[etcdctl]: https://github.com/coreos/etcd/tree/master/etcdctl
+[grpc]: http://www.grpc.io/
+[grpc-gateway]: https://github.com/grpc-ecosystem/grpc-gateway
+[json-mapping]: https://developers.google.com/protocol-buffers/docs/proto3#json
+[swagger]: http://swagger.io/
+[swagger-doc]: apispec/swagger/rpc.swagger.json
+
--- a/Documentation/dev-guide/api_reference_v3.md
+++ b/Documentation/dev-guide/api_reference_v3.md
@ -1,6 +1,9 @@
 ### etcd API Reference


+This is a generated documentation. Please read the proto files for more.
+
+
 ##### service `Auth` (etcdserver/etcdserverpb/rpc.proto)

 | Method | Request Type | Response Type | Description |
@ -9,16 +12,18 @@
 | AuthDisable | AuthDisableRequest | AuthDisableResponse | AuthDisable disables authentication. |
 | Authenticate | AuthenticateRequest | AuthenticateResponse | Authenticate processes an authenticate request. |
 | UserAdd | AuthUserAddRequest | AuthUserAddResponse | UserAdd adds a new user. |
-| UserGet | AuthUserGetRequest | AuthUserGetResponse | UserGet gets detailed user information or lists all users. |
+| UserGet | AuthUserGetRequest | AuthUserGetResponse | UserGet gets detailed user information. |
+| UserList | AuthUserListRequest | AuthUserListResponse | UserList gets a list of all users. |
 | UserDelete | AuthUserDeleteRequest | AuthUserDeleteResponse | UserDelete deletes a specified user. |
 | UserChangePassword | AuthUserChangePasswordRequest | AuthUserChangePasswordResponse | UserChangePassword changes the password of a specified user. |
-| UserGrant | AuthUserGrantRequest | AuthUserGrantResponse | UserGrant grants a role to a specified user. |
-| UserRevoke | AuthUserRevokeRequest | AuthUserRevokeResponse | UserRevoke revokes a role of specified user. |
+| UserGrantRole | AuthUserGrantRoleRequest | AuthUserGrantRoleResponse | UserGrant grants a role to a specified user. |
+| UserRevokeRole | AuthUserRevokeRoleRequest | AuthUserRevokeRoleResponse | UserRevokeRole revokes a role of specified user. |
 | RoleAdd | AuthRoleAddRequest | AuthRoleAddResponse | RoleAdd adds a new role. |
-| RoleGet | AuthRoleGetRequest | AuthRoleGetResponse | RoleGet gets detailed role information or lists all roles. |
+| RoleGet | AuthRoleGetRequest | AuthRoleGetResponse | RoleGet gets detailed role information. |
+| RoleList | AuthRoleListRequest | AuthRoleListResponse | RoleList gets lists of all roles. |
 | RoleDelete | AuthRoleDeleteRequest | AuthRoleDeleteResponse | RoleDelete deletes a specified role. |
-| RoleGrant | AuthRoleGrantRequest | AuthRoleGrantResponse | RoleGrant grants a permission of a specified key or range to a specified role. |
-| RoleRevoke | AuthRoleRevokeRequest | AuthRoleRevokeResponse | RoleRevoke revokes a key or range permission of a specified role. |
+| RoleGrantPermission | AuthRoleGrantPermissionRequest | AuthRoleGrantPermissionResponse | RoleGrantPermission grants a permission of a specified key or range to a specified role. |
+| RoleRevokePermission | AuthRoleRevokePermissionRequest | AuthRoleRevokePermissionResponse | RoleRevokePermission revokes a key or range permission of a specified role. |



@ -35,11 +40,13 @@

 ##### service `KV` (etcdserver/etcdserverpb/rpc.proto)

+for grpc-gateway
+
 | Method | Request Type | Response Type | Description |
 | ------ | ------------ | ------------- | ----------- |
 | Range | RangeRequest | RangeResponse | Range gets the keys in the range from the key-value store. |
 | Put | PutRequest | PutResponse | Put puts the given key into the key-value store. A put request increments the revision of the key-value store and generates one event in the event history. |
-| DeleteRange | DeleteRangeRequest | DeleteRangeResponse | Delete deletes the given range from the key-value store. A delete request increments the revision of the key-value store and generates a delete event in the event history for every deleted key. |
+| DeleteRange | DeleteRangeRequest | DeleteRangeResponse | DeleteRange deletes the given range from the key-value store. A delete request increments the revision of the key-value store and generates a delete event in the event history for every deleted key. |
 | Txn | TxnRequest | TxnResponse | Txn processes multiple requests in a single transaction. A txn request increments the revision of the key-value store and generates events with the same revision for every completed request. It is not allowed to modify the same key several times within one txn. |
 | Compact | CompactionRequest | CompactionResponse | Compact compacts the event history in the etcd key-value store. The key-value store should be periodically compacted or the event history will continue to grow indefinitely. |

@ -86,6 +93,8 @@

 ##### message `AlarmRequest` (etcdserver/etcdserverpb/rpc.proto)

+default, used to query if any alarm is active space quota is exhausted
+
 | Field | Description | Type |
 | ----- | ----------- | ---- |
 | action | action is the kind of alarm request to issue. The action may GET alarm statuses, ACTIVATE an alarm, or DEACTIVATE a raised alarm. | AlarmAction |
@ -149,7 +158,9 @@ Empty field.

 ##### message `AuthRoleDeleteRequest` (etcdserver/etcdserverpb/rpc.proto)

-Empty field.
+| Field | Description | Type |
+| ----- | ----------- | ---- |
+| role |  | string |



@ -163,7 +174,9 @@ Empty field.

 ##### message `AuthRoleGetRequest` (etcdserver/etcdserverpb/rpc.proto)

-Empty field.
+| Field | Description | Type |
+| ----- | ----------- | ---- |
+| role |  | string |



@ -172,10 +185,11 @@ Empty field.
 | Field | Description | Type |
 | ----- | ----------- | ---- |
 | header |  | ResponseHeader |
+| perm |  | (slice of) authpb.Permission |



-##### message `AuthRoleGrantRequest` (etcdserver/etcdserverpb/rpc.proto)
+##### message `AuthRoleGrantPermissionRequest` (etcdserver/etcdserverpb/rpc.proto)

 | Field | Description | Type |
 | ----- | ----------- | ---- |
@ -184,7 +198,7 @@ Empty field.



-##### message `AuthRoleGrantResponse` (etcdserver/etcdserverpb/rpc.proto)
+##### message `AuthRoleGrantPermissionResponse` (etcdserver/etcdserverpb/rpc.proto)

 | Field | Description | Type |
 | ----- | ----------- | ---- |
@ -192,13 +206,32 @@ Empty field.



-##### message `AuthRoleRevokeRequest` (etcdserver/etcdserverpb/rpc.proto)
+##### message `AuthRoleListRequest` (etcdserver/etcdserverpb/rpc.proto)

 Empty field.



-##### message `AuthRoleRevokeResponse` (etcdserver/etcdserverpb/rpc.proto)
+##### message `AuthRoleListResponse` (etcdserver/etcdserverpb/rpc.proto)
+
+| Field | Description | Type |
+| ----- | ----------- | ---- |
+| header |  | ResponseHeader |
+| roles |  | (slice of) string |
+
+
+
+##### message `AuthRoleRevokePermissionRequest` (etcdserver/etcdserverpb/rpc.proto)
+
+| Field | Description | Type |
+| ----- | ----------- | ---- |
+| role |  | string |
+| key |  | string |
+| range_end |  | string |
+
+
+
+##### message `AuthRoleRevokePermissionResponse` (etcdserver/etcdserverpb/rpc.proto)

 | Field | Description | Type |
 | ----- | ----------- | ---- |
@ -258,7 +291,9 @@ Empty field.

 ##### message `AuthUserGetRequest` (etcdserver/etcdserverpb/rpc.proto)

-Empty field.
+| Field | Description | Type |
+| ----- | ----------- | ---- |
+| name |  | string |



@ -267,10 +302,11 @@ Empty field.
 | Field | Description | Type |
 | ----- | ----------- | ---- |
 | header |  | ResponseHeader |
+| roles |  | (slice of) string |



-##### message `AuthUserGrantRequest` (etcdserver/etcdserverpb/rpc.proto)
+##### message `AuthUserGrantRoleRequest` (etcdserver/etcdserverpb/rpc.proto)

 | Field | Description | Type |
 | ----- | ----------- | ---- |
@ -279,7 +315,7 @@ Empty field.



-##### message `AuthUserGrantResponse` (etcdserver/etcdserverpb/rpc.proto)
+##### message `AuthUserGrantRoleResponse` (etcdserver/etcdserverpb/rpc.proto)

 | Field | Description | Type |
 | ----- | ----------- | ---- |
@ -287,13 +323,31 @@ Empty field.



-##### message `AuthUserRevokeRequest` (etcdserver/etcdserverpb/rpc.proto)
+##### message `AuthUserListRequest` (etcdserver/etcdserverpb/rpc.proto)

 Empty field.



-##### message `AuthUserRevokeResponse` (etcdserver/etcdserverpb/rpc.proto)
+##### message `AuthUserListResponse` (etcdserver/etcdserverpb/rpc.proto)
+
+| Field | Description | Type |
+| ----- | ----------- | ---- |
+| header |  | ResponseHeader |
+| users |  | (slice of) string |
+
+
+
+##### message `AuthUserRevokeRoleRequest` (etcdserver/etcdserverpb/rpc.proto)
+
+| Field | Description | Type |
+| ----- | ----------- | ---- |
+| name |  | string |
+| role |  | string |
+
+
+
+##### message `AuthUserRevokeRoleResponse` (etcdserver/etcdserverpb/rpc.proto)

 | Field | Description | Type |
 | ----- | ----------- | ---- |
@ -381,7 +435,7 @@ Empty field.
 | Field | Description | Type |
 | ----- | ----------- | ---- |
 | header |  | ResponseHeader |
-| deleted | Deleted is the number of keys deleted by the delete range request. | int64 |
+| deleted | deleted is the number of keys deleted by the delete range request. | int64 |



@ -552,13 +606,15 @@ Empty field.

 | Field | Description | Type |
 | ----- | ----------- | ---- |
-| key | key is the first key for the range. If range_end is not given, the request only looks up key. | bytes |
-| range_end | range_end is the upper bound on the requested range [key, range_end). If range_end is '\0', the range is all keys >= key. | bytes |
+| key | default, no sorting lowest target value first highest target value first key is the first key for the range. If range_end is not given, the request only looks up key. | bytes |
+| range_end | range_end is the upper bound on the requested range [key, range_end). If range_end is '\0', the range is all keys >= key. If the range_end is one bit larger than the given key, then the range requests get the all keys with the prefix (the given key). If both key and range_end are '\0', then range requests returns all keys. | bytes |
 | limit | limit is a limit on the number of keys returned for the request. | int64 |
-| revision | revision is the point-in-time of the key-value store to use for the range. If revision is less or equal to zero, the range is over the newest key-value store. If the revision has been compacted, ErrCompaction is returned as a response. | int64 |
+| revision | revision is the point-in-time of the key-value store to use for the range. If revision is less or equal to zero, the range is over the newest key-value store. If the revision has been compacted, ErrCompacted is returned as a response. | int64 |
 | sort_order | sort_order is the order for returned sorted results. | SortOrder |
 | sort_target | sort_target is the key-value field to use for sorting. | SortTarget |
 | serializable | serializable sets the range request to use serializable member-local reads. Range requests are linearizable by default; linearizable requests have higher latency and lower throughput than serializable requests but reflect the current consensus of the cluster. For better performance, in exchange for possible stale reads, a serializable range request is served locally without needing to reach consensus with other nodes in the cluster. | bool |
+| keys_only | keys_only when set returns only the keys and not the values. | bool |
+| count_only | count_only when set returns only the count of the keys in the range. | bool |



@ -567,12 +623,13 @@ Empty field.
 | Field | Description | Type |
 | ----- | ----------- | ---- |
 | header |  | ResponseHeader |
-| kvs | kvs is the list of key-value pairs matched by the range request. | (slice of) mvccpb.KeyValue |
+| kvs | kvs is the list of key-value pairs matched by the range request. kvs is empty when count is requested. | (slice of) mvccpb.KeyValue |
 | more | more indicates if there are more keys to return in the requested range. | bool |
+| count | count is set to the number of keys within the range when requested. | int64 |



-##### message `RequestUnion` (etcdserver/etcdserverpb/rpc.proto)
+##### message `RequestOp` (etcdserver/etcdserverpb/rpc.proto)

 | Field | Description | Type |
 | ----- | ----------- | ---- |
@ -594,7 +651,7 @@ Empty field.



-##### message `ResponseUnion` (etcdserver/etcdserverpb/rpc.proto)
+##### message `ResponseOp` (etcdserver/etcdserverpb/rpc.proto)

 | Field | Description | Type |
 | ----- | ----------- | ---- |
@ -646,9 +703,9 @@ From google paxosdb paper: Our implementation hinges around a powerful primitive

 | Field | Description | Type |
 | ----- | ----------- | ---- |
-| compare | Compare is a list of predicates representing a conjunction of terms. If the comparisons succeed, then the success requests will be processed in order, and the response will contain their respective responses in order. If the comparisons fail, then the failure requests will be processed in order, and the response will contain their respective responses in order. | (slice of) Compare |
-| success | success is a list of requests which will be applied when compare evaluates to true. | (slice of) RequestUnion |
-| failure | failure is a list of requests which will be applied when compare evaluates to false. | (slice of) RequestUnion |
+| compare | compare is a list of predicates representing a conjunction of terms. If the comparisons succeed, then the success requests will be processed in order, and the response will contain their respective responses in order. If the comparisons fail, then the failure requests will be processed in order, and the response will contain their respective responses in order. | (slice of) Compare |
+| success | success is a list of requests which will be applied when compare evaluates to true. | (slice of) RequestOp |
+| failure | failure is a list of requests which will be applied when compare evaluates to false. | (slice of) RequestOp |



@ -658,7 +715,7 @@ From google paxosdb paper: Our implementation hinges around a powerful primitive
 | ----- | ----------- | ---- |
 | header |  | ResponseHeader |
 | succeeded | succeeded is set to true if the compare evaluated to true or false otherwise. | bool |
-| responses | responses is a list of responses corresponding to the results from applying success if succeeded is true or failure if succeeded is false. | (slice of) ResponseUnion |
+| responses | responses is a list of responses corresponding to the results from applying success if succeeded is true or failure if succeeded is false. | (slice of) ResponseOp |



@ -741,8 +798,9 @@ Permission is a single entity

 | Field | Description | Type |
 | ----- | ----------- | ---- |
-| key |  | bytes |
 | permType |  | Type |
+| key |  | bytes |
+| range_end |  | bytes |



--- a/Documentation/dev-guide/apispec/swagger/rpc.swagger.json
+++ b/Documentation/dev-guide/apispec/swagger/rpc.swagger.json
--- a/Documentation/dev-guide/experimental_apis.md
+++ b/Documentation/dev-guide/experimental_apis.md
@ -0,0 +1,8 @@
+# Experimental APIs and features
+
+For the most part, the etcd project is stable, but we are still moving fast! We believe in the release fast philosophy. We want to get early feedback on features still in development and stabilizing. Thus, there are, and will be more, experimental features and APIs. We plan to improve these features based on the early feedback from the community, or abandon them if there is little interest, in the next few releases. If you are running a production system, please do not rely on any experimental features or APIs.
+
+## The current experimental API/features are:
+
+- v3 auth API: expect to be stable in 3.1 release
+- etcd gateway: expect to be stable in 3.1 release
--- a/Documentation/dev-guide/local_cluster.md
+++ b/Documentation/dev-guide/local_cluster.md
@ -1,8 +1,8 @@
-# Setup a Local Cluster
+# Setup a local cluster

 For testing and development deployments, the quickest and easiest way is to set up a local cluster. For a production deployment, refer to the [clustering][clustering] section.

-## Local Standalone Cluster
+## Local standalone cluster

 Deploying an etcd cluster as a standalone cluster is straightforward. Start it with just one command:

@ -26,7 +26,7 @@ $ ./etcdctl get foo
 bar
 ```

-## Local Multi-member Cluster
+## Local multi-member cluster

 A Procfile is provided to easily set up a local multi-member cluster. Start a multi-member cluster with a few commands:

@ -45,7 +45,7 @@ To interact with the started cluster by using etcdctl:
 # use API version 3
 $ export ETCDCTL_API=3

-$ etcdctl --endpoints=localhost:12379 member list
+$ etcdctl --write-out=table --endpoints=localhost:12379 member list
 +------------------+---------+--------+------------------------+------------------------+
 |        ID        | STATUS  |  NAME  |       PEER ADDRS       |      CLIENT ADDRS      |
 +------------------+---------+--------+------------------------+------------------------+
@ -86,5 +86,5 @@ hello
 To learn more about interacting with etcd, read [interacting with etcd section][interacting].

 [interacting]: ./interacting_v3.md
-[clustering]: ./clustering.md
+[clustering]: ../op-guide/clustering.md

--- a/Documentation/dev-internal/discovery_protocol.md
+++ b/Documentation/dev-internal/discovery_protocol.md
@ -1,4 +1,4 @@
-# Discovery Service Protocol
+# Discovery service protocol

 Discovery service protocol helps new etcd member to discover all other members in cluster bootstrap phase using a shared discovery URL.

@ -8,7 +8,7 @@ The protocol uses a new discovery token to bootstrap one _unique_ etcd cluster.

 The rest of this article will walk through the discovery process with examples that correspond to a self-hosted discovery cluster. The public discovery service, discovery.etcd.io, functions the same way, but with a layer of polish to abstract away ugly URLs, generate UUIDs automatically, and provide some protections against excessive requests. At its core, the public discovery service still uses an etcd cluster as the data store as described in this document.

-## The Protocol Workflow
+## Protocol workflow

 The idea of discovery protocol is to use an internal etcd cluster to coordinate bootstrap of a new cluster. First, all new members interact with discovery service and help to generate the expected member list. Then each new member bootstraps its server using this list, which performs the same functionality as -initial-cluster flag.

@ -16,7 +16,7 @@ In the following example workflow, we will list each step of protocol in curl fo

 By convention the etcd discovery protocol uses the key prefix `_etcd/registry`. If `http://example.com` hosts an etcd cluster for discovery service, a full URL to discovery keyspace will be `http://example.com/v2/keys/_etcd/registry`. We will use this as the URL prefix in the example.

-### Creating a New Discovery Token
+### Creating a new discovery token

 Generate a unique token that will identify the new cluster. This will be used as a unique prefix in discovery keyspace in the following steps. An easy way to do this is to use `uuidgen`:

@ -24,9 +24,9 @@ Generate a unique token that will identify the new cluster. This will be used as
 UUID=$(uuidgen)
 ```

-### Specifying the Expected Cluster Size
+### Specifying the expected cluster size

-You need to specify the expected cluster size for this discovery token. The size is used by the discovery service to know when it has found all members that will initially form the cluster.
+The discovery token expects a cluster size that must be specified. The size is used by the discovery service to know when it has found all members that will initially form the cluster.

 ```
 curl -X PUT http://example.com/v2/keys/_etcd/registry/${UUID}/_config/size -d value=${cluster_size}
@ -34,9 +34,9 @@ curl -X PUT http://example.com/v2/keys/_etcd/registry/${UUID}/_config/size -d va

 Usually the cluster size is 3, 5 or 7. Check [optimal cluster size][cluster-size] for more details.

-### Bringing up etcd Processes
+### Bringing up etcd processes

-Now that you have your discovery URL, you can use it as `-discovery` flag and bring up etcd processes. Every etcd process will follow this next few steps internally if given a `-discovery` flag.
+Given the discovery URL, use it as `-discovery` flag and bring up etcd processes. Every etcd process will follow this next few steps internally if given a `-discovery` flag.

 ### Registering itself

@ -46,7 +46,7 @@ The first thing for etcd process is to register itself into the discovery URL as
 curl -X PUT http://example.com/v2/keys/_etcd/registry/${UUID}/${member_id}?prevExist=false -d value="${member_name}=${member_peer_url_1}&${member_name}=${member_peer_url_2}"
 ```

-### Checking the Status
+### Checking the status

 It checks the expected cluster size and registration status in discovery URL, and decides what the next action is.

@ -61,8 +61,7 @@ If the number of registered members is bigger than the expected size N, it treat

 In etcd implementation, the member may check the cluster status even before registering itself. So it could fail quickly if the cluster has been full.

-### Waiting for All Members
-
+### Waiting for all members

 The wait process is described in detail in the [etcd API documentation][api].

@ -72,11 +71,11 @@ curl -X GET http://example.com/v2/keys/_etcd/registry/${UUID}?wait=true&waitInde

 It keeps waiting until finding all members.

-## Public Discovery Service
+## Public discovery service

 CoreOS Inc. hosts a public discovery service at https://discovery.etcd.io/ , which provides some nice features for ease of use.

-### Mask Key Prefix
+### Mask key prefix

 Public discovery service will redirect `https://discovery.etcd.io/${UUID}` to etcd cluster behind for the key at `/v2/keys/_etcd/registry`. It masks register key prefix for short and readable discovery url.

@ -96,17 +95,17 @@ Possible status codes:

 The generation process in the service follows the steps from [Creating a New Discovery Token][new-discovery-token] to [Specifying the Expected Cluster Size][expected-cluster-size].

-### Check Discovery Status
+### Check discovery status

 ```
 GET /${UUID}
 ```

-You can check the status for this discovery token, including the machines that have been registered, by requesting the value of the UUID.
+The status for this discovery token, including the machines that have been registered, can be checked by requesting the value of the UUID.

 ### Open-source repository

-The repository is located at https://github.com/coreos/discovery.etcd.io. You could use it to build your own public discovery service.
+The repository is located at https://github.com/coreos/discovery.etcd.io. It could be used to build a custom discovery service.

 [api]: ../v2/api.md#waiting-for-a-change
 [cluster-size]: ../v2/admin_guide.md#optimal-cluster-size
--- a/Documentation/dev-internal/logging.md
+++ b/Documentation/dev-internal/logging.md
@ -1,4 +1,4 @@
-# Logging Conventions
+# Logging conventions

 etcd uses the [capnslog][capnslog] library for logging application output categorized into *levels*. A log message's level is determined according to these conventions:

@ -26,4 +26,4 @@ etcd uses the [capnslog][capnslog] library for logging application output catego
    * Send a normal message to a remote peer
    * Write a log entry to disk

-[capnslog]: [https://github.com/coreos/pkg/tree/master/capnslog]
+[capnslog]: [https://github.com/coreos/pkg/tree/master/capnslog]
--- a/Documentation/dev-internal/release.md
+++ b/Documentation/dev-internal/release.md
@ -2,9 +2,9 @@

 The guide talks about how to release a new version of etcd.

-The procedure includes some manual steps for sanity checking but it can probably be further scripted. Please keep this document up-to-date if you want to make changes to the release process. 
+The procedure includes some manual steps for sanity checking but it can probably be further scripted. Please keep this document up-to-date if making changes to the release process. 

-## Prepare Release
+## Prepare release

 Set desired version as environment variable for following steps. Here is an example to release 2.3.0:

@ -15,7 +15,7 @@ export PREV_VERSION=v2.2.5

 All releases version numbers follow the format of [semantic versioning 2.0.0](http://semver.org/).

-### Major, Minor Version Release, or its Pre-release
+### Major, minor version release, or its pre-release

 - Ensure the relevant milestone on GitHub is complete. All referenced issues should be closed, or moved elsewhere.
 - Remove this release from [roadmap](https://github.com/coreos/etcd/blob/master/ROADMAP.md), if necessary.
@ -23,18 +23,18 @@ All releases version numbers follow the format of [semantic versioning 2.0.0](ht
 - Bump [hardcoded MinClusterVerion in the repository](https://github.com/coreos/etcd/blob/master/version/version.go#L29), if necessary.
 - Add feature capability maps for the new version, if necessary.

-### Patch Version Release
+### Patch version release

 - Discuss about commits that are backported to the patch release. The commits should not include merge commits.
 - Cherry-pick these commits starting from the oldest one into stable branch.

-## Write Release Note
+## Write release note

 - Write introduction for the new release. For example, what major bug we fix, what new features we introduce or what performance improvement we make.
 - Write changelog for the last release. ChangeLog should be straightforward and easy to understand for the end-user.
 - Put `[GH XXXX]` at the head of change line to reference Pull Request that introduces the change. Moreover, add a link on it to jump to the Pull Request.

-## Tag Version
+## Tag version

 - Bump [hardcoded Version in the repository](https://github.com/coreos/etcd/blob/master/version/version.go#L30) to the latest version `${VERSION}`.
 - Ensure all tests on CI system are passed.
@ -45,7 +45,7 @@ All releases version numbers follow the format of [semantic versioning 2.0.0](ht
 - Sanity check tag correctness through `git show tags/$VERSION`.
 - Push the tag to GitHub through `git push origin tags/$VERSION`. This assumes `origin` corresponds to "https://github.com/coreos/etcd".

-## Build Release Binaries and Images
+## Build release binaries and images

 - Ensure `actool` is available, or installing it through `go get github.com/appc/spec/actool`.
 - Ensure `docker` is available.
@ -58,7 +58,7 @@ Run release script in root directory:

 It generates all release binaries and images under directory ./release.

-## Sign Binaries and Images
+## Sign binaries and images

 etcd project key must be used to sign the generated binaries and images.`$SUBKEYID` is the key ID of etcd project Yubikey. Connect the key and run `gpg2 --card-status` to get the ID.

@ -66,14 +66,14 @@ The following commands are used for public release sign:

 ```
 cd release
-for i in etcd-*{.zip,.tar.gz}; do gpg2 --default-key $SUBKEYID --output ${i}.asc --detach-sign ${i}; done
+for i in etcd-*{.zip,.tar.gz}; do gpg2 --default-key $SUBKEYID --armor --output ${i}.asc --detach-sign ${i}; done
 for i in etcd-*{.zip,.tar.gz}; do gpg2 --verify ${i}.asc ${i}; done
 ```

 The public key for GPG signing can be found at [CoreOS Application Signing Key](https://coreos.com/security/app-signing-key)


-## Publish Release Page in GitHub
+## Publish release page in GitHub

 - Set release title as the version name.
 - Follow the format of previous release pages.
@ -81,7 +81,7 @@ The public key for GPG signing can be found at [CoreOS Application Signing Key](
 - Select whether it is a pre-release.
 - Publish the release!

-## Publish Docker Image in Quay.io
+## Publish docker image in Quay.io

 - Push docker image:

@ -92,7 +92,7 @@ docker push quay.io/coreos/etcd:${VERSION}

 - Add `latest` tag to the new image on [quay.io](https://quay.io/repository/coreos/etcd?tag=latest&tab=tags) if this is a stable release.

-## Announce to etcd-dev Googlegroup
+## Announce to the etcd-dev Googlegroup

 - Follow the format of [previous release emails](https://groups.google.com/forum/#!forum/etcd-dev).
 - Make sure to include a list of authors that contributed since the previous release - something like the following might be handy:
@ -103,7 +103,7 @@ git log ...${PREV_VERSION} --pretty=format:"%an" | sort | uniq | tr '\n' ',' | s

 - Send email to etcd-dev@googlegroups.com

-## Post Release
+## Post release

 - Create new stable branch through `git push origin ${VERSION_MAJOR}.${VERSION_MINOR}` if this is a major stable release. This assumes `origin` corresponds to "https://github.com/coreos/etcd".
 - Bump [hardcoded Version in the repository](https://github.com/coreos/etcd/blob/master/version/version.go#L30) to the version `${VERSION}+git`.
--- a/Documentation/dl_build.md
+++ b/Documentation/dl_build.md
@ -1,17 +1,17 @@
-# Download and Build
+# Download and build

-## System Requirements
+## System requirements

-TODO
+The etcd performance benchmarks run etcd on 8 vCPU, 16GB RAM, 50GB SSD GCE instances, but any relatively modern machine with low latency storage and a few gigabytes of memory should suffice for most use cases. Applications with large v2 data stores will require more memory than a large v3 data store since data is kept in anonymous memory instead of memory mapped from a file. than For running etcd on a cloud provider, we suggest at least a medium instance on AWS or a standard-1 instance on GCE.

-## Download the Pre-built Binary
+## Download the pre-built binary

 The easiest way to get etcd is to use one of the pre-built release binaries which are available for OSX, Linux, Windows, appc, and Docker. Instructions for using these binaries are on the [GitHub releases page][github-release].

-## Build the Latest Version
+## Build the latest version

-For those wanting to try the very latest version, you can build the latest version of etcd from the `master` branch.
-[Go](https://golang.org/) version 1.5+ is required to build the latest version of etcd.
+For those wanting to try the very latest version, build etcd from the `master` branch.
+[Go](https://golang.org/) version 1.6+ (with HTTP2 support) is required to build the latest version of etcd.

 Here are the commands to build an etcd binary from the `master` branch:

@ -33,7 +33,7 @@ $ ./bin/etcd
 ...
 ```

-## Test your Installation
+## Test the installation

 Check the etcd binary is built correctly by starting etcd and setting a key.

@ -53,4 +53,4 @@ OK
 If OK is printed, then etcd is working!

 [github-release]: https://github.com/coreos/etcd/releases/
-[go]: https://golang.org/doc/install
+[go]: https://golang.org/doc/install
--- a/Documentation/docs.md
+++ b/Documentation/docs.md
@ -4,45 +4,59 @@ etcd is a distributed key-value store designed to reliably and quickly preserve

 ## Getting started

-New etcd users and developers should get started by [downloading and building][download_build] etcd.
+New etcd users and developers should get started by [downloading and building][download_build] etcd. After getting etcd, follow this [quick demo][demo] to see the basics of creating and working with an etcd cluster.

 ## Developing with etcd

-The easiest way to get started using etcd as a distributed key-value store for your applications is to [set up a local cluster][local_cluster].
+The easiest way to get started using etcd as a distributed key-value store is to [set up a local cluster][local_cluster].

 - [Setting up local clusters][local_cluster]
 - [Interacting with etcd][interacting]
 - [API references][api_ref]
+ - [gRPC gateway][api_grpc_gateway]
+ - [Experimental features and APIs][experimental]

 ## Operating etcd clusters

 Administrators who need to create reliable and scalable key-value stores for the developers they support should begin with a [cluster on multiple machines][clustering].

 - [Setting up clusters][clustering]
+ - [Run etcd clusters inside containers][container]
 - [Configuration][conf]
 - [Security][security]
 - Monitoring
 - [Maintenance][maintenance]
+ - [Understand failures][failures]
 - [Disaster recovery][recovery]
 - [Performance][performance]
+ - [Versioning][versioning]
+ - [Supported platform][supported_platform]

 ## Learning

 To learn more about the concepts and internals behind etcd, read the following pages:

- - Why etcd
- - Concepts
- - Internals
+ - Why etcd (TODO)
+ - [Understand data model][data_model]
+ - [Understand APIs][understand_apis]
 - [Glossary][glossary]
+ - Internals (TODO)

 ## Upgrading and compatibility

+ - [Migrate applications from using API v2 to API v3][v2_migration]
+ - [Updating v2.3 to v3.0][v3_upgrade]
+
 ## Troubleshooting

 [api_ref]: dev-guide/api_reference_v3.md
+[api_grpc_gateway]: dev-guide/api_grpc_gateway.md
 [clustering]: op-guide/clustering.md
 [conf]: op-guide/configuration.md
+[data_model]: learning/data_model.md
+[demo]: demo.md
 [download_build]: dl_build.md
+[failures]: op-guide/failures.md
 [glossary]: learning/glossary.md
 [interacting]: dev-guide/interacting_v3.md
 [local_cluster]: dev-guide/local_cluster.md
@ -50,3 +64,10 @@ To learn more about the concepts and internals behind etcd, read the following p
 [recovery]: op-guide/recovery.md
 [maintenance]: op-guide/maintenance.md
 [security]: op-guide/security.md
+[v2_migration]: op-guide/v2-migration.md
+[container]: op-guide/container.md
+[understand_apis]: learning/api.md
+[versioning]: op-guide/versioning.md
+[supported_platform]: op-guide/supported-platform.md
+[experimental]: dev-guide/experimental_apis.md
+[v3_upgrade]: upgrades/upgrade_3_0.md
--- a/Documentation/learning/api.md
+++ b/Documentation/learning/api.md
@ -0,0 +1,57 @@
+# etcd3 API
+
+NOTE: this doc is not finished!
+
+## Response header
+
+All Responses from etcd API have a [response header][response_header] attached. The response header includes the metadata of the response.
+
+```proto
+message ResponseHeader {
+  uint64 cluster_id = 1;
+  uint64 member_id = 2;
+  int64 revision = 3;
+  uint64 raft_term = 4;
+}
+```
+
+* Cluster_ID - the ID of the cluster that generates the response
+* Member_ID - the ID of the member that generates the response
+* Revision - the revision of the key-value store when the response is generated
+* Raft_Term - the Raft term of the member when the response is generated
+
+An application may read the Cluster_ID (Member_ID) field to ensure it is communicating with the intended cluster (member).
+
+Applications can use the `Revision` to know the latest revision of the key-value store. This is especially useful when applications specify a historical revision to make time `travel query` and wishes to know the latest revision at the time of the request.
+
+Applications can use `Raft_Term` to detect when the cluster completes a new leader election.
+
+## Key-Value API
+
+Key-Value API is used to manipulate key-value pairs stored inside etcd. The key-value API is defined as a [gRPC service][kv-service]. The Key-Value pair is defined as structured data in [protobuf format][kv-proto].
+
+### Key-Value pair
+
+A key-value pair is the smallest unit that the key-value API can manipulate. Each key-value pair has a number of fields:
+
+```protobuf
+message KeyValue {
+  bytes key = 1;
+  int64 create_revision = 2;
+  int64 mod_revision = 3;
+  int64 version = 4;
+  bytes value = 5;
+  int64 lease = 6;
+}
+```
+
+* Key - key in bytes. An empty key is not allowed.
+* Value - value in bytes.
+* Version - version is the version of the key. A deletion resets the version to zero and any modification of the key increases its version.
+* Create_Revision - revision of the last creation on the key.
+* Mod_Revision - revision of the last modification on the key.
+* Lease - the ID of the lease attached to the key. If lease is 0, then no lease is attached to the key.
+
+[kv-proto]: https://github.com/coreos/etcd/blob/master/mvcc/mvccpb/kv.proto
+[kv-service]: https://github.com/coreos/etcd/blob/master/etcdserver/etcdserverpb/rpc.proto
+[response_header]: https://github.com/coreos/etcd/blob/master/etcdserver/etcdserverpb/rpc.proto
--- a/Documentation/learning/api_guarantees.md
+++ b/Documentation/learning/api_guarantees.md
@ -0,0 +1,63 @@
+# KV API guarantees
+
+etcd is a consistent and durable key value store with mini-transaction(TODO: link to txn doc when we have it) support. The key value store is exposed through the KV APIs. etcd tries to ensure the strongest consistency and durability guarantees for a distributed system. This specification enumerates the KV API guarantees made by etcd.
+
+### APIs to consider
+
+* Read APIs
+    * range
+    * watch
+* Write APIs
+    * put
+    * delete
+* Combination (read-modify-write) APIs
+    * txn
+
+### etcd specific definitions
+
+#### Operation completed
+
+An etcd operation is considered complete when it is committed through consensus, and therefore “executed” -- permanently stored -- by the etcd storage engine. The client knows an operation is completed when it receives a response from the etcd server. Note that the client may be uncertain about the status of an operation if it times out, or there is a network disruption between the client and the etcd member. etcd may also abort operations when there is a leader election. etcd does not send `abort` responses to  clients’ outstanding requests in this event.
+
+#### Revision
+
+An etcd operation that modifies the key value store is assigned with a single increasing revision. A transaction operation might modifies the key value store multiple times, but only one revision is assigned. The revision attribute of a key value pair that modified by the operation has the same value as the revision of the operation. The revision can be used as a logical clock for key value store. A key value pair that has a larger revision is modified after a key value pair with a smaller revision. Two key value pairs that have the same revision are modified by an operation "concurrently".
+
+### Guarantees provided
+
+#### Atomicity
+
+All API requests are atomic; an operation either completes entirely or not at all. For watch requests, all events generated by one operation will be in one watch response. Watch never observes partial events for a single operation.
+
+#### Consistency
+
+All API calls ensure [sequential consistency][seq_consistency], the strongest consistency guarantee available from distributed systems. No matter which etcd member server a client makes requests to, a client reads the same events in the same order. If two members complete the same number of operations, the state of the two members is consistent.
+
+For watch operations, etcd guarantees to return the same value for the same key across all members for the same revision. For range operations, etcd has a similar guarantee for [linearized][Linearizability] access; serialized access may be behind the quorum state, so that the later revision is not yet available.
+
+As with all distributed systems, it is impossible for etcd to ensure [strict consistency][strict_consistency]. etcd does not guarantee that it will return to a read the “most recent” value (as measured by a wall clock when a request is completed) available on any cluster member.
+
+#### Isolation
+
+etcd ensures [serializable isolation][serializable_isolation], which is the highest isolation level available in distributed systems. Read operations will never observe any intermediate data.
+
+#### Durability
+
+Any completed operations are durable. All accessible data is also durable data. A read will never return data that has not been made durable.
+
+#### Linearizability
+
+Linearizability (also known as Atomic Consistency or External Consistency) is a consistency level between strict consistency and sequential consistency. 
+
+For linearizability, suppose each operation receives a timestamp from a loosely synchronized global clock. Operations are linearized if and only if they always complete as though they were executed in a sequential order and each operation appears to complete in the order specified by the program. Likewise, if an operation’s timestamp precedes another, that operation must also precede the other operation in the sequence.
+
+For example, consider a client completing a write at time point 1 (*t1*). A client issuing a read at *t2* (for *t2* > *t1*) should receive a value at least as recent as the previous write, completed at *t1*. However, the read might actually complete only by *t3*, and the returned value, current at *t2* when the read began, might be "stale" by *t3*.
+
+etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
+
+etcd ensures linearizability for all other operations by default. Linearizability comes with a cost, however, because linearized requests must go through the Raft consensus process. To obtain lower latencies and higher throughput for read requests, clients can configure a request’s consistency mode to `serializable`, which may access stale data with respect to quorum, but removes the performance penalty of linearized accesses' reliance on live consensus.
+
+[seq_consistency]: https://en.wikipedia.org/wiki/Consistency_model#Sequential_consistency
+[strict_consistency]: https://en.wikipedia.org/wiki/Consistency_model#Strict_consistency
+[serializable_isolation]: https://en.wikipedia.org/wiki/Isolation_(database_systems)#Serializable
+[Linearizability]: #Linearizability
--- a/Documentation/learning/data_model.md
+++ b/Documentation/learning/data_model.md
@ -0,0 +1,25 @@
+# Data model
+
+etcd is designed to reliably store infrequently updated data and provide reliable watch queries. etcd exposes previous versions of key-value pairs to support inexpensive snapshots and watch history events (“time travel queries”). A persistent, multi-version, concurrency-control data model is a good fit for these use cases.
+
+etcd stores data in a multiversion [persistent][persistent-ds] key-value store. The persistent key-value store preserves the previous version of a key-value pair when its value is superseded with new data. The key-value store is effectively immutable; its operations do not update the structure in-place, but instead always generates a new updated structure. All past versions of keys are still accessible and watchable after modification. To prevent the data store from growing indefinitely over time from maintaining old versions, the store may be compacted to shed the oldest versions of superseded data.
+
+### Logical view
+
+The store’s logical view is a flat binary key space. The key space has a lexically sorted index on byte string keys so range queries are inexpensive.
+
+The key space maintains multiple revisions. Each atomic mutative operation (e.g., a transaction operation may contain multiple operations) creates a new revision on the key space. All data held by previous revisions remains unchanged. Old versions of key can still be accessed through previous revisions. Likewise, revisions are indexed as well; ranging over revisions with watchers is efficient. If the store is compacted to recover space, revisions before the compact revision will be removed.
+
+A key’s lifetime spans a generation. Each key may have one or multiple generations. Creating a key increments the generation of that key, starting at 1 if the key never existed. Deleting a key generates a key tombstone, concluding the key’s current generation. Each modification of a key creates a new version of the key. Once a compaction happens, any generation ended before the given revision will be removed and values set before the compaction revision except the latest one will be removed.
+
+### Physical view
+
+etcd stores the physical data as key-value pairs in a persistent [b+tree][b+tree]. Each revision of the store’s state only contains the delta from its previous revision to be efficient. A single revision may correspond to multiple keys in the tree. 
+
+The key of key-value pair is a 3-tuple (major, sub, type). Major is the store revision holding the key. Sub differentiates among  keys within the same revision. Type is an optional suffix for special value (e.g., `t` if the value contains a tombstone). The value of the key-value pair contains the modification from previous revision, thus one delta from previous revision. The b+tree is ordered by key in lexical byte-order. Ranged lookups over revision deltas are fast; this enables quickly finding modifications from one specific revision to another. Compaction removes out-of-date keys-value pairs.
+
+etcd also keeps a secondary in-memory [btree][btree] index to speed up range queries over keys. The keys in the btree index are the keys of the store exposed to user. The value is a pointer to the modification of the persistent b+tree. Compaction removes dead pointers.
+
+[persistent-ds]: https://en.wikipedia.org/wiki/Persistent_data_structure
+[btree]: https://en.wikipedia.org/wiki/B-tree
+[b+tree]: https://en.wikipedia.org/wiki/B%2B_tree
--- a/Documentation/libraries-and-tools.md
+++ b/Documentation/libraries-and-tools.md
@ -1,4 +1,4 @@
-# Libraries and Tools
+# Libraries and tools

 **Tools**

@ -17,7 +17,8 @@

 **Go libraries**

- [etcd/client](https://github.com/coreos/etcd/blob/master/client) - the officially maintained Go client
+- [etcd/clientv3](https://github.com/coreos/etcd/blob/master/clientv3) - the officially maintained Go client for v3
+- [etcd/client](https://github.com/coreos/etcd/blob/master/client) - the officially maintained Go client for v2
 - [go-etcd](https://github.com/coreos/go-etcd) - the deprecated official client. May be useful for older (<2.0.0) versions of etcd.

 **Java libraries**
@ -92,6 +93,10 @@

 - [efrecon/etcd-tcl](https://github.com/efrecon/etcd-tcl) - Supports v2, except wait.

+**Gradle Plugins**
+
+- [gradle-etcd-rest-plugin](https://github.com/cdancy/gradle-etcd-rest-plugin) - Supports v2
+
 **Chef Integration**

 - [coderanger/etcd-chef](https://github.com/coderanger/etcd-chef)
@ -127,3 +132,4 @@
 - [spf13/viper](https://github.com/spf13/viper) - Go configuration library, reads values from ENV, pflags, files, and etcd with optional encryption
 - [lytics/metafora](https://github.com/lytics/metafora) - Go distributed task library
 - [ryandoyle/nss-etcd](https://github.com/ryandoyle/nss-etcd) - A GNU libc NSS module for resolving names from etcd.
+- [Gru](https://github.com/dnaeon/gru) - Orchestration made easy with Go
--- a/Documentation/metrics.md
+++ b/Documentation/metrics.md
@ -14,18 +14,70 @@ The metrics under the `etcd` prefix are for monitoring and alerting. They are st

 Metrics that are etcd2 related are documented [v2 metrics guide][v2-http-metrics].

-### server
+### Server

 These metrics describe the status of the etcd server. In order to detect outages or problems for troubleshooting, the server metrics of every production etcd cluster should be closely monitored.

 All these metrics are prefixed with `etcd_server_`

-| Name                      | Description                       | Type    |
-|---------------------------|-----------------------------------|---------|
-| leader_changes_seen_total | The number of leader changes seen | Counter |
+| Name                      | Description                                              | Type    |
+|---------------------------|----------------------------------------------------------|---------|
+| has_leader                | Whether or not a leader exists. 1 is existence, 0 is not.| Gauge   |
+| leader_changes_seen_total | The number of leader changes seen.                       | Counter |
+| proposals_committed_total | The total number of consensus proposals committed.       | Gauge   |
+| proposals_applied_total   | The total number of consensus proposals applied.         | Gauge   |
+| proposals_pending         | The current number of pending proposals.                 | Gauge   |
+| proposals_failed_total    | The total number of failed proposals seen.               | Counter |
+
+`has_leader` indicates whether the member has a leader. If a member does not have a leader, it is
+totally unavailable. If all the members in the cluster do not have any leader, the entire cluster
+is totally unavailable.

 `leader_changes_seen_total` counts the number of leader changes the member has seen since its start. Rapid leadership changes impact the performance of etcd significantly. It also signals that the leader is unstable, perhaps due to network connectivity issues or excessive load hitting the etcd cluster.

+`proposals_committed_total` records the total number of consensus proposals committed. This gauge should increase over time if the cluster is healthy. Several healthy members of an etcd cluster may have different total committed proposals at once. This discrepancy may be due to recovering from peers after starting, lagging behind the leader, or being the leader and therefore having the most commits. It is important to monitor this metric across all the members in the cluster; a consistently large lag between a single member and its leader indicates that member is slow or unhealthy.
+
+`proposals_applied_total` records the total number of consensus proposals applied. The etcd server applies every committed proposal asynchronously. The difference between `proposals_committed_total` and `proposals_applied_total` should usually be small (within a few thousands even under high load). If the difference between them continues to rise, it indicates that the etcd server is overloaded. This might happen when applying expensive queries like heavy range queries or large txn operations.
+
+`proposals_pending` indicates how many proposals are queued to commit. Rising pending proposals suggests there is a high client load or the member cannot commit proposals.
+
+`proposals_failed_total` are normally related to two issues: temporary failures related to a leader election or longer downtime caused by a loss of quorum in the cluster.
+
+### Disk
+
+These metrics describe the status of the disk operations.
+
+All these metrics are prefixed with `etcd_disk_`.
+
+| Name                               | Description                                           | Type      |
+|------------------------------------|-------------------------------------------------------|-----------|
+| wal_fsync_duration_seconds         | The latency distributions of fsync called by wal      | Histogram |
+| backend_commit_duration_seconds    | The latency distributions of commit called by backend.| Histogram |
+
+A `wal_fsync` is called when etcd persists its log entries to disk before applying them.
+
+A `backend_commit` is called when etcd commits an incremental snapshot of its most recent changes to disk.
+
+High disk operation latencies (`wal_fsync_duration_seconds` or `backend_commit_duration_seconds`) often indicate disk issues. It may cause high request latency or make the cluster unstable.
+
+### Network
+
+These metrics describe the status of the network.
+
+All these metrics are prefixed with `etcd_network_`
+
+| Name                      | Description                                                        | Type          |
+|---------------------------|--------------------------------------------------------------------|---------------|
+| peer_sent_bytes_total           | The total number of bytes sent to the peer with ID `To`.         | Counter(To)   |
+| peer_received_bytes_total       | The total number of bytes received from the peer with ID `From`. | Counter(From) |
+| peer_round_trip_time_seconds    | Round-Trip-Time histogram between peers.                         | Histogram(To) |
+| client_grpc_sent_bytes_total    | The total number of bytes sent to grpc clients.                  | Counter   |
+| client_grpc_received_bytes_total| The total number of bytes received to grpc clients.              | Counter   |
+
+`peer_sent_bytes_total` counts the total number of bytes sent to a specific peer. Usually the leader member sends more data than other members since it is responsible for transmitting replicated data.
+
+`peer_received_bytes_total` counts the total number of bytes received from a specific peer. Usually follower members receive data only from the leader member.
+
 ### gRPC requests

 These metrics describe the requests served by a specific etcd member: total received requests, total failed requests, and processing latency. They are useful for tracking user-generated traffic hitting the etcd cluster.
@ -57,30 +109,8 @@ Example Prometheus queries that may be useful from these metrics (across all etc

 The metrics under the `etcd_debugging` prefix are for debugging. They are very implementation dependent and volatile. They might be changed or removed without any warning in new etcd releases. Some of the metrics might be moved to the `etcd` prefix when they become more stable.

-### etcdserver

-| Name                                    | Description                                      | Type      |
-|-----------------------------------------|--------------------------------------------------|-----------|
-| proposal_duration_seconds              | The latency distributions of committing proposal | Histogram |
-| proposals_pending                       | The current number of pending proposals          | Gauge     |
-| proposals_failed_total                   | The total number of failed proposals             | Counter   |
-
-[Proposal][glossary-proposal] duration (`proposal_duration_seconds`) provides a proposal commit latency histogram. The reported latency reflects network and disk IO delays in etcd.
-
-Proposals pending (`proposals_pending`) indicates how many proposals are queued for commit. Rising pending proposals suggests there is a high client load or the cluster is unstable.
-
-Failed proposals (`proposals_failed_total`) are normally related to two issues: temporary failures related to a leader election or longer duration downtime caused by a loss of quorum in the cluster.
-
-### wal
-
-| Name                               | Description                                      | Type      |
-|------------------------------------|--------------------------------------------------|-----------|
-| fsync_duration_seconds            | The latency distributions of fsync called by wal | Histogram |
-| last_index_saved                   | The index of the last entry saved by wal         | Gauge     |
-
-Abnormally high fsync duration (`fsync_duration_seconds`) indicates disk issues and might cause the cluster to be unstable.
-
-### snapshot
+### Snapshot

 | Name                                       | Description                                                | Type      |
 |--------------------------------------------|------------------------------------------------------------|-----------|
@ -88,24 +118,6 @@ Abnormally high fsync duration (`fsync_duration_seconds`) indicates disk issues

 Abnormally high snapshot duration (`snapshot_save_total_duration_seconds`) indicates disk issues and might cause the cluster to be unstable.

-### rafthttp
-
-| Name                              | Description                                | Type         | Labels                         |
-|-----------------------------------|--------------------------------------------|--------------|--------------------------------|
-| message_sent_latency_seconds      | The latency distributions of messages sent | HistogramVec | sendingType, msgType, remoteID |
-| message_sent_failed_total         | The total number of failed messages sent   | Summary      | sendingType, msgType, remoteID |
-
-
-Abnormally high message duration (`message_sent_latency_seconds`) indicates network issues and might cause the cluster to be unstable.
-
-An increase in message failures (`message_sent_failed_total`) indicates more severe network issues and might cause the cluster to be unstable.
-
-Label `sendingType` is the connection type to send messages. `message`, `msgapp` and `msgappv2` use HTTP streaming, while `pipeline` does HTTP request for each message.
-
-Label `msgType` is the type of raft message. `MsgApp` is log replication messages; `MsgSnap` is snapshot install messages; `MsgProp` is proposal forward messages; the others maintain internal raft status. Given large snapshots, a lengthy msgSnap transmission latency should be expected. For other types of messages, given enough network bandwidth, latencies comparable to ping latency should be expected.
-
-Label `remoteID` is the member ID of the message destination.
-
 ## Prometheus supplied metrics

 The Prometheus client library provides a number of metrics under the `go` and `process` namespaces. There are a few that are particlarly interesting.
--- a/Documentation/op-guide/clustering.md
+++ b/Documentation/op-guide/clustering.md
@ -2,9 +2,9 @@

 ## Overview

-Starting an etcd cluster statically requires that each member knows another in the cluster. In a number of cases, you might not know the IPs of your cluster members ahead of time. In these cases, you can bootstrap an etcd cluster with the help of a discovery service.
+Starting an etcd cluster statically requires that each member knows another in the cluster. In a number of cases, the IPs of the cluster members may be unknown ahead of time. In these cases, the etcd cluster can be bootstrapped with the help of a discovery service.

-Once an etcd cluster is up and running, adding or removing members is done via [runtime reconfiguration][runtime-conf]. To better understand the design behind runtime reconfiguration, we suggest you read [the runtime configuration design document][runtime-reconf-design].
+Once an etcd cluster is up and running, adding or removing members is done via [runtime reconfiguration][runtime-conf]. To better understand the design behind runtime reconfiguration, we suggest reading [the runtime configuration design document][runtime-reconf-design].

 This guide will cover the following mechanisms for bootstrapping an etcd cluster:

@ -22,7 +22,7 @@ Each of the bootstrapping mechanisms will be used to create a three machine etcd

 ## Static

-As we know the cluster members, their addresses and the size of the cluster before starting, we can use an offline bootstrap configuration by setting the `initial-cluster` flag. Each machine will get either the following command line or environment variables:
+As we know the cluster members, their addresses and the size of the cluster before starting, we can use an offline bootstrap configuration by setting the `initial-cluster` flag. Each machine will get either the following environment variables or command line:

 ```
 ETCD_INITIAL_CLUSTER="infra0=http://10.0.1.10:2380,infra1=http://10.0.1.11:2380,infra2=http://10.0.1.12:2380"
@ -36,11 +36,11 @@ ETCD_INITIAL_CLUSTER_STATE=new

 Note that the URLs specified in `initial-cluster` are the _advertised peer URLs_, i.e. they should match the value of `initial-advertise-peer-urls` on the respective nodes.

-If you are spinning up multiple clusters (or creating and destroying a single cluster) with same configuration for testing purpose, it is highly recommended that you specify a unique `initial-cluster-token` for the different clusters. By doing this, etcd can generate unique cluster IDs and member IDs for the clusters even if they otherwise have the exact same configuration. This can protect you from cross-cluster-interaction, which might corrupt your clusters.
+If spinning up multiple clusters (or creating and destroying a single cluster) with same configuration for testing purpose, it is highly recommended that each cluster is given a unique `initial-cluster-token`. By doing this, etcd can generate unique cluster IDs and member IDs for the clusters even if they otherwise have the exact same configuration. This can protect etcd from cross-cluster-interaction, which might corrupt the clusters.

-etcd listens on [`listen-client-urls`][conf-listen-client] to accept client traffic. etcd member advertises the URLs specified in [`advertise-client-urls`][conf-adv-client] to other members, proxies, clients. Please make sure the `advertise-client-urls` are reachable from intended clients. A common mistake is setting `advertise-client-urls` to localhost or leave it as default when you want the remote clients to reach etcd.
+etcd listens on [`listen-client-urls`][conf-listen-client] to accept client traffic. etcd member advertises the URLs specified in [`advertise-client-urls`][conf-adv-client] to other members, proxies, clients. Please make sure the `advertise-client-urls` are reachable from intended clients. A common mistake is setting `advertise-client-urls` to localhost or leave it as default if the remote clients should reach etcd.

-On each machine you would start etcd with these flags:
+On each machine, start etcd with these flags:

 ```
 $ etcd --name infra0 --initial-advertise-peer-urls http://10.0.1.10:2380 \
@ -70,9 +70,99 @@ $ etcd --name infra2 --initial-advertise-peer-urls http://10.0.1.12:2380 \
  --initial-cluster-state new
 ```

-The command line parameters starting with `--initial-cluster` will be ignored on subsequent runs of etcd. You are free to remove the environment variables or command line flags after the initial bootstrap process. If you need to make changes to the configuration later (for example, adding or removing members to/from the cluster), see the [runtime configuration][runtime-conf] guide.
+The command line parameters starting with `--initial-cluster` will be ignored on subsequent runs of etcd. Feel free to remove the environment variables or command line flags after the initial bootstrap process. If the configuration needs changes later (for example, adding or removing members to/from the cluster), see the [runtime configuration][runtime-conf] guide.

-### Error Cases
+### TLS
+
+etcd supports encrypted communication through the TLS protocol. TLS channels can be used for encrypted internal cluster communication between peers as well as encrypted client traffic. This section provides examples for setting up a cluster with peer and client TLS. Additional information detailing etcd's TLS support can be found in the [security guide][security-guide].
+
+#### Self-signed certificates
+
+A cluster using self-signed certificates both encrypts traffic and authenticates its connections. To start a cluster with self-signed certificates, each cluster member should have a unique key pair (`member.crt`, `member.key`) signed by a shared cluster CA certificate (`ca.crt`) for both peer connections and client connections. Certificates may be generated by following the etcd [TLS setup][tls-setup] example.
+
+On each machine, etcd would be started with these flags:
+
+```
+$ etcd --name infra0 --initial-advertise-peer-urls http://10.0.1.10:2380 \
+  --listen-peer-urls https://10.0.1.10:2380 \
+  --listen-client-urls https://10.0.1.10:2379,https://127.0.0.1:2379 \
+  --advertise-client-urls https://10.0.1.10:2379 \
+  --initial-cluster-token etcd-cluster-1 \
+  --initial-cluster infra0=https://10.0.1.10:2380,infra1=https://10.0.1.11:2380,infra2=https://10.0.1.12:2380 \
+  --initial-cluster-state new \
+  --client-cert-auth --trusted-ca-file=/path/to/ca-client.crt \
+  --cert-file=/path/to/infra0-client.crt --key-file=/path/to/infra0-client.key \
+  --peer-client-cert-auth --peer-trusted-ca-file=ca-peer.crt \
+  --peer-cert-file=/path/to/infra0-peer.crt --peer-key-file=/path/to/infra0-peer.key
+```
+```
+$ etcd --name infra1 --initial-advertise-peer-urls https://10.0.1.11:2380 \
+  --listen-peer-urls https://10.0.1.11:2380 \
+  --listen-client-urls https://10.0.1.11:2379,https://127.0.0.1:2379 \
+  --advertise-client-urls https://10.0.1.11:2379 \
+  --initial-cluster-token etcd-cluster-1 \
+  --initial-cluster infra0=https://10.0.1.10:2380,infra1=https://10.0.1.11:2380,infra2=https://10.0.1.12:2380 \
+  --initial-cluster-state new \
+  --client-cert-auth --trusted-ca-file=/path/to/ca-client.crt \
+  --cert-file=/path/to/infra1-client.crt --key-file=/path/to/infra1-client.key \
+  --peer-client-cert-auth --peer-trusted-ca-file=ca-peer.crt \
+  --peer-cert-file=/path/to/infra1-peer.crt --peer-key-file=/path/to/infra1-peer.key
+```
+```
+$ etcd --name infra2 --initial-advertise-peer-urls https://10.0.1.12:2380 \
+  --listen-peer-urls https://10.0.1.12:2380 \
+  --listen-client-urls https://10.0.1.12:2379,https://127.0.0.1:2379 \
+  --advertise-client-urls https://10.0.1.12:2379 \
+  --initial-cluster-token etcd-cluster-1 \
+  --initial-cluster infra0=https://10.0.1.10:2380,infra1=https://10.0.1.11:2380,infra2=https://10.0.1.12:2380 \
+  --initial-cluster-state new \
+  --client-cert-auth --trusted-ca-file=/path/to/ca-client.crt \
+  --cert-file=/path/to/infra2-client.crt --key-file=/path/to/infra2-client.key \
+  --peer-client-cert-auth --peer-trusted-ca-file=ca-peer.crt \
+  --peer-cert-file=/path/to/infra2-peer.crt --peer-key-file=/path/to/infra2-peer.key
+```
+
+#### Automatic certificates
+
+If the cluster needs encrypted communication but does not require authenticated connections, etcd can be configured to automatically generate its keys. On initialization, each member creates its own set of keys based on its advertised IP addresses and hosts.
+
+On each machine, etcd would be started with these flag:
+
+```
+$ etcd --name infra0 --initial-advertise-peer-urls https://10.0.1.10:2380 \
+  --listen-peer-urls https://10.0.1.10:2380 \
+  --listen-client-urls https://10.0.1.10:2379,https://127.0.0.1:2379 \
+  --advertise-client-urls https://10.0.1.10:2379 \
+  --initial-cluster-token etcd-cluster-1 \
+  --initial-cluster infra0=https://10.0.1.10:2380,infra1=https://10.0.1.11:2380,infra2=https://10.0.1.12:2380 \
+  --initial-cluster-state new \
+  --auto-tls \
+  --peer-auto-tls
+```
+```
+$ etcd --name infra1 --initial-advertise-peer-urls https://10.0.1.11:2380 \
+  --listen-peer-urls https://10.0.1.11:2380 \
+  --listen-client-urls https://10.0.1.11:2379,https://127.0.0.1:2379 \
+  --advertise-client-urls https://10.0.1.11:2379 \
+  --initial-cluster-token etcd-cluster-1 \
+  --initial-cluster infra0=https://10.0.1.10:2380,infra1=https://10.0.1.11:2380,infra2=https://10.0.1.12:2380 \
+  --initial-cluster-state new \
+  --auto-tls \
+  --peer-auto-tls
+```
+```
+$ etcd --name infra2 --initial-advertise-peer-urls https://10.0.1.12:2380 \
+  --listen-peer-urls https://10.0.1.12:2380 \
+  --listen-client-urls https://10.0.1.12:2379,https://127.0.0.1:2379 \
+  --advertise-client-urls https://10.0.1.12:2379 \
+  --initial-cluster-token etcd-cluster-1 \
+  --initial-cluster infra0=https://10.0.1.10:2380,infra1=https://10.0.1.11:2380,infra2=https://10.0.1.12:2380 \
+  --initial-cluster-state new \
+  --auto-tls \
+  --peer-auto-tls
+```
+
+### Error cases

 In the following example, we have not included our new host in the list of enumerated nodes. If this is a new cluster, the node _must_ be added to the list of initial cluster members.

@ -100,7 +190,7 @@ etcd: error setting up initial cluster: infra0 has different advertised URLs in
 exit 1
 ```

-If you configure a peer with a different set of configuration and attempt to join this cluster you will get a cluster ID mismatch and etcd will exit.
+If a peer is configured with a different set of configuration arguments and attempts to join this cluster, etcd will report a cluster ID mismatch will exit.

 ```
 $ etcd --name infra3 --initial-advertise-peer-urls http://10.0.1.13:2380 \
@ -115,34 +205,34 @@ exit 1

 ## Discovery

-In a number of cases, you might not know the IPs of your cluster peers ahead of time. This is common when utilizing cloud providers or when your network uses DHCP. In these cases, rather than specifying a static configuration, you can use an existing etcd cluster to bootstrap a new one. We call this process "discovery".
+In a number of cases, the IPs of the cluster peers may not be known ahead of time. This is common when utilizing cloud providers or when the network uses DHCP. In these cases, rather than specifying a static configuration, use an existing etcd cluster to bootstrap a new one. We call this process "discovery".

 There two methods that can be used for discovery:

 * etcd discovery service
 * DNS SRV records

-### etcd Discovery
+### etcd discovery

-To better understand the design about discovery service protocol, we suggest you read [this][discovery-proto].
+To better understand the design about discovery service protocol, we suggest reading the discovery service protocol [documentation][discovery-proto].

-#### Lifetime of a Discovery URL
+#### Lifetime of a discovery URL

-A discovery URL identifies a unique etcd cluster. Instead of reusing a discovery URL, you should always create discovery URLs for new clusters.
+A discovery URL identifies a unique etcd cluster. Instead of reusing a discovery URL, always create discovery URLs for new clusters.

 Moreover, discovery URLs should ONLY be used for the initial bootstrapping of a cluster. To change cluster membership after the cluster is already running, see the [runtime reconfiguration][runtime-conf] guide.

-#### Custom etcd Discovery Service
+#### Custom etcd discovery service

-Discovery uses an existing cluster to bootstrap itself. If you are using your own etcd cluster you can create a URL like so:
+Discovery uses an existing cluster to bootstrap itself. If using a private etcd cluster, can create a URL like so:

 ```
 $ curl -X PUT https://myetcd.local/v2/keys/discovery/6c007a14875d53d9bf0ef5a6fc0257c817f0fb83/_config/size -d value=3
 ```

-By setting the size key to the URL, you create a discovery URL with an expected cluster size of 3.
+By setting the size key to the URL, a discovery URL is created with an expected cluster size of 3.

-The URL you will use in this case will be `https://myetcd.local/v2/keys/discovery/6c007a14875d53d9bf0ef5a6fc0257c817f0fb83` and the etcd members will use the `https://myetcd.local/v2/keys/discovery/6c007a14875d53d9bf0ef5a6fc0257c817f0fb83` directory for registration as they start.
+The URL to use in this case will be `https://myetcd.local/v2/keys/discovery/6c007a14875d53d9bf0ef5a6fc0257c817f0fb83` and the etcd members will use the `https://myetcd.local/v2/keys/discovery/6c007a14875d53d9bf0ef5a6fc0257c817f0fb83` directory for registration as they start.

 **Each member must have a different name flag specified. `Hostname` or `machine-id` can be a good choice. Or discovery will fail due to duplicated name.**

@ -172,16 +262,16 @@ $ etcd --name infra2 --initial-advertise-peer-urls http://10.0.1.12:2380 \

 This will cause each member to register itself with the custom etcd discovery service and begin the cluster once all machines have been registered.

-#### Public etcd Discovery Service
+#### Public etcd discovery service

-If you do not have access to an existing cluster, you can use the public discovery service hosted at `discovery.etcd.io`.  You can create a private discovery URL using the "new" endpoint like so:
+If no exiting cluster is available, use the public discovery service hosted at `discovery.etcd.io`.  To create a private discovery URL using the "new" endpoint, use the command:

 ```
 $ curl https://discovery.etcd.io/new?size=3
 https://discovery.etcd.io/3e86b59982e49066c5d813af1c2e2579cbf573de
 ```

-This will create the cluster with an initial expected size of 3 members. If you do not specify a size, a default of 3 will be used.
+This will create the cluster with an initial expected size of 3 members. If no size is specified, a default of 3 is used.

 ```
 ETCD_DISCOVERY=https://discovery.etcd.io/3e86b59982e49066c5d813af1c2e2579cbf573de
@ -219,11 +309,11 @@ $ etcd --name infra2 --initial-advertise-peer-urls http://10.0.1.12:2380 \

 This will cause each member to register itself with the discovery service and begin the cluster once all members have been registered.

-You can use the environment variable `ETCD_DISCOVERY_PROXY` to cause etcd to use an HTTP proxy to connect to the discovery service.
+Use the environment variable `ETCD_DISCOVERY_PROXY` to cause etcd to use an HTTP proxy to connect to the discovery service.

-#### Error and Warning Cases
+#### Error and warning cases

-##### Discovery Server Errors
+##### Discovery server errors


 ```
@ -238,8 +328,7 @@ exit 1

 ##### Warnings

-This is a harmless warning notifying you that the discovery URL will be
-ignored on this machine.
+This is a harmless warning indicating the discovery URL will be ignored on this machine.

 ```
 $ etcd --name infra0 --initial-advertise-peer-urls http://10.0.1.10:2380 \
@ -250,7 +339,7 @@ $ etcd --name infra0 --initial-advertise-peer-urls http://10.0.1.10:2380 \
 etcdserver: discovery token ignored since a cluster has already been initialized. Valid log found at /var/lib/etcd
 ```

-### DNS Discovery
+### DNS discovery

 DNS [SRV records][rfc-srv] can be used as a discovery mechanism.
 The `-discovery-srv` flag can be used to set the DNS domain name where the discovery SRV records can be found.
@ -259,14 +348,16 @@ The following DNS SRV records are looked up in the listed order:
 * _etcd-server-ssl._tcp.example.com
 * _etcd-server._tcp.example.com

-If `_etcd-server-ssl._tcp.example.com` is found then etcd will attempt the bootstrapping process over SSL.
+If `_etcd-server-ssl._tcp.example.com` is found then etcd will attempt the bootstrapping process over TLS.

 To help clients discover the etcd cluster, the following DNS SRV records are looked up in the listed order:

 * _etcd-client._tcp.example.com
 * _etcd-client-ssl._tcp.example.com

-If `_etcd-client-ssl._tcp.example.com` is found, clients will attempt to communicate with the etcd cluster over SSL.
+If `_etcd-client-ssl._tcp.example.com` is found, clients will attempt to communicate with the etcd cluster over SSL/TLS.
+
+If etcd is using TLS without a custom certificate authority, the discovery domain (e.g., example.com) must match the SRV record domain (e.g., infra1.example.com). This is to mitigate attacks that forge SRV records to point to a different domain; the domain would have a valid certificate under PKI but be controlled by an unknown third party.

 #### Create DNS SRV records

@ -290,6 +381,7 @@ infra0.example.com.  300  IN  A  10.0.1.10
 infra1.example.com.  300  IN  A  10.0.1.11
 infra2.example.com.  300  IN  A  10.0.1.12
 ```
+
 #### Bootstrap the etcd cluster using DNS

 etcd cluster members can listen on domain names or IP address, the bootstrap process will resolve DNS A records.
@ -329,7 +421,7 @@ $ etcd --name infra2 \
 --listen-peer-urls http://infra2.example.com:2380
 ```

-You can also bootstrap the cluster using IP addresses instead of domain names:
+The cluster can also bootstrap using IP addresses instead of domain names:

 ```
 $ etcd --name infra0 \
@ -370,11 +462,13 @@ When the `--proxy` flag is set, etcd runs in [proxy mode][proxy]. This proxy mod

 To setup an etcd cluster with proxies of v2 API, please read the the [clustering doc in etcd 2.3 release][clustering_etcd2].

-[conf-adv-client]: configuration.md#-advertise-client-urls
-[conf-listen-client]: configuration.md#-listen-client-urls
-[discovery-proto]: dev-internal/discovery_protocol.md
+[conf-adv-client]: configuration.md#--advertise-client-urls
+[conf-listen-client]: configuration.md#--listen-client-urls
+[discovery-proto]: ../dev-internal/discovery_protocol.md
 [rfc-srv]: http://www.ietf.org/rfc/rfc2052.txt
 [runtime-conf]: runtime-configuration.md
 [runtime-reconf-design]: runtime-reconf-design.md
 [proxy]: https://github.com/coreos/etcd/blob/release-2.3/Documentation/proxy.md
 [clustering_etcd2]: https://github.com/coreos/etcd/blob/release-2.3/Documentation/clustering.md
+[security-guide]: security.md
+[tls-setup]: /hack/tls-setup
--- a/Documentation/op-guide/configuration.md
+++ b/Documentation/op-guide/configuration.md
@ -1,22 +1,20 @@
-# Configuration Flags
+# Configuration flags

 etcd is configurable through command-line flags and environment variables. Options set on the command line take precedence over those from the environment.

-The format of environment variable for flag `--my-flag` is `ETCD_MY_FLAG`. It applies to all  flags.
+The format of environment variable for flag `--my-flag` is `ETCD_MY_FLAG`. It applies to all flags.

-The [official etcd ports][iana-ports] are 2379 for client requests, and 2380 for peer communication. Some legacy code and documentation still references ports 4001 and 7001, but all new etcd use and discussion should adopt the assigned ports.
+The [official etcd ports][iana-ports] are 2379 for client requests and 2380 for peer communication. The etcd ports can be set to accept TLS traffic, non-TLS traffic, or both TLS and non-TLS traffic.

 To start etcd automatically using custom settings at startup in Linux, using a [systemd][systemd-intro] unit is highly recommended.

-[systemd-intro]: http://freedesktop.org/wiki/Software/systemd/
-
-## Member Flags
+## Member flags

 ### --name
 + Human-readable name for this member.
 + default: "default"
 + env variable: ETCD_NAME
-+ This value is referenced as this node's own entries listed in the `--initial-cluster` flag (Ex: `default=http://localhost:2380` or `default=http://localhost:2380,default=http://localhost:7001`). This needs to match the key used in the flag if you're using [static bootstrapping][build-cluster]. When using discovery, each member must have a unique name. `Hostname` or `machine-id` can be a good choice.
+ This value is referenced as this node's own entries listed in the `--initial-cluster` flag (e.g., `default=http://localhost:2380`). This needs to match the key used in the flag if using [static bootstrapping][build-cluster]. When using discovery, each member must have a unique name. `Hostname` or `machine-id` can be a good choice.

 ### --data-dir
 + Path to the data directory.
@ -39,20 +37,20 @@ To start etcd automatically using custom settings at startup in Linux, using a [
 + env variable: ETCD_HEARTBEAT_INTERVAL

 ### --election-timeout
-+ Time (in milliseconds) for an election to timeout. See [Documentation/tuning.md](tuning.md#time-parameters) for details.
+ Time (in milliseconds) for an election to timeout. See [Documentation/tuning.md][tuning] for details.
 + default: "1000"
 + env variable: ETCD_ELECTION_TIMEOUT

 ### --listen-peer-urls
 + List of URLs to listen on for peer traffic. This flag tells the etcd to accept incoming requests from its peers on the specified scheme://IP:port combinations. Scheme can be either http or https.If 0.0.0.0 is specified as the IP, etcd listens to the given port on all interfaces. If an IP address is given as well as a port, etcd will listen on the given port and interface. Multiple URLs may be used to specify a number of addresses and ports to listen on. The etcd will respond to requests from any of the listed addresses and ports.
-+ default: "http://localhost:2380,http://localhost:7001"
+ default: "http://localhost:2380"
 + env variable: ETCD_LISTEN_PEER_URLS
 + example: "http://10.0.0.1:2380"
 + invalid example: "http://example.com:2380" (domain name is invalid for binding)

 ### --listen-client-urls
 + List of URLs to listen on for client traffic. This flag tells the etcd to accept incoming requests from the clients on the specified scheme://IP:port combinations. Scheme can be either http or https. If 0.0.0.0 is specified as the IP, etcd listens to the given port on all interfaces. If an IP address is given as well as a port, etcd will listen on the given port and interface. Multiple URLs may be used to specify a number of addresses and ports to listen on. The etcd will respond to requests from any of the listed addresses and ports.
-+ default: "http://localhost:2379,http://localhost:4001"
+ default: "http://localhost:2379"
 + env variable: ETCD_LISTEN_CLIENT_URLS
 + example: "http://10.0.0.1:2379"
 + invalid example: "http://example.com:2379" (domain name is invalid for binding)
@ -61,20 +59,20 @@ To start etcd automatically using custom settings at startup in Linux, using a [
 + Maximum number of snapshot files to retain (0 is unlimited)
 + default: 5
 + env variable: ETCD_MAX_SNAPSHOTS
-+ The default for users on Windows is unlimited, and manual purging down to 5 (or your preference for safety) is recommended.
+ The default for users on Windows is unlimited, and manual purging down to 5 (or some preference for safety) is recommended.

 ### --max-wals
 + Maximum number of wal files to retain (0 is unlimited)
 + default: 5
 + env variable: ETCD_MAX_WALS
-+ The default for users on Windows is unlimited, and manual purging down to 5 (or your preference for safety) is recommended.
+ The default for users on Windows is unlimited, and manual purging down to 5 (or some preference for safety) is recommended.

 ### --cors
 + Comma-separated white list of origins for CORS (cross-origin resource sharing).
 + default: none
 + env variable: ETCD_CORS

-## Clustering Flags
+## Clustering flags

 `--initial` prefix flags are used in bootstrapping ([static bootstrap][build-cluster], [discovery-service bootstrap][discovery] or [runtime reconfiguration][reconfig]) a new member, and ignored when restarting an existing member.

@ -83,13 +81,13 @@ To start etcd automatically using custom settings at startup in Linux, using a [
 ### --initial-advertise-peer-urls

 + List of this member's peer URLs to advertise to the rest of the cluster. These addresses are used for communicating etcd data around the cluster. At least one must be routable to all cluster members. These URLs can contain domain names.
-+ default: "http://localhost:2380,http://localhost:7001"
+ default: "http://localhost:2380"
 + env variable: ETCD_INITIAL_ADVERTISE_PEER_URLS
 + example: "http://example.com:2380, http://10.0.0.1:2380"

 ### --initial-cluster
 + Initial cluster configuration for bootstrapping.
-+ default: "default=http://localhost:2380,default=http://localhost:7001"
+ default: "default=http://localhost:2380"
 + env variable: ETCD_INITIAL_CLUSTER
 + The key is the value of the `--name` flag for each node provided. The default uses `default` for the key because this is the default for the `--name` flag.

@ -107,10 +105,10 @@ To start etcd automatically using custom settings at startup in Linux, using a [

 ### --advertise-client-urls
 + List of this member's client URLs to advertise to the rest of the cluster. These URLs can contain domain names.
-+ default: "http://localhost:2379,http://localhost:4001"
+ default: "http://localhost:2379"
 + env variable: ETCD_ADVERTISE_CLIENT_URLS
 + example: "http://example.com:2379, http://10.0.0.1:2379"
-+ Be careful if you are advertising URLs such as http://localhost:2379 from a cluster member and are using the proxy feature of etcd. This will cause loops, because the proxy will be forwarding requests to itself until its resources (memory, file descriptors) are eventually depleted.
+ Be careful if advertising URLs such as http://localhost:2379 from a cluster member and are using the proxy feature of etcd. This will cause loops, because the proxy will be forwarding requests to itself until its resources (memory, file descriptors) are eventually depleted.

 ### --discovery
 + Discovery URL used to bootstrap the cluster.
@ -123,7 +121,7 @@ To start etcd automatically using custom settings at startup in Linux, using a [
 + env variable: ETCD_DISCOVERY_SRV

 ### --discovery-fallback
-+ Expected behavior ("exit" or "proxy") when discovery services fails.
+ Expected behavior ("exit" or "proxy") when discovery services fails. "proxy" supports v2 API only.
 + default: "proxy"
 + env variable: ETCD_DISCOVERY_FALLBACK

@ -137,9 +135,14 @@ To start etcd automatically using custom settings at startup in Linux, using a [
 + default: false
 + env variable: ETCD_STRICT_RECONFIG_CHECK

-## Proxy Flags
+### --auto-compaction-retention
+ Auto compaction retention for mvcc key value store in hour. 0 means disable auto compaction.
+ default: 0
+ env variable: ETCD_AUTO_COMPACTION_RETENTION

-`--proxy` prefix flags configures etcd to run in [proxy mode][proxy].
+## Proxy flags
+
+`--proxy` prefix flags configures etcd to run in [proxy mode][proxy]. "proxy" supports v2 API only.

 ### --proxy
 + Proxy mode setting ("off", "readonly" or "on").
@ -168,11 +171,11 @@ To start etcd automatically using custom settings at startup in Linux, using a [

 ### --proxy-read-timeout
 + Time (in milliseconds) for a read to timeout or 0 to disable the timeout.
-+ Don't change this value if you use watches because they are using long polling requests.
+ Don't change this value if using watches because use long polling requests.
 + default: 0
 + env variable: ETCD_PROXY_READ_TIMEOUT

-## Security Flags
+## Security flags

 The security flags help to [build a secure etcd cluster][security].

@ -236,7 +239,7 @@ The security flags help to [build a secure etcd cluster][security].
 + default: false
 + env variable: ETCD_PEER_AUTO_TLS

-## Logging Flags
+## Logging flags

 ### --debug
 + Drop the default log level to DEBUG for all subpackages.
@ -249,7 +252,7 @@ The security flags help to [build a secure etcd cluster][security].
 + env variable: ETCD_LOG_PACKAGE_LEVELS


-## Unsafe Flags
+## Unsafe flags

 Please be CAUTIOUS when using unsafe flags because it will break the guarantees given by the consensus protocol.
 For example, it may panic if other members in the cluster are still alive.
@ -260,12 +263,16 @@ Follow the instructions when using these flags.
 + default: false
 + env variable: ETCD_FORCE_NEW_CLUSTER

-## Miscellaneous Flags
+## Miscellaneous flags

 ### --version
 + Print the version and exit.
 + default: false

+### --config-file
+ Load server configuration from a file.
+ default: none
+
 ## Profiling flags

 ### --enable-pprof
@ -277,9 +284,7 @@ Follow the instructions when using these flags.
 [discovery]: clustering.md#discovery
 [iana-ports]: https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml?search=etcd
 [proxy]: ../v2/proxy.md
-[reconfig]: runtime-configuration.md
 [restore]: ../v2/admin_guide.md#restoring-a-backup
-[rfc-v3]: rfc/v3api.md
 [security]: security.md
 [systemd-intro]: http://freedesktop.org/wiki/Software/systemd/
-[tuning]: tuning.md#time-parameters
+[tuning]: ../tuning.md#time-parameters
--- a/Documentation/op-guide/container.md
+++ b/Documentation/op-guide/container.md
@ -0,0 +1,61 @@
+# Run etcd clusters inside containers
+
+The following guide shows how to run etcd with rkt and Docker using the [static bootstrap process](clustering.md#static).
+
+## Docker
+
+In order to expose the etcd API to clients outside of Docker host, use the host IP address of the container. Please see [`docker inspect`](https://docs.docker.com/engine/reference/commandline/inspect) for more detail on how to get the IP address. Alternatively, specify `--net=host` flag to `docker run` command to skip placing the container inside of a separate network stack.
+
+```
+# For each machine
+ETCD_VERSION=v3.0.0
+TOKEN=my-etcd-token
+CLUSTER_STATE=new
+NAME_1=etcd-node-0
+NAME_2=etcd-node-1
+NAME_3=etcd-node-2
+HOST_1=10.20.30.1
+HOST_2=10.20.30.2
+HOST_3=10.20.30.3
+CLUSTER=${NAME_1}=http://${HOST_1}:2380,${NAME_2}=http://${HOST_2}:2380,${NAME_3}=http://${HOST_3}:2380
+
+# For node 1
+THIS_NAME=${NAME_1}
+THIS_IP=${HOST_1}
+sudo docker run --net=host --name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
+	/usr/local/bin/etcd \
+    --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+
+# For node 2
+THIS_NAME=${NAME_2}
+THIS_IP=${HOST_2}
+sudo docker run --net=host --name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
+	/usr/local/bin/etcd \
+    --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+
+# For node 3
+THIS_NAME=${NAME_3}
+THIS_IP=${HOST_3}
+sudo docker run --net=host --name etcd quay.io/coreos/etcd:${ETCD_VERSION} \
+	/usr/local/bin/etcd \
+    --data-dir=data.etcd --name ${THIS_NAME} \
+	--initial-advertise-peer-urls http://${THIS_IP}:2380 --listen-peer-urls http://${THIS_IP}:2380 \
+	--advertise-client-urls http://${THIS_IP}:2379 --listen-client-urls http://${THIS_IP}:2379 \
+	--initial-cluster ${CLUSTER} \
+	--initial-cluster-state ${CLUSTER_STATE} --initial-cluster-token ${TOKEN}
+```
+
+To run `etcdctl` using API version 3:
+
+```
+docker exec etcd /bin/sh -c "export ETCDCTL_API=3 && /usr/local/bin/etcdctl put foo bar"
+```
+
--- a/Documentation/op-guide/failures.md
+++ b/Documentation/op-guide/failures.md
@ -0,0 +1,44 @@
+# Understand failures
+
+Failures are common in a large deployment of machines. A machine fails when its hardware or software malfunctions. Multiple machines fail together when there are power failures or network issues. Multiple kinds of failures can also happen at once; it is almost impossible to enumerate all possible failure cases. 
+
+In this section, we catalog kinds of failures and discuss how etcd is designed to tolerate these failures. Most users, if not all, can map a particular failure into one kind of failure. To prepare for rare or [unrecoverable failures][unrecoverable], always [back up][backup] the etcd cluster.
+
+## Minor followers failure
+
+When fewer than half of the followers fail, the etcd cluster can still accept requests and make progress without any major disruption. For example, two follower failures will not affect a five member etcd cluster’s operation. However, clients will lose connectivity to the failed members. Client libraries should hide these interruptions from users for read requests by automatically reconnecting to other members. Operators should expect the system load on the other members to increase due to the reconnections.
+
+## Leader failure
+
+When a leader fails, the etcd cluster automatically elects a new leader. The election does not happen instantly once the leader fails. It takes about an election timeout to elect a new leader since the failure detection model is timeout based. 
+
+During the leader election the cluster cannot process any writes. Write requests sent during the election are queued for processing until a new leader is elected.
+
+Writes already sent to the old leader but not yet committed may be lost. The new leader has the power to rewrite any uncommitted entries from the previous leader. From the user perspective, some write requests might time out after a new leader election. However, no committed writes are ever lost.
+
+The new leader extends timeouts automatically for all leases. This mechanism ensures a lease will not expire before the granted TTL even if it was granted by the old leader. 
+
+## Majority failure
+
+When the majority members of the cluster fail, the etcd cluster fails and cannot accept more writes.
+
+The etcd cluster can only recover from a majority failure once the majority of members become available. If a majority of members cannot come back online, then the operator must start [disaster recovery][unrecoverable] to recover the cluster.
+
+Once a majority of members works, the etcd cluster elects a new leader automatically and returns to a healthy state. The new leader extends timeouts automatically for all leases. This mechanism ensures no lease expires due to server side unavailability.
+
+## Network partition
+
+A network partition is similar to a minor followers failure or a leader failure. A network partition divides the etcd cluster into two parts; one with a member majority and the other with a member minority. The majority side becomes the available cluster and the minority side is unavailable; there is no “split-brain” in etcd. 
+
+If the leader is on the majority side, then from the majority point of view the failure is a minority follower failure. If the leader is on the minority side, then it is a leader failure. The leader on the minority side steps down and the majority side elects a new leader.
+
+Once the network partition clears, the minority side automatically recognizes the leader from the majority side and recovers its state. 
+
+## Failure during bootstrapping
+
+A cluster bootstrap is only successful if all required members successfully start. If any failure happens during bootstrapping, remove the data directories on all members and re-bootstrap the cluster with a new cluster-token or new discovery token.
+
+Of course, it is possible to recover a failed bootstrapped cluster like recovering a running cluster. However, it almost always takes more time and resources to recover that cluster than bootstrapping a new one, since there is no data to recover.
+
+[backup]: maintenance.md#snapshot-backup
+[unrecoverable]: recovery.md#disaster-recovery
--- a/Documentation/op-guide/maintenance.md
+++ b/Documentation/op-guide/maintenance.md
@ -12,11 +12,11 @@ Since etcd keeps an exact history of its keyspace, this history should be period

 The keyspace can be compacted automatically with `etcd`'s time windowed history retention policy, or manually with `etcdctl`. The `etcdctl` method provides fine-grained control over the compacting process whereas automatic compacting fits applications that only need key history for some length of time.

-`etcd` can be set to automatically compact the keyspace with the `--experimental-auto-compaction` option with a period of hours:
+`etcd` can be set to automatically compact the keyspace with the `--auto-compaction` option with a period of hours:

 ```sh
 # keep one hour of history
-$ etcd --experimental-auto-compaction-retention=1
+$ etcd --auto-compaction-retention=1
 ```

 An `etcdctl` initiated compaction works as follows:
@ -66,7 +66,7 @@ $ while [ 1 ]; do dd if=/dev/urandom bs=1024 count=1024  | etcdctl put key  || b
 ...
 Error:  rpc error: code = 8 desc = etcdserver: mvcc: database space exceeded
 # confirm quota space is exceeded
-$ etcdctl endpoint status
+$ etcdctl --write-out=table endpoint status
 +----------------+------------------+-----------+---------+-----------+-----------+------------+
 |    ENDPOINT    |        ID        |  VERSION  | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
 +----------------+------------------+-----------+---------+-----------+-----------+------------+
@ -105,7 +105,7 @@ A snapshot is taken with `etcdctl`:

 ```sh
 $ etcdctl snapshot save backup.db
-$ etcdctl snapshot status backup.db
+$ etcdctl --write-out=table snapshot status backup.db
 +----------+----------+------------+------------+
 |   HASH   | REVISION | TOTAL KEYS | TOTAL SIZE |
 +----------+----------+------------+------------+
--- a/Documentation/op-guide/performance.md
+++ b/Documentation/op-guide/performance.md
@ -10,4 +10,65 @@ There are other sub-systems which impact the overall performance of etcd. Each s

 ## Benchmarks

-TODO
+Benchmarking etcd performance can be done with the [benchmark](https://github.com/coreos/etcd/tree/master/tools/benchmark) CLI tool included with etcd.
+
+For some baseline performance numbers, we consider a three member etcd cluster with the following hardware configuration:
+
+- Google Cloud Compute Engine
+- 3 machines of 8 vCPUs + 16GB Memory + 50GB SSD
+- 1 machine(client) of 16 vCPUs + 30GB Memory + 50GB SSD
+- Ubuntu 15.10
+- etcd v3 master branch (commit SHA d8f325d), Go 1.6.2
+
+With this configuration, etcd can approximately write:
+
+| Number of keys | Key size in bytes | Value size in bytes | Number of connections | Number of clients | Target etcd server | Average write QPS | Average latency per request | Memory |
+|----------------|-------------------|---------------------|-----------------------|-------------------|--------------------|-------------------|-----------------------------|--------|
+| 10,000 | 8 | 256 | 1 | 1 | leader only | 525 | 2ms | 35 MB |
+| 100,000 | 8 | 256 | 100 | 1000 | leader only | 25,000 | 30ms | 35 MB |
+| 100,000 | 8 | 256 | 100 | 1000 | all members | 33,000 | 25ms | 35 MB |
+
+Sample commands are:
+
+```
+# assuming IP_1 is leader, write requests to the leader
+benchmark --endpoints={IP_1} --conns=1 --clients=1 \
+    put --key-size=8 --sequential-keys --total=10000 --val-size=256
+benchmark --endpoints={IP_1} --conns=100 --clients=1000 \
+    put --key-size=8 --sequential-keys --total=100000 --val-size=256
+
+# write to all members
+benchmark --endpoints={IP_1},{IP_2},{IP_3} --conns=100 --clients=1000 \
+    put --key-size=8 --sequential-keys --total=100000 --val-size=256
+```
+
+Linearizable read requests go through a quorum of cluster members for consensus to fetch the most recent data. Serializable read requests are cheaper than linearizable reads since they are served by any single etcd member, instead of a quorum of members, in exchange for possibly serving stale data. etcd can read: 
+
+| Number of requests | Key size in bytes | Value size in bytes | Number of connections | Number of clients | Consistency | Average latency per request | Average read QPS |
+|--------------------|-------------------|---------------------|-----------------------|-------------------|-------------|-----------------------------|------------------|
+| 10,000 | 8 | 256 | 1 | 1 | Linearizable | 2ms | 560 |
+| 10,000 | 8 | 256 | 1 | 1 | Serializable | 0.4ms | 7,500 |
+| 100,000 | 8 | 256 | 100 | 1000 | Linearizable | 15ms | 43,000 |
+| 100,000 | 8 | 256 | 100 | 1000 | Serializable | 9ms | 93,000 |
+
+Sample commands are:
+
+```
+# Linearizable read requests
+benchmark --endpoints={IP_1},{IP_2},{IP_3} --conns=1 --clients=1 \
+    range YOUR_KEY --consistency=l --total=10000
+benchmark --endpoints={IP_1},{IP_2},{IP_3} --conns=100 --clients=1000 \
+    range YOUR_KEY --consistency=l --total=100000
+
+# Serializable read requests for each member and sum up the numbers
+for endpoint in {IP_1} {IP_2} {IP_3}; do
+    benchmark --endpoints=$endpoint --conns=1 --clients=1 \
+        range YOUR_KEY --consistency=s --total=10000
+done
+for endpoint in {IP_1} {IP_2} {IP_3}; do
+    benchmark --endpoints=$endpoint --conns=100 --clients=1000 \
+        range YOUR_KEY --consistency=s --total=100000
+done
+```
+
+We encourage running the benchmark test when setting up an etcd cluster for the first time in a new environment to ensure the cluster achieves adequate performance; cluster latency and throughput can be sensitive to minor environment differences.
--- a/Documentation/op-guide/recovery.md
+++ b/Documentation/op-guide/recovery.md
@ -2,9 +2,9 @@

 etcd is designed to withstand machine failures. An etcd cluster automatically recovers from temporary failures (e.g., machine reboots) and tolerates up to *(N-1)/2* permanent failures for a cluster of N members. When a member permanently fails, whether due to hardware failure or disk corruption, it loses access to the cluster. If the cluster permanently loses more than *(N-1)/2* members then it disastrously fails, irrevocably losing quorum. Once quorum is lost, the cluster cannot reach consensus and therefore cannot continue accepting updates.

-To recover from disastrous failure, etcd provides snapshot and restore facilities to recreate the cluster without data loss.
+To recover from disastrous failure, etcd v3 provides snapshot and restore facilities to recreate the cluster without v3 key data loss. To recover v2 keys, refer to the [v2 admin guide][v2_recover].

-TODO(xiangli): add note to clarify this only recovers for the kv store of etcd3.
+[v2_recover]: ../v2/admin_guide.md#disaster-recovery

 ### Snapshotting the keyspace

@ -18,6 +18,8 @@ $ etcdctl --endpoints $ENDPOINT snapshot save snapshot.db

 To restore a cluster, all that is needed is a single snapshot "db" file. A cluster restore with `etcdctl snapshot restore` creates new etcd data directories; all members should restore using the same snapshot. Restoring overwrites some snapshot metadata (specifically, the member ID and cluster ID); the member loses its former identity. This metadata overwrite prevents the new member from inadvertently joining an existing cluster. Therefore in order to start a cluster from a snapshot, the restore must start a new logical cluster.

+Snapshot integrity may be optionally verified at restore time. If the snapshot is taken with `etcdctl snapshot save`, it will have an integrity hash that is checked by `etcdctl snapshot restore`. If the snapshot is copied from the data directory, there is no integrity hash and it will only restore by using `--skip-hash-check`.
+
 A restore initializes a new member of a new cluster, with a fresh cluster configuration using `etcd`'s cluster configuration flags, but preserves the contents of the etcd keyspace. Continuing from the previous example, the following creates new etcd data directories (`m1.etcd`, `m2.etcd`, `m3.etcd`) for a three member cluster:

 ```sh
--- a/Documentation/op-guide/runtime-configuration.md
+++ b/Documentation/op-guide/runtime-configuration.md
@ -1,39 +1,39 @@
-# Runtime Reconfiguration
+# Runtime reconfiguration

 etcd comes with support for incremental runtime reconfiguration, which allows users to update the membership of the cluster at run time.

 Reconfiguration requests can only be processed when the majority of the cluster members are functioning. It is **highly recommended** to always have a cluster size greater than two in production. It is unsafe to remove a member from a two member cluster. The majority of a two member cluster is also two. If there is a failure during the removal process, the cluster might not able to make progress and need to [restart from majority failure][majority failure].

-To better understand the design behind runtime reconfiguration, we suggest you read [the runtime reconfiguration document][runtime-reconf].
+To better understand the design behind runtime reconfiguration, we suggest reading [the runtime reconfiguration document][runtime-reconf].

-## Reconfiguration Use Cases
+## Reconfiguration use cases

 Let's walk through some common reasons for reconfiguring a cluster. Most of these just involve combinations of adding or removing a member, which are explained below under [Cluster Reconfiguration Operations][cluster-reconf].

-### Cycle or Upgrade Multiple Machines
+### Cycle or upgrade multiple machines

-If you need to move multiple members of your cluster due to planned maintenance (hardware upgrades, network downtime, etc.), it is recommended to modify members one at a time.
+If multiple cluster members need to move due to planned maintenance (hardware upgrades, network downtime, etc.), it is recommended to modify members one at a time.

-It is safe to remove the leader, however there is a brief period of downtime while the election process takes place. If your cluster holds more than 50MB, it is recommended to [migrate the member's data directory][member migration].
+It is safe to remove the leader, however there is a brief period of downtime while the election process takes place. If the cluster holds more than 50MB, it is recommended to [migrate the member's data directory][member migration].

-### Change the Cluster Size
+### Change the cluster size

 Increasing the cluster size can enhance [failure tolerance][fault tolerance table] and provide better read performance. Since clients can read from any member, increasing the number of members increases the overall read throughput.

 Decreasing the cluster size can improve the write performance of a cluster, with a trade-off of decreased resilience. Writes into the cluster are replicated to a majority of members of the cluster before considered committed. Decreasing the cluster size lowers the majority, and each write is committed more quickly.

-### Replace A Failed Machine
+### Replace a failed machine

-If a machine fails due to hardware failure, data directory corruption, or some other fatal situation, it should be replaced as soon as possible. Machines that have failed but haven't been removed adversely affect your quorum and reduce the tolerance for an additional failure.
+If a machine fails due to hardware failure, data directory corruption, or some other fatal situation, it should be replaced as soon as possible. Machines that have failed but haven't been removed adversely affect the quorum and reduce the tolerance for an additional failure.

-To replace the machine, follow the instructions for [removing the member][remove member] from the cluster, and then [add a new member][add member] in its place. If your cluster holds more than 50MB, it is recommended to [migrate the failed member's data directory][member migration] if you can still access it.
+To replace the machine, follow the instructions for [removing the member][remove member] from the cluster, and then [add a new member][add member] in its place. If the cluster holds more than 50MB, it is recommended to [migrate the failed member's data directory][member migration] if it is still accessible.

-### Restart Cluster from Majority Failure
+### Restart cluster from majority failure

-If the majority of your cluster is lost or all of your nodes have changed IP addresses, then you need to take manual action in order to recover safely.
+If the majority of the cluster is lost or all of the nodes have changed IP addresses, then manual action is necessary to recover safely.
 The basic steps in the recovery process include [creating a new cluster using the old data][disaster recovery], forcing a single member to act as the leader, and finally using runtime configuration to [add new members][add member] to this new cluster one at a time.

-## Cluster Reconfiguration Operations
+## Cluster reconfiguration operations

 Now that we have the use cases in mind, let us lay out the operations involved in each.

@ -42,32 +42,30 @@ This is essentially the same requirement as for any other write to etcd.

 All changes to the cluster are done one at a time:

-* To update a single member peerURLs you will make an update operation
-* To replace a single member you will make an add then a remove operation
-* To increase from 3 to 5 members you will make two add operations
-* To decrease from 5 to 3 you will make two remove operations
+* To update a single member peerURLs, make an update operation
+* To replace a single member, make an add then a remove operation
+* To increase from 3 to 5 members, make two add operations
+* To decrease from 5 to 3, make two remove operations

 All of these examples will use the `etcdctl` command line tool that ships with etcd.
-If you want to use the members API directly you can find the documentation [here][member-api].
+To change membership without `etcdctl`, use the [v2 HTTP members API][member-api] or the [v3 gRPC members API][member-api-grpc].

-TODO: v3 member API documentation
-
-### Update a Member
+### Update a member

 #### Update advertise client URLs

-If you would like to update the advertise client URLs of a member, you can simply restart
+To update the advertise client URLs of a member, simply restart
 that member with updated client urls flag (`--advertise-client-urls`) or environment variable
 (`ETCD_ADVERTISE_CLIENT_URLS`). The restarted member will self publish the updated URLs.
 A wrongly updated client URL will not affect the health of the etcd cluster.

 #### Update advertise peer URLs

-If you would like to update the advertise peer URLs of a member, you have to first update 
+To update the advertise peer URLs of a member, first update 
 it explicitly via member command and then restart the member. The additional action is required
 since updating peer URLs changes the cluster wide configuration and can affect the health of the etcd cluster. 

-To update the peer URLs, first, we need to find the target member's ID. You can list all members with `etcdctl`:
+To update the peer URLs, first, we need to find the target member's ID. To list all members with `etcdctl`:

 ```sh
 $ etcdctl member list
@ -83,7 +81,7 @@ $ etcdctl member update a8266ecf031671f3 http://10.0.1.10:2380
 Updated member with ID a8266ecf031671f3 in cluster
 ```

-### Remove a Member
+### Remove a member

 Let us say the member ID we want to remove is a8266ecf031671f3.
 We then use the `remove` command to perform the removal:
@ -101,11 +99,11 @@ etcd: this member has been permanently removed from the cluster. Exiting.

 It is safe to remove the leader, however the cluster will be inactive while a new leader is elected. This duration is normally the period of election timeout plus the voting process.

-### Add a New Member
+### Add a new member

 Adding a member is a two step process:

- * Add the new member to the cluster via the [members API][member-api] or the `etcdctl member add` command.
+ * Add the new member to the cluster via the [HTTP members API][member-api], the [gRPC members API][member-api-grpc], or the `etcdctl member add` command.
 * Start the new member with the new cluster configuration, including a list of the updated members (existing members + the new member).

 Using `etcdctl` let's add the new member to the cluster by specifying its [name][conf-name] and [advertised peer URLs][conf-adv-peer]:
@ -131,10 +129,10 @@ $ etcd --listen-client-urls http://10.0.1.13:2379 --advertise-client-urls http:/

 The new member will run as a part of the cluster and immediately begin catching up with the rest of the cluster.

-If you are adding multiple members the best practice is to configure a single member at a time and verify it starts correctly before adding more new members.
-If you add a new member to a 1-node cluster, the cluster cannot make progress before the new member starts because it needs two members as majority to agree on the consensus. You will only see this behavior between the time `etcdctl member add` informs the cluster about the new member and the new member successfully establishing a connection to the existing one.
+If adding multiple members the best practice is to configure a single member at a time and verify it starts correctly before adding more new members.
+If adding a new member to a 1-node cluster, the cluster cannot make progress before the new member starts because it needs two members as majority to agree on the consensus. This behavior only happens between the time `etcdctl member add` informs the cluster about the new member and the new member successfully establishing a connection to the existing one.

-#### Error Cases When Adding Members
+#### Error cases when adding members

 In the following case we have not included our new host in the list of enumerated nodes.
 If this is a new cluster, the node must be added to the list of initial cluster members.
@ -165,7 +163,7 @@ etcd: this member has been permanently removed from the cluster. Exiting.
 exit 1
 ```

-### Strict Reconfiguration Check Mode (`-strict-reconfig-check`)
+### Strict reconfiguration check mode (`-strict-reconfig-check`)

 As described in the above, the best practice of adding new members is to configure a single member at a time and verify it starts correctly before adding more new members. This step by step approach is very important because if newly added members is not configured correctly (for example the peer URLs are incorrect), the cluster can lose quorum. The quorum loss happens since the newly added member are counted in the quorum even if that member is not reachable from other existing members. Also quorum loss might happen if there is a connectivity issue or there are operational issues.

@ -181,6 +179,7 @@ It is recommended to enable this option. However, it is disabled by default beca
 [fault tolerance table]: ../v2/admin_guide.md#fault-tolerance-table
 [majority failure]: #restart-cluster-from-majority-failure
 [member-api]: ../v2/members_api.md
+[member-api-grpc]: ../dev-guide/api_reference_v3.md#service-cluster-etcdserveretcdserverpbrpcproto
 [member migration]: ../v2/admin_guide.md#member-migration
 [remove member]: #remove-a-member
 [runtime-reconf]: runtime-reconf-design.md
--- a/Documentation/op-guide/runtime-reconf-design.md
+++ b/Documentation/op-guide/runtime-reconf-design.md
@ -1,50 +1,50 @@
-# Design of Runtime Reconfiguration
+# Design of runtime reconfiguration

 Runtime reconfiguration is one of the hardest and most error prone features in a distributed system, especially in a consensus based system like etcd.

 Read on to learn about the design of etcd's runtime reconfiguration commands and how we tackled these problems.

-## Two Phase Config Changes Keep you Safe
+## Two phase config changes keep the cluster safe

-In etcd, every runtime reconfiguration has to go through [two phases][add-member] for safety reasons. For example, to add a member you need to first inform cluster of new configuration and then start the new member.
+In etcd, every runtime reconfiguration has to go through [two phases][add-member] for safety reasons. For example, to add a member, first inform cluster of new configuration and then start the new member.

 Phase 1 - Inform cluster of new configuration

-To add a member into etcd cluster, you need to make an API call to request a new member to be added to the cluster. And this is only way that you can add a new member into an existing cluster. The API call returns when the cluster agrees on the configuration change.
+To add a member into etcd cluster, make an API call to request a new member to be added to the cluster. This is only way to add a new member into an existing cluster. The API call returns when the cluster agrees on the configuration change.

 Phase 2 - Start new member

-To join the etcd member into the existing cluster, you need to specify the correct `initial-cluster` and set `initial-cluster-state` to `existing`. When the member starts, it will contact the existing cluster first and verify the current cluster configuration matches the expected one specified in `initial-cluster`. When the new member successfully starts, you know your cluster reached the expected configuration.
+To join the etcd member into the existing cluster, specify the correct `initial-cluster` and set `initial-cluster-state` to `existing`. When the member starts, it will contact the existing cluster first and verify the current cluster configuration matches the expected one specified in `initial-cluster`. When the new member successfully starts, the cluster has reached the expected configuration.

 By splitting the process into two discrete phases users are forced to be explicit regarding cluster membership changes. This actually gives users more flexibility and makes things easier to reason about. For example, if there is an attempt to add a new member with the same ID as an existing member in an etcd cluster, the action will fail immediately during phase one without impacting the running cluster. Similar protection is provided to prevent adding new members by mistake. If a new etcd member attempts to join the cluster before the cluster has accepted the configuration change,, it will not be accepted by the cluster.

 Without the explicit workflow around cluster membership etcd would be vulnerable to unexpected cluster membership changes. For example, if etcd is running under an init system such as systemd, etcd would be restarted after being removed via the membership API, and attempt to rejoin the cluster on startup. This cycle would continue every time a member is removed via the API and systemd is set to restart etcd after failing, which is unexpected.

-We think runtime reconfiguration should be a low frequent operation. We made the decision to keep it explicit and user-driven to ensure configuration safety and keep your cluster always running smoothly under your control.
+We expect runtime reconfiguration to be an infrequent operation. We decided to keep it explicit and user-driven to ensure configuration safety and keep the cluster always running smoothly under explicit control.

-## Permanent Loss of Quorum Requires New Cluster
+## Permanent loss of quorum requires new cluster

 If a cluster permanently loses a majority of its members, a new cluster will need to be started from an old data directory to recover the previous state.

-It is entirely possible to force removing the failed members from the existing cluster to recover. However, we decided not to support this method since it bypasses the normal consensus committing phase, which is unsafe. If the member to remove is not actually dead or you force to remove different members through different members in the same cluster, you will end up with diverged cluster with same clusterID. This is very dangerous and hard to debug/fix afterwards. 
+It is entirely possible to force removing the failed members from the existing cluster to recover. However, we decided not to support this method since it bypasses the normal consensus committing phase, which is unsafe. If the member to remove is not actually dead or force removed through different members in the same cluster, etcd will end up with a diverged cluster with same clusterID. This is very dangerous and hard to debug/fix afterwards. 

-If you have a correct deployment, the possibility of permanent majority lose is very low. But it is a severe enough problem that worth special care. We strongly suggest you to read the [disaster recovery documentation][disaster-recovery] and prepare for permanent majority lose before you put etcd into production.
+With a correct deployment, the possibility of permanent majority lose is very low. But it is a severe enough problem that worth special care. We strongly suggest reading the [disaster recovery documentation][disaster-recovery] and prepare for permanent majority lose before putting etcd into production.

-## Do Not Use Public Discovery Service For Runtime Reconfiguration
+## Do not use public discovery service for runtime reconfiguration

-The public discovery service should only be used for bootstrapping a cluster. To join member into an existing cluster, you should use runtime reconfiguration API. 
+The public discovery service should only be used for bootstrapping a cluster. To join member into an existing cluster, use runtime reconfiguration API. 

-Discovery service is designed for bootstrapping an etcd cluster in the cloud environment, when you do not know the IP addresses of all the members beforehand. After you successfully bootstrap a cluster, the IP addresses of all the members are known. Technically, you should not need the discovery service any more.
+Discovery service is designed for bootstrapping an etcd cluster in the cloud environment, when the IP addresses of all the members are not known beforehand. After successfully bootstrapping a cluster, the IP addresses of all the members are known. Technically, the discovery service should no longer be needed.

 It seems that using public discovery service is a convenient way to do runtime reconfiguration, after all discovery service already has all the cluster configuration information. However relying on public discovery service brings troubles: 

-1. it introduces external dependencies for the entire life-cycle of your cluster, not just bootstrap time. If there is a network issue between your cluster and public discovery service, your cluster will suffer from it.
+1. it introduces external dependencies for the entire life-cycle of the cluster, not just bootstrap time. If there is a network issue between the cluster and public discovery service, the cluster will suffer from it.
 
-2. public discovery service must reflect correct runtime configuration of your cluster during it life-cycle. It has to provide security mechanism to avoid bad actions, and it is hard. 
+2. public discovery service must reflect correct runtime configuration of the cluster during it life-cycle. It has to provide security mechanism to avoid bad actions, and it is hard. 

 3. public discovery service has to keep tens of thousands of cluster configurations. Our public discovery service backend is not ready for that workload.

-If you want to have a discovery service that supports runtime reconfiguration, the best choice is to build your private one.
+To have a discovery service that supports runtime reconfiguration, the best choice is to build a private one.

 [add-member]: runtime-configuration.md#add-a-new-member
 [disaster-recovery]: recovery.md
--- a/Documentation/op-guide/security.md
+++ b/Documentation/op-guide/security.md
@ -1,10 +1,10 @@
-# Security Model
+# Security model

-etcd supports SSL/TLS as well as authentication through client certificates, both for clients to server as well as peer (server to server / cluster) communication.
+etcd supports automatic TLS as well as authentication through client certificates for both clients to server as well as peer (server to server / cluster) communication.

-To get up and running you first need to have a CA certificate and a signed key pair for one member. It is recommended to create and sign a new key pair for every member in a cluster.
+To get up and running, first have a CA certificate and a signed key pair for one member. It is recommended to create and sign a new key pair for every member in a cluster.

-For convenience, the [cfssl] tool provides an easy interface to certificate generation, and we provide an example using the tool [here][tls-setup]. You can also examine this [alternative guide to generating self-signed key pairs][tls-guide].
+For convenience, the [cfssl] tool provides an easy interface to certificate generation, and we provide an example using the tool [here][tls-setup]. Alternatively, try this [guide to generating self-signed key pairs][tls-guide].

 ## Basic setup

@ -12,7 +12,7 @@ etcd takes several certificate related configuration options, either through com

 **Client-to-server communication:**

-`--cert-file=<path>`: Certificate used for SSL/TLS connections **to** etcd. When this option is set, you can set advertise-client-urls using HTTPS schema.
+`--cert-file=<path>`: Certificate used for SSL/TLS connections **to** etcd. When this option is set, advertise-client-urls can use the HTTPS schema.

 `--key-file=<path>`: Key for the certificate. Must be unencrypted.

@ -20,6 +20,8 @@ etcd takes several certificate related configuration options, either through com

 `--trusted-ca-file=<path>`: Trusted certificate authority.

+`--auto-tls`: Use automatically generated self-signed certificates for TLS connections with clients.
+
 **Peer (server-to-server / cluster) communication:**

 The peer options work the same way as the client-to-server options:
@ -32,11 +34,13 @@ The peer options work the same way as the client-to-server options:

 `--peer-trusted-ca-file=<path>`: Trusted certificate authority.

+`--peer-auto-tls`: Use automatically generated self-signed certificates for TLS connections between peers.
+
 If either a client-to-server or peer certificate is supplied the key must also be set. All of these configuration options are also available through the environment variables, `ETCD_CA_FILE`, `ETCD_PEER_CA_FILE` and so on.

 ## Example 1: Client-to-server transport security with HTTPS

-For this you need your CA certificate (`ca.crt`) and signed key pair (`server.crt`, `server.key`) ready.
+For this, have a CA certificate (`ca.crt`) and signed key pair (`server.crt`, `server.key`) ready.

 Let us configure etcd to provide simple HTTPS transport security step by step:

@ -46,19 +50,19 @@ $ etcd --name infra0 --data-dir infra0 \
  --advertise-client-urls=https://127.0.0.1:2379 --listen-client-urls=https://127.0.0.1:2379
 ```

-This should start up fine and you can now test the configuration by speaking HTTPS to etcd:
+This should start up fine and it will be possible to test the configuration by speaking HTTPS to etcd:

 ```sh
 $ curl --cacert /path/to/ca.crt https://127.0.0.1:2379/v2/keys/foo -XPUT -d value=bar -v
 ```

-You should be able to see the handshake succeed. Because we use self-signed certificates with our own certificate authorities you need to provide the CA to curl using the `--cacert` option. Another possibility would be to add your CA certificate to the trusted certificates on your system (usually in `/etc/ssl/certs`).
+The command should show that the handshake succeed. Since we use self-signed certificates with our own certificate authority, the CA must be passed to curl using the `--cacert` option. Another possibility would be to add the CA certificate to the system's trusted certificates directory (usually in `/etc/pki/tls/certs` or `/etc/ssl/certs`).

 **OSX 10.9+ Users**: curl 7.30.0 on OSX 10.9+ doesn't understand certificates passed in on the command line.
-Instead you must import the dummy ca.crt directly into the keychain or add the `-k` flag to curl to ignore errors.
-If you want to test without the `-k` flag run `open ./fixtures/ca/ca.crt` and follow the prompts.
-Please remove this certificate after you are done testing!
-If you know of a workaround let us know.
+Instead, import the dummy ca.crt directly into the keychain or add the `-k` flag to curl to ignore errors.
+To test without the `-k` flag, run `open ./fixtures/ca/ca.crt` and follow the prompts.
+Please remove this certificate after testing!
+If there is a workaround, let us know.

 ## Example 2: Client-to-server authentication with HTTPS client certificates

@ -66,7 +70,7 @@ For now we've given the etcd client the ability to verify the server identity an

 The clients will provide their certificates to the server and the server will check whether the cert is signed by the supplied CA and decide whether to serve the request.

-You need the same files mentioned in the first example for this, as well as a key pair for the client (`client.crt`, `client.key`) signed by the same certificate authority.
+The same files mentioned in the first example are needed for this, as well as a key pair for the client (`client.crt`, `client.key`) signed by the same certificate authority.

 ```sh
 $ etcd --name infra0 --data-dir infra0 \
@ -95,7 +99,7 @@ $ curl --cacert /path/to/ca.crt --cert /path/to/client.crt --key /path/to/client
  -L https://127.0.0.1:2379/v2/keys/foo -XPUT -d value=bar -v
 ```

-You should be able to see:
+The output should include:

 ```
 ...
@ -141,9 +145,43 @@ $ etcd --name infra2 --data-dir infra2 \
  --discovery ${DISCOVERY_URL}
 ```

-The etcd members will form a cluster and all communication between members in the cluster will be encrypted and authenticated using the client certificates. You will see in the output of etcd that the addresses it connects to use HTTPS.
+The etcd members will form a cluster and all communication between members in the cluster will be encrypted and authenticated using the client certificates. The output of etcd will show that the addresses it connects to use HTTPS.

-## Notes For etcd Proxy
+## Example 4: Automatic self-signed transport security
+
+For cases where communication encryption, but not authentication, is needed, etcd supports encrypting its messages with automatically generated self-signed certificates. This simplifies deployment because there is no need for managing certificates and keys outside of etcd.
+
+Configure etcd to use self-signed certificates for client and peer connections with the flags `--auto-tls` and `--peer-auto-tls`:
+
+```sh
+DISCOVERY_URL=... # from https://discovery.etcd.io/new
+
+# member1
+$ etcd --name infra1 --data-dir infra1 \
+  --auto-tls --peer-auto-tls \
+  --initial-advertise-peer-urls=https://10.0.1.10:2380 --listen-peer-urls=https://10.0.1.10:2380 \
+  --discovery ${DISCOVERY_URL}
+
+# member2
+$ etcd --name infra2 --data-dir infra2 \
+  --auto-tls --peer-auto-tls \
+  --initial-advertise-peer-urls=https://10.0.1.11:2380 --listen-peer-urls=https://10.0.1.11:2380 \
+  --discovery ${DISCOVERY_URL}
+```
+
+Self-signed certificates do not authenticate identity so curl will return an error:
+
+```sh
+curl: (60) SSL certificate problem: Invalid certificate chain
+```
+
+To disable certificate chain checking, invoke curl with the `-k` flag:
+
+```sh
+$ curl -k https://127.0.0.1:2379/v2/keys/foo -Xput -d value=bar -v
+```
+
+## Notes for etcd proxy

 etcd proxy terminates the TLS from its client if the connection is secure, and uses proxy's own key/cert specified in `--peer-key-file` and `--peer-cert-file` to communicate with etcd members.

@ -151,23 +189,16 @@ The proxy communicates with etcd members through both the `--advertise-client-ur

 When client authentication is enabled for an etcd member, the administrator must ensure that the peer certificate specified in the proxy's `--peer-cert-file` option is valid for that authentication. The proxy's peer certificate must also be valid for peer authentication if peer authentication is enabled.

-## Frequently Asked Questions
+## Frequently asked questions

-### My cluster is not working with peer tls configuration?
-
-The internal protocol of etcd v2.0.x uses a lot of short-lived HTTP connections.
-So, when enabling TLS you may need to increase the heartbeat interval and election timeouts to reduce internal cluster connection churn.
-A reasonable place to start are these values: ` --heartbeat-interval 500 --election-timeout 2500`.
-These issues are resolved in the etcd v2.1.x series of releases which uses fewer connections.
-
-### I'm seeing a SSLv3 alert handshake failure when using SSL client authentication?
+### I'm seeing a SSLv3 alert handshake failure when using TLS client authentication?

 The `crypto/tls` package of `golang` checks the key usage of the certificate public key before using it.
 To use the certificate public key to do client auth, we need to add `clientAuth` to `Extended Key Usage` when creating the certificate public key.

 Here is how to do it:

-Add the following section to your openssl.cnf:
+Add the following section to openssl.cnf:

 ```
 [ ssl_client ]
@ -183,9 +214,9 @@ $ openssl ca -config openssl.cnf -policy policy_anything -extensions ssl_client
 ```

 ### With peer certificate authentication I receive "certificate is valid for 127.0.0.1, not $MY_IP"
-Make sure that you sign your certificates with a Subject Name your member's public IP address. The `etcd-ca` tool for example provides an `--ip=` option for its `new-cert` command.
+Make sure to sign the certificates with a Subject Name the member's public IP address. The `etcd-ca` tool for example provides an `--ip=` option for its `new-cert` command.

-If you need your certificate to be signed for your member's FQDN in its Subject Name then you could use Subject Alternative Names (short IP SANs) to add your IP address. The `etcd-ca` tool provides `--domain=` option for its `new-cert` command, and openssl can make [it][alt-name] too.
+The certificate needs to be signed for the member's FQDN in its Subject Name, use Subject Alternative Names (short IP SANs) to add the IP address. The `etcd-ca` tool provides `--domain=` option for its `new-cert` command, and openssl can make [it][alt-name] too.

 [cfssl]: https://github.com/cloudflare/cfssl
 [tls-setup]: /hack/tls-setup
--- a/Documentation/op-guide/supported-platform.md
+++ b/Documentation/op-guide/supported-platform.md
@ -0,0 +1,14 @@
+## Supported platform
+
+### 32-bit and other unsupported systems
+
+etcd has known issues on 32-bit systems due to a bug in the Go runtime. See #[358][358] for more information.
+
+To avoid inadvertently running a possibly unstable etcd server, `etcd` on unsupported architectures will print
+a warning message and immediately exit if the environment variable `ETCD_UNSUPPORTED_ARCH` is not set to
+the target architecture.
+
+Currently only the amd64 architecture is officially supported by `etcd`.
+
+[358]: https://github.com/coreos/etcd/issues/358
+
--- a/Documentation/op-guide/v2-migration.md
+++ b/Documentation/op-guide/v2-migration.md
@ -0,0 +1,47 @@
+# Migrate applications from using API v2 to API v3
+
+The data store v2 is still accessible from the API v2 after upgrading to etcd3. Thus, it will work as before and require no application changes. With etcd 3, applications use the new grpc API v3 to access the mvcc store, which provides more features and improved performance. The mvcc store and the old store v2 are separate and isolated; writes to the store v2 will not affect the mvcc store and, similarly, writes to the mvcc store will not affect the store v2.
+
+Migrating an application from the API v2 to the API v3 involves two steps: 1) migrate the client library and, 2) migrate the data. If the application can rebuild the data, then migrating the data is unnecessary.
+
+## Migrate client library
+
+API v3 is different from API v2, thus application developers need to use a new client library to send requests to etcd API v3. The documentation of the client v3 is available at https://godoc.org/github.com/coreos/etcd/clientv3. 
+
+There are some notable differences between API v2 and API v3:
+
+- Transaction: In v3, etcd provides multi-key conditional transactions. Applications should use transactions in place of `Compare-And-Swap` operations.
+
+- Flat key space: There are no directories in API v3, only keys. For example, "/a/b/c/" is a key. Range queries support getting all keys matching a given prefix.
+
+- Compacted responses: Operations like `Delete` no longer return previous values. To get the deleted value, a transaction can be used to atomically get the key and then delete its value.
+
+- Leases: A replacement for v2 TTLs; the TTL is bound to a lease and keys attach to the lease. When the TTL expires, the lease is revoked and all attached keys are removed.
+
+## Migrate data
+
+Application data can be migrated either offline or online. Offline migration is much simpler than online migration and is recommended.
+
+### Offline migration
+
+Offline migration is very simple but requires etcd downtime. If an etcd downtime window spanning from seconds to minutes is acceptable, offline migration is a good choice and is easy to automate.
+
+First, all members in the etcd cluster must converge to the same state. This can be achieved by stopping all applications that write keys to etcd. Alternatively, if the applications must remain running, configure etcd to listen on a different client URL and restart all etcd members. To check if the states converged, within a few seconds, use the `ETCDCTL_API=3 etcdctl endpoint status` command to confirm that the `raft index` of all members match (or differ by at most 1 due to an internal sync raft command).
+
+Second, migrate the v2 keys into v3 with the [migrate][migrate_command] (`ETCDCTL_API=3 etcdctl migrate`) command. The migrate command writes keys in the v2 store to a user-provided transformer program and reads back transformed keys. It then writes transformed keys into the mvcc store. This usually takes at most tens of seconds.
+
+Restart the etcd members and everything should just work.
+
+### Online migration
+
+If the application cannot tolerate any downtime, then it must migrate online. The implementation of online migration will vary from application to application but the overall idea is the same.
+
+First, write application code using the v3 API. The application must support two modes: a migration mode and a normal mode. The application starts in migration mode. When running in migration mode, the application reads keys using the v3 API first, and, if it cannot find the key, it retries with the API v2. In normal mode, the application only reads keys using the v3 API. The application writes keys over the API v3 in both modes. To acknowledge a switch from migration mode to normal mode, the application watches on a switch mode key. When switch key’s value turns to `true`, the application switches over from migration mode to normal mode.
+
+Second, start a background job to migrate data from the store v2 to the mvcc store by reading keys from the API v2 and writing keys to the API v3. 
+
+After finishing data migration, the background job writes `true` into the switch mode key to notify the application that it may switch modes.
+
+Online migration can be difficult when the application logic depends on store v2 indexes. Applications will need additional logic to convert mvcc store revisions to store v2 indexes.
+
+[migrate_command]: ../../etcdctl/README.md#migrate-options
--- a/Documentation/op-guide/versioning.md
+++ b/Documentation/op-guide/versioning.md
@ -0,0 +1,17 @@
+## Versioning
+
+### Service versioning
+
+etcd uses [semantic versioning](http://semver.org)
+New minor versions may add additional features to the API.
+
+Get the running etcd cluster version with `etcdctl`:
+
+```sh
+ETCDCTL_API=3 etcdctl --endpoints=127.0.0.1:2379 endpoint status
+```
+
+### API versioning
+
+The `v3` API responses should not change after the 3.0.0 release but new features will be added over time.
+
--- a/Documentation/production-users.md
+++ b/Documentation/production-users.md
@ -1,6 +1,6 @@
-# Production Users
+# Production users

-This document tracks people and use cases for etcd in production. By creating a list of production use cases we hope to build a community of advisors that we can reach out to with experience using various etcd applications, operation environments, and cluster sizes. The etcd development team may reach out periodically to check-in on your experience and update this list.
+This document tracks people and use cases for etcd in production. By creating a list of production use cases we hope to build a community of advisors that we can reach out to with experience using various etcd applications, operation environments, and cluster sizes. The etcd development team may reach out periodically to check-in on how etcd is working in the field and update this list.

 ## discovery.etcd.io

--- a/Documentation/reporting_bugs.md
+++ b/Documentation/reporting_bugs.md
@ -1,24 +1,24 @@
-# Reporting Bugs
+# Reporting bugs

-If you find bugs or documentation mistakes in the etcd project, please let us know by [opening an issue][issue]. We treat bugs and mistakes very seriously and believe no issue is too small. Before creating a bug report, please check that an issue reporting the same problem does not already exist.
+If any part of the etcd project has bugs or documentation mistakes, please let us know by [opening an issue][issue]. We treat bugs and mistakes very seriously and believe no issue is too small. Before creating a bug report, please check that an issue reporting the same problem does not already exist.

-To make your bug report accurate and easy to understand, please try to create bug reports that are:
+To make the bug report accurate and easy to understand, please try to create bug reports that are:

- Specific. Include as much details as possible: which version, what environment, what configuration, etc. You can also attach etcd log (the starting log with etcd configuration is especially important).
+- Specific. Include as much details as possible: which version, what environment, what configuration, etc. If the bug is related to running the etcd server, please attach the etcd log (the starting log with etcd configuration is especially important).

- Reproducible. Include the steps to reproduce the problem. We understand some issues might be hard to reproduce, please includes the steps that might lead to the problem. You can also attach the affected etcd data dir and stack strace to the bug report.
+- Reproducible. Include the steps to reproduce the problem. We understand some issues might be hard to reproduce, please includes the steps that might lead to the problem. If possible, please attach the affected etcd data dir and stack strace to the bug report.

- Isolated. Please try to isolate and reproduce the bug with minimum dependencies. It would significantly slow down the speed to fix a bug if too many dependencies are involved in a bug report. Debugging external systems that rely on etcd is out of scope, but we are happy to point you in the right direction or help you interact with etcd in the correct manner.
+- Isolated. Please try to isolate and reproduce the bug with minimum dependencies. It would significantly slow down the speed to fix a bug if too many dependencies are involved in a bug report. Debugging external systems that rely on etcd is out of scope, but we are happy to provide guidance in the right direction or help with using etcd itself.

 - Unique. Do not duplicate existing bug report.

 - Scoped. One bug per report. Do not follow up with another bug inside one report.

-You might also want to read [Elika Etemad’s article on filing good bug reports][filing-good-bugs] before creating a bug report.
+It may be worthwhile to read [Elika Etemad’s article on filing good bug reports][filing-good-bugs] before creating a bug report.

-We might ask you for further information to locate a bug. A duplicated bug report will be closed.
+We might ask for further information to locate a bug. A duplicated bug report will be closed.

-## Frequently Asked Questions
+## Frequently asked questions

 ### How to get a stack trace

@ -39,7 +39,7 @@ $ sudo systemctl cat etcd2
 $ sudo journalctl -u etcd2
 ```

-Due to an upstream systemd bug, journald may miss the last few log lines when its process exit. If journalctl tells you that etcd stops without fatal or panic message, you could try `sudo journalctl -f -t etcd2` to get full log.
+Due to an upstream systemd bug, journald may miss the last few log lines when its processes exit. If journalctl says etcd stopped without fatal or panic message, try `sudo journalctl -f -t etcd2` to get full log.

 [etcd-issue]: https://github.com/coreos/etcd/issues/new
 [filing-good-bugs]: http://fantasai.inkedblade.net/style/talks/filing-good-bugs/
--- a/Documentation/tuning.md
+++ b/Documentation/tuning.md
@ -1,11 +1,10 @@
 # Tuning

-The default settings in etcd should work well for installations on a local network where the average network latency is low.
-However, when using etcd across multiple data centers or over networks with high latency you may need to tweak the heartbeat interval and election timeout settings.
+The default settings in etcd should work well for installations on a local network where the average network latency is low. However, when using etcd across multiple data centers or over networks with high latency, the heartbeat interval and election timeout settings may need tuning.

 The network isn't the only source of latency. Each request and response may be impacted by slow disks on both the leader and follower. Each of these timeouts represents the total time from request to successful response from the other machine.

-## Time Parameters
+## Time parameters

 The underlying distributed consensus protocol relies on two separate time parameters to ensure that nodes can handoff leadership if one stalls or goes offline.
 The first parameter is called the *Heartbeat Interval*.
@ -24,20 +23,20 @@ On the other side, a too high heartbeat interval leads to high election timeout.
 The easiest way to measure round-trip time (RTT) is to use [PING utility][ping].

 The election timeout should be set based on the heartbeat interval and average round-trip time between members.
-Election timeouts must be at least 10 times the round-trip time so it can account for variance in your network.
-For example, if the round-trip time between your members is 10ms then you should have at least a 100ms election timeout.
+Election timeouts must be at least 10 times the round-trip time so it can account for variance in the network.
+For example, if the round-trip time between members is 10ms then the election timeout should be at least 100ms.

-You should also set your election timeout to at least 5 to 10 times your heartbeat interval to account for variance in leader replication.
-For a heartbeat interval of 50ms you should set your election timeout to at least 250ms - 500ms.
+The election timeout should be set to at least 5 to 10 times the heartbeat interval to account for variance in leader replication.
+For a heartbeat interval of 50ms, set the election timeout to at least 250ms - 500ms.

 The upper limit of election timeout is 50000ms (50s), which should only be used when deploying a globally-distributed etcd cluster.
 A reasonable round-trip time for the continental United States is 130ms, and the time between US and Japan is around 350-400ms.
-If your network has uneven performance or regular packet delays/loss then it is possible that a couple of retries may be necessary to successfully send a packet. So 5s is a safe upper limit of global round-trip time.
+If the network has uneven performance or regular packet delays/loss then it is possible that a couple of retries may be necessary to successfully send a packet. So 5s is a safe upper limit of global round-trip time.
 As the election timeout should be an order of magnitude bigger than broadcast time, in the case of ~5s for a globally distributed cluster, then 50 seconds becomes a reasonable maximum.

 The heartbeat interval and election timeout value should be the same for all members in one cluster. Setting different values for etcd members may disrupt cluster stability.

-You can override the default values on the command line:
+The default values can be overridden on the command line:

 ```sh
 # Command line arguments:
@ -58,11 +57,11 @@ A complete history works well for lightly used clusters but clusters that are he
 To avoid having a huge log etcd makes periodic snapshots.
 These snapshots provide a way for etcd to compact the log by saving the current state of the system and removing old logs.

-### Snapshot Tuning
+### Snapshot tuning

 Creating snapshots can be expensive so they're only created after a given number of changes to etcd.
 By default, snapshots will be made after every 10,000 changes.
-If etcd's memory usage and disk usage are too high, you can lower the snapshot threshold by setting the following on the command line:
+If etcd's memory usage and disk usage are too high, try lowering the snapshot threshold by setting the following on the command line:

 ```sh
 # Command line arguments:
--- a/Documentation/upgrades/upgrade_3_0.md
+++ b/Documentation/upgrades/upgrade_3_0.md
@ -0,0 +1,119 @@
+## Upgrade etcd from 2.3 to 3.0
+
+In the general case, upgrading from etcd 2.3 to 3.0 can be a zero-downtime, rolling upgrade:
+ - one by one, stop the etcd v2.3 processes and replace them with etcd v3.0 processes
+ - after running all v3.0 processes, new features in v3.0 are available to the cluster
+
+Before [starting an upgrade](#upgrade-procedure), read through the rest of this guide to prepare.
+
+### Upgrade Checklists
+
+#### Upgrade Requirements
+
+To upgrade an existing etcd deployment to 3.0, the running cluster must be 2.3 or greater. If it's before 2.3, please upgrade to [2.3](https://github.com/coreos/etcd/releases/tag/v2.3.0) before upgrading to 3.0.
+
+Also, to ensure a smooth rolling upgrade, the running cluster must be healthy. You can check the health of the cluster by using the `etcdctl cluster-health` command.
+
+#### Preparation
+
+Before upgrading etcd, always test the services relying on etcd in a staging environment before deploying the upgrade to the production environment.
+
+Before beginning,  [backup the etcd data directory](../v2/admin_guide.md#backing-up-the-datastore). Should something go wrong with the upgrade, it is possible to use this backup to [downgrade](#downgrade) back to existing etcd version.
+
+#### Mixed Versions
+
+While upgrading, an etcd cluster supports mixed versions of etcd members, and operates with the protocol of the lowest common version. The cluster is only considered upgraded once all of its members are upgraded to version 3.0. Internally, etcd members negotiate with each other to determine the overall cluster version, which controls the reported version and the supported features.
+
+#### Limitations
+
+It might take up to 2 minutes for the newly upgraded member to catch up with the existing cluster when the total data size is larger than 50MB. Check the size of a recent  snapshot to estimate  the total data size. In other words, it is safest to wait for 2 minutes between upgrading each member.
+
+For a much larger total data size, 100MB or more , this one-time process might take even more time. Administrators of very large etcd clusters of this magnitude can feel free to contact the [etcd team][etcd-contact] before upgrading, and we’ll be happy to provide advice on the procedure.
+
+#### Downgrade
+
+If all members have been upgraded to v3.0, the cluster will be upgraded to v3.0, and downgrade from this completed state is **not possible**. If any single member is still v2.3, however, the cluster and its operations remains “v2.3”, and it is possible from this mixed cluster state to return to using a v2.3 etcd binary on all members.
+
+Please [backup the data directory](../v2/admin_guide.md#backing-up-the-datastore) of all etcd members to make downgrading the cluster possible even after it has been completely upgraded.
+
+### Upgrade Procedure
+
+This example details the  upgrade of a three-member v2.3 ectd cluster running on a local machine.
+
+#### 1. Check upgrade requirements.
+
+Is the the cluster healthy and running v.2.3.x?
+
+```
+$ etcdctl cluster-health
+member 6e3bd23ae5f1eae0 is healthy: got healthy result from http://localhost:22379
+member 924e2e83e93f2560 is healthy: got healthy result from http://localhost:32379
+member 8211f1d0f64f3269 is healthy: got healthy result from http://localhost:12379
+cluster is healthy
+
+$ curl http://localhost:2379/version
+{"etcdserver":"2.3.x","etcdcluster":"2.3.0"}
+```
+
+#### 2. Stop the existing etcd process
+
+When each etcd process is stopped, expected errors will be logged by other cluster members. This is normal since a cluster member connection has been (temporarily) broken:
+
+```
+2016-06-27 15:21:48.624124 E | rafthttp: failed to dial 8211f1d0f64f3269 on stream Message (dial tcp 127.0.0.1:12380: getsockopt: connection refused)
+2016-06-27 15:21:48.624175 I | rafthttp: the connection with 8211f1d0f64f3269 became inactive
+```
+
+It’s a good idea at this point to  [backup the etcd data directory](../v2/admin_guide.md#backing-up-the-datastore) to provide a downgrade path should any problems occur:
+
+```
+$ etcdctl backup \
+      --data-dir /var/lib/etcd \
+      --backup-dir /tmp/etcd_backup
+```
+
+#### 3. Drop-in etcd v3.0 binary and start the new etcd process
+
+The new v3.0 etcd will publish its information to the cluster:
+
+```
+09:58:25.938673 I | etcdserver: published {Name:infra1 ClientURLs:[http://localhost:12379]} to cluster 524400597fb1d5f6
+```
+
+Verify that each member, and then the entire cluster, becomes healthy with the new v3.0 etcd binary:
+
+```
+$ etcdctl cluster-health
+member 6e3bd23ae5f1eae0 is healthy: got healthy result from http://localhost:22379
+member 924e2e83e93f2560 is healthy: got healthy result from http://localhost:32379
+member 8211f1d0f64f3269 is healthy: got healthy result from http://localhost:12379
+cluster is healthy
+```
+
+
+Upgraded members will log warnings like the following until the entire cluster is upgraded. This is expected and will cease after all etcd cluster members are upgraded to v3.0:
+
+```
+2016-06-27 15:22:05.679644 W | etcdserver: the local etcd version 2.3.7 is not up-to-date
+2016-06-27 15:22:05.679660 W | etcdserver: member 8211f1d0f64f3269 has a higher version 3.0.0
+```
+
+#### 4. Repeat step 2 to step 3 for all other members
+
+#### 5. Finish
+
+When all members are upgraded, the cluster will report  upgrading to 3.0 successfully:
+
+```
+2016-06-27 15:22:19.873751 N | membership: updated the cluster version from 2.3 to 3.0
+2016-06-27 15:22:19.914574 I | api: enabled capabilities for version 3.0.0
+```
+
+```
+$ ETCDCTL_API=3 etcdctl endpoint health
+127.0.0.1:12379 is healthy: successfully committed proposal: took = 18.440155ms
+127.0.0.1:32379 is healthy: successfully committed proposal: took = 13.651368ms
+127.0.0.1:22379 is healthy: successfully committed proposal: took = 18.513301ms
+```
+
+[etcd-contact]: https://groups.google.com/forum/#!forum/etcd-dev
--- a/Documentation/v2/README.md
+++ b/Documentation/v2/README.md
@ -118,6 +118,17 @@ See [CONTRIBUTING](../../CONTRIBUTING.md) for details on submitting patches and

 See [reporting bugs](reporting_bugs.md) for details about reporting any issue you may encounter.

+## Known bugs
+
+[GH518](https://github.com/coreos/etcd/issues/518) is a known bug. Issue is that:
+
+```
+curl http://127.0.0.1:2379/v2/keys/foo -XPUT -d value=bar
+curl http://127.0.0.1:2379/v2/keys/foo -XPUT -d dir=true -d prevExist=true
+```
+
+If the previous node is a key and client tries to overwrite it with `dir=true`, it does not give warnings such as `Not a directory`. Instead, the key is set to empty value.
+
 ## Project Details

 ### Versioning
@ -141,7 +152,7 @@ The `v2` API responses should not change after the 2.0.0 release but new feature

 etcd has known issues on 32-bit systems due to a bug in the Go runtime. See #[358][358] for more information.

-To avoid inadvertantly running a possibly unstable etcd server, `etcd` on unsupported architectures will print
+To avoid inadvertently running a possibly unstable etcd server, `etcd` on unsupported architectures will print
 a warning message and immediately exit if the environment variable `ETCD_UNSUPPORTED_ARCH` is not set to
 the target architecture.

--- a/Documentation/v2/admin_guide.md
+++ b/Documentation/v2/admin_guide.md
@ -113,7 +113,8 @@ It is recommended to have an odd number of members in a cluster. Having an odd c
 | Cluster Size | Majority   | Failure Tolerance |
 |--------------|------------|-------------------|
 | 1 | 1 | 0 |
-| 3 | 2 | 1 |
+| 2 | 2 | 0 |
+| 3 | 2 | **1** |
 | 4 | 3 | 1 |
 | 5 | 3 | **2** |
 | 6 | 4 | 2 |
@ -135,7 +136,7 @@ The data directory contains all the data to recover a member to its point-in-tim

 * Stop the member process.
 * Copy the data directory of the now-idle member to the new machine.
-* Update the peer URLs for the replaced member to reflect the new machine according to the [runtime reconfiguration instructions][update-member].
+* Update the peer URLs for the replaced member to reflect the new machine according to the [runtime reconfiguration instructions][update-a-member].
 * Start etcd on the new machine, using the same configuration and the copy of the data directory.

 This example will walk you through the process of migrating the infra1 member to a new machine:
@ -217,12 +218,14 @@ To recover from such scenarios, etcd provides functionality to backup and restor

 **NB:** Windows users must stop etcd before running the backup command.

-The first step of the recovery is to backup the data directory on a functioning etcd node. To do this, use the `etcdctl backup` command, passing in the original data directory used by etcd. For example:
+The first step of the recovery is to backup the data directory and wal directory, if stored separately, on a functioning etcd node. To do this, use the `etcdctl backup` command, passing in the original data (and wal) directory used by etcd. For example:

 ```sh
    etcdctl backup \
      --data-dir %data_dir% \
+      [--wal-dir %wal_dir%] \
      --backup-dir %backup_data_dir%
+      [--backup-wal-dir %backup_wal_dir%]
 ```

 This command will rewrite some of the metadata contained in the backup (specifically, the node ID and cluster ID), which means that the node will lose its former identity. In order to recreate a cluster from the backup, you will need to start a new, single-node cluster. The metadata is rewritten to prevent the new node from inadvertently being joined onto an existing cluster.
@ -234,26 +237,30 @@ To restore a backup using the procedure created above, start etcd with the `-for
 ```sh
    etcd \
      -data-dir=%backup_data_dir% \
+      [-wal-dir=%backup_wal_dir%] \
      -force-new-cluster \
      ...
 ```

 Now etcd should be available on this node and serving the original datastore.

-Once you have verified that etcd has started successfully, shut it down and move the data back to the previous location (you may wish to make another copy as well to be safe):
+Once you have verified that etcd has started successfully, shut it down and move the data and wal, if stored separately, back to the previous location (you may wish to make another copy as well to be safe):

 ```sh
    pkill etcd
    rm -fr %data_dir%
+    rm -fr %wal_dir%
    mv %backup_data_dir% %data_dir%
+    mv %backup_wal_dir% %wal_dir%
    etcd \
      -data-dir=%data_dir% \
+      [-wal-dir=%wal_dir%] \
      ...
 ```

 #### Restoring the cluster

-Now that the node is running successfully, [change its advertised peer URLs][update-member], as the `--force-new-cluster` option has set the peer URL to the default listening on localhost.
+Now that the node is running successfully, [change its advertised peer URLs][update-a-member], as the `--force-new-cluster` option has set the peer URL to the default listening on localhost.

 You can then add more nodes to the cluster and restore resiliency. See the [add a new member][add-a-member] guide for more details. **NB:** If you are trying to restore your cluster using old failed etcd nodes, please make sure you have stopped old etcd instances and removed their old data directories specified by the data-dir configuration parameter.

--- a/Documentation/v2/api.md
+++ b/Documentation/v2/api.md
@ -233,10 +233,11 @@ curl http://127.0.0.1:2379/v2/keys/foo -XPUT -d value=bar -d ttl= -d prevExist=t

 ### Refreshing key TTL

-Keys in etcd can be refreshed without notifying watchers
-this can be achieved by setting the refresh to true when updating a TTL
+Keys in etcd can be refreshed without notifying current watchers.

-You cannot update the value of a key when refreshing it
+This can be achieved by setting the refresh to true when updating a TTL.
+
+You cannot update the value of a key when refreshing it.

 ```sh
 curl http://127.0.0.1:2379/v2/keys/foo -XPUT -d value=bar -d ttl=5
--- a/Documentation/v2/auth_api.md
+++ b/Documentation/v2/auth_api.md
@ -145,8 +145,8 @@ GET/HEAD  /v2/auth/users
                  "role": "root",
                  "permissions": {
                    "kv": {
-                      "read": ["*"],
-                      "write": ["*"]
+                      "read": ["/*"],
+                      "write": ["/*"]
                    }
                  }
                }
@ -159,8 +159,8 @@ GET/HEAD  /v2/auth/users
                  "role": "guest",
                  "permissions": {
                    "kv": {
-                      "read": ["*"],
-                      "write": ["*"]
+                      "read": ["/*"],
+                      "write": ["/*"]
                    }
                  }
                }
@ -198,8 +198,8 @@ GET/HEAD  /v2/auth/users/alice
              "role": "etcd",
              "permissions" : {
                "kv" : {
-                  "read": [ "*" ],
-                  "write": [ "*" ]
+                  "read": [ "/*" ],
+                  "write": [ "/*" ]
                }
              }
            }
@ -311,8 +311,8 @@ GET/HEAD  /v2/auth/roles
              "role": "etcd",
              "permissions": {
                "kv": {
-                  "read": ["*"],
-                  "write": ["*"]
+                  "read": ["/*"],
+                  "write": ["/*"]
                }
              }
            },
@ -320,8 +320,8 @@ GET/HEAD  /v2/auth/roles
              "role": "quay",
              "permissions": {
                "kv": {
-                  "read": ["*"],
-                  "write": ["*"]
+                  "read": ["/*"],
+                  "write": ["/*"]
                }
              }
            }
@ -393,7 +393,7 @@ PUT  /v2/auth/roles/guest
          "revoke" : {
            "kv" : {
              "write": [
-                "*"
+                "/*"
              ]
            }
          }
--- a/Documentation/v2/clustering.md
+++ b/Documentation/v2/clustering.md
@ -309,6 +309,7 @@ infra0.example.com.  300  IN  A  10.0.1.10
 infra1.example.com.  300  IN  A  10.0.1.11
 infra2.example.com.  300  IN  A  10.0.1.12
 ```
+
 #### Bootstrap the etcd cluster using DNS

 etcd cluster members can listen on domain names or IP address, the bootstrap process will resolve DNS A records.
--- a/Documentation/v2/proxy.md
+++ b/Documentation/v2/proxy.md
@ -49,7 +49,7 @@ To start a proxy that will connect to a statically defined etcd cluster, specify

 ```
 etcd --proxy on \
--listen-client-urls http://127.0.0.1:8080 \
+--listen-client-urls http://127.0.0.1:2379 \
 --initial-cluster infra0=http://10.0.1.10:2380,infra1=http://10.0.1.11:2380,infra2=http://10.0.1.12:2380
 ```

@ -60,7 +60,7 @@ To start a proxy using the discovery service, specify the `discovery` flag. The

 ```
 etcd --proxy on \
--listen-client-urls http://127.0.0.1:8080 \
+--listen-client-urls http://127.0.0.1:2379 \
 --discovery https://discovery.etcd.io/3e86b59982e49066c5d813af1c2e2579cbf573de \
 ```

--- a/3
+++ b/3
@ -1,6 +1,7 @@
-Anthony Romano <anthony.romano@coreos.com > (@heyitsanthony) pkg:*
+Anthony Romano <anthony.romano@coreos.com> (@heyitsanthony) pkg:*
 Brandon Philips <brandon.philips@coreos.com> (@philips) pkg:*
 Gyu-Ho Lee <gyu_ho.lee@coreos.com> (@gyuho) pkg:*
 Xiang Li <xiang.li@coreos.com> (@xiang90) pkg:*

 Ben Darnell <ben@cockroachlabs.com> (@bdarnell) pkg:github.com/coreos/etcd/raft
+Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp> (@mitake) pkg:github.com/coreos/etcd/auth
--- a/5
+++ b/5
@ -1,5 +1,6 @@
 # Use goreman to run `go get github.com/mattn/goreman`
-etcd1: bin/etcd --name infra1 --listen-client-urls http://127.0.0.1:12379 --advertise-client-urls http://127.0.0.1:12379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:12380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --initial-cluster-state new --enable-pprof
+etcd1: bin/etcd --name infra1 --listen-client-urls http://127.0.0.1:2379 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:12380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --initial-cluster-state new --enable-pprof
 etcd2: bin/etcd --name infra2 --listen-client-urls http://127.0.0.1:22379 --advertise-client-urls http://127.0.0.1:22379 --listen-peer-urls http://127.0.0.1:22380 --initial-advertise-peer-urls http://127.0.0.1:22380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --initial-cluster-state new --enable-pprof
 etcd3: bin/etcd --name infra3 --listen-client-urls http://127.0.0.1:32379 --advertise-client-urls http://127.0.0.1:32379 --listen-peer-urls http://127.0.0.1:32380 --initial-advertise-peer-urls http://127.0.0.1:32380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --initial-cluster-state new --enable-pprof
-proxy: bin/etcd --name infra-proxy1 --proxy=on --listen-client-urls http://127.0.0.1:2379 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --enable-pprof
+# in future, use proxy to listen on 2379
+#proxy: bin/etcd --name infra-proxy1 --proxy=on --listen-client-urls http://127.0.0.1:2378 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --enable-pprof
--- a/README.md
+++ b/README.md
@ -14,22 +14,24 @@
 etcd is a distributed, consistent key-value store for shared configuration and service discovery, with a focus on being:

 * *Simple*: well-defined, user-facing API (gRPC)
-* *Secure*: optional SSL client cert authentication
-* *Fast*: benchmarked 1000s of writes/s per instance
+* *Secure*: automatic TLS with optional client cert authentication
+* *Fast*: benchmarked 10,000 writes/sec
 * *Reliable*: properly distributed using Raft

 etcd is written in Go and uses the [Raft][raft] consensus algorithm to manage a highly-available replicated log.

-etcd is used [in production by many companies](./Documentation/production-users.md), and the development team stands behind it in critical deployment scenarios, where etcd is frequently teamed with applications such as [Kubernetes][k8s], [fleet][fleet], [locksmith][locksmith], [vulcand][vulcand], and many others.
+etcd is used [in production by many companies](./Documentation/production-users.md), and the development team stands behind it in critical deployment scenarios, where etcd is frequently teamed with applications such as [Kubernetes][k8s], [fleet][fleet], [locksmith][locksmith], [vulcand][vulcand], [Doorman][doorman], and many others. Reliability is further ensured by rigorous [testing][etcd-tests].

 See [etcdctl][etcdctl] for a simple command line client.

 [raft]: https://raft.github.io/
 [k8s]: http://kubernetes.io/
+[doorman]: https://github.com/youtube/doorman
 [fleet]: https://github.com/coreos/fleet
 [locksmith]: https://github.com/coreos/locksmith
 [vulcand]: https://github.com/vulcand/vulcand
 [etcdctl]: https://github.com/coreos/etcd/tree/master/etcdctl
+[etcd-tests]: http://dash.etcd.io

 ## Getting started

@ -38,7 +40,7 @@ See [etcdctl][etcdctl] for a simple command line client.
 The easiest way to get etcd is to use one of the pre-built release binaries which are available for OSX, Linux, Windows, AppC (ACI), and Docker. Instructions for using these binaries are on the [GitHub releases page][github-release].

 For those wanting to try the very latest version, you can build the latest version of etcd from the `master` branch.
-You will first need [*Go*](https://golang.org/) installed on your machine (version 1.5+ is required).
+You will first need [*Go*](https://golang.org/) installed on your machine (version 1.6+ is required).
 All development occurs on `master`, including new features and bug fixes.
 Bug fixes are first targeted at `master` and subsequently ported to release branches, as described in the [branch management][branch-management] guide.

@ -62,7 +64,13 @@ ETCDCTL_API=3 etcdctl put mykey "this is awesome"
 ETCDCTL_API=3 etcdctl get mykey
 ```

-That's it-- etcd is running and serving keys.
+That's it! etcd is now running and serving client requests. For more
+
+- [Animated quick demo][demo-gif]
+- [Interactive etcd playground][etcd-play]
+
+[demo-gif]: ./Documentation/demo.md
+[etcd-play]: http://play.etcd.io/

 ### etcd TCP ports

@ -99,7 +107,7 @@ Now it's time to dig into the full etcd API and other guides.
 [fulldoc]: ./Documentation/docs.md
 [api]: ./Documentation/dev-guide/api_reference_v3.md
 [clustering]: ./Documentation/op-guide/clustering.md
-[configuration]: op-guide/configuration.md
+[configuration]: ./Documentation/op-guide/configuration.md
 [libraries-and-tools]: ./Documentation/libraries-and-tools.md
 [security]: ./Documentation/op-guide/security.md
 [tuning]: ./Documentation/tuning.md
@ -119,37 +127,7 @@ See [CONTRIBUTING](CONTRIBUTING.md) for details on submitting patches and the co

 See [reporting bugs](Documentation/reporting_bugs.md) for details about reporting any issue you may encounter.

-## Project details
-
-### Versioning
-
-#### Service versioning
-
-etcd uses [semantic versioning](http://semver.org)
-New minor versions may add additional features to the API.
-
-Get the running etcd cluster version with `etcdctl`:
-
-```sh
-ETCDCTL_API=3 etcdctl --endpoints=127.0.0.1:2379 endpoint status
-```
-
-#### API versioning
-
-The `v3` API responses should not change after the 3.0.0 release but new features will be added over time.
-
-#### 32-bit and other unsupported systems
-
-etcd has known issues on 32-bit systems due to a bug in the Go runtime. See #[358][358] for more information.
-
-To avoid inadvertantly running a possibly unstable etcd server, `etcd` on unsupported architectures will print
-a warning message and immediately exit if the environment variable `ETCD_UNSUPPORTED_ARCH` is not set to
-the target architecture.
-
-Currently only the amd64 architecture is officially supported by `etcd`.
-
-[358]: https://github.com/coreos/etcd/issues/358
-
 ### License

 etcd is under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details.
+
--- a/5
+++ b/5
@ -1,6 +1,5 @@
 # Use goreman to run `go get github.com/mattn/goreman`
-etcd1: bin/etcd --name infra1 --listen-client-urls http://127.0.0.1:2379 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:12380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --initial-cluster-state new --enable-pprof
+etcd1: bin/etcd --name infra1 --listen-client-urls http://127.0.0.1:12379 --advertise-client-urls http://127.0.0.1:12379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:12380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --initial-cluster-state new --enable-pprof
 etcd2: bin/etcd --name infra2 --listen-client-urls http://127.0.0.1:22379 --advertise-client-urls http://127.0.0.1:22379 --listen-peer-urls http://127.0.0.1:22380 --initial-advertise-peer-urls http://127.0.0.1:22380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --initial-cluster-state new --enable-pprof
 etcd3: bin/etcd --name infra3 --listen-client-urls http://127.0.0.1:32379 --advertise-client-urls http://127.0.0.1:32379 --listen-peer-urls http://127.0.0.1:32380 --initial-advertise-peer-urls http://127.0.0.1:32380 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --initial-cluster-state new --enable-pprof
-# in future, use proxy to listen on 2379
-#proxy: bin/etcd --name infra-proxy1 --proxy=on --listen-client-urls http://127.0.0.1:2378 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --enable-pprof
+proxy: bin/etcd --name infra-proxy1 --proxy=on --listen-client-urls http://127.0.0.1:2379 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --enable-pprof
--- a/alarm/alarms.go
+++ b/alarm/alarms.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/auth/authpb/auth.pb.go
+++ b/auth/authpb/auth.pb.go
@ -18,7 +18,7 @@ package authpb
 import (
 	"fmt"

-	proto "github.com/gogo/protobuf/proto"
+	proto "github.com/golang/protobuf/proto"

 	math "math"
 )
@ -32,7 +32,7 @@ var _ = math.Inf

 // This is a compile-time assertion to ensure that this generated file
 // is compatible with the proto package it is being compiled against.
-const _ = proto.GoGoProtoPackageIsVersion1
+const _ = proto.ProtoPackageIsVersion1

 type Permission_Type int32

@ -72,8 +72,9 @@ func (*User) Descriptor() ([]byte, []int) { return fileDescriptorAuth, []int{0}

 // Permission is a single entity
 type Permission struct {
-	Key      []byte          `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"`
-	PermType Permission_Type `protobuf:"varint,2,opt,name=permType,proto3,enum=authpb.Permission_Type" json:"permType,omitempty"`
+	PermType Permission_Type `protobuf:"varint,1,opt,name=permType,proto3,enum=authpb.Permission_Type" json:"permType,omitempty"`
+	Key      []byte          `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
+	RangeEnd []byte          `protobuf:"bytes,3,opt,name=range_end,json=rangeEnd,proto3" json:"range_end,omitempty"`
 }

 func (m *Permission) Reset()                    { *m = Permission{} }
@ -158,16 +159,22 @@ func (m *Permission) MarshalTo(data []byte) (int, error) {
 	_ = i
 	var l int
 	_ = l
+	if m.PermType != 0 {
+		data[i] = 0x8
+		i++
+		i = encodeVarintAuth(data, i, uint64(m.PermType))
+	}
 	if len(m.Key) > 0 {
-		data[i] = 0xa
+		data[i] = 0x12
 		i++
 		i = encodeVarintAuth(data, i, uint64(len(m.Key)))
 		i += copy(data[i:], m.Key)
 	}
-	if m.PermType != 0 {
-		data[i] = 0x10
+	if len(m.RangeEnd) > 0 {
+		data[i] = 0x1a
 		i++
-		i = encodeVarintAuth(data, i, uint64(m.PermType))
+		i = encodeVarintAuth(data, i, uint64(len(m.RangeEnd)))
+		i += copy(data[i:], m.RangeEnd)
 	}
 	return i, nil
 }
@ -258,12 +265,16 @@ func (m *User) Size() (n int) {
 func (m *Permission) Size() (n int) {
 	var l int
 	_ = l
+	if m.PermType != 0 {
+		n += 1 + sovAuth(uint64(m.PermType))
+	}
 	l = len(m.Key)
 	if l > 0 {
 		n += 1 + l + sovAuth(uint64(l))
 	}
-	if m.PermType != 0 {
-		n += 1 + sovAuth(uint64(m.PermType))
+	l = len(m.RangeEnd)
+	if l > 0 {
+		n += 1 + l + sovAuth(uint64(l))
 	}
 	return n
 }
@ -468,6 +479,25 @@ func (m *Permission) Unmarshal(data []byte) error {
 		}
 		switch fieldNum {
 		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field PermType", wireType)
+			}
+			m.PermType = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowAuth
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := data[iNdEx]
+				iNdEx++
+				m.PermType |= (Permission_Type(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 2:
 			if wireType != 2 {
 				return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType)
 			}
@ -498,11 +528,11 @@ func (m *Permission) Unmarshal(data []byte) error {
 				m.Key = []byte{}
 			}
 			iNdEx = postIndex
-		case 2:
-			if wireType != 0 {
-				return fmt.Errorf("proto: wrong wireType = %d for field PermType", wireType)
+		case 3:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RangeEnd", wireType)
 			}
-			m.PermType = 0
+			var byteLen int
 			for shift := uint(0); ; shift += 7 {
 				if shift >= 64 {
 					return ErrIntOverflowAuth
@ -512,11 +542,23 @@ func (m *Permission) Unmarshal(data []byte) error {
 				}
 				b := data[iNdEx]
 				iNdEx++
-				m.PermType |= (Permission_Type(b) & 0x7F) << shift
+				byteLen |= (int(b) & 0x7F) << shift
 				if b < 0x80 {
 					break
 				}
 			}
+			if byteLen < 0 {
+				return ErrInvalidLengthAuth
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.RangeEnd = append(m.RangeEnd[:0], data[iNdEx:postIndex]...)
+			if m.RangeEnd == nil {
+				m.RangeEnd = []byte{}
+			}
+			iNdEx = postIndex
 		default:
 			iNdEx = preIndex
 			skippy, err := skipAuth(data[iNdEx:])
@ -756,21 +798,23 @@ var (
 )

 var fileDescriptorAuth = []byte{
-	// 254 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0x4a, 0x2c, 0x2d, 0xc9,
-	0xd0, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x03, 0xb1, 0x0b, 0x92, 0xa4, 0x44, 0xd2, 0xf3,
-	0xd3, 0xf3, 0xc1, 0x42, 0xfa, 0x20, 0x16, 0x44, 0x56, 0xc9, 0x87, 0x8b, 0x25, 0xb4, 0x38, 0xb5,
-	0x48, 0x48, 0x88, 0x8b, 0x25, 0x2f, 0x31, 0x37, 0x55, 0x82, 0x51, 0x81, 0x51, 0x83, 0x27, 0x08,
-	0xcc, 0x16, 0x92, 0xe2, 0xe2, 0x28, 0x48, 0x2c, 0x2e, 0x2e, 0xcf, 0x2f, 0x4a, 0x91, 0x60, 0x02,
-	0x8b, 0xc3, 0xf9, 0x42, 0x22, 0x5c, 0xac, 0x45, 0xf9, 0x39, 0xa9, 0xc5, 0x12, 0xcc, 0x0a, 0xcc,
-	0x1a, 0x9c, 0x41, 0x10, 0x8e, 0x52, 0x3d, 0x17, 0x57, 0x40, 0x6a, 0x51, 0x6e, 0x66, 0x71, 0x71,
-	0x66, 0x7e, 0x9e, 0x90, 0x00, 0x17, 0x73, 0x76, 0x6a, 0x25, 0xd4, 0x48, 0x10, 0x53, 0xc8, 0x18,
-	0x68, 0x22, 0x50, 0x3e, 0xa4, 0xb2, 0x20, 0x15, 0x6c, 0x22, 0x9f, 0x91, 0xb8, 0x1e, 0xc4, 0x79,
-	0x7a, 0x08, 0x7d, 0x7a, 0x20, 0xe9, 0x20, 0xb8, 0x42, 0x25, 0x2d, 0x2e, 0x16, 0x10, 0x2d, 0xc4,
-	0xc1, 0xc5, 0x12, 0xe4, 0xea, 0xe8, 0x22, 0xc0, 0x20, 0xc4, 0xc9, 0xc5, 0x1a, 0x1e, 0xe4, 0x19,
-	0xe2, 0x2a, 0xc0, 0x28, 0xc4, 0xcb, 0xc5, 0x09, 0x12, 0x84, 0x70, 0x99, 0x94, 0x42, 0x80, 0x6a,
-	0x80, 0x2e, 0xc1, 0xea, 0x1d, 0x0b, 0x2e, 0x5e, 0xa0, 0x1b, 0x10, 0xf6, 0x00, 0x5d, 0xc0, 0xac,
-	0xc1, 0x6d, 0x24, 0x84, 0xe9, 0x82, 0x20, 0x54, 0x85, 0x4e, 0x22, 0x27, 0x1e, 0xca, 0x31, 0x5c,
-	0x00, 0xe2, 0x13, 0x8f, 0xe4, 0x18, 0x2f, 0x00, 0xf1, 0x03, 0x20, 0x4e, 0x62, 0x03, 0x87, 0xa0,
-	0x31, 0x20, 0x00, 0x00, 0xff, 0xff, 0x92, 0x06, 0xa1, 0xed, 0x6d, 0x01, 0x00, 0x00,
+	// 288 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x6c, 0x90, 0xc1, 0x4a, 0xc3, 0x30,
+	0x1c, 0xc6, 0x9b, 0xb6, 0x1b, 0xed, 0x5f, 0x27, 0x25, 0x0c, 0x0c, 0x13, 0x42, 0xe9, 0xa9, 0x78,
+	0xa8, 0xb0, 0x5d, 0xbc, 0x2a, 0xf6, 0x20, 0x78, 0x90, 0x50, 0xf1, 0x28, 0x1d, 0x0d, 0x75, 0x6c,
+	0x6d, 0x4a, 0x32, 0x91, 0xbe, 0x89, 0x07, 0x1f, 0x68, 0xc7, 0x3d, 0x82, 0xab, 0x2f, 0x22, 0x4d,
+	0x64, 0x43, 0xdc, 0xed, 0xfb, 0xbe, 0xff, 0x97, 0xe4, 0x97, 0x3f, 0x40, 0xfe, 0xb6, 0x7e, 0x4d,
+	0x1a, 0x29, 0xd6, 0x02, 0x0f, 0x7b, 0xdd, 0xcc, 0x27, 0xe3, 0x52, 0x94, 0x42, 0x47, 0x57, 0xbd,
+	0x32, 0xd3, 0xe8, 0x01, 0xdc, 0x27, 0xc5, 0x25, 0xc6, 0xe0, 0xd6, 0x79, 0xc5, 0x09, 0x0a, 0x51,
+	0x7c, 0xca, 0xb4, 0xc6, 0x13, 0xf0, 0x9a, 0x5c, 0xa9, 0x77, 0x21, 0x0b, 0x62, 0xeb, 0x7c, 0xef,
+	0xf1, 0x18, 0x06, 0x52, 0xac, 0xb8, 0x22, 0x4e, 0xe8, 0xc4, 0x3e, 0x33, 0x26, 0xfa, 0x44, 0x00,
+	0x8f, 0x5c, 0x56, 0x0b, 0xa5, 0x16, 0xa2, 0xc6, 0x33, 0xf0, 0x1a, 0x2e, 0xab, 0xac, 0x6d, 0xcc,
+	0xc5, 0x67, 0xd3, 0xf3, 0xc4, 0xd0, 0x24, 0x87, 0x56, 0xd2, 0x8f, 0xd9, 0xbe, 0x88, 0x03, 0x70,
+	0x96, 0xbc, 0xfd, 0x7d, 0xb0, 0x97, 0xf8, 0x02, 0x7c, 0x99, 0xd7, 0x25, 0x7f, 0xe1, 0x75, 0x41,
+	0x1c, 0x03, 0xa2, 0x83, 0xb4, 0x2e, 0xa2, 0x4b, 0x70, 0xf5, 0x31, 0x0f, 0x5c, 0x96, 0xde, 0xdc,
+	0x05, 0x16, 0xf6, 0x61, 0xf0, 0xcc, 0xee, 0xb3, 0x34, 0x40, 0x78, 0x04, 0x7e, 0x1f, 0x1a, 0x6b,
+	0x47, 0x19, 0xb8, 0x4c, 0xac, 0xf8, 0xd1, 0xcf, 0x5e, 0xc3, 0x68, 0xc9, 0xdb, 0x03, 0x16, 0xb1,
+	0x43, 0x27, 0x3e, 0x99, 0xe2, 0xff, 0xc0, 0xec, 0x6f, 0xf1, 0x96, 0x6c, 0x76, 0xd4, 0xda, 0xee,
+	0xa8, 0xb5, 0xe9, 0x28, 0xda, 0x76, 0x14, 0x7d, 0x75, 0x14, 0x7d, 0x7c, 0x53, 0x6b, 0x3e, 0xd4,
+	0x3b, 0x9e, 0xfd, 0x04, 0x00, 0x00, 0xff, 0xff, 0xcc, 0x76, 0x8d, 0x4f, 0x8f, 0x01, 0x00, 0x00,
 }
--- a/auth/authpb/auth.proto
+++ b/auth/authpb/auth.proto
@ -18,14 +18,15 @@ message User {

 // Permission is a single entity
 message Permission {
-  bytes key = 1;
-
  enum Type {
    READ = 0;
    WRITE = 1;
    READWRITE = 2;
  }
-  Type permType = 2;
+  Type permType = 1;
+
+  bytes key = 2;
+  bytes range_end = 3;
 }

 // Role is a single entry in the bucket authRoles
--- a/auth/doc.go
+++ b/auth/doc.go
@ -0,0 +1,16 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package auth provides client role authentication for accessing keys in etcd.
+package auth
--- a/auth/range_perm_cache.go
+++ b/auth/range_perm_cache.go
@ -0,0 +1,219 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package auth
+
+import (
+	"bytes"
+	"sort"
+
+	"github.com/coreos/etcd/auth/authpb"
+	"github.com/coreos/etcd/mvcc/backend"
+)
+
+// isSubset returns true if a is a subset of b
+func isSubset(a, b *rangePerm) bool {
+	switch {
+	case len(a.end) == 0 && len(b.end) == 0:
+		// a, b are both keys
+		return bytes.Equal(a.begin, b.begin)
+	case len(b.end) == 0:
+		// b is a key, a is a range
+		return false
+	case len(a.end) == 0:
+		return 0 <= bytes.Compare(a.begin, b.begin) && bytes.Compare(a.begin, b.end) <= 0
+	default:
+		return 0 <= bytes.Compare(a.begin, b.begin) && bytes.Compare(a.end, b.end) <= 0
+	}
+}
+
+func isRangeEqual(a, b *rangePerm) bool {
+	return bytes.Equal(a.begin, b.begin) && bytes.Equal(a.end, b.end)
+}
+
+// removeSubsetRangePerms removes any rangePerms that are subsets of other rangePerms.
+// If there are equal ranges, removeSubsetRangePerms only keeps one of them.
+func removeSubsetRangePerms(perms []*rangePerm) []*rangePerm {
+	// TODO(mitake): currently it is O(n^2), we need a better algorithm
+	newp := make([]*rangePerm, 0)
+
+	for i := range perms {
+		skip := false
+
+		for j := range perms {
+			if i == j {
+				continue
+			}
+
+			if isRangeEqual(perms[i], perms[j]) {
+				// if ranges are equal, we only keep the first range.
+				if i > j {
+					skip = true
+					break
+				}
+			} else if isSubset(perms[i], perms[j]) {
+				// if a range is a strict subset of the other one, we skip the subset.
+				skip = true
+				break
+			}
+		}
+
+		if skip {
+			continue
+		}
+
+		newp = append(newp, perms[i])
+	}
+
+	return newp
+}
+
+// mergeRangePerms merges adjacent rangePerms.
+func mergeRangePerms(perms []*rangePerm) []*rangePerm {
+	merged := make([]*rangePerm, 0)
+	perms = removeSubsetRangePerms(perms)
+	sort.Sort(RangePermSliceByBegin(perms))
+
+	i := 0
+	for i < len(perms) {
+		begin, next := i, i
+		for next+1 < len(perms) && bytes.Compare(perms[next].end, perms[next+1].begin) != -1 {
+			next++
+		}
+
+		merged = append(merged, &rangePerm{begin: perms[begin].begin, end: perms[next].end})
+
+		i = next + 1
+	}
+
+	return merged
+}
+
+func getMergedPerms(tx backend.BatchTx, userName string) *unifiedRangePermissions {
+	user := getUser(tx, userName)
+	if user == nil {
+		plog.Errorf("invalid user name %s", userName)
+		return nil
+	}
+
+	var readPerms, writePerms []*rangePerm
+
+	for _, roleName := range user.Roles {
+		role := getRole(tx, roleName)
+		if role == nil {
+			continue
+		}
+
+		for _, perm := range role.KeyPermission {
+			rp := &rangePerm{begin: perm.Key, end: perm.RangeEnd}
+
+			switch perm.PermType {
+			case authpb.READWRITE:
+				readPerms = append(readPerms, rp)
+				writePerms = append(writePerms, rp)
+
+			case authpb.READ:
+				readPerms = append(readPerms, rp)
+
+			case authpb.WRITE:
+				writePerms = append(writePerms, rp)
+			}
+		}
+	}
+
+	return &unifiedRangePermissions{
+		readPerms:  mergeRangePerms(readPerms),
+		writePerms: mergeRangePerms(writePerms),
+	}
+}
+
+func checkKeyPerm(cachedPerms *unifiedRangePermissions, key, rangeEnd []byte, permtyp authpb.Permission_Type) bool {
+	var tocheck []*rangePerm
+
+	switch permtyp {
+	case authpb.READ:
+		tocheck = cachedPerms.readPerms
+	case authpb.WRITE:
+		tocheck = cachedPerms.writePerms
+	default:
+		plog.Panicf("unknown auth type: %v", permtyp)
+	}
+
+	requiredPerm := &rangePerm{begin: key, end: rangeEnd}
+
+	for _, perm := range tocheck {
+		if isSubset(requiredPerm, perm) {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (as *authStore) isRangeOpPermitted(tx backend.BatchTx, userName string, key, rangeEnd []byte, permtyp authpb.Permission_Type) bool {
+	// assumption: tx is Lock()ed
+	_, ok := as.rangePermCache[userName]
+	if !ok {
+		perms := getMergedPerms(tx, userName)
+		if perms == nil {
+			plog.Errorf("failed to create a unified permission of user %s", userName)
+			return false
+		}
+		as.rangePermCache[userName] = perms
+	}
+
+	return checkKeyPerm(as.rangePermCache[userName], key, rangeEnd, permtyp)
+}
+
+func (as *authStore) clearCachedPerm() {
+	as.rangePermCache = make(map[string]*unifiedRangePermissions)
+}
+
+func (as *authStore) invalidateCachedPerm(userName string) {
+	delete(as.rangePermCache, userName)
+}
+
+type unifiedRangePermissions struct {
+	// readPerms[i] and readPerms[j] (i != j) don't overlap
+	readPerms []*rangePerm
+	// writePerms[i] and writePerms[j] (i != j) don't overlap, too
+	writePerms []*rangePerm
+}
+
+type rangePerm struct {
+	begin, end []byte
+}
+
+type RangePermSliceByBegin []*rangePerm
+
+func (slice RangePermSliceByBegin) Len() int {
+	return len(slice)
+}
+
+func (slice RangePermSliceByBegin) Less(i, j int) bool {
+	switch bytes.Compare(slice[i].begin, slice[j].begin) {
+	case 0: // begin(i) == begin(j)
+		return bytes.Compare(slice[i].end, slice[j].end) == -1
+
+	case -1: // begin(i) < begin(j)
+		return true
+
+	default:
+		return false
+	}
+}
+
+func (slice RangePermSliceByBegin) Swap(i, j int) {
+	slice[i], slice[j] = slice[j], slice[i]
+}
--- a/auth/range_perm_cache_test.go
+++ b/auth/range_perm_cache_test.go
@ -0,0 +1,129 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package auth
+
+import (
+	"bytes"
+	"testing"
+)
+
+func isPermsEqual(a, b []*rangePerm) bool {
+	if len(a) != len(b) {
+		return false
+	}
+
+	for i := range a {
+		if len(b) <= i {
+			return false
+		}
+
+		if !bytes.Equal(a[i].begin, b[i].begin) || !bytes.Equal(a[i].end, b[i].end) {
+			return false
+		}
+	}
+
+	return true
+}
+
+func TestGetMergedPerms(t *testing.T) {
+	tests := []struct {
+		params []*rangePerm
+		want   []*rangePerm
+	}{
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}},
+			[]*rangePerm{{[]byte("a"), []byte("b")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("b"), []byte("c")}},
+			[]*rangePerm{{[]byte("a"), []byte("c")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("c")}, {[]byte("b"), []byte("d")}},
+			[]*rangePerm{{[]byte("a"), []byte("d")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("b"), []byte("c")}, {[]byte("d"), []byte("e")}},
+			[]*rangePerm{{[]byte("a"), []byte("c")}, {[]byte("d"), []byte("e")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("c"), []byte("d")}, {[]byte("e"), []byte("f")}},
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("c"), []byte("d")}, {[]byte("e"), []byte("f")}},
+		},
+		{
+			[]*rangePerm{{[]byte("e"), []byte("f")}, {[]byte("c"), []byte("d")}, {[]byte("a"), []byte("b")}},
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("c"), []byte("d")}, {[]byte("e"), []byte("f")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("c"), []byte("d")}, {[]byte("a"), []byte("z")}},
+			[]*rangePerm{{[]byte("a"), []byte("z")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("c"), []byte("d")}, {[]byte("a"), []byte("z")}, {[]byte("1"), []byte("9")}},
+			[]*rangePerm{{[]byte("1"), []byte("9")}, {[]byte("a"), []byte("z")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("c"), []byte("d")}, {[]byte("a"), []byte("z")}, {[]byte("1"), []byte("a")}},
+			[]*rangePerm{{[]byte("1"), []byte("z")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("a"), []byte("z")}, {[]byte("5"), []byte("6")}, {[]byte("1"), []byte("9")}},
+			[]*rangePerm{{[]byte("1"), []byte("9")}, {[]byte("a"), []byte("z")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("b")}, {[]byte("b"), []byte("c")}, {[]byte("c"), []byte("d")}, {[]byte("d"), []byte("f")}, {[]byte("1"), []byte("9")}},
+			[]*rangePerm{{[]byte("1"), []byte("9")}, {[]byte("a"), []byte("f")}},
+		},
+		// overlapping
+		{
+			[]*rangePerm{{[]byte("a"), []byte("f")}, {[]byte("b"), []byte("g")}},
+			[]*rangePerm{{[]byte("a"), []byte("g")}},
+		},
+		// keys
+		{
+			[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("b"), []byte("")}},
+			[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("b"), []byte("")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("a"), []byte("c")}},
+			[]*rangePerm{{[]byte("a"), []byte("c")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("a"), []byte("c")}, {[]byte("b"), []byte("")}},
+			[]*rangePerm{{[]byte("a"), []byte("c")}},
+		},
+		{
+			[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("b"), []byte("c")}, {[]byte("b"), []byte("")}, {[]byte("c"), []byte("")}, {[]byte("d"), []byte("")}},
+			[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("b"), []byte("c")}, {[]byte("d"), []byte("")}},
+		},
+		// duplicate ranges
+		{
+			[]*rangePerm{{[]byte("a"), []byte("f")}, {[]byte("a"), []byte("f")}},
+			[]*rangePerm{{[]byte("a"), []byte("f")}},
+		},
+		// duplicate keys
+		{
+			[]*rangePerm{{[]byte("a"), []byte("")}, {[]byte("a"), []byte("")}, {[]byte("a"), []byte("")}},
+			[]*rangePerm{{[]byte("a"), []byte("")}},
+		},
+	}
+
+	for i, tt := range tests {
+		result := mergeRangePerms(tt.params)
+		if !isPermsEqual(result, tt.want) {
+			t.Errorf("#%d: result=%q, want=%q", i, result, tt.want)
+		}
+	}
+}
--- a/auth/simple_token.go
+++ b/auth/simple_token.go
@ -1,4 +1,4 @@
-// Copyright 2016 Nippon Telegraph and Telephone Corporation.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -27,15 +27,7 @@ const (
 	defaultSimpleTokenLength = 16
 )

-var (
-	simpleTokens map[string]string // token -> user ID
-)
-
-func init() {
-	simpleTokens = make(map[string]string)
-}
-
-func genSimpleToken() (string, error) {
+func (as *authStore) GenSimpleToken() (string, error) {
 	ret := make([]byte, defaultSimpleTokenLength)

 	for i := 0; i < defaultSimpleTokenLength; i++ {
@ -50,22 +42,14 @@ func genSimpleToken() (string, error) {
 	return string(ret), nil
 }

-func genSimpleTokenForUser(userID string) (string, error) {
-	var token string
-	var err error
+func (as *authStore) assignSimpleTokenToUser(username, token string) {
+	as.simpleTokensMu.Lock()

-	for {
-		// generating random numbers in RSM would't a good idea
-		token, err = genSimpleToken()
-		if err != nil {
-			return "", err
-		}
-
-		if _, ok := simpleTokens[token]; !ok {
-			break
-		}
+	_, ok := as.simpleTokens[token]
+	if ok {
+		plog.Panicf("token %s is alredy used", token)
 	}

-	simpleTokens[token] = userID
-	return token, nil
+	as.simpleTokens[token] = username
+	as.simpleTokensMu.Unlock()
 }
--- a/auth/store.go
+++ b/auth/store.go
@ -1,4 +1,4 @@
-// Copyright 2016 Nippon Telegraph and Telephone Corporation.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -17,18 +17,23 @@ package auth
 import (
 	"bytes"
 	"errors"
+	"fmt"
 	"sort"
 	"strings"
+	"sync"

 	"github.com/coreos/etcd/auth/authpb"
 	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
 	"github.com/coreos/etcd/mvcc/backend"
 	"github.com/coreos/pkg/capnslog"
 	"golang.org/x/crypto/bcrypt"
+	"golang.org/x/net/context"
 )

 var (
 	enableFlagKey = []byte("authEnabled")
+	authEnabled   = []byte{1}
+	authDisabled  = []byte{0}

 	authBucketName      = []byte("auth")
 	authUsersBucketName = []byte("authUsers")
@ -36,22 +41,32 @@ var (

 	plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "auth")

-	ErrUserAlreadyExist = errors.New("auth: user already exists")
-	ErrUserNotFound     = errors.New("auth: user not found")
-	ErrRoleAlreadyExist = errors.New("auth: role already exists")
-	ErrRoleNotFound     = errors.New("auth: role not found")
-	ErrAuthFailed       = errors.New("auth: authentication failed, invalid user ID or password")
+	ErrRootUserNotExist     = errors.New("auth: root user does not exist")
+	ErrRootRoleNotExist     = errors.New("auth: root user does not have root role")
+	ErrUserAlreadyExist     = errors.New("auth: user already exists")
+	ErrUserNotFound         = errors.New("auth: user not found")
+	ErrRoleAlreadyExist     = errors.New("auth: role already exists")
+	ErrRoleNotFound         = errors.New("auth: role not found")
+	ErrAuthFailed           = errors.New("auth: authentication failed, invalid user ID or password")
+	ErrPermissionDenied     = errors.New("auth: permission denied")
+	ErrRoleNotGranted       = errors.New("auth: role is not granted to the user")
+	ErrPermissionNotGranted = errors.New("auth: permission is not granted to the role")
+)
+
+const (
+	rootUser = "root"
+	rootRole = "root"
 )

 type AuthStore interface {
 	// AuthEnable turns on the authentication feature
-	AuthEnable()
+	AuthEnable() error

-	// Authenticate does authentication based on given user name and password,
-	// and returns a token for successful case.
-	// Note that the generated token is valid only for the member the client
-	// connected to within fixed time duration. Reauth is required after the duration.
-	Authenticate(name string, password string) (*pb.AuthenticateResponse, error)
+	// AuthDisable turns off the authentication feature
+	AuthDisable()
+
+	// Authenticate does authentication based on given user name and password
+	Authenticate(ctx context.Context, username, password string) (*pb.AuthenticateResponse, error)

 	// Recover recovers the state of auth store from the given backend
 	Recover(b backend.Backend)
@ -65,68 +80,154 @@ type AuthStore interface {
 	// UserChangePassword changes a password of a user
 	UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)

-	// UserGrant grants a role to the user
-	UserGrant(r *pb.AuthUserGrantRequest) (*pb.AuthUserGrantResponse, error)
+	// UserGrantRole grants a role to the user
+	UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
+
+	// UserGet gets the detailed information of a users
+	UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
+
+	// UserRevokeRole revokes a role of a user
+	UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)

 	// RoleAdd adds a new role
 	RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)

-	// RoleGrant grants a permission to a role
-	RoleGrant(r *pb.AuthRoleGrantRequest) (*pb.AuthRoleGrantResponse, error)
+	// RoleGrantPermission grants a permission to a role
+	RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
+
+	// RoleGet gets the detailed information of a role
+	RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
+
+	// RoleRevokePermission gets the detailed information of a role
+	RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
+
+	// RoleDelete gets the detailed information of a role
+	RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
+
+	// UserList gets a list of all users
+	UserList(r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
+
+	// RoleList gets a list of all roles
+	RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
+
+	// UsernameFromToken gets a username from the given Token
+	UsernameFromToken(token string) (string, bool)
+
+	// IsPutPermitted checks put permission of the user
+	IsPutPermitted(username string, key []byte) bool
+
+	// IsRangePermitted checks range permission of the user
+	IsRangePermitted(username string, key, rangeEnd []byte) bool
+
+	// IsDeleteRangePermitted checks delete-range permission of the user
+	IsDeleteRangePermitted(username string, key, rangeEnd []byte) bool
+
+	// IsAdminPermitted checks admin permission of the user
+	IsAdminPermitted(username string) bool
+
+	// GenSimpleToken produces a simple random string
+	GenSimpleToken() (string, error)
 }

 type authStore struct {
-	be backend.Backend
+	be        backend.Backend
+	enabled   bool
+	enabledMu sync.RWMutex
+
+	rangePermCache map[string]*unifiedRangePermissions // username -> unifiedRangePermissions
+
+	simpleTokensMu sync.RWMutex
+	simpleTokens   map[string]string // token -> username
 }

-func (as *authStore) AuthEnable() {
-	value := []byte{1}
-
+func (as *authStore) AuthEnable() error {
 	b := as.be
 	tx := b.BatchTx()
 	tx.Lock()
-	tx.UnsafePut(authBucketName, enableFlagKey, value)
+	defer func() {
+		tx.Unlock()
+		b.ForceCommit()
+	}()
+
+	u := getUser(tx, rootUser)
+	if u == nil {
+		return ErrRootUserNotExist
+	}
+
+	if !hasRootRole(u) {
+		return ErrRootRoleNotExist
+	}
+
+	tx.UnsafePut(authBucketName, enableFlagKey, authEnabled)
+
+	as.enabledMu.Lock()
+	as.enabled = true
+	as.enabledMu.Unlock()
+
+	as.rangePermCache = make(map[string]*unifiedRangePermissions)
+
+	plog.Noticef("Authentication enabled")
+
+	return nil
+}
+
+func (as *authStore) AuthDisable() {
+	b := as.be
+	tx := b.BatchTx()
+	tx.Lock()
+	tx.UnsafePut(authBucketName, enableFlagKey, authDisabled)
 	tx.Unlock()
 	b.ForceCommit()

-	plog.Noticef("Authentication enabled")
+	as.enabledMu.Lock()
+	as.enabled = false
+	as.enabledMu.Unlock()
+
+	plog.Noticef("Authentication disabled")
 }

-func (as *authStore) Authenticate(name string, password string) (*pb.AuthenticateResponse, error) {
+func (as *authStore) Authenticate(ctx context.Context, username, password string) (*pb.AuthenticateResponse, error) {
+	// TODO(mitake): after adding jwt support, branching based on values of ctx is required
+	index := ctx.Value("index").(uint64)
+	simpleToken := ctx.Value("simpleToken").(string)
+
 	tx := as.be.BatchTx()
 	tx.Lock()
 	defer tx.Unlock()

-	_, vs := tx.UnsafeRange(authUsersBucketName, []byte(name), nil, 0)
-	if len(vs) != 1 {
-		plog.Noticef("authentication failed, user %s doesn't exist", name)
-		return &pb.AuthenticateResponse{}, ErrAuthFailed
-	}
-
-	user := &authpb.User{}
-	err := user.Unmarshal(vs[0])
-	if err != nil {
-		return nil, err
+	user := getUser(tx, username)
+	if user == nil {
+		return nil, ErrAuthFailed
 	}

 	if bcrypt.CompareHashAndPassword(user.Password, []byte(password)) != nil {
-		plog.Noticef("authentication failed, invalid password for user %s", name)
+		plog.Noticef("authentication failed, invalid password for user %s", username)
 		return &pb.AuthenticateResponse{}, ErrAuthFailed
 	}

-	token, err := genSimpleTokenForUser(name)
-	if err != nil {
-		plog.Errorf("failed to generate simple token: %s", err)
-		return nil, err
-	}
+	token := fmt.Sprintf("%s.%d", simpleToken, index)
+	as.assignSimpleTokenToUser(username, token)

-	plog.Infof("authorized %s, token is %s", name, token)
+	plog.Infof("authorized %s, token is %s", username, token)
 	return &pb.AuthenticateResponse{Token: token}, nil
 }

 func (as *authStore) Recover(be backend.Backend) {
+	enabled := false
 	as.be = be
-	// TODO(mitake): recovery process
+	tx := be.BatchTx()
+	tx.Lock()
+	_, vs := tx.UnsafeRange(authBucketName, enableFlagKey, nil, 0)
+	if len(vs) == 1 {
+		if bytes.Equal(vs[0], authEnabled) {
+			enabled = true
+		}
+	}
+	tx.Unlock()
+
+	as.enabledMu.Lock()
+	as.enabled = enabled
+	as.enabledMu.Unlock()
 }

 func (as *authStore) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
@ -140,23 +241,17 @@ func (as *authStore) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse,
 	tx.Lock()
 	defer tx.Unlock()

-	_, vs := tx.UnsafeRange(authUsersBucketName, []byte(r.Name), nil, 0)
-	if len(vs) != 0 {
-		return &pb.AuthUserAddResponse{}, ErrUserAlreadyExist
+	user := getUser(tx, r.Name)
+	if user != nil {
+		return nil, ErrUserAlreadyExist
 	}

-	newUser := authpb.User{
+	newUser := &authpb.User{
 		Name:     []byte(r.Name),
 		Password: hashed,
 	}

-	marshaledUser, merr := newUser.Marshal()
-	if merr != nil {
-		plog.Errorf("failed to marshal a new user data: %s", merr)
-		return nil, merr
-	}
-
-	tx.UnsafePut(authUsersBucketName, []byte(r.Name), marshaledUser)
+	putUser(tx, newUser)

 	plog.Noticef("added a new user: %s", r.Name)

@ -168,12 +263,12 @@ func (as *authStore) UserDelete(r *pb.AuthUserDeleteRequest) (*pb.AuthUserDelete
 	tx.Lock()
 	defer tx.Unlock()

-	_, vs := tx.UnsafeRange(authUsersBucketName, []byte(r.Name), nil, 0)
-	if len(vs) != 1 {
-		return &pb.AuthUserDeleteResponse{}, ErrUserNotFound
+	user := getUser(tx, r.Name)
+	if user == nil {
+		return nil, ErrUserNotFound
 	}

-	tx.UnsafeDelete(authUsersBucketName, []byte(r.Name))
+	delUser(tx, r.Name)

 	plog.Noticef("deleted a user: %s", r.Name)

@ -193,68 +288,221 @@ func (as *authStore) UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*p
 	tx.Lock()
 	defer tx.Unlock()

-	_, vs := tx.UnsafeRange(authUsersBucketName, []byte(r.Name), nil, 0)
-	if len(vs) != 1 {
-		return &pb.AuthUserChangePasswordResponse{}, ErrUserNotFound
+	user := getUser(tx, r.Name)
+	if user == nil {
+		return nil, ErrUserNotFound
 	}

-	updatedUser := authpb.User{
+	updatedUser := &authpb.User{
 		Name:     []byte(r.Name),
+		Roles:    user.Roles,
 		Password: hashed,
 	}

-	marshaledUser, merr := updatedUser.Marshal()
-	if merr != nil {
-		plog.Errorf("failed to marshal a new user data: %s", merr)
-		return nil, merr
-	}
-
-	tx.UnsafePut(authUsersBucketName, []byte(r.Name), marshaledUser)
+	putUser(tx, updatedUser)

 	plog.Noticef("changed a password of a user: %s", r.Name)

 	return &pb.AuthUserChangePasswordResponse{}, nil
 }

-func (as *authStore) UserGrant(r *pb.AuthUserGrantRequest) (*pb.AuthUserGrantResponse, error) {
+func (as *authStore) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
 	tx := as.be.BatchTx()
 	tx.Lock()
 	defer tx.Unlock()

-	_, vs := tx.UnsafeRange(authUsersBucketName, []byte(r.User), nil, 0)
-	if len(vs) != 1 {
+	user := getUser(tx, r.User)
+	if user == nil {
 		return nil, ErrUserNotFound
 	}

-	user := &authpb.User{}
-	err := user.Unmarshal(vs[0])
-	if err != nil {
-		return nil, err
-	}
-
-	_, vs = tx.UnsafeRange(authRolesBucketName, []byte(r.Role), nil, 0)
-	if len(vs) != 1 {
-		return nil, ErrRoleNotFound
+	if r.Role != rootRole {
+		role := getRole(tx, r.Role)
+		if role == nil {
+			return nil, ErrRoleNotFound
+		}
 	}

 	idx := sort.SearchStrings(user.Roles, r.Role)
 	if idx < len(user.Roles) && strings.Compare(user.Roles[idx], r.Role) == 0 {
 		plog.Warningf("user %s is already granted role %s", r.User, r.Role)
-		return &pb.AuthUserGrantResponse{}, nil
+		return &pb.AuthUserGrantRoleResponse{}, nil
 	}

 	user.Roles = append(user.Roles, r.Role)
 	sort.Sort(sort.StringSlice(user.Roles))

-	marshaledUser, merr := user.Marshal()
-	if merr != nil {
-		return nil, merr
-	}
+	putUser(tx, user)

-	tx.UnsafePut(authUsersBucketName, user.Name, marshaledUser)
+	as.invalidateCachedPerm(r.User)

 	plog.Noticef("granted role %s to user %s", r.Role, r.User)
-	return &pb.AuthUserGrantResponse{}, nil
+	return &pb.AuthUserGrantRoleResponse{}, nil
+}
+
+func (as *authStore) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	var resp pb.AuthUserGetResponse
+
+	user := getUser(tx, r.Name)
+	if user == nil {
+		return nil, ErrUserNotFound
+	}
+
+	for _, role := range user.Roles {
+		resp.Roles = append(resp.Roles, role)
+	}
+
+	return &resp, nil
+}
+
+func (as *authStore) UserList(r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	var resp pb.AuthUserListResponse
+
+	users := getAllUsers(tx)
+
+	for _, u := range users {
+		resp.Users = append(resp.Users, string(u.Name))
+	}
+
+	return &resp, nil
+}
+
+func (as *authStore) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	user := getUser(tx, r.Name)
+	if user == nil {
+		return nil, ErrUserNotFound
+	}
+
+	updatedUser := &authpb.User{
+		Name:     user.Name,
+		Password: user.Password,
+	}
+
+	for _, role := range user.Roles {
+		if strings.Compare(role, r.Role) != 0 {
+			updatedUser.Roles = append(updatedUser.Roles, role)
+		}
+	}
+
+	if len(updatedUser.Roles) == len(user.Roles) {
+		return nil, ErrRoleNotGranted
+	}
+
+	putUser(tx, updatedUser)
+
+	as.invalidateCachedPerm(r.Name)
+
+	plog.Noticef("revoked role %s from user %s", r.Role, r.Name)
+	return &pb.AuthUserRevokeRoleResponse{}, nil
+}
+
+func (as *authStore) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	var resp pb.AuthRoleGetResponse
+
+	role := getRole(tx, r.Role)
+	if role == nil {
+		return nil, ErrRoleNotFound
+	}
+
+	for _, perm := range role.KeyPermission {
+		resp.Perm = append(resp.Perm, perm)
+	}
+
+	return &resp, nil
+}
+
+func (as *authStore) RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	var resp pb.AuthRoleListResponse
+
+	roles := getAllRoles(tx)
+
+	for _, r := range roles {
+		resp.Roles = append(resp.Roles, string(r.Name))
+	}
+
+	return &resp, nil
+}
+
+func (as *authStore) RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	role := getRole(tx, r.Role)
+	if role == nil {
+		return nil, ErrRoleNotFound
+	}
+
+	updatedRole := &authpb.Role{
+		Name: role.Name,
+	}
+
+	for _, perm := range role.KeyPermission {
+		if !bytes.Equal(perm.Key, []byte(r.Key)) || !bytes.Equal(perm.RangeEnd, []byte(r.RangeEnd)) {
+			updatedRole.KeyPermission = append(updatedRole.KeyPermission, perm)
+		}
+	}
+
+	if len(role.KeyPermission) == len(updatedRole.KeyPermission) {
+		return nil, ErrPermissionNotGranted
+	}
+
+	putRole(tx, updatedRole)
+
+	// TODO(mitake): currently single role update invalidates every cache
+	// It should be optimized.
+	as.clearCachedPerm()
+
+	plog.Noticef("revoked key %s from role %s", r.Key, r.Role)
+	return &pb.AuthRoleRevokePermissionResponse{}, nil
+}
+
+func (as *authStore) RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
+	// TODO(mitake): current scheme of role deletion allows existing users to have the deleted roles
+	//
+	// Assume a case like below:
+	// create a role r1
+	// create a user u1 and grant r1 to u1
+	// delete r1
+	//
+	// After this sequence, u1 is still granted the role r1. So if admin create a new role with the name r1,
+	// the new r1 is automatically granted u1.
+	// In some cases, it would be confusing. So we need to provide an option for deleting the grant relation
+	// from all users.
+
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	role := getRole(tx, r.Role)
+	if role == nil {
+		return nil, ErrRoleNotFound
+	}
+
+	delRole(tx, r.Role)
+
+	plog.Noticef("deleted role %s", r.Role)
+	return &pb.AuthRoleDeleteResponse{}, nil
 }

 func (as *authStore) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
@ -262,8 +510,8 @@ func (as *authStore) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse,
 	tx.Lock()
 	defer tx.Unlock()

-	_, vs := tx.UnsafeRange(authRolesBucketName, []byte(r.Name), nil, 0)
-	if len(vs) != 0 {
+	role := getRole(tx, r.Name)
+	if role != nil {
 		return nil, ErrRoleAlreadyExist
 	}

@ -271,18 +519,20 @@ func (as *authStore) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse,
 		Name: []byte(r.Name),
 	}

-	marshaledRole, err := newRole.Marshal()
-	if err != nil {
-		return nil, err
-	}
-
-	tx.UnsafePut(authRolesBucketName, []byte(r.Name), marshaledRole)
+	putRole(tx, newRole)

 	plog.Noticef("Role %s is created", r.Name)

 	return &pb.AuthRoleAddResponse{}, nil
 }

+func (as *authStore) UsernameFromToken(token string) (string, bool) {
+	as.simpleTokensMu.RLock()
+	defer as.simpleTokensMu.RUnlock()
+	t, ok := as.simpleTokens[token]
+	return t, ok
+}
+
 type permSlice []*authpb.Permission

 func (perms permSlice) Len() int {
@ -297,34 +547,28 @@ func (perms permSlice) Swap(i, j int) {
 	perms[i], perms[j] = perms[j], perms[i]
 }

-func (as *authStore) RoleGrant(r *pb.AuthRoleGrantRequest) (*pb.AuthRoleGrantResponse, error) {
+func (as *authStore) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
 	tx := as.be.BatchTx()
 	tx.Lock()
 	defer tx.Unlock()

-	_, vs := tx.UnsafeRange(authRolesBucketName, []byte(r.Name), nil, 0)
-	if len(vs) != 1 {
+	role := getRole(tx, r.Name)
+	if role == nil {
 		return nil, ErrRoleNotFound
 	}

-	role := &authpb.Role{}
-	err := role.Unmarshal(vs[0])
-	if err != nil {
-		plog.Errorf("failed to unmarshal a role %s: %s", r.Name, err)
-		return nil, err
-	}
-
 	idx := sort.Search(len(role.KeyPermission), func(i int) bool {
 		return bytes.Compare(role.KeyPermission[i].Key, []byte(r.Perm.Key)) >= 0
 	})

-	if idx < len(role.KeyPermission) && bytes.Equal(role.KeyPermission[idx].Key, r.Perm.Key) {
+	if idx < len(role.KeyPermission) && bytes.Equal(role.KeyPermission[idx].Key, r.Perm.Key) && bytes.Equal(role.KeyPermission[idx].RangeEnd, r.Perm.RangeEnd) {
 		// update existing permission
 		role.KeyPermission[idx].PermType = r.Perm.PermType
 	} else {
 		// append new permission to the role
 		newPerm := &authpb.Permission{
 			Key:      []byte(r.Perm.Key),
+			RangeEnd: []byte(r.Perm.RangeEnd),
 			PermType: r.Perm.PermType,
 		}

@ -332,17 +576,168 @@ func (as *authStore) RoleGrant(r *pb.AuthRoleGrantRequest) (*pb.AuthRoleGrantRes
 		sort.Sort(permSlice(role.KeyPermission))
 	}

-	marshaledRole, merr := role.Marshal()
-	if merr != nil {
-		plog.Errorf("failed to marshal updated role %s: %s", r.Name, merr)
-		return nil, merr
-	}
+	putRole(tx, role)

-	tx.UnsafePut(authRolesBucketName, []byte(r.Name), marshaledRole)
+	// TODO(mitake): currently single role update invalidates every cache
+	// It should be optimized.
+	as.clearCachedPerm()

 	plog.Noticef("role %s's permission of key %s is updated as %s", r.Name, r.Perm.Key, authpb.Permission_Type_name[int32(r.Perm.PermType)])

-	return &pb.AuthRoleGrantResponse{}, nil
+	return &pb.AuthRoleGrantPermissionResponse{}, nil
+}
+
+func (as *authStore) isOpPermitted(userName string, key, rangeEnd []byte, permTyp authpb.Permission_Type) bool {
+	// TODO(mitake): this function would be costly so we need a caching mechanism
+	if !as.isAuthEnabled() {
+		return true
+	}
+
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	user := getUser(tx, userName)
+	if user == nil {
+		plog.Errorf("invalid user name %s for permission checking", userName)
+		return false
+	}
+
+	if as.isRangeOpPermitted(tx, userName, key, rangeEnd, permTyp) {
+		return true
+	}
+
+	return false
+}
+
+func (as *authStore) IsPutPermitted(username string, key []byte) bool {
+	return as.isOpPermitted(username, key, nil, authpb.WRITE)
+}
+
+func (as *authStore) IsRangePermitted(username string, key, rangeEnd []byte) bool {
+	return as.isOpPermitted(username, key, rangeEnd, authpb.READ)
+}
+
+func (as *authStore) IsDeleteRangePermitted(username string, key, rangeEnd []byte) bool {
+	return as.isOpPermitted(username, key, rangeEnd, authpb.WRITE)
+}
+
+func (as *authStore) IsAdminPermitted(username string) bool {
+	if !as.isAuthEnabled() {
+		return true
+	}
+
+	tx := as.be.BatchTx()
+	tx.Lock()
+	defer tx.Unlock()
+
+	u := getUser(tx, username)
+	if u == nil {
+		return false
+	}
+
+	return hasRootRole(u)
+}
+
+func getUser(tx backend.BatchTx, username string) *authpb.User {
+	_, vs := tx.UnsafeRange(authUsersBucketName, []byte(username), nil, 0)
+	if len(vs) == 0 {
+		return nil
+	}
+
+	user := &authpb.User{}
+	err := user.Unmarshal(vs[0])
+	if err != nil {
+		plog.Panicf("failed to unmarshal user struct (name: %s): %s", username, err)
+	}
+	return user
+}
+
+func getAllUsers(tx backend.BatchTx) []*authpb.User {
+	_, vs := tx.UnsafeRange(authUsersBucketName, []byte{0}, []byte{0xff}, -1)
+	if len(vs) == 0 {
+		return nil
+	}
+
+	var users []*authpb.User
+
+	for _, v := range vs {
+		user := &authpb.User{}
+		err := user.Unmarshal(v)
+		if err != nil {
+			plog.Panicf("failed to unmarshal user struct: %s", err)
+		}
+
+		users = append(users, user)
+	}
+
+	return users
+}
+
+func putUser(tx backend.BatchTx, user *authpb.User) {
+	b, err := user.Marshal()
+	if err != nil {
+		plog.Panicf("failed to marshal user struct (name: %s): %s", user.Name, err)
+	}
+	tx.UnsafePut(authUsersBucketName, user.Name, b)
+}
+
+func delUser(tx backend.BatchTx, username string) {
+	tx.UnsafeDelete(authUsersBucketName, []byte(username))
+}
+
+func getRole(tx backend.BatchTx, rolename string) *authpb.Role {
+	_, vs := tx.UnsafeRange(authRolesBucketName, []byte(rolename), nil, 0)
+	if len(vs) == 0 {
+		return nil
+	}
+
+	role := &authpb.Role{}
+	err := role.Unmarshal(vs[0])
+	if err != nil {
+		plog.Panicf("failed to unmarshal role struct (name: %s): %s", rolename, err)
+	}
+	return role
+}
+
+func getAllRoles(tx backend.BatchTx) []*authpb.Role {
+	_, vs := tx.UnsafeRange(authRolesBucketName, []byte{0}, []byte{0xff}, -1)
+	if len(vs) == 0 {
+		return nil
+	}
+
+	var roles []*authpb.Role
+
+	for _, v := range vs {
+		role := &authpb.Role{}
+		err := role.Unmarshal(v)
+		if err != nil {
+			plog.Panicf("failed to unmarshal role struct: %s", err)
+		}
+
+		roles = append(roles, role)
+	}
+
+	return roles
+}
+
+func putRole(tx backend.BatchTx, role *authpb.Role) {
+	b, err := role.Marshal()
+	if err != nil {
+		plog.Panicf("failed to marshal role struct (name: %s): %s", role.Name, err)
+	}
+
+	tx.UnsafePut(authRolesBucketName, []byte(role.Name), b)
+}
+
+func delRole(tx backend.BatchTx, rolename string) {
+	tx.UnsafeDelete(authRolesBucketName, []byte(rolename))
+}
+
+func (as *authStore) isAuthEnabled() bool {
+	as.enabledMu.RLock()
+	defer as.enabledMu.RUnlock()
+	return as.enabled
 }

 func NewAuthStore(be backend.Backend) *authStore {
@ -357,6 +752,16 @@ func NewAuthStore(be backend.Backend) *authStore {
 	be.ForceCommit()

 	return &authStore{
-		be: be,
+		be:           be,
+		simpleTokens: make(map[string]string),
 	}
 }
+
+func hasRootRole(u *authpb.User) bool {
+	for _, r := range u.Roles {
+		if r == rootRole {
+			return true
+		}
+	}
+	return false
+}
--- a/auth/store_test.go
+++ b/auth/store_test.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -20,6 +20,7 @@ import (

 	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
 	"github.com/coreos/etcd/mvcc/backend"
+	"golang.org/x/net/context"
 )

 func TestUserAdd(t *testing.T) {
@ -60,7 +61,8 @@ func TestAuthenticate(t *testing.T) {
 	}

 	// auth a non-existing user
-	_, err = as.Authenticate("foo-test", "bar")
+	ctx1 := context.WithValue(context.WithValue(context.TODO(), "index", uint64(1)), "simpleToken", "dummy")
+	_, err = as.Authenticate(ctx1, "foo-test", "bar")
 	if err == nil {
 		t.Fatalf("expected %v, got %v", ErrAuthFailed, err)
 	}
@ -69,13 +71,15 @@ func TestAuthenticate(t *testing.T) {
 	}

 	// auth an existing user with correct password
-	_, err = as.Authenticate("foo", "bar")
+	ctx2 := context.WithValue(context.WithValue(context.TODO(), "index", uint64(2)), "simpleToken", "dummy")
+	_, err = as.Authenticate(ctx2, "foo", "bar")
 	if err != nil {
 		t.Fatal(err)
 	}

 	// auth an existing user but with wrong password
-	_, err = as.Authenticate("foo", "")
+	ctx3 := context.WithValue(context.WithValue(context.TODO(), "index", uint64(3)), "simpleToken", "dummy")
+	_, err = as.Authenticate(ctx3, "foo", "")
 	if err == nil {
 		t.Fatalf("expected %v, got %v", ErrAuthFailed, err)
 	}
@ -129,7 +133,9 @@ func TestUserChangePassword(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	_, err = as.Authenticate("foo", "")
+
+	ctx1 := context.WithValue(context.WithValue(context.TODO(), "index", uint64(1)), "simpleToken", "dummy")
+	_, err = as.Authenticate(ctx1, "foo", "")
 	if err != nil {
 		t.Fatal(err)
 	}
@ -138,7 +144,9 @@ func TestUserChangePassword(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	_, err = as.Authenticate("foo", "bar")
+
+	ctx2 := context.WithValue(context.WithValue(context.TODO(), "index", uint64(2)), "simpleToken", "dummy")
+	_, err = as.Authenticate(ctx2, "foo", "bar")
 	if err != nil {
 		t.Fatal(err)
 	}
@ -190,13 +198,13 @@ func TestUserGrant(t *testing.T) {
 	}

 	// grants a role to the user
-	_, err = as.UserGrant(&pb.AuthUserGrantRequest{User: "foo", Role: "role-test"})
+	_, err = as.UserGrantRole(&pb.AuthUserGrantRoleRequest{User: "foo", Role: "role-test"})
 	if err != nil {
 		t.Fatal(err)
 	}

 	// grants a role to a non-existing user
-	_, err = as.UserGrant(&pb.AuthUserGrantRequest{User: "foo-test", Role: "role-test"})
+	_, err = as.UserGrantRole(&pb.AuthUserGrantRoleRequest{User: "foo-test", Role: "role-test"})
 	if err == nil {
 		t.Fatalf("expected %v, got %v", ErrUserNotFound, err)
 	}
--- a/48
+++ b/48
@ -1,22 +1,44 @@
 #!/bin/sh -e

+# set some environment variables
 ORG_PATH="github.com/coreos"
 REPO_PATH="${ORG_PATH}/etcd"
-
 export GO15VENDOREXPERIMENT="1"
-
 eval $(go env)
-
 GIT_SHA=`git rev-parse --short HEAD || echo "GitNotFound"`
-
-LINK_OPERATOR="="
-
-if [ -z "${GOARCH}" ] || [ "${GOARCH}" = "$(go env GOHOSTARCH)" ]; then
-	out="bin"
-else
-	out="bin/${GOARCH}"
+if [ ! -z "$FAILPOINTS" ]; then
+	GIT_SHA="$GIT_SHA"-FAILPOINTS
 fi

-# Static compilation is useful when etcd is run in a container
-CGO_ENABLED=0 go build $GO_BUILD_FLAGS -installsuffix cgo -ldflags "-s -X ${REPO_PATH}/cmd/vendor/${REPO_PATH}/version.GitSHA${LINK_OPERATOR}${GIT_SHA}" -o ${out}/etcd ${REPO_PATH}/cmd
-CGO_ENABLED=0 go build $GO_BUILD_FLAGS -installsuffix cgo -ldflags "-s" -o ${out}/etcdctl ${REPO_PATH}/cmd/etcdctl
+# enable/disable failpoints
+toggle_failpoints() {
+	FAILPKGS="etcdserver/"
+
+	mode="disable"
+	if [ ! -z "$FAILPOINTS" ]; then mode="enable"; fi
+	if [ ! -z "$1" ]; then mode="$1"; fi
+
+	if which gofail >/dev/null 2>&1; then
+		gofail "$mode" $FAILPKGS
+	elif [ "$mode" != "disable" ]; then
+		echo "FAILPOINTS set but gofail not found"
+		exit 1
+	fi
+}
+
+etcd_build() {
+	if [ -z "${GOARCH}" ] || [ "${GOARCH}" = "$(go env GOHOSTARCH)" ]; then
+		out="bin"
+	else
+		out="bin/${GOARCH}"
+	fi
+	toggle_failpoints
+	# Static compilation is useful when etcd is run in a container
+	CGO_ENABLED=0 go build $GO_BUILD_FLAGS -installsuffix cgo -ldflags "-s -X ${REPO_PATH}/cmd/vendor/${REPO_PATH}/version.GitSHA=${GIT_SHA}" -o ${out}/etcd ${REPO_PATH}/cmd
+	CGO_ENABLED=0 go build $GO_BUILD_FLAGS -installsuffix cgo -ldflags "-s" -o ${out}/etcdctl ${REPO_PATH}/cmd/etcdctl
+}
+
+toggle_failpoints
+
+# don't build when sourced
+(echo "$0" | grep "/build$" > /dev/null) && etcd_build || true
--- a/client/auth_role.go
+++ b/client/auth_role.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/auth_user.go
+++ b/client/auth_user.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/client.go
+++ b/client/client.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -37,6 +37,10 @@ var (
 	ErrClusterUnavailable    = errors.New("client: etcd cluster is unavailable or misconfigured")
 	ErrNoLeaderEndpoint      = errors.New("client: no leader endpoint available")
 	errTooManyRedirectChecks = errors.New("client: too many redirect checks")
+
+	// oneShotCtxValue is set on a context using WithValue(&oneShotValue) so
+	// that Do() will not retry a request
+	oneShotCtxValue interface{}
 )

 var DefaultRequestTimeout = 5 * time.Second
@ -335,6 +339,7 @@ func (c *httpClusterClient) Do(ctx context.Context, act httpAction) (*http.Respo
 	var body []byte
 	var err error
 	cerr := &ClusterError{}
+	isOneShot := ctx.Value(&oneShotCtxValue) != nil

 	for i := pinned; i < leps+pinned; i++ {
 		k := i % leps
@ -348,6 +353,9 @@ func (c *httpClusterClient) Do(ctx context.Context, act httpAction) (*http.Respo
 			if err == context.Canceled || err == context.DeadlineExceeded {
 				return nil, nil, err
 			}
+			if isOneShot {
+				return nil, nil, err
+			}
 			continue
 		}
 		if resp.StatusCode/100 == 5 {
@ -358,6 +366,9 @@ func (c *httpClusterClient) Do(ctx context.Context, act httpAction) (*http.Respo
 			default:
 				cerr.Errors = append(cerr.Errors, fmt.Errorf("client: etcd member %s returns server error [%s]", eps[k].String(), http.StatusText(resp.StatusCode)))
 			}
+			if isOneShot {
+				return nil, nil, cerr.Errors[0]
+			}
 			continue
 		}
 		if k != pinned {
--- a/client/client_test.go
+++ b/client/client_test.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/cluster_error.go
+++ b/client/cluster_error.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/curl.go
+++ b/client/curl.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/discover.go
+++ b/client/discover.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/doc.go
+++ b/client/doc.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/fake_transport_test.go
+++ b/client/fake_transport_test.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/integration/client_test.go
+++ b/client/integration/client_test.go
@ -0,0 +1,134 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package integration
+
+import (
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"strings"
+	"sync/atomic"
+	"testing"
+
+	"golang.org/x/net/context"
+
+	"github.com/coreos/etcd/client"
+	"github.com/coreos/etcd/integration"
+	"github.com/coreos/etcd/pkg/testutil"
+)
+
+// TestV2NoRetryEOF tests destructive api calls won't retry on a disconnection.
+func TestV2NoRetryEOF(t *testing.T) {
+	defer testutil.AfterTest(t)
+	// generate an EOF response; specify address so appears first in sorted ep list
+	lEOF := integration.NewListenerWithAddr(t, fmt.Sprintf("eof:123.%d.sock", os.Getpid()))
+	defer lEOF.Close()
+	tries := uint32(0)
+	go func() {
+		for {
+			conn, err := lEOF.Accept()
+			if err != nil {
+				return
+			}
+			atomic.AddUint32(&tries, 1)
+			conn.Close()
+		}
+	}()
+	eofURL := integration.UrlScheme + "://" + lEOF.Addr().String()
+	cli := integration.MustNewHTTPClient(t, []string{eofURL, eofURL}, nil)
+	kapi := client.NewKeysAPI(cli)
+	for i, f := range noRetryList(kapi) {
+		startTries := atomic.LoadUint32(&tries)
+		if err := f(); err == nil {
+			t.Errorf("#%d: expected EOF error, got nil", i)
+		}
+		endTries := atomic.LoadUint32(&tries)
+		if startTries+1 != endTries {
+			t.Errorf("#%d: expected 1 try, got %d", i, endTries-startTries)
+		}
+	}
+}
+
+// TestV2NoRetryNoLeader tests destructive api calls won't retry if given an error code.
+func TestV2NoRetryNoLeader(t *testing.T) {
+	defer testutil.AfterTest(t)
+
+	lHttp := integration.NewListenerWithAddr(t, fmt.Sprintf("errHttp:123.%d.sock", os.Getpid()))
+	eh := &errHandler{errCode: http.StatusServiceUnavailable}
+	srv := httptest.NewUnstartedServer(eh)
+	defer lHttp.Close()
+	defer srv.Close()
+	srv.Listener = lHttp
+	go srv.Start()
+	lHttpURL := integration.UrlScheme + "://" + lHttp.Addr().String()
+
+	cli := integration.MustNewHTTPClient(t, []string{lHttpURL, lHttpURL}, nil)
+	kapi := client.NewKeysAPI(cli)
+	// test error code
+	for i, f := range noRetryList(kapi) {
+		reqs := eh.reqs
+		if err := f(); err == nil || !strings.Contains(err.Error(), "no leader") {
+			t.Errorf("#%d: expected \"no leader\", got %v", i, err)
+		}
+		if eh.reqs != reqs+1 {
+			t.Errorf("#%d: expected 1 request, got %d", i, eh.reqs-reqs)
+		}
+	}
+}
+
+// TestV2RetryRefuse tests destructive api calls will retry if a connection is refused.
+func TestV2RetryRefuse(t *testing.T) {
+	defer testutil.AfterTest(t)
+	cl := integration.NewCluster(t, 1)
+	cl.Launch(t)
+	defer cl.Terminate(t)
+	// test connection refused; expect no error failover
+	cli := integration.MustNewHTTPClient(t, []string{integration.UrlScheme + "://refuseconn:123", cl.URL(0)}, nil)
+	kapi := client.NewKeysAPI(cli)
+	if _, err := kapi.Set(context.Background(), "/delkey", "def", nil); err != nil {
+		t.Fatal(err)
+	}
+	for i, f := range noRetryList(kapi) {
+		if err := f(); err != nil {
+			t.Errorf("#%d: unexpected retry failure (%v)", i, err)
+		}
+	}
+}
+
+type errHandler struct {
+	errCode int
+	reqs    int
+}
+
+func (eh *errHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
+	req.Body.Close()
+	eh.reqs++
+	w.WriteHeader(eh.errCode)
+}
+
+func noRetryList(kapi client.KeysAPI) []func() error {
+	return []func() error{
+		func() error {
+			opts := &client.SetOptions{PrevExist: client.PrevNoExist}
+			_, err := kapi.Set(context.Background(), "/setkey", "bar", opts)
+			return err
+		},
+		func() error {
+			_, err := kapi.Delete(context.Background(), "/delkey", nil)
+			return err
+		},
+	}
+}
--- a/client/integration/main_test.go
+++ b/client/integration/main_test.go
@ -0,0 +1,20 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package integration
+
+import (
+	"os"
+	"testing"
+
+	"github.com/coreos/etcd/pkg/testutil"
+)
+
+func TestMain(m *testing.M) {
+	v := m.Run()
+	if v == 0 && testutil.CheckLeakedGoroutine() {
+		os.Exit(1)
+	}
+	os.Exit(v)
+}
--- a/client/keys.go
+++ b/client/keys.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -337,7 +337,11 @@ func (k *httpKeysAPI) Set(ctx context.Context, key, val string, opts *SetOptions
 		act.Dir = opts.Dir
 	}

-	resp, body, err := k.client.Do(ctx, act)
+	doCtx := ctx
+	if act.PrevExist == PrevNoExist {
+		doCtx = context.WithValue(doCtx, &oneShotCtxValue, &oneShotCtxValue)
+	}
+	resp, body, err := k.client.Do(doCtx, act)
 	if err != nil {
 		return nil, err
 	}
@ -385,7 +389,8 @@ func (k *httpKeysAPI) Delete(ctx context.Context, key string, opts *DeleteOption
 		act.Recursive = opts.Recursive
 	}

-	resp, body, err := k.client.Do(ctx, act)
+	doCtx := context.WithValue(ctx, &oneShotCtxValue, &oneShotCtxValue)
+	resp, body, err := k.client.Do(doCtx, act)
 	if err != nil {
 		return nil, err
 	}
--- a/client/keys_bench_test.go
+++ b/client/keys_bench_test.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/keys_test.go
+++ b/client/keys_test.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -645,15 +645,14 @@ func assertRequest(got http.Request, wantMethod string, wantURL *url.URL, wantHe
 	} else {
 		if wantBody == nil {
 			return fmt.Errorf("want.Body=%v got.Body=%s", wantBody, got.Body)
-		} else {
-			gotBytes, err := ioutil.ReadAll(got.Body)
-			if err != nil {
-				return err
-			}
+		}
+		gotBytes, err := ioutil.ReadAll(got.Body)
+		if err != nil {
+			return err
+		}

-			if !reflect.DeepEqual(wantBody, gotBytes) {
-				return fmt.Errorf("want.Body=%s got.Body=%s", wantBody, gotBytes)
-			}
+		if !reflect.DeepEqual(wantBody, gotBytes) {
+			return fmt.Errorf("want.Body=%s got.Body=%s", wantBody, gotBytes)
 		}
 	}

--- a/client/members.go
+++ b/client/members.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/members_test.go
+++ b/client/members_test.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/srv.go
+++ b/client/srv.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/srv_test.go
+++ b/client/srv_test.go
@ -1,4 +1,4 @@
-// Copyright 2015 CoreOS, Inc.
+// Copyright 2015 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/client/util.go
+++ b/client/util.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/clientv3/auth.go
+++ b/clientv3/auth.go
@ -1,4 +1,4 @@
-// Copyright 2016 Nippon Telegraph and Telephone Corporation.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -19,21 +19,28 @@ import (
 	"strings"

 	"github.com/coreos/etcd/auth/authpb"
-	"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
 	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
 	"golang.org/x/net/context"
 	"google.golang.org/grpc"
 )

 type (
-	AuthEnableResponse             pb.AuthEnableResponse
-	AuthenticateResponse           pb.AuthenticateResponse
-	AuthUserAddResponse            pb.AuthUserAddResponse
-	AuthUserDeleteResponse         pb.AuthUserDeleteResponse
-	AuthUserChangePasswordResponse pb.AuthUserChangePasswordResponse
-	AuthUserGrantResponse          pb.AuthUserGrantResponse
-	AuthRoleAddResponse            pb.AuthRoleAddResponse
-	AuthRoleGrantResponse          pb.AuthRoleGrantResponse
+	AuthEnableResponse               pb.AuthEnableResponse
+	AuthDisableResponse              pb.AuthDisableResponse
+	AuthenticateResponse             pb.AuthenticateResponse
+	AuthUserAddResponse              pb.AuthUserAddResponse
+	AuthUserDeleteResponse           pb.AuthUserDeleteResponse
+	AuthUserChangePasswordResponse   pb.AuthUserChangePasswordResponse
+	AuthUserGrantRoleResponse        pb.AuthUserGrantRoleResponse
+	AuthUserGetResponse              pb.AuthUserGetResponse
+	AuthUserRevokeRoleResponse       pb.AuthUserRevokeRoleResponse
+	AuthRoleAddResponse              pb.AuthRoleAddResponse
+	AuthRoleGrantPermissionResponse  pb.AuthRoleGrantPermissionResponse
+	AuthRoleGetResponse              pb.AuthRoleGetResponse
+	AuthRoleRevokePermissionResponse pb.AuthRoleRevokePermissionResponse
+	AuthRoleDeleteResponse           pb.AuthRoleDeleteResponse
+	AuthUserListResponse             pb.AuthUserListResponse
+	AuthRoleListResponse             pb.AuthRoleListResponse

 	PermissionType authpb.Permission_Type
 )
@ -48,8 +55,8 @@ type Auth interface {
 	// AuthEnable enables auth of an etcd cluster.
 	AuthEnable(ctx context.Context) (*AuthEnableResponse, error)

-	// Authenticate does authenticate with given user name and password.
-	Authenticate(ctx context.Context, name string, password string) (*AuthenticateResponse, error)
+	// AuthDisable disables auth of an etcd cluster.
+	AuthDisable(ctx context.Context) (*AuthDisableResponse, error)

 	// UserAdd adds a new user to an etcd cluster.
 	UserAdd(ctx context.Context, name string, password string) (*AuthUserAddResponse, error)
@ -60,14 +67,35 @@ type Auth interface {
 	// UserChangePassword changes a password of a user.
 	UserChangePassword(ctx context.Context, name string, password string) (*AuthUserChangePasswordResponse, error)

-	// UserGrant grants a role to a user.
-	UserGrant(ctx context.Context, user string, role string) (*AuthUserGrantResponse, error)
+	// UserGrantRole grants a role to a user.
+	UserGrantRole(ctx context.Context, user string, role string) (*AuthUserGrantRoleResponse, error)
+
+	// UserGet gets a detailed information of a user.
+	UserGet(ctx context.Context, name string) (*AuthUserGetResponse, error)
+
+	// UserList gets a list of all users.
+	UserList(ctx context.Context) (*AuthUserListResponse, error)
+
+	// UserRevokeRole revokes a role of a user.
+	UserRevokeRole(ctx context.Context, name string, role string) (*AuthUserRevokeRoleResponse, error)

 	// RoleAdd adds a new role to an etcd cluster.
 	RoleAdd(ctx context.Context, name string) (*AuthRoleAddResponse, error)

-	// RoleGrant grants a permission to a role.
-	RoleGrant(ctx context.Context, name string, key string, permType PermissionType) (*AuthRoleGrantResponse, error)
+	// RoleGrantPermission grants a permission to a role.
+	RoleGrantPermission(ctx context.Context, name string, key, rangeEnd string, permType PermissionType) (*AuthRoleGrantPermissionResponse, error)
+
+	// RoleGet gets a detailed information of a role.
+	RoleGet(ctx context.Context, role string) (*AuthRoleGetResponse, error)
+
+	// RoleList gets a list of all roles.
+	RoleList(ctx context.Context) (*AuthRoleListResponse, error)
+
+	// RoleRevokePermission revokes a permission from a role.
+	RoleRevokePermission(ctx context.Context, role string, key, rangeEnd string) (*AuthRoleRevokePermissionResponse, error)
+
+	// RoleDelete deletes a role.
+	RoleDelete(ctx context.Context, role string) (*AuthRoleDeleteResponse, error)
 }

 type auth struct {
@ -88,46 +116,82 @@ func NewAuth(c *Client) Auth {

 func (auth *auth) AuthEnable(ctx context.Context) (*AuthEnableResponse, error) {
 	resp, err := auth.remote.AuthEnable(ctx, &pb.AuthEnableRequest{})
-	return (*AuthEnableResponse)(resp), rpctypes.Error(err)
+	return (*AuthEnableResponse)(resp), toErr(ctx, err)
 }

-func (auth *auth) Authenticate(ctx context.Context, name string, password string) (*AuthenticateResponse, error) {
-	resp, err := auth.remote.Authenticate(ctx, &pb.AuthenticateRequest{Name: name, Password: password})
-	return (*AuthenticateResponse)(resp), rpctypes.Error(err)
+func (auth *auth) AuthDisable(ctx context.Context) (*AuthDisableResponse, error) {
+	resp, err := auth.remote.AuthDisable(ctx, &pb.AuthDisableRequest{})
+	return (*AuthDisableResponse)(resp), toErr(ctx, err)
 }

 func (auth *auth) UserAdd(ctx context.Context, name string, password string) (*AuthUserAddResponse, error) {
 	resp, err := auth.remote.UserAdd(ctx, &pb.AuthUserAddRequest{Name: name, Password: password})
-	return (*AuthUserAddResponse)(resp), rpctypes.Error(err)
+	return (*AuthUserAddResponse)(resp), toErr(ctx, err)
 }

 func (auth *auth) UserDelete(ctx context.Context, name string) (*AuthUserDeleteResponse, error) {
 	resp, err := auth.remote.UserDelete(ctx, &pb.AuthUserDeleteRequest{Name: name})
-	return (*AuthUserDeleteResponse)(resp), rpctypes.Error(err)
+	return (*AuthUserDeleteResponse)(resp), toErr(ctx, err)
 }

 func (auth *auth) UserChangePassword(ctx context.Context, name string, password string) (*AuthUserChangePasswordResponse, error) {
 	resp, err := auth.remote.UserChangePassword(ctx, &pb.AuthUserChangePasswordRequest{Name: name, Password: password})
-	return (*AuthUserChangePasswordResponse)(resp), rpctypes.Error(err)
+	return (*AuthUserChangePasswordResponse)(resp), toErr(ctx, err)
 }

-func (auth *auth) UserGrant(ctx context.Context, user string, role string) (*AuthUserGrantResponse, error) {
-	resp, err := auth.remote.UserGrant(ctx, &pb.AuthUserGrantRequest{User: user, Role: role})
-	return (*AuthUserGrantResponse)(resp), rpctypes.Error(err)
+func (auth *auth) UserGrantRole(ctx context.Context, user string, role string) (*AuthUserGrantRoleResponse, error) {
+	resp, err := auth.remote.UserGrantRole(ctx, &pb.AuthUserGrantRoleRequest{User: user, Role: role})
+	return (*AuthUserGrantRoleResponse)(resp), toErr(ctx, err)
+}
+
+func (auth *auth) UserGet(ctx context.Context, name string) (*AuthUserGetResponse, error) {
+	resp, err := auth.remote.UserGet(ctx, &pb.AuthUserGetRequest{Name: name}, grpc.FailFast(false))
+	return (*AuthUserGetResponse)(resp), toErr(ctx, err)
+}
+
+func (auth *auth) UserList(ctx context.Context) (*AuthUserListResponse, error) {
+	resp, err := auth.remote.UserList(ctx, &pb.AuthUserListRequest{}, grpc.FailFast(false))
+	return (*AuthUserListResponse)(resp), toErr(ctx, err)
+}
+
+func (auth *auth) UserRevokeRole(ctx context.Context, name string, role string) (*AuthUserRevokeRoleResponse, error) {
+	resp, err := auth.remote.UserRevokeRole(ctx, &pb.AuthUserRevokeRoleRequest{Name: name, Role: role})
+	return (*AuthUserRevokeRoleResponse)(resp), toErr(ctx, err)
 }

 func (auth *auth) RoleAdd(ctx context.Context, name string) (*AuthRoleAddResponse, error) {
 	resp, err := auth.remote.RoleAdd(ctx, &pb.AuthRoleAddRequest{Name: name})
-	return (*AuthRoleAddResponse)(resp), rpctypes.Error(err)
+	return (*AuthRoleAddResponse)(resp), toErr(ctx, err)
 }

-func (auth *auth) RoleGrant(ctx context.Context, name string, key string, permType PermissionType) (*AuthRoleGrantResponse, error) {
+func (auth *auth) RoleGrantPermission(ctx context.Context, name string, key, rangeEnd string, permType PermissionType) (*AuthRoleGrantPermissionResponse, error) {
 	perm := &authpb.Permission{
 		Key:      []byte(key),
+		RangeEnd: []byte(rangeEnd),
 		PermType: authpb.Permission_Type(permType),
 	}
-	resp, err := auth.remote.RoleGrant(ctx, &pb.AuthRoleGrantRequest{Name: name, Perm: perm})
-	return (*AuthRoleGrantResponse)(resp), rpctypes.Error(err)
+	resp, err := auth.remote.RoleGrantPermission(ctx, &pb.AuthRoleGrantPermissionRequest{Name: name, Perm: perm})
+	return (*AuthRoleGrantPermissionResponse)(resp), toErr(ctx, err)
+}
+
+func (auth *auth) RoleGet(ctx context.Context, role string) (*AuthRoleGetResponse, error) {
+	resp, err := auth.remote.RoleGet(ctx, &pb.AuthRoleGetRequest{Role: role}, grpc.FailFast(false))
+	return (*AuthRoleGetResponse)(resp), toErr(ctx, err)
+}
+
+func (auth *auth) RoleList(ctx context.Context) (*AuthRoleListResponse, error) {
+	resp, err := auth.remote.RoleList(ctx, &pb.AuthRoleListRequest{}, grpc.FailFast(false))
+	return (*AuthRoleListResponse)(resp), toErr(ctx, err)
+}
+
+func (auth *auth) RoleRevokePermission(ctx context.Context, role string, key, rangeEnd string) (*AuthRoleRevokePermissionResponse, error) {
+	resp, err := auth.remote.RoleRevokePermission(ctx, &pb.AuthRoleRevokePermissionRequest{Role: role, Key: key, RangeEnd: rangeEnd})
+	return (*AuthRoleRevokePermissionResponse)(resp), toErr(ctx, err)
+}
+
+func (auth *auth) RoleDelete(ctx context.Context, role string) (*AuthRoleDeleteResponse, error) {
+	resp, err := auth.remote.RoleDelete(ctx, &pb.AuthRoleDeleteRequest{Role: role})
+	return (*AuthRoleDeleteResponse)(resp), toErr(ctx, err)
 }

 func StrToPermissionType(s string) (PermissionType, error) {
@ -137,3 +201,29 @@ func StrToPermissionType(s string) (PermissionType, error) {
 	}
 	return PermissionType(-1), fmt.Errorf("invalid permission type: %s", s)
 }
+
+type authenticator struct {
+	conn   *grpc.ClientConn // conn in-use
+	remote pb.AuthClient
+}
+
+func (auth *authenticator) authenticate(ctx context.Context, name string, password string) (*AuthenticateResponse, error) {
+	resp, err := auth.remote.Authenticate(ctx, &pb.AuthenticateRequest{Name: name, Password: password}, grpc.FailFast(false))
+	return (*AuthenticateResponse)(resp), toErr(ctx, err)
+}
+
+func (auth *authenticator) close() {
+	auth.conn.Close()
+}
+
+func newAuthenticator(endpoint string, opts []grpc.DialOption) (*authenticator, error) {
+	conn, err := grpc.Dial(endpoint, opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	return &authenticator{
+		conn:   conn,
+		remote: pb.NewAuthClient(conn),
+	}, nil
+}
--- a/clientv3/balancer.go
+++ b/clientv3/balancer.go
@ -0,0 +1,147 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package clientv3
+
+import (
+	"net/url"
+	"strings"
+	"sync"
+
+	"golang.org/x/net/context"
+	"google.golang.org/grpc"
+)
+
+// simpleBalancer does the bare minimum to expose multiple eps
+// to the grpc reconnection code path
+type simpleBalancer struct {
+	// addrs are the client's endpoints for grpc
+	addrs []grpc.Address
+	// notifyCh notifies grpc of the set of addresses for connecting
+	notifyCh chan []grpc.Address
+
+	// readyc closes once the first connection is up
+	readyc    chan struct{}
+	readyOnce sync.Once
+
+	// mu protects upEps, pinAddr, and connectingAddr
+	mu sync.RWMutex
+	// upEps holds the current endpoints that have an active connection
+	upEps map[string]struct{}
+	// upc closes when upEps transitions from empty to non-zero or the balancer closes.
+	upc chan struct{}
+
+	// pinAddr is the currently pinned address; set to the empty string on
+	// intialization and shutdown.
+	pinAddr string
+}
+
+func newSimpleBalancer(eps []string) *simpleBalancer {
+	notifyCh := make(chan []grpc.Address, 1)
+	addrs := make([]grpc.Address, len(eps))
+	for i := range eps {
+		addrs[i].Addr = getHost(eps[i])
+	}
+	notifyCh <- addrs
+	sb := &simpleBalancer{
+		addrs:    addrs,
+		notifyCh: notifyCh,
+		readyc:   make(chan struct{}),
+		upEps:    make(map[string]struct{}),
+		upc:      make(chan struct{}),
+	}
+	return sb
+}
+
+func (b *simpleBalancer) Start(target string) error { return nil }
+
+func (b *simpleBalancer) ConnectNotify() <-chan struct{} {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.upc
+}
+
+func (b *simpleBalancer) Up(addr grpc.Address) func(error) {
+	b.mu.Lock()
+	if len(b.upEps) == 0 {
+		// notify waiting Get()s and pin first connected address
+		close(b.upc)
+		b.pinAddr = addr.Addr
+	}
+	b.upEps[addr.Addr] = struct{}{}
+	b.mu.Unlock()
+	// notify client that a connection is up
+	b.readyOnce.Do(func() { close(b.readyc) })
+	return func(err error) {
+		b.mu.Lock()
+		delete(b.upEps, addr.Addr)
+		if len(b.upEps) == 0 && b.pinAddr != "" {
+			b.upc = make(chan struct{})
+		} else if b.pinAddr == addr.Addr {
+			// choose new random up endpoint
+			for k := range b.upEps {
+				b.pinAddr = k
+				break
+			}
+		}
+		b.mu.Unlock()
+	}
+}
+
+func (b *simpleBalancer) Get(ctx context.Context, opts grpc.BalancerGetOptions) (grpc.Address, func(), error) {
+	var addr string
+	for {
+		b.mu.RLock()
+		ch := b.upc
+		b.mu.RUnlock()
+		select {
+		case <-ch:
+		case <-ctx.Done():
+			return grpc.Address{Addr: ""}, nil, ctx.Err()
+		}
+		b.mu.RLock()
+		addr = b.pinAddr
+		upEps := len(b.upEps)
+		b.mu.RUnlock()
+		if addr == "" {
+			return grpc.Address{Addr: ""}, nil, grpc.ErrClientConnClosing
+		}
+		if upEps > 0 {
+			break
+		}
+	}
+	return grpc.Address{Addr: addr}, func() {}, nil
+}
+
+func (b *simpleBalancer) Notify() <-chan []grpc.Address { return b.notifyCh }
+
+func (b *simpleBalancer) Close() error {
+	b.mu.Lock()
+	close(b.notifyCh)
+	// terminate all waiting Get()s
+	b.pinAddr = ""
+	if len(b.upEps) == 0 {
+		close(b.upc)
+	}
+	b.mu.Unlock()
+	return nil
+}
+
+func getHost(ep string) string {
+	url, uerr := url.Parse(ep)
+	if uerr != nil || !strings.Contains(ep, "://") {
+		return ep
+	}
+	return url.Host
+}
--- a/clientv3/client.go
+++ b/clientv3/client.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -15,18 +15,22 @@
 package clientv3

 import (
+	"crypto/tls"
 	"errors"
+	"fmt"
 	"io/ioutil"
 	"log"
 	"net"
 	"net/url"
 	"strings"
-	"sync"
 	"time"

+	"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
+
 	"golang.org/x/net/context"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials"
+	"google.golang.org/grpc/metadata"
 )

 var (
@ -42,29 +46,23 @@ type Client struct {
 	Auth
 	Maintenance

-	conn   *grpc.ClientConn
-	cfg    Config
-	creds  *credentials.TransportAuthenticator
-	mu     sync.RWMutex // protects connection selection and error list
-	errors []error      // errors passed to retryConnection
+	conn         *grpc.ClientConn
+	cfg          Config
+	creds        *credentials.TransportCredentials
+	balancer     *simpleBalancer
+	retryWrapper retryRpcFunc

 	ctx    context.Context
 	cancel context.CancelFunc

-	// fields below are managed by connMonitor
-
-	// reconnc accepts writes which signal the client should reconnect
-	reconnc chan error
-	// newconnc is closed on successful connect and set to a fresh channel
-	newconnc    chan struct{}
-	lastConnErr error
+	// Username is a username for authentication
+	Username string
+	// Password is a password for authentication
+	Password string
 }

 // New creates a new etcdv3 client from a given configuration.
 func New(cfg Config) (*Client, error) {
-	if cfg.RetryDialer == nil {
-		cfg.RetryDialer = dialEndpointList
-	}
 	if len(cfg.Endpoints) == 0 {
 		return nil, ErrNoAvailableEndpoints
 	}
@ -88,20 +86,8 @@ func NewFromConfigFile(path string) (*Client, error) {

 // Close shuts down the client's etcd connections.
 func (c *Client) Close() error {
-	c.mu.Lock()
-	if c.cancel == nil {
-		c.mu.Unlock()
-		return nil
-	}
 	c.cancel()
-	c.cancel = nil
-	err := c.conn.Close()
-	connc := c.newconnc
-	c.mu.Unlock()
-	c.Watcher.Close()
-	c.Lease.Close()
-	<-connc
-	return err
+	return toErr(c.ctx, c.conn.Close())
 }

 // Ctx is a context for "out of band" messages (e.g., for sending
@ -112,76 +98,173 @@ func (c *Client) Ctx() context.Context { return c.ctx }
 // Endpoints lists the registered endpoints for the client.
 func (c *Client) Endpoints() []string { return c.cfg.Endpoints }

-// Errors returns all errors that have been observed since called last.
-func (c *Client) Errors() (errs []error) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	errs = c.errors
-	c.errors = nil
-	return errs
+type authTokenCredential struct {
+	token string
 }

-// Dial establishes a connection for a given endpoint using the client's config
-func (c *Client) Dial(endpoint string) (*grpc.ClientConn, error) {
-	opts := []grpc.DialOption{
-		grpc.WithBlock(),
-		grpc.WithTimeout(c.cfg.DialTimeout),
+func (cred authTokenCredential) RequireTransportSecurity() bool {
+	return false
+}
+
+func (cred authTokenCredential) GetRequestMetadata(ctx context.Context, s ...string) (map[string]string, error) {
+	return map[string]string{
+		"token": cred.token,
+	}, nil
+}
+
+func (c *Client) dialTarget(endpoint string) (proto string, host string, creds *credentials.TransportCredentials) {
+	proto = "tcp"
+	host = endpoint
+	creds = c.creds
+	url, uerr := url.Parse(endpoint)
+	if uerr != nil || !strings.Contains(endpoint, "://") {
+		return
 	}
-	if c.creds != nil {
-		opts = append(opts, grpc.WithTransportCredentials(*c.creds))
-	} else {
-		opts = append(opts, grpc.WithInsecure())
+	// strip scheme:// prefix since grpc dials by host
+	host = url.Host
+	switch url.Scheme {
+	case "unix":
+		proto = "unix"
+	case "http":
+		creds = nil
+	case "https":
+		if creds != nil {
+			break
+		}
+		tlsconfig := &tls.Config{}
+		emptyCreds := credentials.NewTLS(tlsconfig)
+		creds = &emptyCreds
+	default:
+		return "", "", nil
+	}
+	return
+}
+
+// dialSetupOpts gives the dial opts prior to any authentication
+func (c *Client) dialSetupOpts(endpoint string, dopts ...grpc.DialOption) (opts []grpc.DialOption) {
+	if c.cfg.DialTimeout > 0 {
+		opts = []grpc.DialOption{grpc.WithTimeout(c.cfg.DialTimeout)}
+	}
+	opts = append(opts, dopts...)
+
+	// grpc issues TLS cert checks using the string passed into dial so
+	// that string must be the host. To recover the full scheme://host URL,
+	// have a map from hosts to the original endpoint.
+	host2ep := make(map[string]string)
+	for i := range c.cfg.Endpoints {
+		_, host, _ := c.dialTarget(c.cfg.Endpoints[i])
+		host2ep[host] = c.cfg.Endpoints[i]
 	}

-	proto := "tcp"
-	if url, uerr := url.Parse(endpoint); uerr == nil && url.Scheme == "unix" {
-		proto = "unix"
-		// strip unix:// prefix so certs work
-		endpoint = url.Host
-	}
-	f := func(a string, t time.Duration) (net.Conn, error) {
+	f := func(host string, t time.Duration) (net.Conn, error) {
+		proto, host, _ := c.dialTarget(host2ep[host])
+		if proto == "" {
+			return nil, fmt.Errorf("unknown scheme for %q", host)
+		}
 		select {
 		case <-c.ctx.Done():
 			return nil, c.ctx.Err()
 		default:
 		}
-		return net.DialTimeout(proto, a, t)
+		return net.DialTimeout(proto, host, t)
 	}
 	opts = append(opts, grpc.WithDialer(f))

-	conn, err := grpc.Dial(endpoint, opts...)
+	_, _, creds := c.dialTarget(endpoint)
+	if creds != nil {
+		opts = append(opts, grpc.WithTransportCredentials(*creds))
+	} else {
+		opts = append(opts, grpc.WithInsecure())
+	}
+
+	return opts
+}
+
+// Dial connects to a single endpoint using the client's config.
+func (c *Client) Dial(endpoint string) (*grpc.ClientConn, error) {
+	return c.dial(endpoint)
+}
+
+func (c *Client) dial(endpoint string, dopts ...grpc.DialOption) (*grpc.ClientConn, error) {
+	opts := c.dialSetupOpts(endpoint, dopts...)
+	host := getHost(endpoint)
+	if c.Username != "" && c.Password != "" {
+		// use dial options without dopts to avoid reusing the client balancer
+		auth, err := newAuthenticator(host, c.dialSetupOpts(endpoint))
+		if err != nil {
+			return nil, err
+		}
+		defer auth.close()
+
+		resp, err := auth.authenticate(c.ctx, c.Username, c.Password)
+		if err != nil {
+			return nil, err
+		}
+		opts = append(opts, grpc.WithPerRPCCredentials(authTokenCredential{token: resp.Token}))
+	}
+
+	conn, err := grpc.Dial(host, opts...)
 	if err != nil {
 		return nil, err
 	}
 	return conn, nil
 }

+// WithRequireLeader requires client requests to only succeed
+// when the cluster has a leader.
+func WithRequireLeader(ctx context.Context) context.Context {
+	md := metadata.Pairs(rpctypes.MetadataRequireLeaderKey, rpctypes.MetadataHasLeader)
+	return metadata.NewContext(ctx, md)
+}
+
 func newClient(cfg *Config) (*Client, error) {
 	if cfg == nil {
-		cfg = &Config{RetryDialer: dialEndpointList}
+		cfg = &Config{}
 	}
-	var creds *credentials.TransportAuthenticator
+	var creds *credentials.TransportCredentials
 	if cfg.TLS != nil {
 		c := credentials.NewTLS(cfg.TLS)
 		creds = &c
 	}
+
 	// use a temporary skeleton client to bootstrap first connection
 	ctx, cancel := context.WithCancel(context.TODO())
-	conn, err := cfg.RetryDialer(&Client{cfg: *cfg, creds: creds, ctx: ctx})
+	client := &Client{
+		conn:   nil,
+		cfg:    *cfg,
+		creds:  creds,
+		ctx:    ctx,
+		cancel: cancel,
+	}
+	if cfg.Username != "" && cfg.Password != "" {
+		client.Username = cfg.Username
+		client.Password = cfg.Password
+	}
+
+	client.balancer = newSimpleBalancer(cfg.Endpoints)
+	conn, err := client.dial(cfg.Endpoints[0], grpc.WithBalancer(client.balancer))
 	if err != nil {
 		return nil, err
 	}
-	client := &Client{
-		conn:     conn,
-		cfg:      *cfg,
-		creds:    creds,
-		ctx:      ctx,
-		cancel:   cancel,
-		reconnc:  make(chan error),
-		newconnc: make(chan struct{}),
-	}
+	client.conn = conn
+	client.retryWrapper = client.newRetryWrapper()

-	go client.connMonitor()
+	// wait for a connection
+	if cfg.DialTimeout > 0 {
+		hasConn := false
+		waitc := time.After(cfg.DialTimeout)
+		select {
+		case <-client.balancer.readyc:
+			hasConn = true
+		case <-ctx.Done():
+		case <-waitc:
+		}
+		if !hasConn {
+			client.cancel()
+			conn.Close()
+			return nil, grpc.ErrClientConnTimeout
+		}
+	}

 	client.Cluster = NewCluster(client)
 	client.KV = NewKV(client)
@ -200,102 +283,42 @@ func newClient(cfg *Config) (*Client, error) {
 }

 // ActiveConnection returns the current in-use connection
-func (c *Client) ActiveConnection() *grpc.ClientConn {
-	c.mu.RLock()
-	defer c.mu.RUnlock()
-	return c.conn
-}
-
-// retryConnection establishes a new connection
-func (c *Client) retryConnection(err error) (newConn *grpc.ClientConn, dialErr error) {
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	if err != nil {
-		c.errors = append(c.errors, err)
-	}
-	if c.cancel == nil {
-		return nil, c.ctx.Err()
-	}
-	if c.conn != nil {
-		c.conn.Close()
-		if st, _ := c.conn.State(); st != grpc.Shutdown {
-			// wait so grpc doesn't leak sleeping goroutines
-			c.conn.WaitForStateChange(c.ctx, st)
-		}
-	}
-
-	c.conn, dialErr = c.cfg.RetryDialer(c)
-	if dialErr != nil {
-		c.errors = append(c.errors, dialErr)
-	}
-	return c.conn, dialErr
-}
-
-// connStartRetry schedules a reconnect if one is not already running
-func (c *Client) connStartRetry(err error) {
-	select {
-	case c.reconnc <- err:
-	default:
-	}
-}
-
-// connWait waits for a reconnect to be processed
-func (c *Client) connWait(ctx context.Context, err error) (*grpc.ClientConn, error) {
-	c.mu.Lock()
-	ch := c.newconnc
-	c.mu.Unlock()
-	c.connStartRetry(err)
-	select {
-	case <-ctx.Done():
-		return nil, ctx.Err()
-	case <-ch:
-	}
-	c.mu.Lock()
-	defer c.mu.Unlock()
-	return c.conn, c.lastConnErr
-}
-
-// connMonitor monitors the connection and handles retries
-func (c *Client) connMonitor() {
-	var err error
-	for {
-		select {
-		case err = <-c.reconnc:
-		case <-c.ctx.Done():
-			c.mu.Lock()
-			c.lastConnErr = c.ctx.Err()
-			close(c.newconnc)
-			c.mu.Unlock()
-			return
-		}
-		conn, connErr := c.retryConnection(err)
-		c.mu.Lock()
-		c.lastConnErr = connErr
-		c.conn = conn
-		close(c.newconnc)
-		c.newconnc = make(chan struct{})
-		c.mu.Unlock()
-	}
-}
-
-// dialEndpointList attempts to connect to each endpoint in order until a
-// connection is established.
-func dialEndpointList(c *Client) (*grpc.ClientConn, error) {
-	var err error
-	for _, ep := range c.Endpoints() {
-		conn, curErr := c.Dial(ep)
-		if curErr != nil {
-			err = curErr
-		} else {
-			return conn, nil
-		}
-	}
-	return nil, err
-}
+func (c *Client) ActiveConnection() *grpc.ClientConn { return c.conn }

 // isHaltErr returns true if the given error and context indicate no forward
 // progress can be made, even after reconnecting.
 func isHaltErr(ctx context.Context, err error) bool {
-	isRPCError := strings.HasPrefix(grpc.ErrorDesc(err), "etcdserver: ")
-	return isRPCError || ctx.Err() != nil
+	if ctx != nil && ctx.Err() != nil {
+		return true
+	}
+	if err == nil {
+		return false
+	}
+	eErr := rpctypes.Error(err)
+	if _, ok := eErr.(rpctypes.EtcdError); ok {
+		return eErr != rpctypes.ErrStopped && eErr != rpctypes.ErrNoLeader
+	}
+	// treat etcdserver errors not recognized by the client as halting
+	return isConnClosing(err) || strings.Contains(err.Error(), "etcdserver:")
+}
+
+// isConnClosing returns true if the error matches a grpc client closing error
+func isConnClosing(err error) bool {
+	return strings.Contains(err.Error(), grpc.ErrClientConnClosing.Error())
+}
+
+func toErr(ctx context.Context, err error) error {
+	if err == nil {
+		return nil
+	}
+	err = rpctypes.Error(err)
+	switch {
+	case ctx.Err() != nil && strings.Contains(err.Error(), "context"):
+		err = ctx.Err()
+	case strings.Contains(err.Error(), ErrNoAvailableEndpoints.Error()):
+		err = ErrNoAvailableEndpoints
+	case strings.Contains(err.Error(), grpc.ErrClientConnClosing.Error()):
+		err = grpc.ErrClientConnClosing
+	}
+	return err
 }
--- a/clientv3/client_test.go
+++ b/clientv3/client_test.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -19,11 +19,15 @@ import (
 	"testing"
 	"time"

+	"github.com/coreos/etcd/etcdserver"
+	"github.com/coreos/etcd/pkg/testutil"
 	"golang.org/x/net/context"
 	"google.golang.org/grpc"
 )

 func TestDialTimeout(t *testing.T) {
+	defer testutil.AfterTest(t)
+
 	donec := make(chan error)
 	go func() {
 		// without timeout, grpc keeps redialing if connection refused
@ -55,9 +59,24 @@ func TestDialTimeout(t *testing.T) {
 	}
 }

+func TestDialNoTimeout(t *testing.T) {
+	cfg := Config{Endpoints: []string{"127.0.0.1:12345"}}
+	c, err := New(cfg)
+	if c == nil || err != nil {
+		t.Fatalf("new client with DialNoWait should succeed, got %v", err)
+	}
+	c.Close()
+}
+
 func TestIsHaltErr(t *testing.T) {
 	if !isHaltErr(nil, fmt.Errorf("etcdserver: some etcdserver error")) {
-		t.Errorf(`error prefixed with "etcdserver: " should be Halted`)
+		t.Errorf(`error prefixed with "etcdserver: " should be Halted by default`)
+	}
+	if isHaltErr(nil, etcdserver.ErrStopped) {
+		t.Errorf("error %v should not halt", etcdserver.ErrStopped)
+	}
+	if isHaltErr(nil, etcdserver.ErrNoLeader) {
+		t.Errorf("error %v should not halt", etcdserver.ErrNoLeader)
 	}
 	ctx, cancel := context.WithCancel(context.TODO())
 	if isHaltErr(ctx, nil) {
--- a/clientv3/cluster.go
+++ b/clientv3/cluster.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -15,7 +15,6 @@
 package clientv3

 import (
-	"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
 	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
 	"golang.org/x/net/context"
 	"google.golang.org/grpc"
@ -44,62 +43,47 @@ type Cluster interface {
 }

 type cluster struct {
-	rc     *remoteClient
 	remote pb.ClusterClient
 }

 func NewCluster(c *Client) Cluster {
-	ret := &cluster{}
-	f := func(conn *grpc.ClientConn) { ret.remote = pb.NewClusterClient(conn) }
-	ret.rc = newRemoteClient(c, f)
-	return ret
+	return &cluster{remote: RetryClusterClient(c)}
 }

 func (c *cluster) MemberAdd(ctx context.Context, peerAddrs []string) (*MemberAddResponse, error) {
 	r := &pb.MemberAddRequest{PeerURLs: peerAddrs}
-	resp, err := c.getRemote().MemberAdd(ctx, r)
+	resp, err := c.remote.MemberAdd(ctx, r)
 	if err == nil {
 		return (*MemberAddResponse)(resp), nil
 	}
-
 	if isHaltErr(ctx, err) {
-		return nil, rpctypes.Error(err)
+		return nil, toErr(ctx, err)
 	}
-
-	c.rc.reconnect(err)
-	return nil, rpctypes.Error(err)
+	return nil, toErr(ctx, err)
 }

 func (c *cluster) MemberRemove(ctx context.Context, id uint64) (*MemberRemoveResponse, error) {
 	r := &pb.MemberRemoveRequest{ID: id}
-	resp, err := c.getRemote().MemberRemove(ctx, r)
+	resp, err := c.remote.MemberRemove(ctx, r)
 	if err == nil {
 		return (*MemberRemoveResponse)(resp), nil
 	}
-
 	if isHaltErr(ctx, err) {
-		return nil, rpctypes.Error(err)
+		return nil, toErr(ctx, err)
 	}
-
-	c.rc.reconnect(err)
-	return nil, rpctypes.Error(err)
+	return nil, toErr(ctx, err)
 }

 func (c *cluster) MemberUpdate(ctx context.Context, id uint64, peerAddrs []string) (*MemberUpdateResponse, error) {
 	// it is safe to retry on update.
 	for {
 		r := &pb.MemberUpdateRequest{ID: id, PeerURLs: peerAddrs}
-		resp, err := c.getRemote().MemberUpdate(ctx, r)
+		resp, err := c.remote.MemberUpdate(ctx, r)
 		if err == nil {
 			return (*MemberUpdateResponse)(resp), nil
 		}
-
 		if isHaltErr(ctx, err) {
-			return nil, rpctypes.Error(err)
-		}
-
-		if err = c.rc.reconnectWait(ctx, err); err != nil {
-			return nil, rpctypes.Error(err)
+			return nil, toErr(ctx, err)
 		}
 	}
 }
@ -107,23 +91,12 @@ func (c *cluster) MemberUpdate(ctx context.Context, id uint64, peerAddrs []strin
 func (c *cluster) MemberList(ctx context.Context) (*MemberListResponse, error) {
 	// it is safe to retry on list.
 	for {
-		resp, err := c.getRemote().MemberList(ctx, &pb.MemberListRequest{})
+		resp, err := c.remote.MemberList(ctx, &pb.MemberListRequest{}, grpc.FailFast(false))
 		if err == nil {
 			return (*MemberListResponse)(resp), nil
 		}
-
 		if isHaltErr(ctx, err) {
-			return nil, rpctypes.Error(err)
-		}
-
-		if err = c.rc.reconnectWait(ctx, err); err != nil {
-			return nil, rpctypes.Error(err)
+			return nil, toErr(ctx, err)
 		}
 	}
 }
-
-func (c *cluster) getRemote() pb.ClusterClient {
-	c.rc.mu.Lock()
-	defer c.rc.mu.Unlock()
-	return c.remote
-}
--- a/clientv3/compact_op.go
+++ b/clientv3/compact_op.go
@ -0,0 +1,53 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package clientv3
+
+import (
+	pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
+)
+
+// CompactOp represents a compact operation.
+type CompactOp struct {
+	revision int64
+	physical bool
+}
+
+// CompactOption configures compact operation.
+type CompactOption func(*CompactOp)
+
+func (op *CompactOp) applyCompactOpts(opts []CompactOption) {
+	for _, opt := range opts {
+		opt(op)
+	}
+}
+
+// OpCompact wraps slice CompactOption to create a CompactOp.
+func OpCompact(rev int64, opts ...CompactOption) CompactOp {
+	ret := CompactOp{revision: rev}
+	ret.applyCompactOpts(opts)
+	return ret
+}
+
+func (op CompactOp) toRequest() *pb.CompactionRequest {
+	return &pb.CompactionRequest{Revision: op.revision, Physical: op.physical}
+}
+
+// WithCompactPhysical makes compact RPC call wait until
+// the compaction is physically applied to the local database
+// such that compacted entries are totally removed from the
+// backend database.
+func WithCompactPhysical() CompactOption {
+	return func(op *CompactOp) { op.physical = true }
+}
--- a/clientv3/compact_op_test.go
+++ b/clientv3/compact_op_test.go
@ -0,0 +1,30 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package clientv3
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/coreos/etcd/etcdserver/etcdserverpb"
+)
+
+func TestCompactOp(t *testing.T) {
+	req1 := OpCompact(100, WithCompactPhysical()).toRequest()
+	req2 := &etcdserverpb.CompactionRequest{Revision: 100, Physical: true}
+	if !reflect.DeepEqual(req1, req2) {
+		t.Fatalf("expected %+v, got %+v", req2, req1)
+	}
+}
--- a/clientv3/compare.go
+++ b/clientv3/compare.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/clientv3/concurrency/doc.go
+++ b/clientv3/concurrency/doc.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/clientv3/concurrency/election.go
+++ b/clientv3/concurrency/election.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -16,6 +16,7 @@ package concurrency

 import (
 	"errors"
+	"fmt"

 	v3 "github.com/coreos/etcd/clientv3"
 	"github.com/coreos/etcd/mvcc/mvccpb"
@ -39,7 +40,7 @@ type Election struct {

 // NewElection returns a new election on a given key prefix.
 func NewElection(client *v3.Client, pfx string) *Election {
-	return &Election{client: client, keyPrefix: pfx}
+	return &Election{client: client, keyPrefix: pfx + "/"}
 }

 // Campaign puts a value as eligible for the election. It blocks until
@ -50,22 +51,38 @@ func (e *Election) Campaign(ctx context.Context, val string) error {
 		return serr
 	}

-	k, rev, err := NewUniqueKV(ctx, e.client, e.keyPrefix, val, v3.WithLease(s.Lease()))
-	if err == nil {
-		err = waitDeletes(ctx, e.client, e.keyPrefix, v3.WithPrefix(), v3.WithRev(rev-1))
+	k := fmt.Sprintf("%s/%x", e.keyPrefix, s.Lease())
+	txn := e.client.Txn(ctx).If(v3.Compare(v3.CreateRevision(k), "=", 0))
+	txn = txn.Then(v3.OpPut(k, val, v3.WithLease(s.Lease())))
+	txn = txn.Else(v3.OpGet(k))
+	resp, err := txn.Commit()
+	if err != nil {
+		return err
+	}
+	e.leaderKey, e.leaderRev, e.leaderSession = k, resp.Header.Revision, s
+	if !resp.Succeeded {
+		kv := resp.Responses[0].GetResponseRange().Kvs[0]
+		e.leaderRev = kv.CreateRevision
+		if string(kv.Value) != val {
+			if err = e.Proclaim(ctx, val); err != nil {
+				e.Resign(ctx)
+				return err
+			}
+		}
 	}

+	err = waitDeletes(ctx, e.client, e.keyPrefix, v3.WithPrefix(), v3.WithRev(e.leaderRev-1))
 	if err != nil {
 		// clean up in case of context cancel
 		select {
 		case <-ctx.Done():
-			e.client.Delete(e.client.Ctx(), k)
+			e.Resign(e.client.Ctx())
 		default:
+			e.leaderSession = nil
 		}
 		return err
 	}

-	e.leaderKey, e.leaderRev, e.leaderSession = k, rev, s
 	return nil
 }

@ -89,19 +106,19 @@ func (e *Election) Proclaim(ctx context.Context, val string) error {
 }

 // Resign lets a leader start a new election.
-func (e *Election) Resign() (err error) {
+func (e *Election) Resign(ctx context.Context) (err error) {
 	if e.leaderSession == nil {
 		return nil
 	}
-	_, err = e.client.Delete(e.client.Ctx(), e.leaderKey)
+	_, err = e.client.Delete(ctx, e.leaderKey)
 	e.leaderKey = ""
 	e.leaderSession = nil
 	return err
 }

 // Leader returns the leader value for the current election.
-func (e *Election) Leader() (string, error) {
-	resp, err := e.client.Get(e.client.Ctx(), e.keyPrefix, v3.WithFirstCreate()...)
+func (e *Election) Leader(ctx context.Context) (string, error) {
+	resp, err := e.client.Get(ctx, e.keyPrefix, v3.WithFirstCreate()...)
 	if err != nil {
 		return "", err
 	} else if len(resp.Kvs) == 0 {
--- a/clientv3/concurrency/key.go
+++ b/clientv3/concurrency/key.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -17,34 +17,12 @@ package concurrency
 import (
 	"fmt"
 	"math"
-	"time"

 	v3 "github.com/coreos/etcd/clientv3"
 	"github.com/coreos/etcd/mvcc/mvccpb"
 	"golang.org/x/net/context"
 )

-// NewUniqueKey creates a new key from a given prefix.
-func NewUniqueKey(ctx context.Context, kv v3.KV, pfx string, opts ...v3.OpOption) (string, int64, error) {
-	return NewUniqueKV(ctx, kv, pfx, "", opts...)
-}
-
-func NewUniqueKV(ctx context.Context, kv v3.KV, pfx, val string, opts ...v3.OpOption) (string, int64, error) {
-	for {
-		newKey := fmt.Sprintf("%s/%v", pfx, time.Now().UnixNano())
-		put := v3.OpPut(newKey, val, opts...)
-		cmp := v3.Compare(v3.ModRevision(newKey), "=", 0)
-		resp, err := kv.Txn(ctx).If(cmp).Then(put).Commit()
-		if err != nil {
-			return "", 0, err
-		}
-		if !resp.Succeeded {
-			continue
-		}
-		return newKey, resp.Header.Revision, nil
-	}
-}
-
 func waitDelete(ctx context.Context, client *v3.Client, key string, rev int64) error {
 	cctx, cancel := context.WithCancel(ctx)
 	defer cancel()
--- a/clientv3/concurrency/mutex.go
+++ b/clientv3/concurrency/mutex.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -32,7 +32,7 @@ type Mutex struct {
 }

 func NewMutex(client *v3.Client, pfx string) *Mutex {
-	return &Mutex{client, pfx, "", -1}
+	return &Mutex{client, pfx + "/", "", -1}
 }

 // Lock locks the mutex with a cancellable context. If the context is cancelled
@ -43,7 +43,7 @@ func (m *Mutex) Lock(ctx context.Context) error {
 		return serr
 	}

-	m.myKey = fmt.Sprintf("%s/%x", m.pfx, s.Lease())
+	m.myKey = fmt.Sprintf("%s%x", m.pfx, s.Lease())
 	cmp := v3.Compare(v3.CreateRevision(m.myKey), "=", 0)
 	// put self in lock waiters via myKey; oldest waiter holds lock
 	put := v3.OpPut(m.myKey, "", v3.WithLease(s.Lease()))
@ -63,14 +63,14 @@ func (m *Mutex) Lock(ctx context.Context) error {
 	// release lock key if cancelled
 	select {
 	case <-ctx.Done():
-		m.Unlock()
+		m.Unlock(m.client.Ctx())
 	default:
 	}
 	return err
 }

-func (m *Mutex) Unlock() error {
-	if _, err := m.client.Delete(m.client.Ctx(), m.myKey); err != nil {
+func (m *Mutex) Unlock(ctx context.Context) error {
+	if _, err := m.client.Delete(ctx, m.myKey); err != nil {
 		return err
 	}
 	m.myKey = "\x00"
@ -92,7 +92,7 @@ func (lm *lockerMutex) Lock() {
 	}
 }
 func (lm *lockerMutex) Unlock() {
-	if err := lm.Mutex.Unlock(); err != nil {
+	if err := lm.Mutex.Unlock(lm.client.Ctx()); err != nil {
 		panic(err)
 	}
 }
--- a/clientv3/concurrency/session.go
+++ b/clientv3/concurrency/session.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/clientv3/concurrency/stm.go
+++ b/clientv3/concurrency/stm.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -56,6 +56,12 @@ func NewSTMSerializable(ctx context.Context, c *v3.Client, apply func(STM) error
 	return runSTM(s, apply)
 }

+// NewSTMReadCommitted initiates a new read committed transaction.
+func NewSTMReadCommitted(ctx context.Context, c *v3.Client, apply func(STM) error) (*v3.TxnResponse, error) {
+	s := &stmReadCommitted{stm{client: c, ctx: ctx, getOpts: []v3.OpOption{v3.WithSerializable()}}}
+	return runSTM(s, apply)
+}
+
 type stmResponse struct {
 	resp *v3.TxnResponse
 	err  error
@ -139,11 +145,12 @@ func (s *stm) commit() *v3.TxnResponse {
 }

 // cmps guards the txn from updates to read set
-func (s *stm) cmps() (cmps []v3.Cmp) {
+func (s *stm) cmps() []v3.Cmp {
+	cmps := make([]v3.Cmp, 0, len(s.rset))
 	for k, rk := range s.rset {
 		cmps = append(cmps, isKeyCurrent(k, rk))
 	}
-	return
+	return cmps
 }

 func (s *stm) fetch(key string) *v3.GetResponse {
@ -159,11 +166,12 @@ func (s *stm) fetch(key string) *v3.GetResponse {
 }

 // puts is the list of ops for all pending writes
-func (s *stm) puts() (puts []v3.Op) {
+func (s *stm) puts() []v3.Op {
+	puts := make([]v3.Op, 0, len(s.wset))
 	for _, v := range s.wset {
 		puts = append(puts, v.op)
 	}
-	return
+	return puts
 }

 func (s *stm) reset() {
@ -201,12 +209,14 @@ func (s *stmSerializable) Rev(key string) int64 {
 	return s.stm.Rev(key)
 }

-func (s *stmSerializable) gets() (keys []string, ops []v3.Op) {
+func (s *stmSerializable) gets() ([]string, []v3.Op) {
+	keys := make([]string, 0, len(s.rset))
+	ops := make([]v3.Op, 0, len(s.rset))
 	for k := range s.rset {
 		keys = append(keys, k)
 		ops = append(ops, v3.OpGet(k))
 	}
-	return
+	return keys, ops
 }

 func (s *stmSerializable) commit() *v3.TxnResponse {
@ -230,6 +240,14 @@ func (s *stmSerializable) commit() *v3.TxnResponse {
 	return nil
 }

+type stmReadCommitted struct{ stm }
+
+// commit always goes through when read committed
+func (s *stmReadCommitted) commit() *v3.TxnResponse {
+	s.rset = nil
+	return s.stm.commit()
+}
+
 func isKeyCurrent(k string, r *v3.GetResponse) v3.Cmp {
 	rev := r.Header.Revision + 1
 	if len(r.Kvs) != 0 {
--- a/clientv3/config.go
+++ b/clientv3/config.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -22,19 +22,12 @@ import (

 	"github.com/coreos/etcd/pkg/tlsutil"
 	"github.com/ghodss/yaml"
-	"google.golang.org/grpc"
 )

-// EndpointDialer is a policy for choosing which endpoint to dial next
-type EndpointDialer func(*Client) (*grpc.ClientConn, error)
-
 type Config struct {
 	// Endpoints is a list of URLs
 	Endpoints []string

-	// RetryDialer chooses the next endpoint to use
-	RetryDialer EndpointDialer
-
 	// DialTimeout is the timeout for failing to establish a connection.
 	DialTimeout time.Duration

@ -43,6 +36,12 @@ type Config struct {

 	// Logger is the logger used by client library.
 	Logger Logger
+
+	// Username is a username for authentication
+	Username string
+
+	// Password is a password for authentication
+	Password string
 }

 type yamlConfig struct {
--- a/clientv3/config_test.go
+++ b/clientv3/config_test.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/clientv3/doc.go
+++ b/clientv3/doc.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/clientv3/example_auth_test.go
+++ b/clientv3/example_auth_test.go
@ -0,0 +1,99 @@
+// Copyright 2016 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package clientv3_test
+
+import (
+	"fmt"
+	"log"
+
+	"github.com/coreos/etcd/clientv3"
+	"golang.org/x/net/context"
+)
+
+func ExampleAuth() {
+	cli, err := clientv3.New(clientv3.Config{
+		Endpoints:   endpoints,
+		DialTimeout: dialTimeout,
+	})
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer cli.Close()
+
+	authapi := clientv3.NewAuth(cli)
+
+	if _, err = authapi.RoleAdd(context.TODO(), "root"); err != nil {
+		log.Fatal(err)
+	}
+
+	if _, err = authapi.RoleGrantPermission(
+		context.TODO(),
+		"root", // role name
+		"foo",  // key
+		"zoo",  // range end
+		clientv3.PermissionType(clientv3.PermReadWrite),
+	); err != nil {
+		log.Fatal(err)
+	}
+
+	if _, err = authapi.UserAdd(context.TODO(), "root", "123"); err != nil {
+		log.Fatal(err)
+	}
+
+	if _, err = authapi.UserGrantRole(context.TODO(), "root", "root"); err != nil {
+		log.Fatal(err)
+	}
+
+	if _, err = authapi.AuthEnable(context.TODO()); err != nil {
+		log.Fatal(err)
+	}
+
+	cliAuth, err := clientv3.New(clientv3.Config{
+		Endpoints:   endpoints,
+		DialTimeout: dialTimeout,
+		Username:    "root",
+		Password:    "123",
+	})
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer cliAuth.Close()
+
+	kv := clientv3.NewKV(cliAuth)
+	if _, err = kv.Put(context.TODO(), "foo1", "bar"); err != nil {
+		log.Fatal(err)
+	}
+
+	_, err = kv.Txn(context.TODO()).
+		If(clientv3.Compare(clientv3.Value("zoo1"), ">", "abc")).
+		Then(clientv3.OpPut("zoo1", "XYZ")).
+		Else(clientv3.OpPut("zoo1", "ABC")).
+		Commit()
+	fmt.Println(err)
+
+	// now check the permission
+	authapi2 := clientv3.NewAuth(cliAuth)
+	resp, err := authapi2.RoleGet(context.TODO(), "root")
+	if err != nil {
+		log.Fatal(err)
+	}
+	fmt.Printf("root user permission: key %q, range end %q\n", resp.Perm[0].Key, resp.Perm[0].RangeEnd)
+
+	if _, err = authapi2.AuthDisable(context.TODO()); err != nil {
+		log.Fatal(err)
+	}
+	// Output: etcdserver: permission denied
+	// root user permission: key "foo", range end "zoo"
+}
--- a/clientv3/example_cluster_test.go
+++ b/clientv3/example_cluster_test.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/clientv3/example_kv_test.go
+++ b/clientv3/example_kv_test.go
@ -1,4 +1,4 @@
-// Copyright 2016 CoreOS, Inc.
+// Copyright 2016 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -210,7 +210,7 @@ func ExampleKV_compact() {
 	compRev := resp.Header.Revision // specify compact revision of your choice

 	ctx, cancel = context.WithTimeout(context.Background(), requestTimeout)
-	err = cli.Compact(ctx, compRev)
+	_, err = cli.Compact(ctx, compRev)
 	cancel()
 	if err != nil {
 		log.Fatal(err)
--- a/Show More
+++ b/Show More