Compare commits

...

15 Commits

Author SHA1 Message Date
a535dc994b version: bump to v2.3.5 2016-05-20 10:36:58 -07:00
46d347812b etcdserver: wait for snapshots before closing raft
Fixes #5374
2016-05-20 10:35:50 -07:00
1d12212e60 etcdserver: stop raft after stopping apply scheduler
Was causing a pipeline leak.
2016-05-20 10:35:28 -07:00
1f17d7204e etcdsever: fix the leaky snashot routine issue 2016-05-20 10:34:48 -07:00
198664e49c Documentation/v2: fix typo for updating a member
Fix https://github.com/coreos/etcd/issues/5358.
2016-05-20 10:33:49 -07:00
ee872bb7ca Documentation/v2: fix auth_api.md bug
role guest read and write is "/*", not "*", same with other roles.
2016-05-20 10:33:47 -07:00
8c9a3c55bd raft: do not panic when removing all the nodes from cluster 2016-05-20 10:33:45 -07:00
6f1ceee9a3 v2http: allow empty role for GET /users
Fix https://github.com/coreos/etcd/issues/5246.
2016-05-20 10:33:43 -07:00
f47375af89 version: bump to v2.3.4+git 2016-05-13 11:58:24 -07:00
df60227765 version: bump to v2.3.4 2016-05-12 14:33:37 -07:00
4db35c113d README: add known bugs
cherry-picked from https://github.com/coreos/etcd/pull/5320.
2016-05-12 14:22:45 -07:00
cf68c2285e etcdctl: Add --wal-dir and --backup-wal-dir options to backup command.
If the WAL is stored in a separate directory then the backup command
would need a --wal-dir option to pick the path to the WAL directory.
The user might also want to store the backup of data and wal separately
for which --backup-wal-dir option is provided.
2016-05-12 14:17:31 -07:00
743f9c9bb0 etcdctl/ctlv2: total-timeout for Sync
Fix https://github.com/coreos/etcd/issues/4897.
2016-05-12 14:15:13 -07:00
f9e09e1b1a httpproxy: fix race on getting close notifier channel
Fixes #5267
2016-05-12 14:14:16 -07:00
d6eb1e7a5f *: bump to v2.3.3+git 2016-04-29 14:18:50 -07:00
13 changed files with 179 additions and 36 deletions

View File

@ -135,7 +135,7 @@ The data directory contains all the data to recover a member to its point-in-tim
* Stop the member process.
* Copy the data directory of the now-idle member to the new machine.
* Update the peer URLs for the replaced member to reflect the new machine according to the [runtime reconfiguration instructions][update-member].
* Update the peer URLs for the replaced member to reflect the new machine according to the [runtime reconfiguration instructions][update-a-member].
* Start etcd on the new machine, using the same configuration and the copy of the data directory.
This example will walk you through the process of migrating the infra1 member to a new machine:
@ -217,12 +217,14 @@ To recover from such scenarios, etcd provides functionality to backup and restor
**NB:** Windows users must stop etcd before running the backup command.
The first step of the recovery is to backup the data directory on a functioning etcd node. To do this, use the `etcdctl backup` command, passing in the original data directory used by etcd. For example:
The first step of the recovery is to backup the data directory and wal directory, if stored separately, on a functioning etcd node. To do this, use the `etcdctl backup` command, passing in the original data (and wal) directory used by etcd. For example:
```sh
etcdctl backup \
--data-dir %data_dir% \
[--wal-dir %wal_dir%] \
--backup-dir %backup_data_dir%
[--backup-wal-dir %backup_wal_dir%]
```
This command will rewrite some of the metadata contained in the backup (specifically, the node ID and cluster ID), which means that the node will lose its former identity. In order to recreate a cluster from the backup, you will need to start a new, single-node cluster. The metadata is rewritten to prevent the new node from inadvertently being joined onto an existing cluster.
@ -234,26 +236,30 @@ To restore a backup using the procedure created above, start etcd with the `-for
```sh
etcd \
-data-dir=%backup_data_dir% \
[-wal-dir=%backup_wal_dir%] \
-force-new-cluster \
...
```
Now etcd should be available on this node and serving the original datastore.
Once you have verified that etcd has started successfully, shut it down and move the data back to the previous location (you may wish to make another copy as well to be safe):
Once you have verified that etcd has started successfully, shut it down and move the data and wal, if stored separately, back to the previous location (you may wish to make another copy as well to be safe):
```sh
pkill etcd
rm -fr %data_dir%
rm -fr %wal_dir%
mv %backup_data_dir% %data_dir%
mv %backup_wal_dir% %wal_dir%
etcd \
-data-dir=%data_dir% \
[-wal-dir=%wal_dir%] \
...
```
#### Restoring the cluster
Now that the node is running successfully, [change its advertised peer URLs][update-member], as the `--force-new-cluster` option has set the peer URL to the default listening on localhost.
Now that the node is running successfully, [change its advertised peer URLs][update-a-member], as the `--force-new-cluster` option has set the peer URL to the default listening on localhost.
You can then add more nodes to the cluster and restore resiliency. See the [add a new member][add-a-member] guide for more details. **NB:** If you are trying to restore your cluster using old failed etcd nodes, please make sure you have stopped old etcd instances and removed their old data directories specified by the data-dir configuration parameter.

View File

@ -145,8 +145,8 @@ GET/HEAD /v2/auth/users
"role": "root",
"permissions": {
"kv": {
"read": ["*"],
"write": ["*"]
"read": ["/*"],
"write": ["/*"]
}
}
}
@ -159,8 +159,8 @@ GET/HEAD /v2/auth/users
"role": "guest",
"permissions": {
"kv": {
"read": ["*"],
"write": ["*"]
"read": ["/*"],
"write": ["/*"]
}
}
}
@ -198,8 +198,8 @@ GET/HEAD /v2/auth/users/alice
"role": "etcd",
"permissions" : {
"kv" : {
"read": [ "*" ],
"write": [ "*" ]
"read": [ "/*" ],
"write": [ "/*" ]
}
}
}
@ -311,8 +311,8 @@ GET/HEAD /v2/auth/roles
"role": "etcd",
"permissions": {
"kv": {
"read": ["*"],
"write": ["*"]
"read": ["/*"],
"write": ["/*"]
}
}
},
@ -320,8 +320,8 @@ GET/HEAD /v2/auth/roles
"role": "quay",
"permissions": {
"kv": {
"read": ["*"],
"write": ["*"]
"read": ["/*"],
"write": ["/*"]
}
}
}
@ -393,7 +393,7 @@ PUT /v2/auth/roles/guest
"revoke" : {
"kv" : {
"write": [
"*"
"/*"
]
}
}

View File

@ -118,6 +118,17 @@ See [CONTRIBUTING](CONTRIBUTING.md) for details on submitting patches and the co
See [reporting bugs](Documentation/reporting_bugs.md) for details about reporting any issue you may encounter.
## Known bugs
[GH518](https://github.com/coreos/etcd/issues/518) is a known bug. Issue is that:
```
curl http://127.0.0.1:2379/v2/keys/foo -XPUT -d value=bar
curl http://127.0.0.1:2379/v2/keys/foo -XPUT -d dir=true -d prevExist=true
```
If the previous node is a key and client tries to overwrite it with `dir=true`, it does not give warnings such as `Not a directory`. Instead, the key is set to empty value.
## Project Details
### Versioning

View File

@ -37,7 +37,9 @@ func NewBackupCommand() cli.Command {
ArgsUsage: " ",
Flags: []cli.Flag{
cli.StringFlag{Name: "data-dir", Value: "", Usage: "Path to the etcd data dir"},
cli.StringFlag{Name: "wal-dir", Value: "", Usage: "Path to the etcd wal dir"},
cli.StringFlag{Name: "backup-dir", Value: "", Usage: "Path to the backup dir"},
cli.StringFlag{Name: "backup-wal-dir", Value: "", Usage: "Path to the backup wal dir"},
},
Action: handleBackup,
}
@ -45,10 +47,23 @@ func NewBackupCommand() cli.Command {
// handleBackup handles a request that intends to do a backup.
func handleBackup(c *cli.Context) {
var srcWAL string
var destWAL string
srcSnap := path.Join(c.String("data-dir"), "member", "snap")
destSnap := path.Join(c.String("backup-dir"), "member", "snap")
srcWAL := path.Join(c.String("data-dir"), "member", "wal")
destWAL := path.Join(c.String("backup-dir"), "member", "wal")
if c.String("wal-dir") != "" {
srcWAL = c.String("wal-dir")
} else {
srcWAL = path.Join(c.String("data-dir"), "member", "wal")
}
if c.String("backup-wal-dir") != "" {
destWAL = c.String("backup-wal-dir")
} else {
destWAL = path.Join(c.String("backup-dir"), "member", "wal")
}
if err := os.MkdirAll(destSnap, 0700); err != nil {
log.Fatalf("failed creating backup snapshot dir %v: %v", destSnap, err)

View File

@ -168,7 +168,13 @@ func getTransport(c *cli.Context) (*http.Transport, error) {
CertFile: certfile,
KeyFile: keyfile,
}
return transport.NewTransport(tls, defaultDialTimeout)
dialTimeout := defaultDialTimeout
totalTimeout := c.GlobalDuration("total-timeout")
if totalTimeout != 0 && totalTimeout < dialTimeout {
dialTimeout = totalTimeout
}
return transport.NewTransport(tls, dialTimeout)
}
func getUsernamePasswordFromFlag(usernameFlag string) (username string, password string, err error) {
@ -215,7 +221,7 @@ func mustNewClient(c *cli.Context) client.Client {
if debug {
fmt.Fprintf(os.Stderr, "start to sync cluster using endpoints(%s)\n", strings.Join(hc.Endpoints(), ","))
}
ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout)
ctx, cancel := contextWithTotalTimeout(c)
err := hc.Sync(ctx)
cancel()
if err != nil {

View File

@ -285,6 +285,10 @@ type userWithRoles struct {
Roles []auth.Role `json:"roles,omitempty"`
}
type usersCollections struct {
Users []userWithRoles `json:"users"`
}
func (sh *authHandler) baseUsers(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
@ -311,9 +315,7 @@ func (sh *authHandler) baseUsers(w http.ResponseWriter, r *http.Request) {
return
}
var usersCollections struct {
Users []userWithRoles `json:"users"`
}
ucs := usersCollections{}
for _, userName := range users {
var user auth.User
user, err = sh.sec.GetUser(userName)
@ -327,15 +329,14 @@ func (sh *authHandler) baseUsers(w http.ResponseWriter, r *http.Request) {
var role auth.Role
role, err = sh.sec.GetRole(roleName)
if err != nil {
writeError(w, r, err)
return
continue
}
uwr.Roles = append(uwr.Roles, role)
}
usersCollections.Users = append(usersCollections.Users, uwr)
ucs.Users = append(ucs.Users, uwr)
}
err = json.NewEncoder(w).Encode(usersCollections)
err = json.NewEncoder(w).Encode(ucs)
if err != nil {
plog.Warningf("baseUsers error encoding on %s", r.URL)

View File

@ -15,10 +15,14 @@
package etcdhttp
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"path"
"sort"
"strings"
"testing"
@ -43,7 +47,14 @@ type mockAuthStore struct {
enabled bool
}
func (s *mockAuthStore) AllUsers() ([]string, error) { return []string{"alice", "bob", "root"}, s.err }
func (s *mockAuthStore) AllUsers() ([]string, error) {
var us []string
for u := range s.users {
us = append(us, u)
}
sort.Strings(us)
return us, s.err
}
func (s *mockAuthStore) GetUser(name string) (auth.User, error) {
u, ok := s.users[name]
if !ok {
@ -67,9 +78,15 @@ func (s *mockAuthStore) UpdateUser(user auth.User) (auth.User, error) {
func (s *mockAuthStore) AllRoles() ([]string, error) {
return []string{"awesome", "guest", "root"}, s.err
}
func (s *mockAuthStore) GetRole(name string) (auth.Role, error) { return *s.roles[name], s.err }
func (s *mockAuthStore) CreateRole(role auth.Role) error { return s.err }
func (s *mockAuthStore) DeleteRole(name string) error { return s.err }
func (s *mockAuthStore) GetRole(name string) (auth.Role, error) {
r, ok := s.roles[name]
if ok {
return *r, s.err
}
return auth.Role{}, fmt.Errorf("%q does not exist (%v)", name, s.err)
}
func (s *mockAuthStore) CreateRole(role auth.Role) error { return s.err }
func (s *mockAuthStore) DeleteRole(name string) error { return s.err }
func (s *mockAuthStore) UpdateRole(role auth.Role) (auth.Role, error) {
return *s.roles[role.Role], s.err
}
@ -361,6 +378,61 @@ func TestAuthFlow(t *testing.T) {
}
}
func TestGetUserGrantedWithNonexistingRole(t *testing.T) {
sh := &authHandler{
sec: &mockAuthStore{
users: map[string]*auth.User{
"root": {
User: "root",
Roles: []string{"root", "foo"},
},
},
roles: map[string]*auth.Role{
"root": {
Role: "root",
},
},
},
cluster: &fakeCluster{id: 1},
}
srv := httptest.NewServer(http.HandlerFunc(sh.baseUsers))
defer srv.Close()
req, err := http.NewRequest("GET", "", nil)
if err != nil {
t.Fatal(err)
}
req.URL, err = url.Parse(srv.URL)
if err != nil {
t.Fatal(err)
}
req.Header.Set("Content-Type", "application/json")
cli := http.DefaultClient
resp, err := cli.Do(req)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()
var uc usersCollections
if err := json.NewDecoder(resp.Body).Decode(&uc); err != nil {
t.Fatal(err)
}
if len(uc.Users) != 1 {
t.Fatalf("expected 1 user, got %+v", uc.Users)
}
if uc.Users[0].User != "root" {
t.Fatalf("expected 'root', got %q", uc.Users[0].User)
}
if len(uc.Users[0].Roles) != 1 {
t.Fatalf("expected 1 role, got %+v", uc.Users[0].Roles)
}
if uc.Users[0].Roles[0].Role != "root" {
t.Fatalf("expected 'root', got %q", uc.Users[0].Roles[0].Role)
}
}
func mustAuthRequest(method, username, password string) *http.Request {
req, err := http.NewRequest(method, "path", strings.NewReader(""))
if err != nil {

View File

@ -203,6 +203,10 @@ type EtcdServer struct {
// count the number of inflight snapshots.
// MUST use atomic operation to access this field.
inflightSnapshots int64
// wg is used to wait for the go routines that depends on the server state
// to exit when stopping the server.
wg sync.WaitGroup
}
// NewServer creates a new EtcdServer from the supplied configuration. The
@ -515,9 +519,15 @@ func (s *EtcdServer) run() {
}
defer func() {
s.r.stop()
sched.Stop()
// wait for snapshots before closing raft so wal stays open
s.wg.Wait()
// must stop raft after scheduler-- etcdserver can leak rafthttp pipelines
// by adding a peer after raft stops the transport
s.r.stop()
// kv, lessor and backend can be nil if running without v3 enabled
// or running unit tests.
if s.lessor != nil {
@ -1162,7 +1172,10 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con
func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) {
clone := s.store.Clone()
s.wg.Add(1)
go func() {
defer s.wg.Done()
d, err := clone.SaveNoCopy()
// TODO: current store will never fail to do a snapshot
// what should we do if the store might fail?

View File

@ -231,9 +231,14 @@ func TestIssue2681(t *testing.T) {
}
// Ensure we can remove a member after a snapshot then add a new one back.
func TestIssue2746(t *testing.T) {
func TestIssue2746(t *testing.T) { testIssue2746(t, 5) }
// With 3 nodes TestIssue2476 sometimes had a shutdown with an inflight snapshot.
func TestIssue2746WithThree(t *testing.T) { testIssue2746(t, 3) }
func testIssue2746(t *testing.T, members int) {
defer testutil.AfterTest(t)
c := NewCluster(t, 5)
c := NewCluster(t, members)
for _, m := range c.Members {
m.SnapCount = 10
@ -247,7 +252,7 @@ func TestIssue2746(t *testing.T) {
clusterMustProgress(t, c.Members)
}
c.RemoveMember(t, uint64(c.Members[4].s.ID()))
c.RemoveMember(t, uint64(c.Members[members-1].s.ID()))
c.waitLeader(t, c.Members)
c.AddMember(t)

View File

@ -111,9 +111,10 @@ func (p *reverseProxy) ServeHTTP(rw http.ResponseWriter, clientreq *http.Request
closeNotifier, ok := rw.(http.CloseNotifier)
cancel := httputil.RequestCanceler(p.transport, proxyreq)
if ok {
closeCh := closeNotifier.CloseNotify()
go func() {
select {
case <-closeNotifier.CloseNotify():
case <-closeCh:
atomic.StoreInt32(&requestClosed, 1)
log.Printf("proxy: client %v closed request prematurely", clientreq.RemoteAddr)
cancel()

View File

@ -837,6 +837,12 @@ func (r *raft) addNode(id uint64) {
func (r *raft) removeNode(id uint64) {
r.delProgress(id)
r.pendingConf = false
// do not try to commit or abort transferring if there is no nodes in the cluster.
if len(r.prs) == 0 {
return
}
// The quorum size is now smaller, so see if any pending entries can
// be committed.
if r.maybeCommit() {

View File

@ -1780,6 +1780,13 @@ func TestRemoveNode(t *testing.T) {
if g := r.nodes(); !reflect.DeepEqual(g, w) {
t.Errorf("nodes = %v, want %v", g, w)
}
// remove all nodes from cluster
r.removeNode(1)
w = []uint64{}
if g := r.nodes(); !reflect.DeepEqual(g, w) {
t.Errorf("nodes = %v, want %v", g, w)
}
}
func TestPromotable(t *testing.T) {

View File

@ -29,7 +29,7 @@ import (
var (
// MinClusterVersion is the min cluster version this etcd binary is compatible with.
MinClusterVersion = "2.2.0"
Version = "2.3.3"
Version = "2.3.5"
// Git SHA Value will be set during build
GitSHA = "Not provided (use ./build instead of go build)"