1// Copyright 2016 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package integration
16
17import (
18	"fmt"
19	"testing"
20	"time"
21
22	"github.com/coreos/etcd/pkg/testutil"
23)
24
25func TestNetworkPartition5MembersLeaderInMinority(t *testing.T) {
26	defer testutil.AfterTest(t)
27
28	clus := NewClusterV3(t, &ClusterConfig{Size: 5})
29	defer clus.Terminate(t)
30
31	leadIndex := clus.WaitLeader(t)
32
33	// minority: leader, follower / majority: follower, follower, follower
34	minority := []int{leadIndex, (leadIndex + 1) % 5}
35	majority := []int{(leadIndex + 2) % 5, (leadIndex + 3) % 5, (leadIndex + 4) % 5}
36
37	minorityMembers := getMembersByIndexSlice(clus.cluster, minority)
38	majorityMembers := getMembersByIndexSlice(clus.cluster, majority)
39
40	// network partition (bi-directional)
41	injectPartition(t, minorityMembers, majorityMembers)
42
43	// minority leader must be lost
44	clus.waitNoLeader(t, minorityMembers)
45
46	// wait extra election timeout
47	time.Sleep(2 * majorityMembers[0].ElectionTimeout())
48
49	// new leader must be from majority
50	clus.waitLeader(t, majorityMembers)
51
52	// recover network partition (bi-directional)
53	recoverPartition(t, minorityMembers, majorityMembers)
54
55	// write to majority first
56	clusterMustProgress(t, append(majorityMembers, minorityMembers...))
57}
58
59func TestNetworkPartition5MembersLeaderInMajority(t *testing.T) {
60	// retry up to 3 times, in case of leader election on majority partition due to slow hardware
61	var err error
62	for i := 0; i < 3; i++ {
63		if err = testNetworkPartition5MembersLeaderInMajority(t); err == nil {
64			break
65		}
66		t.Logf("[%d] got %v", i, err)
67	}
68	if err != nil {
69		t.Fatalf("failed after 3 tries (%v)", err)
70	}
71}
72
73func testNetworkPartition5MembersLeaderInMajority(t *testing.T) error {
74	defer testutil.AfterTest(t)
75
76	clus := NewClusterV3(t, &ClusterConfig{Size: 5})
77	defer clus.Terminate(t)
78
79	leadIndex := clus.WaitLeader(t)
80
81	// majority: leader, follower, follower / minority: follower, follower
82	majority := []int{leadIndex, (leadIndex + 1) % 5, (leadIndex + 2) % 5}
83	minority := []int{(leadIndex + 3) % 5, (leadIndex + 4) % 5}
84
85	majorityMembers := getMembersByIndexSlice(clus.cluster, majority)
86	minorityMembers := getMembersByIndexSlice(clus.cluster, minority)
87
88	// network partition (bi-directional)
89	injectPartition(t, majorityMembers, minorityMembers)
90
91	// minority leader must be lost
92	clus.waitNoLeader(t, minorityMembers)
93
94	// wait extra election timeout
95	time.Sleep(2 * majorityMembers[0].ElectionTimeout())
96
97	// leader must be hold in majority
98	leadIndex2 := clus.waitLeader(t, majorityMembers)
99	leadID, leadID2 := clus.Members[leadIndex].s.ID(), majorityMembers[leadIndex2].s.ID()
100	if leadID != leadID2 {
101		return fmt.Errorf("unexpected leader change from %s, got %s", leadID, leadID2)
102	}
103
104	// recover network partition (bi-directional)
105	recoverPartition(t, majorityMembers, minorityMembers)
106
107	// write to majority first
108	clusterMustProgress(t, append(majorityMembers, minorityMembers...))
109	return nil
110}
111
112func TestNetworkPartition4Members(t *testing.T) {
113	defer testutil.AfterTest(t)
114
115	clus := NewClusterV3(t, &ClusterConfig{Size: 4})
116	defer clus.Terminate(t)
117
118	leadIndex := clus.WaitLeader(t)
119
120	// groupA: leader, follower / groupB: follower, follower
121	groupA := []int{leadIndex, (leadIndex + 1) % 4}
122	groupB := []int{(leadIndex + 2) % 4, (leadIndex + 3) % 4}
123
124	leaderPartition := getMembersByIndexSlice(clus.cluster, groupA)
125	followerPartition := getMembersByIndexSlice(clus.cluster, groupB)
126
127	// network partition (bi-directional)
128	injectPartition(t, leaderPartition, followerPartition)
129
130	// no group has quorum, so leader must be lost in all members
131	clus.WaitNoLeader(t)
132
133	// recover network partition (bi-directional)
134	recoverPartition(t, leaderPartition, followerPartition)
135
136	// need to wait since it recovered with no leader
137	clus.WaitLeader(t)
138
139	clusterMustProgress(t, clus.Members)
140}
141
142func getMembersByIndexSlice(clus *cluster, idxs []int) []*member {
143	ms := make([]*member, len(idxs))
144	for i, idx := range idxs {
145		ms[i] = clus.Members[idx]
146	}
147	return ms
148}
149
150func injectPartition(t *testing.T, src, others []*member) {
151	for _, m := range src {
152		m.InjectPartition(t, others...)
153	}
154}
155
156func recoverPartition(t *testing.T, src, others []*member) {
157	for _, m := range src {
158		m.RecoverPartition(t, others...)
159	}
160}
161