1// Copyright 2016 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package main
16
17import (
18	"fmt"
19	"time"
20)
21
22const (
23	snapshotCount      = 10000
24	slowNetworkLatency = 500 // 500 millisecond
25	randomVariation    = 50
26
27	// Wait more when it recovers from slow network, because network layer
28	// needs extra time to propagate traffic control (tc command) change.
29	// Otherwise, we get different hash values from the previous revision.
30	// For more detail, please see https://github.com/coreos/etcd/issues/5121.
31	waitRecover = 5 * time.Second
32)
33
34func injectStop(m *member) error { return m.Agent.Stop() }
35func recoverStop(m *member) error {
36	_, err := m.Agent.Restart()
37	return err
38}
39
40func newFailureKillAll() failure {
41	return &failureAll{
42		description:   "kill all members",
43		injectMember:  injectStop,
44		recoverMember: recoverStop,
45	}
46}
47
48func newFailureKillMajority() failure {
49	return &failureMajority{
50		description:   "kill majority of the cluster",
51		injectMember:  injectStop,
52		recoverMember: recoverStop,
53	}
54}
55
56func newFailureKillOne() failure {
57	return &failureOne{
58		description:   "kill one random member",
59		injectMember:  injectStop,
60		recoverMember: recoverStop,
61	}
62}
63
64func newFailureKillLeader() failure {
65	ff := failureByFunc{
66		description:   "kill leader member",
67		injectMember:  injectStop,
68		recoverMember: recoverStop,
69	}
70	return &failureLeader{ff, 0}
71}
72
73func newFailureKillOneForLongTime() failure {
74	return &failureUntilSnapshot{newFailureKillOne()}
75}
76
77func newFailureKillLeaderForLongTime() failure {
78	return &failureUntilSnapshot{newFailureKillLeader()}
79}
80
81func injectDropPort(m *member) error  { return m.Agent.DropPort(m.peerPort()) }
82func recoverDropPort(m *member) error { return m.Agent.RecoverPort(m.peerPort()) }
83
84func newFailureIsolate() failure {
85	return &failureOne{
86		description:   "isolate one member",
87		injectMember:  injectDropPort,
88		recoverMember: recoverDropPort,
89	}
90}
91
92func newFailureIsolateAll() failure {
93	return &failureAll{
94		description:   "isolate all members",
95		injectMember:  injectDropPort,
96		recoverMember: recoverDropPort,
97	}
98}
99
100func injectLatency(m *member) error {
101	if err := m.Agent.SetLatency(slowNetworkLatency, randomVariation); err != nil {
102		m.Agent.RemoveLatency()
103		return err
104	}
105	return nil
106}
107
108func recoverLatency(m *member) error {
109	if err := m.Agent.RemoveLatency(); err != nil {
110		return err
111	}
112	time.Sleep(waitRecover)
113	return nil
114}
115
116func newFailureSlowNetworkOneMember() failure {
117	desc := fmt.Sprintf("slow down one member's network by adding %d ms latency", slowNetworkLatency)
118	return &failureOne{
119		description:   description(desc),
120		injectMember:  injectLatency,
121		recoverMember: recoverLatency,
122	}
123}
124
125func newFailureSlowNetworkLeader() failure {
126	desc := fmt.Sprintf("slow down leader's network by adding %d ms latency", slowNetworkLatency)
127	ff := failureByFunc{
128		description:   description(desc),
129		injectMember:  injectLatency,
130		recoverMember: recoverLatency,
131	}
132	return &failureLeader{ff, 0}
133}
134
135func newFailureSlowNetworkAll() failure {
136	return &failureAll{
137		description:   "slow down all members' network",
138		injectMember:  injectLatency,
139		recoverMember: recoverLatency,
140	}
141}
142
143func newFailureNop() failure {
144	return &failureNop{
145		description: "no failure",
146	}
147}
148
149func newFailureExternal(scriptPath string) failure {
150	return &failureExternal{
151		description: fmt.Sprintf("external fault injector (script: %s)", scriptPath),
152		scriptPath:  scriptPath,
153	}
154}
155