1// Copyright 2016 The etcd Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package main 16 17import ( 18 "fmt" 19 "time" 20) 21 22const ( 23 snapshotCount = 10000 24 slowNetworkLatency = 500 // 500 millisecond 25 randomVariation = 50 26 27 // Wait more when it recovers from slow network, because network layer 28 // needs extra time to propagate traffic control (tc command) change. 29 // Otherwise, we get different hash values from the previous revision. 30 // For more detail, please see https://github.com/coreos/etcd/issues/5121. 31 waitRecover = 5 * time.Second 32) 33 34func injectStop(m *member) error { return m.Agent.Stop() } 35func recoverStop(m *member) error { 36 _, err := m.Agent.Restart() 37 return err 38} 39 40func newFailureKillAll() failure { 41 return &failureAll{ 42 description: "kill all members", 43 injectMember: injectStop, 44 recoverMember: recoverStop, 45 } 46} 47 48func newFailureKillMajority() failure { 49 return &failureMajority{ 50 description: "kill majority of the cluster", 51 injectMember: injectStop, 52 recoverMember: recoverStop, 53 } 54} 55 56func newFailureKillOne() failure { 57 return &failureOne{ 58 description: "kill one random member", 59 injectMember: injectStop, 60 recoverMember: recoverStop, 61 } 62} 63 64func newFailureKillLeader() failure { 65 ff := failureByFunc{ 66 description: "kill leader member", 67 injectMember: injectStop, 68 recoverMember: recoverStop, 69 } 70 return &failureLeader{ff, 0} 71} 72 73func newFailureKillOneForLongTime() failure { 74 return &failureUntilSnapshot{newFailureKillOne()} 75} 76 77func newFailureKillLeaderForLongTime() failure { 78 return &failureUntilSnapshot{newFailureKillLeader()} 79} 80 81func injectDropPort(m *member) error { return m.Agent.DropPort(m.peerPort()) } 82func recoverDropPort(m *member) error { return m.Agent.RecoverPort(m.peerPort()) } 83 84func newFailureIsolate() failure { 85 return &failureOne{ 86 description: "isolate one member", 87 injectMember: injectDropPort, 88 recoverMember: recoverDropPort, 89 } 90} 91 92func newFailureIsolateAll() failure { 93 return &failureAll{ 94 description: "isolate all members", 95 injectMember: injectDropPort, 96 recoverMember: recoverDropPort, 97 } 98} 99 100func injectLatency(m *member) error { 101 if err := m.Agent.SetLatency(slowNetworkLatency, randomVariation); err != nil { 102 m.Agent.RemoveLatency() 103 return err 104 } 105 return nil 106} 107 108func recoverLatency(m *member) error { 109 if err := m.Agent.RemoveLatency(); err != nil { 110 return err 111 } 112 time.Sleep(waitRecover) 113 return nil 114} 115 116func newFailureSlowNetworkOneMember() failure { 117 desc := fmt.Sprintf("slow down one member's network by adding %d ms latency", slowNetworkLatency) 118 return &failureOne{ 119 description: description(desc), 120 injectMember: injectLatency, 121 recoverMember: recoverLatency, 122 } 123} 124 125func newFailureSlowNetworkLeader() failure { 126 desc := fmt.Sprintf("slow down leader's network by adding %d ms latency", slowNetworkLatency) 127 ff := failureByFunc{ 128 description: description(desc), 129 injectMember: injectLatency, 130 recoverMember: recoverLatency, 131 } 132 return &failureLeader{ff, 0} 133} 134 135func newFailureSlowNetworkAll() failure { 136 return &failureAll{ 137 description: "slow down all members' network", 138 injectMember: injectLatency, 139 recoverMember: recoverLatency, 140 } 141} 142 143func newFailureNop() failure { 144 return &failureNop{ 145 description: "no failure", 146 } 147} 148 149func newFailureExternal(scriptPath string) failure { 150 return &failureExternal{ 151 description: fmt.Sprintf("external fault injector (script: %s)", scriptPath), 152 scriptPath: scriptPath, 153 } 154} 155