1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
32
33 #include "mongo/platform/basic.h"
34
35 #include "mongo/base/disallow_copying.h"
36 #include "mongo/db/repl/elect_cmd_runner.h"
37 #include "mongo/db/repl/freshness_checker.h"
38 #include "mongo/db/repl/replication_coordinator_impl.h"
39 #include "mongo/db/repl/topology_coordinator.h"
40 #include "mongo/stdx/mutex.h"
41 #include "mongo/util/log.h"
42 #include "mongo/util/scopeguard.h"
43
44 namespace mongo {
45 namespace repl {
46
47 namespace {
48 class LoseElectionGuard {
49 MONGO_DISALLOW_COPYING(LoseElectionGuard);
50
51 public:
LoseElectionGuard(TopologyCoordinator * topCoord,executor::TaskExecutor * executor,std::unique_ptr<FreshnessChecker> * freshnessChecker,std::unique_ptr<ElectCmdRunner> * electCmdRunner,executor::TaskExecutor::EventHandle * electionFinishedEvent)52 LoseElectionGuard(TopologyCoordinator* topCoord,
53 executor::TaskExecutor* executor,
54 std::unique_ptr<FreshnessChecker>* freshnessChecker,
55 std::unique_ptr<ElectCmdRunner>* electCmdRunner,
56 executor::TaskExecutor::EventHandle* electionFinishedEvent)
57 : _topCoord(topCoord),
58 _executor(executor),
59 _freshnessChecker(freshnessChecker),
60 _electCmdRunner(electCmdRunner),
61 _electionFinishedEvent(electionFinishedEvent),
62 _dismissed(false) {}
63
~LoseElectionGuard()64 ~LoseElectionGuard() {
65 if (_dismissed) {
66 return;
67 }
68 _topCoord->processLoseElection();
69 _freshnessChecker->reset(NULL);
70 _electCmdRunner->reset(NULL);
71 if (_electionFinishedEvent->isValid()) {
72 _executor->signalEvent(*_electionFinishedEvent);
73 }
74 }
75
dismiss()76 void dismiss() {
77 _dismissed = true;
78 }
79
80 private:
81 TopologyCoordinator* const _topCoord;
82 executor::TaskExecutor* const _executor;
83 std::unique_ptr<FreshnessChecker>* const _freshnessChecker;
84 std::unique_ptr<ElectCmdRunner>* const _electCmdRunner;
85 const executor::TaskExecutor::EventHandle* _electionFinishedEvent;
86 bool _dismissed;
87 };
88
89 } // namespace
90
_startElectSelf_inlock()91 void ReplicationCoordinatorImpl::_startElectSelf_inlock() {
92 invariant(!_freshnessChecker);
93 invariant(!_electCmdRunner);
94
95 switch (_rsConfigState) {
96 case kConfigSteady:
97 break;
98 case kConfigInitiating:
99 case kConfigReconfiguring:
100 case kConfigHBReconfiguring:
101 LOG(2) << "Not standing for election; processing a configuration change";
102 // Transition out of candidate role.
103 _topCoord->processLoseElection();
104 return;
105 default:
106 severe() << "Entered replica set election code while in illegal config state "
107 << int(_rsConfigState);
108 fassertFailed(18913);
109 }
110
111 log() << "Standing for election";
112 const StatusWith<executor::TaskExecutor::EventHandle> finishEvh = _replExecutor->makeEvent();
113 if (finishEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
114 return;
115 }
116 fassert(18680, finishEvh.getStatus());
117 _electionFinishedEvent = finishEvh.getValue();
118 LoseElectionGuard lossGuard(_topCoord.get(),
119 _replExecutor.get(),
120 &_freshnessChecker,
121 &_electCmdRunner,
122 &_electionFinishedEvent);
123
124
125 invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
126 OpTime lastOpTimeApplied(_getMyLastAppliedOpTime_inlock());
127
128 if (lastOpTimeApplied.isNull()) {
129 log() << "not trying to elect self, "
130 "do not yet have a complete set of data from any point in time"
131 " -- lastAppliedOpTime is null";
132 return;
133 }
134
135 _freshnessChecker.reset(new FreshnessChecker);
136
137 StatusWith<executor::TaskExecutor::EventHandle> nextPhaseEvh =
138 _freshnessChecker->start(_replExecutor.get(),
139 lastOpTimeApplied.getTimestamp(),
140 _rsConfig,
141 _selfIndex,
142 _topCoord->getMaybeUpHostAndPorts());
143 if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
144 return;
145 }
146 fassert(18681, nextPhaseEvh.getStatus());
147 _replExecutor
148 ->onEvent(nextPhaseEvh.getValue(),
149 stdx::bind(&ReplicationCoordinatorImpl::_onFreshnessCheckComplete, this))
150 .status_with_transitional_ignore();
151 lossGuard.dismiss();
152 }
153
_onFreshnessCheckComplete()154 void ReplicationCoordinatorImpl::_onFreshnessCheckComplete() {
155 stdx::lock_guard<stdx::mutex> lk(_mutex);
156 invariant(_freshnessChecker);
157 invariant(!_electCmdRunner);
158 LoseElectionGuard lossGuard(_topCoord.get(),
159 _replExecutor.get(),
160 &_freshnessChecker,
161 &_electCmdRunner,
162 &_electionFinishedEvent);
163
164 if (_freshnessChecker->isCanceled()) {
165 LOG(2) << "Election canceled during freshness check phase";
166 return;
167 }
168
169 const Date_t now(_replExecutor->now());
170 const FreshnessChecker::ElectionAbortReason abortReason =
171 _freshnessChecker->shouldAbortElection();
172
173 // need to not sleep after last time sleeping,
174 switch (abortReason) {
175 case FreshnessChecker::None:
176 break;
177 case FreshnessChecker::FreshnessTie:
178 if ((_selfIndex != 0) && !_sleptLastElection) {
179 const auto ms = Milliseconds(_nextRandomInt64_inlock(1000) + 50);
180 const Date_t nextCandidateTime = now + ms;
181 log() << "possible election tie; sleeping " << ms << " until "
182 << dateToISOStringLocal(nextCandidateTime);
183 _topCoord->setElectionSleepUntil(nextCandidateTime);
184 _scheduleWorkAt(nextCandidateTime,
185 stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
186 this,
187 stdx::placeholders::_1));
188 _sleptLastElection = true;
189 return;
190 }
191 _sleptLastElection = false;
192 break;
193 case FreshnessChecker::FresherNodeFound:
194 log() << "not electing self, we are not freshest";
195 return;
196 case FreshnessChecker::QuorumUnreachable:
197 log() << "not electing self, we could not contact enough voting members";
198 return;
199 default:
200 log() << "not electing self due to election abort message :"
201 << static_cast<int>(abortReason);
202 return;
203 }
204
205 log() << "running for election"
206 << (abortReason == FreshnessChecker::FreshnessTie
207 ? "; slept last election, so running regardless of possible tie"
208 : "");
209
210 // Secure our vote for ourself first
211 if (!_topCoord->voteForMyself(now)) {
212 return;
213 }
214
215 _electCmdRunner.reset(new ElectCmdRunner);
216 StatusWith<executor::TaskExecutor::EventHandle> nextPhaseEvh = _electCmdRunner->start(
217 _replExecutor.get(), _rsConfig, _selfIndex, _topCoord->getMaybeUpHostAndPorts());
218 if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
219 return;
220 }
221 fassert(18685, nextPhaseEvh.getStatus());
222
223 _replExecutor
224 ->onEvent(nextPhaseEvh.getValue(),
225 stdx::bind(&ReplicationCoordinatorImpl::_onElectCmdRunnerComplete, this))
226 .status_with_transitional_ignore();
227 lossGuard.dismiss();
228 }
229
_onElectCmdRunnerComplete()230 void ReplicationCoordinatorImpl::_onElectCmdRunnerComplete() {
231 stdx::unique_lock<stdx::mutex> lk(_mutex);
232 LoseElectionGuard lossGuard(_topCoord.get(),
233 _replExecutor.get(),
234 &_freshnessChecker,
235 &_electCmdRunner,
236 &_electionFinishedEvent);
237
238 invariant(_freshnessChecker);
239 invariant(_electCmdRunner);
240 if (_electCmdRunner->isCanceled()) {
241 LOG(2) << "Election canceled during elect self phase";
242 return;
243 }
244
245 const int receivedVotes = _electCmdRunner->getReceivedVotes();
246
247 if (receivedVotes < _rsConfig.getMajorityVoteCount()) {
248 log() << "couldn't elect self, only received " << receivedVotes
249 << " votes, but needed at least " << _rsConfig.getMajorityVoteCount();
250 // Suppress ourselves from standing for election again, giving other nodes a chance
251 // to win their elections.
252 const auto ms = Milliseconds(_nextRandomInt64_inlock(1000) + 50);
253 const Date_t now(_replExecutor->now());
254 const Date_t nextCandidateTime = now + ms;
255 log() << "waiting until " << nextCandidateTime << " before standing for election again";
256 _topCoord->setElectionSleepUntil(nextCandidateTime);
257 _scheduleWorkAt(nextCandidateTime,
258 stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
259 this,
260 stdx::placeholders::_1));
261 return;
262 }
263
264 if (_rsConfig.getConfigVersion() != _freshnessChecker->getOriginalConfigVersion()) {
265 log() << "config version changed during our election, ignoring result";
266 return;
267 }
268
269 log() << "election succeeded, assuming primary role";
270
271 lossGuard.dismiss();
272 _freshnessChecker.reset(NULL);
273 _electCmdRunner.reset(NULL);
274 auto electionFinishedEvent = _electionFinishedEvent;
275 _postWonElectionUpdateMemberState_inlock();
276 _replExecutor->signalEvent(electionFinishedEvent);
277 }
278
_recoverFromElectionTie(const executor::TaskExecutor::CallbackArgs & cbData)279 void ReplicationCoordinatorImpl::_recoverFromElectionTie(
280 const executor::TaskExecutor::CallbackArgs& cbData) {
281 stdx::unique_lock<stdx::mutex> lk(_mutex);
282
283 auto now = _replExecutor->now();
284 const auto status = _topCoord->checkShouldStandForElection(now);
285 if (!status.isOK()) {
286 LOG(2) << "ReplicationCoordinatorImpl::_recoverFromElectionTie -- " << status.reason();
287 } else {
288 fassertStatusOK(28817,
289 _topCoord->becomeCandidateIfElectable(
290 now, TopologyCoordinator::StartElectionReason::kElectionTimeout));
291 _startElectSelf_inlock();
292 }
293 }
294
295 } // namespace repl
296 } // namespace mongo
297