1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/base/disallow_copying.h"
36 #include "mongo/db/repl/elect_cmd_runner.h"
37 #include "mongo/db/repl/freshness_checker.h"
38 #include "mongo/db/repl/replication_coordinator_impl.h"
39 #include "mongo/db/repl/topology_coordinator.h"
40 #include "mongo/stdx/mutex.h"
41 #include "mongo/util/log.h"
42 #include "mongo/util/scopeguard.h"
43 
44 namespace mongo {
45 namespace repl {
46 
47 namespace {
48 class LoseElectionGuard {
49     MONGO_DISALLOW_COPYING(LoseElectionGuard);
50 
51 public:
LoseElectionGuard(TopologyCoordinator * topCoord,executor::TaskExecutor * executor,std::unique_ptr<FreshnessChecker> * freshnessChecker,std::unique_ptr<ElectCmdRunner> * electCmdRunner,executor::TaskExecutor::EventHandle * electionFinishedEvent)52     LoseElectionGuard(TopologyCoordinator* topCoord,
53                       executor::TaskExecutor* executor,
54                       std::unique_ptr<FreshnessChecker>* freshnessChecker,
55                       std::unique_ptr<ElectCmdRunner>* electCmdRunner,
56                       executor::TaskExecutor::EventHandle* electionFinishedEvent)
57         : _topCoord(topCoord),
58           _executor(executor),
59           _freshnessChecker(freshnessChecker),
60           _electCmdRunner(electCmdRunner),
61           _electionFinishedEvent(electionFinishedEvent),
62           _dismissed(false) {}
63 
~LoseElectionGuard()64     ~LoseElectionGuard() {
65         if (_dismissed) {
66             return;
67         }
68         _topCoord->processLoseElection();
69         _freshnessChecker->reset(NULL);
70         _electCmdRunner->reset(NULL);
71         if (_electionFinishedEvent->isValid()) {
72             _executor->signalEvent(*_electionFinishedEvent);
73         }
74     }
75 
dismiss()76     void dismiss() {
77         _dismissed = true;
78     }
79 
80 private:
81     TopologyCoordinator* const _topCoord;
82     executor::TaskExecutor* const _executor;
83     std::unique_ptr<FreshnessChecker>* const _freshnessChecker;
84     std::unique_ptr<ElectCmdRunner>* const _electCmdRunner;
85     const executor::TaskExecutor::EventHandle* _electionFinishedEvent;
86     bool _dismissed;
87 };
88 
89 }  // namespace
90 
_startElectSelf_inlock()91 void ReplicationCoordinatorImpl::_startElectSelf_inlock() {
92     invariant(!_freshnessChecker);
93     invariant(!_electCmdRunner);
94 
95     switch (_rsConfigState) {
96         case kConfigSteady:
97             break;
98         case kConfigInitiating:
99         case kConfigReconfiguring:
100         case kConfigHBReconfiguring:
101             LOG(2) << "Not standing for election; processing a configuration change";
102             // Transition out of candidate role.
103             _topCoord->processLoseElection();
104             return;
105         default:
106             severe() << "Entered replica set election code while in illegal config state "
107                      << int(_rsConfigState);
108             fassertFailed(18913);
109     }
110 
111     log() << "Standing for election";
112     const StatusWith<executor::TaskExecutor::EventHandle> finishEvh = _replExecutor->makeEvent();
113     if (finishEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
114         return;
115     }
116     fassert(18680, finishEvh.getStatus());
117     _electionFinishedEvent = finishEvh.getValue();
118     LoseElectionGuard lossGuard(_topCoord.get(),
119                                 _replExecutor.get(),
120                                 &_freshnessChecker,
121                                 &_electCmdRunner,
122                                 &_electionFinishedEvent);
123 
124 
125     invariant(_rsConfig.getMemberAt(_selfIndex).isElectable());
126     OpTime lastOpTimeApplied(_getMyLastAppliedOpTime_inlock());
127 
128     if (lastOpTimeApplied.isNull()) {
129         log() << "not trying to elect self, "
130                  "do not yet have a complete set of data from any point in time"
131                  " -- lastAppliedOpTime is null";
132         return;
133     }
134 
135     _freshnessChecker.reset(new FreshnessChecker);
136 
137     StatusWith<executor::TaskExecutor::EventHandle> nextPhaseEvh =
138         _freshnessChecker->start(_replExecutor.get(),
139                                  lastOpTimeApplied.getTimestamp(),
140                                  _rsConfig,
141                                  _selfIndex,
142                                  _topCoord->getMaybeUpHostAndPorts());
143     if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
144         return;
145     }
146     fassert(18681, nextPhaseEvh.getStatus());
147     _replExecutor
148         ->onEvent(nextPhaseEvh.getValue(),
149                   stdx::bind(&ReplicationCoordinatorImpl::_onFreshnessCheckComplete, this))
150         .status_with_transitional_ignore();
151     lossGuard.dismiss();
152 }
153 
_onFreshnessCheckComplete()154 void ReplicationCoordinatorImpl::_onFreshnessCheckComplete() {
155     stdx::lock_guard<stdx::mutex> lk(_mutex);
156     invariant(_freshnessChecker);
157     invariant(!_electCmdRunner);
158     LoseElectionGuard lossGuard(_topCoord.get(),
159                                 _replExecutor.get(),
160                                 &_freshnessChecker,
161                                 &_electCmdRunner,
162                                 &_electionFinishedEvent);
163 
164     if (_freshnessChecker->isCanceled()) {
165         LOG(2) << "Election canceled during freshness check phase";
166         return;
167     }
168 
169     const Date_t now(_replExecutor->now());
170     const FreshnessChecker::ElectionAbortReason abortReason =
171         _freshnessChecker->shouldAbortElection();
172 
173     // need to not sleep after last time sleeping,
174     switch (abortReason) {
175         case FreshnessChecker::None:
176             break;
177         case FreshnessChecker::FreshnessTie:
178             if ((_selfIndex != 0) && !_sleptLastElection) {
179                 const auto ms = Milliseconds(_nextRandomInt64_inlock(1000) + 50);
180                 const Date_t nextCandidateTime = now + ms;
181                 log() << "possible election tie; sleeping " << ms << " until "
182                       << dateToISOStringLocal(nextCandidateTime);
183                 _topCoord->setElectionSleepUntil(nextCandidateTime);
184                 _scheduleWorkAt(nextCandidateTime,
185                                 stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
186                                            this,
187                                            stdx::placeholders::_1));
188                 _sleptLastElection = true;
189                 return;
190             }
191             _sleptLastElection = false;
192             break;
193         case FreshnessChecker::FresherNodeFound:
194             log() << "not electing self, we are not freshest";
195             return;
196         case FreshnessChecker::QuorumUnreachable:
197             log() << "not electing self, we could not contact enough voting members";
198             return;
199         default:
200             log() << "not electing self due to election abort message :"
201                   << static_cast<int>(abortReason);
202             return;
203     }
204 
205     log() << "running for election"
206           << (abortReason == FreshnessChecker::FreshnessTie
207                   ? "; slept last election, so running regardless of possible tie"
208                   : "");
209 
210     // Secure our vote for ourself first
211     if (!_topCoord->voteForMyself(now)) {
212         return;
213     }
214 
215     _electCmdRunner.reset(new ElectCmdRunner);
216     StatusWith<executor::TaskExecutor::EventHandle> nextPhaseEvh = _electCmdRunner->start(
217         _replExecutor.get(), _rsConfig, _selfIndex, _topCoord->getMaybeUpHostAndPorts());
218     if (nextPhaseEvh.getStatus() == ErrorCodes::ShutdownInProgress) {
219         return;
220     }
221     fassert(18685, nextPhaseEvh.getStatus());
222 
223     _replExecutor
224         ->onEvent(nextPhaseEvh.getValue(),
225                   stdx::bind(&ReplicationCoordinatorImpl::_onElectCmdRunnerComplete, this))
226         .status_with_transitional_ignore();
227     lossGuard.dismiss();
228 }
229 
_onElectCmdRunnerComplete()230 void ReplicationCoordinatorImpl::_onElectCmdRunnerComplete() {
231     stdx::unique_lock<stdx::mutex> lk(_mutex);
232     LoseElectionGuard lossGuard(_topCoord.get(),
233                                 _replExecutor.get(),
234                                 &_freshnessChecker,
235                                 &_electCmdRunner,
236                                 &_electionFinishedEvent);
237 
238     invariant(_freshnessChecker);
239     invariant(_electCmdRunner);
240     if (_electCmdRunner->isCanceled()) {
241         LOG(2) << "Election canceled during elect self phase";
242         return;
243     }
244 
245     const int receivedVotes = _electCmdRunner->getReceivedVotes();
246 
247     if (receivedVotes < _rsConfig.getMajorityVoteCount()) {
248         log() << "couldn't elect self, only received " << receivedVotes
249               << " votes, but needed at least " << _rsConfig.getMajorityVoteCount();
250         // Suppress ourselves from standing for election again, giving other nodes a chance
251         // to win their elections.
252         const auto ms = Milliseconds(_nextRandomInt64_inlock(1000) + 50);
253         const Date_t now(_replExecutor->now());
254         const Date_t nextCandidateTime = now + ms;
255         log() << "waiting until " << nextCandidateTime << " before standing for election again";
256         _topCoord->setElectionSleepUntil(nextCandidateTime);
257         _scheduleWorkAt(nextCandidateTime,
258                         stdx::bind(&ReplicationCoordinatorImpl::_recoverFromElectionTie,
259                                    this,
260                                    stdx::placeholders::_1));
261         return;
262     }
263 
264     if (_rsConfig.getConfigVersion() != _freshnessChecker->getOriginalConfigVersion()) {
265         log() << "config version changed during our election, ignoring result";
266         return;
267     }
268 
269     log() << "election succeeded, assuming primary role";
270 
271     lossGuard.dismiss();
272     _freshnessChecker.reset(NULL);
273     _electCmdRunner.reset(NULL);
274     auto electionFinishedEvent = _electionFinishedEvent;
275     _postWonElectionUpdateMemberState_inlock();
276     _replExecutor->signalEvent(electionFinishedEvent);
277 }
278 
_recoverFromElectionTie(const executor::TaskExecutor::CallbackArgs & cbData)279 void ReplicationCoordinatorImpl::_recoverFromElectionTie(
280     const executor::TaskExecutor::CallbackArgs& cbData) {
281     stdx::unique_lock<stdx::mutex> lk(_mutex);
282 
283     auto now = _replExecutor->now();
284     const auto status = _topCoord->checkShouldStandForElection(now);
285     if (!status.isOK()) {
286         LOG(2) << "ReplicationCoordinatorImpl::_recoverFromElectionTie -- " << status.reason();
287     } else {
288         fassertStatusOK(28817,
289                         _topCoord->becomeCandidateIfElectable(
290                             now, TopologyCoordinator::StartElectionReason::kElectionTimeout));
291         _startElectSelf_inlock();
292     }
293 }
294 
295 }  // namespace repl
296 }  // namespace mongo
297