1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
32
33 #include "mongo/platform/basic.h"
34
35 #include "mongo/db/operation_context.h"
36
37 #include "mongo/bson/inline_decls.h"
38 #include "mongo/db/client.h"
39 #include "mongo/db/service_context.h"
40 #include "mongo/platform/random.h"
41 #include "mongo/stdx/mutex.h"
42 #include "mongo/util/assert_util.h"
43 #include "mongo/util/clock_source.h"
44 #include "mongo/util/fail_point_service.h"
45 #include "mongo/util/log.h"
46 #include "mongo/util/scopeguard.h"
47 #include "mongo/util/system_tick_source.h"
48
49 namespace mongo {
50
51 namespace {
52 // Enabling the maxTimeAlwaysTimeOut fail point will cause any query or command run with a
53 // valid non-zero max time to fail immediately. Any getmore operation on a cursor already
54 // created with a valid non-zero max time will also fail immediately.
55 //
56 // This fail point cannot be used with the maxTimeNeverTimeOut fail point.
57 MONGO_FP_DECLARE(maxTimeAlwaysTimeOut);
58
59 // Enabling the maxTimeNeverTimeOut fail point will cause the server to never time out any
60 // query, command, or getmore operation, regardless of whether a max time is set.
61 //
62 // This fail point cannot be used with the maxTimeAlwaysTimeOut fail point.
63 MONGO_FP_DECLARE(maxTimeNeverTimeOut);
64
65 // Enabling the checkForInterruptFail fail point will start a game of random chance on the
66 // connection specified in the fail point data, generating an interrupt with a given fixed
67 // probability. Example invocation:
68 //
69 // {configureFailPoint: "checkForInterruptFail",
70 // mode: "alwaysOn",
71 // data: {conn: 17, chance: .01}}
72 //
73 // Both data fields must be specified. In the above example, all interrupt points on connection 17
74 // will generate a kill on the current operation with probability p(.01), including interrupt points
75 // of nested operations. "chance" must be a double between 0 and 1, inclusive.
76 MONGO_FP_DECLARE(checkForInterruptFail);
77
78 } // namespace
79
OperationContext(Client * client,unsigned int opId)80 OperationContext::OperationContext(Client* client, unsigned int opId)
81 : _client(client),
82 _opId(opId),
83 _elapsedTime(client ? client->getServiceContext()->getTickSource()
84 : SystemTickSource::get()) {}
85
setDeadlineAndMaxTime(Date_t when,Microseconds maxTime)86 void OperationContext::setDeadlineAndMaxTime(Date_t when, Microseconds maxTime) {
87 invariant(!getClient()->isInDirectClient());
88 uassert(40120, "Illegal attempt to change operation deadline", !hasDeadline());
89 _deadline = when;
90 _maxTime = maxTime;
91 }
92
computeMaxTimeFromDeadline(Date_t when)93 Microseconds OperationContext::computeMaxTimeFromDeadline(Date_t when) {
94 Microseconds maxTime;
95 if (when == Date_t::max()) {
96 maxTime = Microseconds::max();
97 } else {
98 maxTime = when - getServiceContext()->getFastClockSource()->now();
99 if (maxTime < Microseconds::zero()) {
100 maxTime = Microseconds::zero();
101 }
102 }
103 return maxTime;
104 }
105
setDeadlineByDate(Date_t when)106 void OperationContext::setDeadlineByDate(Date_t when) {
107 setDeadlineAndMaxTime(when, computeMaxTimeFromDeadline(when));
108 }
109
setDeadlineAfterNowBy(Microseconds maxTime)110 void OperationContext::setDeadlineAfterNowBy(Microseconds maxTime) {
111 Date_t when;
112 if (maxTime < Microseconds::zero()) {
113 maxTime = Microseconds::zero();
114 }
115 if (maxTime == Microseconds::max()) {
116 when = Date_t::max();
117 } else {
118 auto clock = getServiceContext()->getFastClockSource();
119 when = clock->now();
120 if (maxTime > Microseconds::zero()) {
121 when += clock->getPrecision() + maxTime;
122 }
123 }
124 setDeadlineAndMaxTime(when, maxTime);
125 }
126
hasDeadlineExpired() const127 bool OperationContext::hasDeadlineExpired() const {
128 if (!hasDeadline()) {
129 return false;
130 }
131 if (MONGO_FAIL_POINT(maxTimeNeverTimeOut)) {
132 return false;
133 }
134 if (MONGO_FAIL_POINT(maxTimeAlwaysTimeOut)) {
135 return true;
136 }
137
138 // TODO: Remove once all OperationContexts are properly connected to Clients and ServiceContexts
139 // in tests.
140 if (MONGO_unlikely(!getClient() || !getServiceContext())) {
141 return false;
142 }
143
144 const auto now = getServiceContext()->getFastClockSource()->now();
145 return now >= getDeadline();
146 }
147
getRemainingMaxTimeMillis() const148 Milliseconds OperationContext::getRemainingMaxTimeMillis() const {
149 if (!hasDeadline()) {
150 return Milliseconds::max();
151 }
152
153 return std::max(Milliseconds{0},
154 getDeadline() - getServiceContext()->getFastClockSource()->now());
155 }
156
getRemainingMaxTimeMicros() const157 Microseconds OperationContext::getRemainingMaxTimeMicros() const {
158 if (!hasDeadline()) {
159 return Microseconds::max();
160 }
161 return _maxTime - getElapsedTime();
162 }
163
checkForInterrupt()164 void OperationContext::checkForInterrupt() {
165 uassertStatusOK(checkForInterruptNoAssert());
166 }
167
168 namespace {
169
170 // Helper function for checkForInterrupt fail point. Decides whether the operation currently
171 // being run by the given Client meet the (probabilistic) conditions for interruption as
172 // specified in the fail point info.
opShouldFail(const OperationContext * opCtx,const BSONObj & failPointInfo)173 bool opShouldFail(const OperationContext* opCtx, const BSONObj& failPointInfo) {
174 // Only target the client with the specified connection number.
175 if (opCtx->getClient()->getConnectionId() != failPointInfo["conn"].safeNumberLong()) {
176 return false;
177 }
178
179 // Return true with (approx) probability p = "chance". Recall: 0 <= chance <= 1.
180 double next = static_cast<double>(std::abs(opCtx->getClient()->getPrng().nextInt64()));
181 double upperBound =
182 std::numeric_limits<int64_t>::max() * failPointInfo["chance"].numberDouble();
183 if (next > upperBound) {
184 return false;
185 }
186 return true;
187 }
188
189 } // namespace
190
checkForInterruptNoAssert()191 Status OperationContext::checkForInterruptNoAssert() {
192 // TODO: Remove the MONGO_likely(getClient()) once all operation contexts are constructed with
193 // clients.
194 if (MONGO_likely(getClient() && getServiceContext()) &&
195 getServiceContext()->getKillAllOperations()) {
196 return Status(ErrorCodes::InterruptedAtShutdown, "interrupted at shutdown");
197 }
198
199 if (hasDeadlineExpired()) {
200 markKilled(ErrorCodes::ExceededTimeLimit);
201 return Status(ErrorCodes::ExceededTimeLimit, "operation exceeded time limit");
202 }
203
204 MONGO_FAIL_POINT_BLOCK(checkForInterruptFail, scopedFailPoint) {
205 if (opShouldFail(this, scopedFailPoint.getData())) {
206 log() << "set pending kill on op " << getOpID() << ", for checkForInterruptFail";
207 markKilled();
208 }
209 }
210
211 const auto killStatus = getKillStatus();
212 if (killStatus != ErrorCodes::OK) {
213 return Status(killStatus, "operation was interrupted");
214 }
215
216 return Status::OK();
217 }
218
sleepUntil(Date_t deadline)219 void OperationContext::sleepUntil(Date_t deadline) {
220 stdx::mutex m;
221 stdx::condition_variable cv;
222 stdx::unique_lock<stdx::mutex> lk(m);
223 invariant(!waitForConditionOrInterruptUntil(cv, lk, deadline, [] { return false; }));
224 }
225
sleepFor(Milliseconds duration)226 void OperationContext::sleepFor(Milliseconds duration) {
227 stdx::mutex m;
228 stdx::condition_variable cv;
229 stdx::unique_lock<stdx::mutex> lk(m);
230 invariant(!waitForConditionOrInterruptFor(cv, lk, duration, [] { return false; }));
231 }
232
waitForConditionOrInterrupt(stdx::condition_variable & cv,stdx::unique_lock<stdx::mutex> & m)233 void OperationContext::waitForConditionOrInterrupt(stdx::condition_variable& cv,
234 stdx::unique_lock<stdx::mutex>& m) {
235 uassertStatusOK(waitForConditionOrInterruptNoAssert(cv, m));
236 }
237
waitForConditionOrInterruptNoAssert(stdx::condition_variable & cv,stdx::unique_lock<stdx::mutex> & m)238 Status OperationContext::waitForConditionOrInterruptNoAssert(
239 stdx::condition_variable& cv, stdx::unique_lock<stdx::mutex>& m) noexcept {
240 auto status = waitForConditionOrInterruptNoAssertUntil(cv, m, Date_t::max());
241 if (!status.isOK()) {
242 return status.getStatus();
243 }
244 invariant(status.getValue() == stdx::cv_status::no_timeout);
245 return status.getStatus();
246 }
247
waitForConditionOrInterruptUntil(stdx::condition_variable & cv,stdx::unique_lock<stdx::mutex> & m,Date_t deadline)248 stdx::cv_status OperationContext::waitForConditionOrInterruptUntil(
249 stdx::condition_variable& cv, stdx::unique_lock<stdx::mutex>& m, Date_t deadline) {
250
251 return uassertStatusOK(waitForConditionOrInterruptNoAssertUntil(cv, m, deadline));
252 }
253
254 // Theory of operation for waitForConditionOrInterruptNoAssertUntil and markKilled:
255 //
256 // An operation indicates to potential killers that it is waiting on a condition variable by setting
257 // _waitMutex and _waitCV, while holding the lock on its parent Client. It then unlocks its Client,
258 // unblocking any killers, which are required to have locked the Client before calling markKilled.
259 //
260 // When _waitMutex and _waitCV are set, killers must lock _waitMutex before setting the _killCode,
261 // and must signal _waitCV before releasing _waitMutex. Unfortunately, they must lock _waitMutex
262 // without holding a lock on Client to avoid a deadlock with callers of
263 // waitForConditionOrInterruptNoAssertUntil(). So, in the event that _waitMutex is set, the killer
264 // increments _numKillers, drops the Client lock, acquires _waitMutex and then re-acquires the
265 // Client lock. We know that the Client, its OperationContext and _waitMutex will remain valid
266 // during this period because the caller of waitForConditionOrInterruptNoAssertUntil will not return
267 // while _numKillers > 0 and will not return until it has itself reacquired _waitMutex. Instead,
268 // that caller will keep waiting on _waitCV until _numKillers drops to 0.
269 //
270 // In essence, when _waitMutex is set, _killCode is guarded by _waitMutex and _waitCV, but when
271 // _waitMutex is not set, it is guarded by the Client spinlock. Changing _waitMutex is itself
272 // guarded by the Client spinlock and _numKillers.
273 //
274 // When _numKillers does drop to 0, the waiter will null out _waitMutex and _waitCV.
275 //
276 // This implementation adds a minimum of two spinlock acquire-release pairs to every condition
277 // variable wait.
waitForConditionOrInterruptNoAssertUntil(stdx::condition_variable & cv,stdx::unique_lock<stdx::mutex> & m,Date_t deadline)278 StatusWith<stdx::cv_status> OperationContext::waitForConditionOrInterruptNoAssertUntil(
279 stdx::condition_variable& cv, stdx::unique_lock<stdx::mutex>& m, Date_t deadline) noexcept {
280 invariant(getClient());
281 {
282 stdx::lock_guard<Client> clientLock(*getClient());
283 invariant(!_waitMutex);
284 invariant(!_waitCV);
285 invariant(0 == _numKillers);
286
287 // This interrupt check must be done while holding the client lock, so as not to race with a
288 // concurrent caller of markKilled.
289 auto status = checkForInterruptNoAssert();
290 if (!status.isOK()) {
291 return status;
292 }
293 _waitMutex = m.mutex();
294 _waitCV = &cv;
295 }
296
297 // If the maxTimeNeverTimeOut failpoint is set, behave as though the operation's deadline does
298 // not exist. Under normal circumstances, if the op has an existing deadline which is sooner
299 // than the deadline passed into this method, we replace our deadline with the op's. This means
300 // that we expect to time out at the same time as the existing deadline expires. If, when we
301 // time out, we find that the op's deadline has not expired (as will always be the case if
302 // maxTimeNeverTimeOut is set) then we assume that the incongruity is due to a clock mismatch
303 // and return ExceededTimeLimit regardless. To prevent this behaviour, only consider the op's
304 // deadline in the event that the maxTimeNeverTimeOut failpoint is not set.
305 bool opHasDeadline = (hasDeadline() && !MONGO_FAIL_POINT(maxTimeNeverTimeOut));
306
307 if (opHasDeadline) {
308 deadline = std::min(deadline, getDeadline());
309 }
310
311 const auto waitStatus = [&] {
312 if (Date_t::max() == deadline) {
313 cv.wait(m);
314 return stdx::cv_status::no_timeout;
315 }
316 return getServiceContext()->getPreciseClockSource()->waitForConditionUntil(cv, m, deadline);
317 }();
318
319 // Continue waiting on cv until no other thread is attempting to kill this one.
320 cv.wait(m, [this] {
321 stdx::lock_guard<Client> clientLock(*getClient());
322 if (0 == _numKillers) {
323 _waitMutex = nullptr;
324 _waitCV = nullptr;
325 return true;
326 }
327 return false;
328 });
329
330 auto status = checkForInterruptNoAssert();
331 if (!status.isOK()) {
332 return status;
333 }
334 if (opHasDeadline && waitStatus == stdx::cv_status::timeout && deadline == getDeadline()) {
335 // It's possible that the system clock used in stdx::condition_variable::wait_until
336 // is slightly ahead of the FastClock used in checkForInterrupt. In this case,
337 // we treat the operation as though it has exceeded its time limit, just as if the
338 // FastClock and system clock had agreed.
339 markKilled(ErrorCodes::ExceededTimeLimit);
340 return Status(ErrorCodes::ExceededTimeLimit, "operation exceeded time limit");
341 }
342 return waitStatus;
343 }
344
markKilled(ErrorCodes::Error killCode)345 void OperationContext::markKilled(ErrorCodes::Error killCode) {
346 invariant(killCode != ErrorCodes::OK);
347 stdx::unique_lock<stdx::mutex> lkWaitMutex;
348 if (_waitMutex) {
349 invariant(++_numKillers > 0);
350 getClient()->unlock();
351 ON_BLOCK_EXIT([this]() noexcept {
352 getClient()->lock();
353 invariant(--_numKillers >= 0);
354 });
355 lkWaitMutex = stdx::unique_lock<stdx::mutex>{*_waitMutex};
356 }
357 _killCode.compareAndSwap(ErrorCodes::OK, killCode);
358 if (lkWaitMutex && _numKillers == 0) {
359 invariant(_waitCV);
360 _waitCV->notify_all();
361 }
362 }
363
setLogicalSessionId(LogicalSessionId lsid)364 void OperationContext::setLogicalSessionId(LogicalSessionId lsid) {
365 invariant(!_lsid);
366 _lsid = std::move(lsid);
367 }
368
setTxnNumber(TxnNumber txnNumber)369 void OperationContext::setTxnNumber(TxnNumber txnNumber) {
370 invariant(_lsid);
371 invariant(!_txnNumber);
372 _txnNumber = txnNumber;
373 }
374
releaseRecoveryUnit()375 RecoveryUnit* OperationContext::releaseRecoveryUnit() {
376 return _recoveryUnit.release();
377 }
378
setRecoveryUnit(RecoveryUnit * unit,RecoveryUnitState state)379 OperationContext::RecoveryUnitState OperationContext::setRecoveryUnit(RecoveryUnit* unit,
380 RecoveryUnitState state) {
381 _recoveryUnit.reset(unit);
382 RecoveryUnitState oldState = _ruState;
383 _ruState = state;
384 return oldState;
385 }
386
releaseLockState()387 std::unique_ptr<Locker> OperationContext::releaseLockState() {
388 dassert(_locker);
389 return std::move(_locker);
390 }
391
setLockState(std::unique_ptr<Locker> locker)392 void OperationContext::setLockState(std::unique_ptr<Locker> locker) {
393 dassert(!_locker);
394 dassert(locker);
395 _locker = std::move(locker);
396 }
397
getExpirationDateForWaitForValue(Milliseconds waitFor)398 Date_t OperationContext::getExpirationDateForWaitForValue(Milliseconds waitFor) {
399 return getServiceContext()->getPreciseClockSource()->now() + waitFor;
400 }
401
402 } // namespace mongo
403