1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/db/operation_context.h"
36 
37 #include "mongo/bson/inline_decls.h"
38 #include "mongo/db/client.h"
39 #include "mongo/db/service_context.h"
40 #include "mongo/platform/random.h"
41 #include "mongo/stdx/mutex.h"
42 #include "mongo/util/assert_util.h"
43 #include "mongo/util/clock_source.h"
44 #include "mongo/util/fail_point_service.h"
45 #include "mongo/util/log.h"
46 #include "mongo/util/scopeguard.h"
47 #include "mongo/util/system_tick_source.h"
48 
49 namespace mongo {
50 
51 namespace {
52 // Enabling the maxTimeAlwaysTimeOut fail point will cause any query or command run with a
53 // valid non-zero max time to fail immediately.  Any getmore operation on a cursor already
54 // created with a valid non-zero max time will also fail immediately.
55 //
56 // This fail point cannot be used with the maxTimeNeverTimeOut fail point.
57 MONGO_FP_DECLARE(maxTimeAlwaysTimeOut);
58 
59 // Enabling the maxTimeNeverTimeOut fail point will cause the server to never time out any
60 // query, command, or getmore operation, regardless of whether a max time is set.
61 //
62 // This fail point cannot be used with the maxTimeAlwaysTimeOut fail point.
63 MONGO_FP_DECLARE(maxTimeNeverTimeOut);
64 
65 // Enabling the checkForInterruptFail fail point will start a game of random chance on the
66 // connection specified in the fail point data, generating an interrupt with a given fixed
67 // probability.  Example invocation:
68 //
69 // {configureFailPoint: "checkForInterruptFail",
70 //  mode: "alwaysOn",
71 //  data: {conn: 17, chance: .01}}
72 //
73 // Both data fields must be specified.  In the above example, all interrupt points on connection 17
74 // will generate a kill on the current operation with probability p(.01), including interrupt points
75 // of nested operations.  "chance" must be a double between 0 and 1, inclusive.
76 MONGO_FP_DECLARE(checkForInterruptFail);
77 
78 }  // namespace
79 
OperationContext(Client * client,unsigned int opId)80 OperationContext::OperationContext(Client* client, unsigned int opId)
81     : _client(client),
82       _opId(opId),
83       _elapsedTime(client ? client->getServiceContext()->getTickSource()
84                           : SystemTickSource::get()) {}
85 
setDeadlineAndMaxTime(Date_t when,Microseconds maxTime)86 void OperationContext::setDeadlineAndMaxTime(Date_t when, Microseconds maxTime) {
87     invariant(!getClient()->isInDirectClient());
88     uassert(40120, "Illegal attempt to change operation deadline", !hasDeadline());
89     _deadline = when;
90     _maxTime = maxTime;
91 }
92 
computeMaxTimeFromDeadline(Date_t when)93 Microseconds OperationContext::computeMaxTimeFromDeadline(Date_t when) {
94     Microseconds maxTime;
95     if (when == Date_t::max()) {
96         maxTime = Microseconds::max();
97     } else {
98         maxTime = when - getServiceContext()->getFastClockSource()->now();
99         if (maxTime < Microseconds::zero()) {
100             maxTime = Microseconds::zero();
101         }
102     }
103     return maxTime;
104 }
105 
setDeadlineByDate(Date_t when)106 void OperationContext::setDeadlineByDate(Date_t when) {
107     setDeadlineAndMaxTime(when, computeMaxTimeFromDeadline(when));
108 }
109 
setDeadlineAfterNowBy(Microseconds maxTime)110 void OperationContext::setDeadlineAfterNowBy(Microseconds maxTime) {
111     Date_t when;
112     if (maxTime < Microseconds::zero()) {
113         maxTime = Microseconds::zero();
114     }
115     if (maxTime == Microseconds::max()) {
116         when = Date_t::max();
117     } else {
118         auto clock = getServiceContext()->getFastClockSource();
119         when = clock->now();
120         if (maxTime > Microseconds::zero()) {
121             when += clock->getPrecision() + maxTime;
122         }
123     }
124     setDeadlineAndMaxTime(when, maxTime);
125 }
126 
hasDeadlineExpired() const127 bool OperationContext::hasDeadlineExpired() const {
128     if (!hasDeadline()) {
129         return false;
130     }
131     if (MONGO_FAIL_POINT(maxTimeNeverTimeOut)) {
132         return false;
133     }
134     if (MONGO_FAIL_POINT(maxTimeAlwaysTimeOut)) {
135         return true;
136     }
137 
138     // TODO: Remove once all OperationContexts are properly connected to Clients and ServiceContexts
139     // in tests.
140     if (MONGO_unlikely(!getClient() || !getServiceContext())) {
141         return false;
142     }
143 
144     const auto now = getServiceContext()->getFastClockSource()->now();
145     return now >= getDeadline();
146 }
147 
getRemainingMaxTimeMillis() const148 Milliseconds OperationContext::getRemainingMaxTimeMillis() const {
149     if (!hasDeadline()) {
150         return Milliseconds::max();
151     }
152 
153     return std::max(Milliseconds{0},
154                     getDeadline() - getServiceContext()->getFastClockSource()->now());
155 }
156 
getRemainingMaxTimeMicros() const157 Microseconds OperationContext::getRemainingMaxTimeMicros() const {
158     if (!hasDeadline()) {
159         return Microseconds::max();
160     }
161     return _maxTime - getElapsedTime();
162 }
163 
checkForInterrupt()164 void OperationContext::checkForInterrupt() {
165     uassertStatusOK(checkForInterruptNoAssert());
166 }
167 
168 namespace {
169 
170 // Helper function for checkForInterrupt fail point.  Decides whether the operation currently
171 // being run by the given Client meet the (probabilistic) conditions for interruption as
172 // specified in the fail point info.
opShouldFail(const OperationContext * opCtx,const BSONObj & failPointInfo)173 bool opShouldFail(const OperationContext* opCtx, const BSONObj& failPointInfo) {
174     // Only target the client with the specified connection number.
175     if (opCtx->getClient()->getConnectionId() != failPointInfo["conn"].safeNumberLong()) {
176         return false;
177     }
178 
179     // Return true with (approx) probability p = "chance".  Recall: 0 <= chance <= 1.
180     double next = static_cast<double>(std::abs(opCtx->getClient()->getPrng().nextInt64()));
181     double upperBound =
182         std::numeric_limits<int64_t>::max() * failPointInfo["chance"].numberDouble();
183     if (next > upperBound) {
184         return false;
185     }
186     return true;
187 }
188 
189 }  // namespace
190 
checkForInterruptNoAssert()191 Status OperationContext::checkForInterruptNoAssert() {
192     // TODO: Remove the MONGO_likely(getClient()) once all operation contexts are constructed with
193     // clients.
194     if (MONGO_likely(getClient() && getServiceContext()) &&
195         getServiceContext()->getKillAllOperations()) {
196         return Status(ErrorCodes::InterruptedAtShutdown, "interrupted at shutdown");
197     }
198 
199     if (hasDeadlineExpired()) {
200         markKilled(ErrorCodes::ExceededTimeLimit);
201         return Status(ErrorCodes::ExceededTimeLimit, "operation exceeded time limit");
202     }
203 
204     MONGO_FAIL_POINT_BLOCK(checkForInterruptFail, scopedFailPoint) {
205         if (opShouldFail(this, scopedFailPoint.getData())) {
206             log() << "set pending kill on op " << getOpID() << ", for checkForInterruptFail";
207             markKilled();
208         }
209     }
210 
211     const auto killStatus = getKillStatus();
212     if (killStatus != ErrorCodes::OK) {
213         return Status(killStatus, "operation was interrupted");
214     }
215 
216     return Status::OK();
217 }
218 
sleepUntil(Date_t deadline)219 void OperationContext::sleepUntil(Date_t deadline) {
220     stdx::mutex m;
221     stdx::condition_variable cv;
222     stdx::unique_lock<stdx::mutex> lk(m);
223     invariant(!waitForConditionOrInterruptUntil(cv, lk, deadline, [] { return false; }));
224 }
225 
sleepFor(Milliseconds duration)226 void OperationContext::sleepFor(Milliseconds duration) {
227     stdx::mutex m;
228     stdx::condition_variable cv;
229     stdx::unique_lock<stdx::mutex> lk(m);
230     invariant(!waitForConditionOrInterruptFor(cv, lk, duration, [] { return false; }));
231 }
232 
waitForConditionOrInterrupt(stdx::condition_variable & cv,stdx::unique_lock<stdx::mutex> & m)233 void OperationContext::waitForConditionOrInterrupt(stdx::condition_variable& cv,
234                                                    stdx::unique_lock<stdx::mutex>& m) {
235     uassertStatusOK(waitForConditionOrInterruptNoAssert(cv, m));
236 }
237 
waitForConditionOrInterruptNoAssert(stdx::condition_variable & cv,stdx::unique_lock<stdx::mutex> & m)238 Status OperationContext::waitForConditionOrInterruptNoAssert(
239     stdx::condition_variable& cv, stdx::unique_lock<stdx::mutex>& m) noexcept {
240     auto status = waitForConditionOrInterruptNoAssertUntil(cv, m, Date_t::max());
241     if (!status.isOK()) {
242         return status.getStatus();
243     }
244     invariant(status.getValue() == stdx::cv_status::no_timeout);
245     return status.getStatus();
246 }
247 
waitForConditionOrInterruptUntil(stdx::condition_variable & cv,stdx::unique_lock<stdx::mutex> & m,Date_t deadline)248 stdx::cv_status OperationContext::waitForConditionOrInterruptUntil(
249     stdx::condition_variable& cv, stdx::unique_lock<stdx::mutex>& m, Date_t deadline) {
250 
251     return uassertStatusOK(waitForConditionOrInterruptNoAssertUntil(cv, m, deadline));
252 }
253 
254 // Theory of operation for waitForConditionOrInterruptNoAssertUntil and markKilled:
255 //
256 // An operation indicates to potential killers that it is waiting on a condition variable by setting
257 // _waitMutex and _waitCV, while holding the lock on its parent Client. It then unlocks its Client,
258 // unblocking any killers, which are required to have locked the Client before calling markKilled.
259 //
260 // When _waitMutex and _waitCV are set, killers must lock _waitMutex before setting the _killCode,
261 // and must signal _waitCV before releasing _waitMutex. Unfortunately, they must lock _waitMutex
262 // without holding a lock on Client to avoid a deadlock with callers of
263 // waitForConditionOrInterruptNoAssertUntil(). So, in the event that _waitMutex is set, the killer
264 // increments _numKillers, drops the Client lock, acquires _waitMutex and then re-acquires the
265 // Client lock. We know that the Client, its OperationContext and _waitMutex will remain valid
266 // during this period because the caller of waitForConditionOrInterruptNoAssertUntil will not return
267 // while _numKillers > 0 and will not return until it has itself reacquired _waitMutex. Instead,
268 // that caller will keep waiting on _waitCV until _numKillers drops to 0.
269 //
270 // In essence, when _waitMutex is set, _killCode is guarded by _waitMutex and _waitCV, but when
271 // _waitMutex is not set, it is guarded by the Client spinlock. Changing _waitMutex is itself
272 // guarded by the Client spinlock and _numKillers.
273 //
274 // When _numKillers does drop to 0, the waiter will null out _waitMutex and _waitCV.
275 //
276 // This implementation adds a minimum of two spinlock acquire-release pairs to every condition
277 // variable wait.
waitForConditionOrInterruptNoAssertUntil(stdx::condition_variable & cv,stdx::unique_lock<stdx::mutex> & m,Date_t deadline)278 StatusWith<stdx::cv_status> OperationContext::waitForConditionOrInterruptNoAssertUntil(
279     stdx::condition_variable& cv, stdx::unique_lock<stdx::mutex>& m, Date_t deadline) noexcept {
280     invariant(getClient());
281     {
282         stdx::lock_guard<Client> clientLock(*getClient());
283         invariant(!_waitMutex);
284         invariant(!_waitCV);
285         invariant(0 == _numKillers);
286 
287         // This interrupt check must be done while holding the client lock, so as not to race with a
288         // concurrent caller of markKilled.
289         auto status = checkForInterruptNoAssert();
290         if (!status.isOK()) {
291             return status;
292         }
293         _waitMutex = m.mutex();
294         _waitCV = &cv;
295     }
296 
297     // If the maxTimeNeverTimeOut failpoint is set, behave as though the operation's deadline does
298     // not exist. Under normal circumstances, if the op has an existing deadline which is sooner
299     // than the deadline passed into this method, we replace our deadline with the op's. This means
300     // that we expect to time out at the same time as the existing deadline expires. If, when we
301     // time out, we find that the op's deadline has not expired (as will always be the case if
302     // maxTimeNeverTimeOut is set) then we assume that the incongruity is due to a clock mismatch
303     // and return ExceededTimeLimit regardless. To prevent this behaviour, only consider the op's
304     // deadline in the event that the maxTimeNeverTimeOut failpoint is not set.
305     bool opHasDeadline = (hasDeadline() && !MONGO_FAIL_POINT(maxTimeNeverTimeOut));
306 
307     if (opHasDeadline) {
308         deadline = std::min(deadline, getDeadline());
309     }
310 
311     const auto waitStatus = [&] {
312         if (Date_t::max() == deadline) {
313             cv.wait(m);
314             return stdx::cv_status::no_timeout;
315         }
316         return getServiceContext()->getPreciseClockSource()->waitForConditionUntil(cv, m, deadline);
317     }();
318 
319     // Continue waiting on cv until no other thread is attempting to kill this one.
320     cv.wait(m, [this] {
321         stdx::lock_guard<Client> clientLock(*getClient());
322         if (0 == _numKillers) {
323             _waitMutex = nullptr;
324             _waitCV = nullptr;
325             return true;
326         }
327         return false;
328     });
329 
330     auto status = checkForInterruptNoAssert();
331     if (!status.isOK()) {
332         return status;
333     }
334     if (opHasDeadline && waitStatus == stdx::cv_status::timeout && deadline == getDeadline()) {
335         // It's possible that the system clock used in stdx::condition_variable::wait_until
336         // is slightly ahead of the FastClock used in checkForInterrupt. In this case,
337         // we treat the operation as though it has exceeded its time limit, just as if the
338         // FastClock and system clock had agreed.
339         markKilled(ErrorCodes::ExceededTimeLimit);
340         return Status(ErrorCodes::ExceededTimeLimit, "operation exceeded time limit");
341     }
342     return waitStatus;
343 }
344 
markKilled(ErrorCodes::Error killCode)345 void OperationContext::markKilled(ErrorCodes::Error killCode) {
346     invariant(killCode != ErrorCodes::OK);
347     stdx::unique_lock<stdx::mutex> lkWaitMutex;
348     if (_waitMutex) {
349         invariant(++_numKillers > 0);
350         getClient()->unlock();
351         ON_BLOCK_EXIT([this]() noexcept {
352             getClient()->lock();
353             invariant(--_numKillers >= 0);
354         });
355         lkWaitMutex = stdx::unique_lock<stdx::mutex>{*_waitMutex};
356     }
357     _killCode.compareAndSwap(ErrorCodes::OK, killCode);
358     if (lkWaitMutex && _numKillers == 0) {
359         invariant(_waitCV);
360         _waitCV->notify_all();
361     }
362 }
363 
setLogicalSessionId(LogicalSessionId lsid)364 void OperationContext::setLogicalSessionId(LogicalSessionId lsid) {
365     invariant(!_lsid);
366     _lsid = std::move(lsid);
367 }
368 
setTxnNumber(TxnNumber txnNumber)369 void OperationContext::setTxnNumber(TxnNumber txnNumber) {
370     invariant(_lsid);
371     invariant(!_txnNumber);
372     _txnNumber = txnNumber;
373 }
374 
releaseRecoveryUnit()375 RecoveryUnit* OperationContext::releaseRecoveryUnit() {
376     return _recoveryUnit.release();
377 }
378 
setRecoveryUnit(RecoveryUnit * unit,RecoveryUnitState state)379 OperationContext::RecoveryUnitState OperationContext::setRecoveryUnit(RecoveryUnit* unit,
380                                                                       RecoveryUnitState state) {
381     _recoveryUnit.reset(unit);
382     RecoveryUnitState oldState = _ruState;
383     _ruState = state;
384     return oldState;
385 }
386 
releaseLockState()387 std::unique_ptr<Locker> OperationContext::releaseLockState() {
388     dassert(_locker);
389     return std::move(_locker);
390 }
391 
setLockState(std::unique_ptr<Locker> locker)392 void OperationContext::setLockState(std::unique_ptr<Locker> locker) {
393     dassert(!_locker);
394     dassert(locker);
395     _locker = std::move(locker);
396 }
397 
getExpirationDateForWaitForValue(Milliseconds waitFor)398 Date_t OperationContext::getExpirationDateForWaitForValue(Milliseconds waitFor) {
399     return getServiceContext()->getPreciseClockSource()->now() + waitFor;
400 }
401 
402 }  // namespace mongo
403