1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #pragma once
32
33 #include <cstdint>
34 #include <limits>
35 #include <string>
36
37 #include "mongo/base/static_assert.h"
38 #include "mongo/base/string_data.h"
39 #include "mongo/config.h"
40 #include "mongo/platform/hash_namespace.h"
41
42 namespace mongo {
43
44 class Locker;
45
46 struct LockHead;
47 struct PartitionedLockHead;
48
49 /**
50 * Lock modes.
51 *
52 * Compatibility Matrix
53 * Granted mode
54 * ---------------.--------------------------------------------------------.
55 * Requested Mode | MODE_NONE MODE_IS MODE_IX MODE_S MODE_X |
56 * MODE_IS | + + + + - |
57 * MODE_IX | + + + - - |
58 * MODE_S | + + - + - |
59 * MODE_X | + - - - - |
60 */
61 enum LockMode {
62 MODE_NONE = 0,
63 MODE_IS = 1,
64 MODE_IX = 2,
65 MODE_S = 3,
66 MODE_X = 4,
67
68 // Counts the lock modes. Used for array size allocations, etc. Always insert new lock
69 // modes above this entry.
70 LockModesCount
71 };
72
73 /**
74 * Returns a human-readable name for the specified lock mode.
75 */
76 const char* modeName(LockMode mode);
77
78 /**
79 * Legacy lock mode names in parity for 2.6 reports.
80 */
81 const char* legacyModeName(LockMode mode);
82
83 /**
84 * Mode A is covered by mode B if the set of conflicts for mode A is a subset of the set of
85 * conflicts for mode B. For example S is covered by X. IS is covered by S. However, IX is not
86 * covered by S or IS.
87 */
88 bool isModeCovered(LockMode mode, LockMode coveringMode);
89
90 /**
91 * Returns whether the passed in mode is S or IS. Used for validation checks.
92 */
isSharedLockMode(LockMode mode)93 inline bool isSharedLockMode(LockMode mode) {
94 return (mode == MODE_IS || mode == MODE_S);
95 }
96
97
98 /**
99 * Return values for the locking functions of the lock manager.
100 */
101 enum LockResult {
102
103 /**
104 * The lock request was granted and is now on the granted list for the specified resource.
105 */
106 LOCK_OK,
107
108 /**
109 * The lock request was not granted because of conflict. If this value is returned, the
110 * request was placed on the conflict queue of the specified resource and a call to the
111 * LockGrantNotification::notify callback should be expected with the resource whose lock
112 * was requested.
113 */
114 LOCK_WAITING,
115
116 /**
117 * The lock request waited, but timed out before it could be granted. This value is never
118 * returned by the LockManager methods here, but by the Locker class, which offers
119 * capability to block while waiting for locks.
120 */
121 LOCK_TIMEOUT,
122
123 /**
124 * The lock request was not granted because it would result in a deadlock. No changes to
125 * the state of the Locker would be made if this value is returned (i.e., it will not be
126 * killed due to deadlock). It is up to the caller to decide how to recover from this
127 * return value - could be either release some locks and try again, or just bail with an
128 * error and have some upper code handle it.
129 */
130 LOCK_DEADLOCK,
131
132 /**
133 * This is used as an initialiser value. Should never be returned.
134 */
135 LOCK_INVALID
136 };
137
138
139 /**
140 * Hierarchy of resource types. The lock manager knows nothing about this hierarchy, it is
141 * purely logical. Resources of different types will never conflict with each other.
142 *
143 * While the lock manager does not know or care about ordering, the general policy is that
144 * resources are acquired in the order below. For example, one might first acquire a
145 * RESOURCE_GLOBAL and then the desired RESOURCE_DATABASE, both using intent modes, and
146 * finally a RESOURCE_COLLECTION in exclusive mode. When locking multiple resources of the
147 * same type, the canonical order is by resourceId order.
148 *
149 * It is OK to lock resources out of order, but it is the users responsibility to ensure
150 * ordering is consistent so deadlock cannot occur.
151 */
152 enum ResourceType {
153 // Types used for special resources, use with a hash id from ResourceId::SingletonHashIds.
154 RESOURCE_INVALID = 0,
155 RESOURCE_GLOBAL, // Used for mode changes or global exclusive operations
156 RESOURCE_MMAPV1_FLUSH, // Necessary only for the MMAPv1 engine
157
158 // Generic resources, used for multi-granularity locking, together with RESOURCE_GLOBAL
159 RESOURCE_DATABASE,
160 RESOURCE_COLLECTION,
161 RESOURCE_METADATA,
162
163 // Resource type used for locking general resources not related to the storage hierarchy.
164 RESOURCE_MUTEX,
165
166 // Counts the rest. Always insert new resource types above this entry.
167 ResourceTypesCount
168 };
169
170 /**
171 * Returns a human-readable name for the specified resource type.
172 */
173 const char* resourceTypeName(ResourceType resourceType);
174
175 /**
176 * Uniquely identifies a lockable resource.
177 */
178 class ResourceId {
179 // We only use 3 bits for the resource type in the ResourceId hash
180 enum { resourceTypeBits = 3 };
181 MONGO_STATIC_ASSERT(ResourceTypesCount <= (1 << resourceTypeBits));
182
183 public:
184 /**
185 * Assign hash ids for special resources to avoid accidental reuse of ids. For ids used
186 * with the same ResourceType, the order here must be the same as the locking order.
187 */
188 enum SingletonHashIds {
189 SINGLETON_INVALID = 0,
190 SINGLETON_PARALLEL_BATCH_WRITER_MODE,
191 SINGLETON_GLOBAL,
192 SINGLETON_MMAPV1_FLUSH,
193 };
194
ResourceId()195 ResourceId() : _fullHash(0) {}
196 ResourceId(ResourceType type, StringData ns);
197 ResourceId(ResourceType type, const std::string& ns);
198 ResourceId(ResourceType type, uint64_t hashId);
199
isValid()200 bool isValid() const {
201 return getType() != RESOURCE_INVALID;
202 }
203
uint64_t()204 operator uint64_t() const {
205 return _fullHash;
206 }
207
208 // This defines the canonical locking order, first by type and then hash id
209 bool operator<(const ResourceId& rhs) const {
210 return _fullHash < rhs._fullHash;
211 }
212
getType()213 ResourceType getType() const {
214 return static_cast<ResourceType>(_fullHash >> (64 - resourceTypeBits));
215 }
216
getHashId()217 uint64_t getHashId() const {
218 return _fullHash & (std::numeric_limits<uint64_t>::max() >> resourceTypeBits);
219 }
220
221 std::string toString() const;
222
223 private:
224 /**
225 * The top 'resourceTypeBits' bits of '_fullHash' represent the resource type,
226 * while the remaining bits contain the bottom bits of the hashId. This avoids false
227 * conflicts between resources of different types, which is necessary to prevent deadlocks.
228 */
229 uint64_t _fullHash;
230
231 static uint64_t fullHash(ResourceType type, uint64_t hashId);
232
233 #ifdef MONGO_CONFIG_DEBUG_BUILD
234 // Keep the complete namespace name for debugging purposes (TODO: this will be
235 // removed once we are confident in the robustness of the lock manager).
236 std::string _nsCopy;
237 #endif
238 };
239
240 #ifndef MONGO_CONFIG_DEBUG_BUILD
241 // Treat the resource ids as 64-bit integers in release mode in order to ensure we do
242 // not spend too much time doing comparisons for hashing.
243 MONGO_STATIC_ASSERT(sizeof(ResourceId) == sizeof(uint64_t));
244 #endif
245
246
247 // Type to uniquely identify a given locker object
248 typedef uint64_t LockerId;
249
250 // Hardcoded resource id for the oplog collection, which is special-cased both for resource
251 // acquisition purposes and for statistics reporting.
252 extern const ResourceId resourceIdLocalDB;
253 extern const ResourceId resourceIdOplog;
254
255 // Hardcoded resource id for admin db. This is to ensure direct writes to auth collections
256 // are serialized (see SERVER-16092)
257 extern const ResourceId resourceIdAdminDB;
258
259 // Hardcoded resource id for ParallelBatchWriterMode. We use the same resource type
260 // as resourceIdGlobal. This will also ensure the waits are reported as global, which
261 // is appropriate. The lock will never be contended unless the parallel batch writers
262 // must stop all other accesses globally. This resource must be locked before all other
263 // resources (including resourceIdGlobal). Replication applier threads don't take this
264 // lock.
265 // TODO: Merge this with resourceIdGlobal
266 extern const ResourceId resourceIdParallelBatchWriterMode;
267
268 /**
269 * Interface on which granted lock requests will be notified. See the contract for the notify
270 * method for more information and also the LockManager::lock call.
271 *
272 * The default implementation of this method would simply block on an event until notify has
273 * been invoked (see CondVarLockGrantNotification).
274 *
275 * Test implementations could just count the number of notifications and their outcome so that
276 * they can validate locks are granted as desired and drive the test execution.
277 */
278 class LockGrantNotification {
279 public:
~LockGrantNotification()280 virtual ~LockGrantNotification() {}
281
282 /**
283 * This method is invoked at most once for each lock request and indicates the outcome of
284 * the lock acquisition for the specified resource id.
285 *
286 * Cases where it won't be called are if a lock acquisition (be it in waiting or converting
287 * state) is cancelled through a call to unlock.
288 *
289 * IMPORTANT: This callback runs under a spinlock for the lock manager, so the work done
290 * inside must be kept to a minimum and no locks or operations which may block
291 * should be run. Also, no methods which call back into the lock manager should
292 * be invoked from within this methods (LockManager is not reentrant).
293 *
294 * @resId ResourceId for which a lock operation was previously called.
295 * @result Outcome of the lock operation.
296 */
297 virtual void notify(ResourceId resId, LockResult result) = 0;
298 };
299
300
301 /**
302 * There is one of those entries per each request for a lock. They hang on a linked list off
303 * the LockHead or off a PartitionedLockHead and also are in a map for each Locker. This
304 * structure is not thread-safe.
305 *
306 * LockRequest are owned by the Locker class and it controls their lifetime. They should not
307 * be deleted while on the LockManager though (see the contract for the lock/unlock methods).
308 */
309 struct LockRequest {
310 enum Status {
311 STATUS_NEW,
312 STATUS_GRANTED,
313 STATUS_WAITING,
314 STATUS_CONVERTING,
315
316 // Counts the rest. Always insert new status types above this entry.
317 StatusCount
318 };
319
320 /**
321 * Used for initialization of a LockRequest, which might have been retrieved from cache.
322 */
323 void initNew(Locker* locker, LockGrantNotification* notify);
324
325 // This is the Locker, which created this LockRequest. Pointer is not owned, just referenced.
326 // Must outlive the LockRequest.
327 //
328 // Written at construction time by Locker
329 // Read by LockManager on any thread
330 // No synchronization
331 Locker* locker;
332
333 // Notification to be invoked when the lock is granted. Pointer is not owned, just referenced.
334 // If a request is in the WAITING or CONVERTING state, must live at least until
335 // LockManager::unlock is cancelled or the notification has been invoked.
336 //
337 // Written at construction time by Locker
338 // Read by LockManager
339 // No synchronization
340 LockGrantNotification* notify;
341
342 // If the request cannot be granted right away, whether to put it at the front or at the end of
343 // the queue. By default, requests are put at the back. If a request is requested to be put at
344 // the front, this effectively bypasses fairness. Default is FALSE.
345 //
346 // Written at construction time by Locker
347 // Read by LockManager on any thread
348 // No synchronization
349 bool enqueueAtFront;
350
351 // When this request is granted and as long as it is on the granted queue, the particular
352 // resource's policy will be changed to "compatibleFirst". This means that even if there are
353 // pending requests on the conflict queue, if a compatible request comes in it will be granted
354 // immediately. This effectively turns off fairness.
355 //
356 // Written at construction time by Locker
357 // Read by LockManager on any thread
358 // No synchronization
359 bool compatibleFirst;
360
361 // When set, an attempt is made to execute this request using partitioned lockheads. This speeds
362 // up the common case where all requested locking modes are compatible with each other, at the
363 // cost of extra overhead for conflicting modes.
364 //
365 // Written at construction time by LockManager
366 // Read by LockManager on any thread
367 // No synchronization
368 bool partitioned;
369
370 // How many times has LockManager::lock been called for this request. Locks are released when
371 // their recursive count drops to zero.
372 //
373 // Written by LockManager on Locker thread
374 // Read by LockManager on Locker thread
375 // Read by Locker on Locker thread
376 // No synchronization
377 unsigned recursiveCount;
378
379 // Pointer to the lock to which this request belongs, or null if this request has not yet been
380 // assigned to a lock or if it belongs to the PartitionedLockHead for locker (in which case
381 // partitionedLock must be set). The LockHead should be alive as long as there are LockRequests
382 // on it, so it is safe to have this pointer hanging around.
383 //
384 // Written by LockManager on any thread
385 // Read by LockManager on any thread
386 // Protected by LockHead bucket's mutex
387 LockHead* lock;
388
389 // Pointer to the partitioned lock to which this request belongs, or null if it is not
390 // partitioned. Only one of 'lock' and 'partitionedLock' is non-NULL, and a request can only
391 // transition from 'partitionedLock' to 'lock', never the other way around.
392 //
393 // Written by LockManager on any thread
394 // Read by LockManager on any thread
395 // Protected by LockHead bucket's mutex
396 PartitionedLockHead* partitionedLock;
397
398 // The linked list chain on which this request hangs off the owning lock head. The reason
399 // intrusive linked list is used instead of the std::list class is to allow for entries to be
400 // removed from the middle of the list in O(1) time, if they are known instead of having to
401 // search for them and we cannot persist iterators, because the list can be modified while an
402 // iterator is held.
403 //
404 // Written by LockManager on any thread
405 // Read by LockManager on any thread
406 // Protected by LockHead bucket's mutex
407 LockRequest* prev;
408 LockRequest* next;
409
410 // The current status of this request. Always starts at STATUS_NEW.
411 //
412 // Written by LockManager on any thread
413 // Read by LockManager on any thread
414 // Protected by LockHead bucket's mutex
415 Status status;
416
417 // If this request is not granted, the mode which has been requested for this lock. If granted,
418 // the mode in which it is currently granted.
419 //
420 // Written by LockManager on any thread
421 // Read by LockManager on any thread
422 // Protected by LockHead bucket's mutex
423 LockMode mode;
424
425 // This value is different from MODE_NONE only if a conversion is requested for a lock and that
426 // conversion cannot be immediately granted.
427 //
428 // Written by LockManager on any thread
429 // Read by LockManager on any thread
430 // Protected by LockHead bucket's mutex
431 LockMode convertMode;
432 };
433
434 /**
435 * Returns a human readable status name for the specified LockRequest status.
436 */
437 const char* lockRequestStatusName(LockRequest::Status status);
438
439 } // namespace mongo
440
441
442 MONGO_HASH_NAMESPACE_START
443 template <>
444 struct hash<mongo::ResourceId> {
445 size_t operator()(const mongo::ResourceId& resource) const {
446 return resource;
447 }
448 };
449 MONGO_HASH_NAMESPACE_END
450