1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #pragma once
32 
33 #include <cstdint>
34 #include <limits>
35 #include <string>
36 
37 #include "mongo/base/static_assert.h"
38 #include "mongo/base/string_data.h"
39 #include "mongo/config.h"
40 #include "mongo/platform/hash_namespace.h"
41 
42 namespace mongo {
43 
44 class Locker;
45 
46 struct LockHead;
47 struct PartitionedLockHead;
48 
49 /**
50  * Lock modes.
51  *
52  * Compatibility Matrix
53  *                                          Granted mode
54  *   ---------------.--------------------------------------------------------.
55  *   Requested Mode | MODE_NONE  MODE_IS   MODE_IX  MODE_S   MODE_X  |
56  *     MODE_IS      |      +        +         +        +        -    |
57  *     MODE_IX      |      +        +         +        -        -    |
58  *     MODE_S       |      +        +         -        +        -    |
59  *     MODE_X       |      +        -         -        -        -    |
60  */
61 enum LockMode {
62     MODE_NONE = 0,
63     MODE_IS = 1,
64     MODE_IX = 2,
65     MODE_S = 3,
66     MODE_X = 4,
67 
68     // Counts the lock modes. Used for array size allocations, etc. Always insert new lock
69     // modes above this entry.
70     LockModesCount
71 };
72 
73 /**
74  * Returns a human-readable name for the specified lock mode.
75  */
76 const char* modeName(LockMode mode);
77 
78 /**
79  * Legacy lock mode names in parity for 2.6 reports.
80  */
81 const char* legacyModeName(LockMode mode);
82 
83 /**
84  * Mode A is covered by mode B if the set of conflicts for mode A is a subset of the set of
85  * conflicts for mode B. For example S is covered by X. IS is covered by S. However, IX is not
86  * covered by S or IS.
87  */
88 bool isModeCovered(LockMode mode, LockMode coveringMode);
89 
90 /**
91  * Returns whether the passed in mode is S or IS. Used for validation checks.
92  */
isSharedLockMode(LockMode mode)93 inline bool isSharedLockMode(LockMode mode) {
94     return (mode == MODE_IS || mode == MODE_S);
95 }
96 
97 
98 /**
99  * Return values for the locking functions of the lock manager.
100  */
101 enum LockResult {
102 
103     /**
104      * The lock request was granted and is now on the granted list for the specified resource.
105      */
106     LOCK_OK,
107 
108     /**
109      * The lock request was not granted because of conflict. If this value is returned, the
110      * request was placed on the conflict queue of the specified resource and a call to the
111      * LockGrantNotification::notify callback should be expected with the resource whose lock
112      * was requested.
113      */
114     LOCK_WAITING,
115 
116     /**
117      * The lock request waited, but timed out before it could be granted. This value is never
118      * returned by the LockManager methods here, but by the Locker class, which offers
119      * capability to block while waiting for locks.
120      */
121     LOCK_TIMEOUT,
122 
123     /**
124      * The lock request was not granted because it would result in a deadlock. No changes to
125      * the state of the Locker would be made if this value is returned (i.e., it will not be
126      * killed due to deadlock). It is up to the caller to decide how to recover from this
127      * return value - could be either release some locks and try again, or just bail with an
128      * error and have some upper code handle it.
129      */
130     LOCK_DEADLOCK,
131 
132     /**
133      * This is used as an initialiser value. Should never be returned.
134      */
135     LOCK_INVALID
136 };
137 
138 
139 /**
140  * Hierarchy of resource types. The lock manager knows nothing about this hierarchy, it is
141  * purely logical. Resources of different types will never conflict with each other.
142  *
143  * While the lock manager does not know or care about ordering, the general policy is that
144  * resources are acquired in the order below. For example, one might first acquire a
145  * RESOURCE_GLOBAL and then the desired RESOURCE_DATABASE, both using intent modes, and
146  * finally a RESOURCE_COLLECTION in exclusive mode. When locking multiple resources of the
147  * same type, the canonical order is by resourceId order.
148  *
149  * It is OK to lock resources out of order, but it is the users responsibility to ensure
150  * ordering is consistent so deadlock cannot occur.
151  */
152 enum ResourceType {
153     // Types used for special resources, use with a hash id from ResourceId::SingletonHashIds.
154     RESOURCE_INVALID = 0,
155     RESOURCE_GLOBAL,        // Used for mode changes or global exclusive operations
156     RESOURCE_MMAPV1_FLUSH,  // Necessary only for the MMAPv1 engine
157 
158     // Generic resources, used for multi-granularity locking, together with RESOURCE_GLOBAL
159     RESOURCE_DATABASE,
160     RESOURCE_COLLECTION,
161     RESOURCE_METADATA,
162 
163     // Resource type used for locking general resources not related to the storage hierarchy.
164     RESOURCE_MUTEX,
165 
166     // Counts the rest. Always insert new resource types above this entry.
167     ResourceTypesCount
168 };
169 
170 /**
171  * Returns a human-readable name for the specified resource type.
172  */
173 const char* resourceTypeName(ResourceType resourceType);
174 
175 /**
176  * Uniquely identifies a lockable resource.
177  */
178 class ResourceId {
179     // We only use 3 bits for the resource type in the ResourceId hash
180     enum { resourceTypeBits = 3 };
181     MONGO_STATIC_ASSERT(ResourceTypesCount <= (1 << resourceTypeBits));
182 
183 public:
184     /**
185      * Assign hash ids for special resources to avoid accidental reuse of ids. For ids used
186      * with the same ResourceType, the order here must be the same as the locking order.
187      */
188     enum SingletonHashIds {
189         SINGLETON_INVALID = 0,
190         SINGLETON_PARALLEL_BATCH_WRITER_MODE,
191         SINGLETON_GLOBAL,
192         SINGLETON_MMAPV1_FLUSH,
193     };
194 
ResourceId()195     ResourceId() : _fullHash(0) {}
196     ResourceId(ResourceType type, StringData ns);
197     ResourceId(ResourceType type, const std::string& ns);
198     ResourceId(ResourceType type, uint64_t hashId);
199 
isValid()200     bool isValid() const {
201         return getType() != RESOURCE_INVALID;
202     }
203 
uint64_t()204     operator uint64_t() const {
205         return _fullHash;
206     }
207 
208     // This defines the canonical locking order, first by type and then hash id
209     bool operator<(const ResourceId& rhs) const {
210         return _fullHash < rhs._fullHash;
211     }
212 
getType()213     ResourceType getType() const {
214         return static_cast<ResourceType>(_fullHash >> (64 - resourceTypeBits));
215     }
216 
getHashId()217     uint64_t getHashId() const {
218         return _fullHash & (std::numeric_limits<uint64_t>::max() >> resourceTypeBits);
219     }
220 
221     std::string toString() const;
222 
223 private:
224     /**
225      * The top 'resourceTypeBits' bits of '_fullHash' represent the resource type,
226      * while the remaining bits contain the bottom bits of the hashId. This avoids false
227      * conflicts between resources of different types, which is necessary to prevent deadlocks.
228      */
229     uint64_t _fullHash;
230 
231     static uint64_t fullHash(ResourceType type, uint64_t hashId);
232 
233 #ifdef MONGO_CONFIG_DEBUG_BUILD
234     // Keep the complete namespace name for debugging purposes (TODO: this will be
235     // removed once we are confident in the robustness of the lock manager).
236     std::string _nsCopy;
237 #endif
238 };
239 
240 #ifndef MONGO_CONFIG_DEBUG_BUILD
241 // Treat the resource ids as 64-bit integers in release mode in order to ensure we do
242 // not spend too much time doing comparisons for hashing.
243 MONGO_STATIC_ASSERT(sizeof(ResourceId) == sizeof(uint64_t));
244 #endif
245 
246 
247 // Type to uniquely identify a given locker object
248 typedef uint64_t LockerId;
249 
250 // Hardcoded resource id for the oplog collection, which is special-cased both for resource
251 // acquisition purposes and for statistics reporting.
252 extern const ResourceId resourceIdLocalDB;
253 extern const ResourceId resourceIdOplog;
254 
255 // Hardcoded resource id for admin db. This is to ensure direct writes to auth collections
256 // are serialized (see SERVER-16092)
257 extern const ResourceId resourceIdAdminDB;
258 
259 // Hardcoded resource id for ParallelBatchWriterMode. We use the same resource type
260 // as resourceIdGlobal. This will also ensure the waits are reported as global, which
261 // is appropriate. The lock will never be contended unless the parallel batch writers
262 // must stop all other accesses globally. This resource must be locked before all other
263 // resources (including resourceIdGlobal). Replication applier threads don't take this
264 // lock.
265 // TODO: Merge this with resourceIdGlobal
266 extern const ResourceId resourceIdParallelBatchWriterMode;
267 
268 /**
269  * Interface on which granted lock requests will be notified. See the contract for the notify
270  * method for more information and also the LockManager::lock call.
271  *
272  * The default implementation of this method would simply block on an event until notify has
273  * been invoked (see CondVarLockGrantNotification).
274  *
275  * Test implementations could just count the number of notifications and their outcome so that
276  * they can validate locks are granted as desired and drive the test execution.
277  */
278 class LockGrantNotification {
279 public:
~LockGrantNotification()280     virtual ~LockGrantNotification() {}
281 
282     /**
283      * This method is invoked at most once for each lock request and indicates the outcome of
284      * the lock acquisition for the specified resource id.
285      *
286      * Cases where it won't be called are if a lock acquisition (be it in waiting or converting
287      * state) is cancelled through a call to unlock.
288      *
289      * IMPORTANT: This callback runs under a spinlock for the lock manager, so the work done
290      *            inside must be kept to a minimum and no locks or operations which may block
291      *            should be run. Also, no methods which call back into the lock manager should
292      *            be invoked from within this methods (LockManager is not reentrant).
293      *
294      * @resId ResourceId for which a lock operation was previously called.
295      * @result Outcome of the lock operation.
296      */
297     virtual void notify(ResourceId resId, LockResult result) = 0;
298 };
299 
300 
301 /**
302  * There is one of those entries per each request for a lock. They hang on a linked list off
303  * the LockHead or off a PartitionedLockHead and also are in a map for each Locker. This
304  * structure is not thread-safe.
305  *
306  * LockRequest are owned by the Locker class and it controls their lifetime. They should not
307  * be deleted while on the LockManager though (see the contract for the lock/unlock methods).
308  */
309 struct LockRequest {
310     enum Status {
311         STATUS_NEW,
312         STATUS_GRANTED,
313         STATUS_WAITING,
314         STATUS_CONVERTING,
315 
316         // Counts the rest. Always insert new status types above this entry.
317         StatusCount
318     };
319 
320     /**
321      * Used for initialization of a LockRequest, which might have been retrieved from cache.
322      */
323     void initNew(Locker* locker, LockGrantNotification* notify);
324 
325     // This is the Locker, which created this LockRequest. Pointer is not owned, just referenced.
326     // Must outlive the LockRequest.
327     //
328     // Written at construction time by Locker
329     // Read by LockManager on any thread
330     // No synchronization
331     Locker* locker;
332 
333     // Notification to be invoked when the lock is granted. Pointer is not owned, just referenced.
334     // If a request is in the WAITING or CONVERTING state, must live at least until
335     // LockManager::unlock is cancelled or the notification has been invoked.
336     //
337     // Written at construction time by Locker
338     // Read by LockManager
339     // No synchronization
340     LockGrantNotification* notify;
341 
342     // If the request cannot be granted right away, whether to put it at the front or at the end of
343     // the queue. By default, requests are put at the back. If a request is requested to be put at
344     // the front, this effectively bypasses fairness. Default is FALSE.
345     //
346     // Written at construction time by Locker
347     // Read by LockManager on any thread
348     // No synchronization
349     bool enqueueAtFront;
350 
351     // When this request is granted and as long as it is on the granted queue, the particular
352     // resource's policy will be changed to "compatibleFirst". This means that even if there are
353     // pending requests on the conflict queue, if a compatible request comes in it will be granted
354     // immediately. This effectively turns off fairness.
355     //
356     // Written at construction time by Locker
357     // Read by LockManager on any thread
358     // No synchronization
359     bool compatibleFirst;
360 
361     // When set, an attempt is made to execute this request using partitioned lockheads. This speeds
362     // up the common case where all requested locking modes are compatible with each other, at the
363     // cost of extra overhead for conflicting modes.
364     //
365     // Written at construction time by LockManager
366     // Read by LockManager on any thread
367     // No synchronization
368     bool partitioned;
369 
370     // How many times has LockManager::lock been called for this request. Locks are released when
371     // their recursive count drops to zero.
372     //
373     // Written by LockManager on Locker thread
374     // Read by LockManager on Locker thread
375     // Read by Locker on Locker thread
376     // No synchronization
377     unsigned recursiveCount;
378 
379     // Pointer to the lock to which this request belongs, or null if this request has not yet been
380     // assigned to a lock or if it belongs to the PartitionedLockHead for locker (in which case
381     // partitionedLock must be set). The LockHead should be alive as long as there are LockRequests
382     // on it, so it is safe to have this pointer hanging around.
383     //
384     // Written by LockManager on any thread
385     // Read by LockManager on any thread
386     // Protected by LockHead bucket's mutex
387     LockHead* lock;
388 
389     // Pointer to the partitioned lock to which this request belongs, or null if it is not
390     // partitioned. Only one of 'lock' and 'partitionedLock' is non-NULL, and a request can only
391     // transition from 'partitionedLock' to 'lock', never the other way around.
392     //
393     // Written by LockManager on any thread
394     // Read by LockManager on any thread
395     // Protected by LockHead bucket's mutex
396     PartitionedLockHead* partitionedLock;
397 
398     // The linked list chain on which this request hangs off the owning lock head. The reason
399     // intrusive linked list is used instead of the std::list class is to allow for entries to be
400     // removed from the middle of the list in O(1) time, if they are known instead of having to
401     // search for them and we cannot persist iterators, because the list can be modified while an
402     // iterator is held.
403     //
404     // Written by LockManager on any thread
405     // Read by LockManager on any thread
406     // Protected by LockHead bucket's mutex
407     LockRequest* prev;
408     LockRequest* next;
409 
410     // The current status of this request. Always starts at STATUS_NEW.
411     //
412     // Written by LockManager on any thread
413     // Read by LockManager on any thread
414     // Protected by LockHead bucket's mutex
415     Status status;
416 
417     // If this request is not granted, the mode which has been requested for this lock. If granted,
418     // the mode in which it is currently granted.
419     //
420     // Written by LockManager on any thread
421     // Read by LockManager on any thread
422     // Protected by LockHead bucket's mutex
423     LockMode mode;
424 
425     // This value is different from MODE_NONE only if a conversion is requested for a lock and that
426     // conversion cannot be immediately granted.
427     //
428     // Written by LockManager on any thread
429     // Read by LockManager on any thread
430     // Protected by LockHead bucket's mutex
431     LockMode convertMode;
432 };
433 
434 /**
435  * Returns a human readable status name for the specified LockRequest status.
436  */
437 const char* lockRequestStatusName(LockRequest::Status status);
438 
439 }  // namespace mongo
440 
441 
442 MONGO_HASH_NAMESPACE_START
443 template <>
444 struct hash<mongo::ResourceId> {
445     size_t operator()(const mongo::ResourceId& resource) const {
446         return resource;
447     }
448 };
449 MONGO_HASH_NAMESPACE_END
450