1 /*******************************************************************************
2     Copyright (c) 2015-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_LOCK_H__
25 #define __UVM_LOCK_H__
26 
27 #include "uvm_forward_decl.h"
28 #include "uvm_linux.h"
29 #include "uvm_common.h"
30 
31 // --------------------------- UVM Locking Order ---------------------------- //
32 //
33 // Any locks described here should have their locking order added to
34 // uvm_lock_order_t below.
35 //
36 // - Global power management lock (g_uvm_global.pm.lock)
37 //      Order: UVM_LOCK_ORDER_GLOBAL_PM
38 //      Reader/write lock (rw_semaphore)
39 //
40 //      Synchronizes user threads with system power management.
41 //
42 //      Taken in read mode by most user-facing UVM driver entry points.  Taken
43 //      in write mode by uvm_suspend(), only, and held for the duration of
44 //      sleep cycles.
45 //
46 //      This lock is special: while it's taken by user-facing entry points,
47 //      and may be taken before or after mmap_lock, this apparent violation of
48 //      lock ordering is permissible because pm_lock may only be taken via
49 //      trylock in read mode by paths which already hold any lower-level
50 //      locks, as well as by paths subject to the kernel's freezer.  Paths
51 //      taking it must be prepared to back off in case of acquisition failures.
52 //
53 //      This, in turn, is acceptable because the lock is taken in write mode
54 //      infrequently, and only as part of to power management.  Starvation is
55 //      not a concern.
56 //
57 //      The mmap_lock deadlock potential aside, the trylock approch is also
58 //      motivated by the need to prevent user threads making UVM system calls
59 //      from blocking when UVM is suspended: when the kernel suspends the
60 //      system, the freezer employed to stop user tasks requires these tasks
61 //      to be interruptible.
62 //
63 // - Global driver state lock (g_uvm_global.global_lock)
64 //      Order: UVM_LOCK_ORDER_GLOBAL
65 //      Exclusive lock (mutex)
66 //
67 //      This protects state associated with GPUs, such as the P2P table
68 //      and instance pointer mappings.
69 //
70 //      This should be taken whenever global GPU state might need to be modified.
71 //
72 // - GPU ISR lock
73 //      Order: UVM_LOCK_ORDER_ISR
74 //      Exclusive lock (mutex) per gpu
75 //
76 //      Protects:
77 //      - gpu->parent->isr.replayable_faults.service_lock:
78 //        Changes to the state of a GPU as it transitions from top-half to
79 //        bottom-half interrupt handler for replayable faults. This lock is
80 //        acquired for that GPU, in the ISR top-half. Then a bottom-half is
81 //        scheduled (to run in a workqueue). Then the bottom-half releases the
82 //        lock when that GPU's processing appears to be done.
83 //
84 //      - gpu->parent->isr.non_replayable_faults.service_lock:
85 //        Changes to the state of a GPU in the bottom-half for non-replayable
86 //        faults. Non-replayable faults are handed-off from RM instead of
87 //        directly from the GPU hardware. This means that we do not keep
88 //        receiving interrupts after RM pops out the faults from the HW buffer.
89 //        In order not to miss fault notifications, we will always schedule a
90 //        bottom-half for non-replayable faults if there are faults ready to be
91 //        consumed in the buffer, even if there already is some bottom-half
92 //        running or scheduled. This lock serializes all scheduled bottom halves
93 //        per GPU which service non-replayable faults.
94 //
95 //      - gpu->parent->isr.access_counters.service_lock:
96 //        Changes to the state of a GPU as it transitions from top-half to
97 //        bottom-half interrupt handler for access counter notifications. This
98 //        lock is acquired for that GPU, in the ISR top-half. Then a bottom-half
99 //        is scheduled (to run in a workqueue). Then the bottom-half releases
100 //        the lock when that GPU's processing appears to be done.
101 //
102 // - mmap_lock (mmap_sem in kernels < 5.8)
103 //      Order: UVM_LOCK_ORDER_MMAP_LOCK
104 //      Reader/writer lock (rw_semaphore)
105 //
106 //      We're often called with the kernel already holding mmap_lock: mmap,
107 //      munmap, CPU fault, etc. These operations may have to take any number of
108 //      UVM locks, so mmap_lock requires special consideration in the lock
109 //      order, since it's sometimes out of our control.
110 //
111 //      We need to hold mmap_lock when calling vm_insert_page, which means that
112 //      any time an operation (such as an ioctl) might need to install a CPU
113 //      mapping, it must take mmap_lock in read mode very early on.
114 //
115 //      However, current->mm is not necessarily the owning mm of the UVM vma.
116 //      fork or fd passing via a UNIX doman socket can cause that. Notably, this
117 //      is also the case when handling GPU faults or doing other operations from
118 //      a kernel thread. In some cases we have an mm associated with a VA space,
119 //      and in those cases we lock that mm instead of current->mm. But since we
120 //      don't always have that luxury, each path specifies the mm to use (either
121 //      explicitly or via uvm_va_block_context_t::mm). That mm may be NULL.
122 //      Later on down the stack we look up the UVM vma and compare its mm before
123 //      operating on that vma.
124 //
125 //      With HMM and ATS, the GPU fault handler takes mmap_lock. GPU faults may
126 //      block forward progress of threads holding the RM GPUs lock until those
127 //      faults are serviced, which means that mmap_lock cannot be held when the
128 //      UVM driver calls into RM. In other words, mmap_lock and the RM GPUs lock
129 //      are mutually exclusive.
130 //
131 // - Global VA spaces list lock
132 //      Order: UVM_LOCK_ORDER_VA_SPACES_LIST
133 //      Mutex which protects g_uvm_global.va_spaces state.
134 //
135 // - VA space writer serialization lock (va_space->serialize_writers_lock)
136 //      Order: UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS
137 //      Exclusive lock (mutex) per uvm_va_space (UVM struct file)
138 //
139 //      This lock prevents a deadlock between RM and UVM by only allowing one
140 //      writer to queue up on the VA space lock at a time.
141 //
142 //      GPU faults are serviced by the UVM bottom half with the VA space lock
143 //      held in read mode. Until they're serviced, these faults may block
144 //      forward progress of RM threads.
145 //
146 //      This constraint means that the UVM driver cannot call into RM while
147 //      GPU fault servicing is blocked. We may block GPU fault servicing by:
148 //      - Taking the VA space lock in write mode
149 //      - Holding the VA space lock in read mode with a writer pending, since
150 //        Linux rw_semaphores are fair.
151 //
152 //      Example of the second condition:
153 //      Thread A        Thread B        UVM BH          Thread C
154 //      UVM API call    UVM API call    GPU fault       RM API call
155 //      ------------    ------------    ------------    ------------
156 //      down_read
157 //                      down_write
158 //                      // Blocked on A
159 //                                      down_read
160 //                                      // Blocked on B
161 //                                                      RM GPU lock
162 //                                                      // Blocked on GPU fault
163 //      RM GPU lock
164 //      // Deadlock
165 //
166 //      The writer serialization lock works around this by biasing the VA space
167 //      lock towards readers, without causing starvation of writers. Writers and
168 //      readers which will make RM calls take this lock, which prevents them
169 //      from queueing up on the VA space rw_semaphore and blocking the UVM
170 //      bottom half.
171 //
172 //      TODO: Bug 1799173: A better long-term approach might be to never allow
173 //            RM calls under the VA space lock at all, but that will take a
174 //            larger restructuring.
175 //
176 // - VA space serialization of down_read with up_write of the VA space lock
177 //   (va_space->read_acquire_write_release_lock)
178 //      Order: UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK
179 //      Exclusive lock (mutex) per uvm_va_space (UVM struct file)
180 //
181 //      This lock prevents a deadlock between RM and UVM by preventing any
182 //      interleaving of down_reads on the VA space lock with concurrent
183 //      up_writes/downgrade_writes. The Linux rw_semaphore implementation does
184 //      not guarantee that two readers will always run concurrently, as shown by
185 //      the following interleaving:
186 //
187 //      Thread A                Thread B
188 //      UVM API call            UVM BH
189 //      ------------            ------------
190 //      down_write
191 //                              down_read
192 //                                  // Fails, calls handler
193 //      up_write
194 //      down_read
195 //          // Success
196 //                                  // Handler sees the lock still active
197 //                                  // Handler waits for lock to be released
198 //                                  // Blocked on A
199 //      RM GPU lock
200 //      // Blocked on GPU fault
201 //
202 //      Given the above interleaving, the kernel's implementation of the
203 //      down_read failure handler running in thread B does not distinguish
204 //      between a reader vs writer holding the lock. From the perspective of all
205 //      other threads, even those which attempt to take the lock for read while
206 //      thread A's reader holds it, a writer is active. Therefore no other
207 //      readers can take the lock, and we result in the same deadlock described
208 //      in the above comments on the VA space writer serialization lock.
209 //
210 //      This lock prevents any such interleaving:
211 //      - Writers take this lock for the duration of the write lock.
212 //
213 //      - Readers which do not call into RM only take this lock across the
214 //        down_read call. If a writer holds the lock, the reader would be
215 //        blocked on the VA space lock anyway. Concurrent readers will serialize
216 //        the taking of the VA space lock, but they will not be serialized
217 //        across their read sections.
218 //
219 //      - Readers which call into RM do not need to take this lock. Their
220 //        down_read is already serialized with a writer's up_write by the
221 //        serialize_writers_lock.
222 //
223 // - VA space lock (va_space->lock)
224 //      Order: UVM_LOCK_ORDER_VA_SPACE
225 //      Reader/writer lock (rw_semaphore) per uvm_va_space (UVM struct file)
226 //
227 //      This is the UVM equivalent of mmap_lock. It protects all state under
228 //      that va_space, such as the VA range tree.
229 //
230 //      Read mode: Faults (CPU and GPU), mapping creation, prefetches. These
231 //      will be serialized at the VA block level if necessary. RM calls are
232 //      allowed only if the VA space serialize_writers_lock is also taken.
233 //
234 //      Write mode: Modification of the range state such as mmap and changes to
235 //      logical permissions or location preferences. RM calls are never allowed.
236 //
237 // - External Allocation Tree lock
238 //      Order: UVM_LOCK_ORDER_EXT_RANGE_TREE
239 //      Exclusive lock (mutex) per external VA range, per GPU.
240 //
241 //      Protects the per-GPU sub-range tree mappings in each external VA range.
242 //
243 // - GPU semaphore pool lock (semaphore_pool->mutex)
244 //      Order: UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL
245 //      Exclusive lock (mutex) per uvm_gpu_semaphore_pool
246 //
247 //      Protects the state of the semaphore pool.
248 //
249 // - RM API lock
250 //      Order: UVM_LOCK_ORDER_RM_API
251 //      Exclusive lock
252 //
253 //      This is an internal RM lock that's acquired by most if not all UVM-RM
254 //      APIs.
255 //      Notably this lock is also held on PMA eviction.
256 //
257 // - RM GPUs lock
258 //      Order: UVM_LOCK_ORDER_RM_GPUS
259 //      Exclusive lock
260 //
261 //      This is an internal RM lock that's acquired by most if not all UVM-RM
262 //      APIs and disables interrupts for the GPUs.
263 //      Notably this lock is *not* held on PMA eviction.
264 //
265 // - VA block lock (va_block->lock)
266 //      Order: UVM_LOCK_ORDER_VA_BLOCK
267 //      Exclusive lock (mutex)
268 //
269 //      Protects:
270 //      - CPU and GPU page table mappings for all VAs under the block
271 //      - Updates to the GPU work tracker for that block (migrations)
272 //
273 //      Operations allowed while holding the lock:
274 //      - CPU allocation (we don't evict CPU memory)
275 //      - GPU memory allocation which cannot evict
276 //      - CPU page table mapping/unmapping
277 //      - Pushing work (GPU page table mapping/unmapping)
278 //
279 //      Operations not allowed while holding the lock:
280 //      - GPU memory allocation which can evict memory (would require nesting
281 //        block locks)
282 // - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
283 //   gpu->sysmem_mappings.bitlock)
284 //      Order: UVM_LOCK_ORDER_CHUNK_MAPPING
285 //      Exclusive bitlock (mutex) per each root chunk, or physical sysmem
286 //      segment.
287 //
288 //      A chunk mapping lock is used to enforce serialization when updating
289 //      kernel mappings of GPU root chunks (vidmem), or CPU chunks (sysmem).
290 //      The VA block lock is usually held during the mapping operation.
291 //
292 //      In the case of vidmem, each lock in the bitlock array serializes the
293 //      mapping and unmapping of a single GPU root chunk. If serialization
294 //      is required to update a root chunk, but no mappings are involved, use
295 //      the PMM root chunk lock (order UVM_LOCK_ORDER_PMM_ROOT_CHUNK) instead.
296 //
297 //      In the case of sysmem, each lock in the array serializes the mapping
298 //      of a large segment of system address space: the locking granularity is
299 //      significantly coarser than the CPU chunk size.
300 //
301 // - Page tree lock
302 //      Order: UVM_LOCK_ORDER_PAGE_TREE
303 //      Exclusive lock per GPU page tree
304 //
305 //      This protects a page tree.  All modifications to the device's page tree
306 //      and the host-side cache of that tree must be done under this lock.
307 //      The host-side cache and device state must be consistent when this lock
308 //      is released
309 //
310 //      Operations allowed while holding this lock
311 //      - Pushing work
312 //
313 //      Operations not allowed while holding this lock
314 //      - GPU memory allocation which can evict
315 //
316 // - Concurrent push semaphore
317 //      Order: UVM_LOCK_ORDER_PUSH
318 //      Semaphore (uvm_semaphore_t)
319 //
320 //      This is a semaphore limiting the amount of concurrent pushes that is
321 //      held for the duration of a push (between uvm_push_begin*() and
322 //      uvm_push_end()).
323 //
324 // - PMM GPU lock (pmm->lock)
325 //      Order: UVM_LOCK_ORDER_PMM
326 //      Exclusive lock (mutex) per uvm_pmm_gpu_t
327 //
328 //      Protects the state of PMM - internal to PMM.
329 //
330 // - PMM GPU PMA lock (pmm->pma_lock)
331 //      Order: UVM_LOCK_ORDER_PMM_PMA
332 //      Reader/writer lock (rw_semaphore) per per uvm_pmm_gpu_t
333 //
334 //      Lock internal to PMM for synchronizing allocations from PMA with
335 //      PMA eviction.
336 //
337 // - PMM root chunk lock (pmm->root_chunks.bitlocks)
338 //      Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK
339 //      Exclusive bitlock (mutex) per each root chunk internal to PMM.
340 //
341 // - Channel lock
342 //      Order: UVM_LOCK_ORDER_CHANNEL
343 //      Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
344 //
345 //      Lock protecting the state of all the channels in a channel pool. The
346 //      channel pool lock documentation contains the guidelines about which lock
347 //      type (mutex or spinlock) to use.
348 //
349 // - Tools global VA space list lock (g_tools_va_space_list_lock)
350 //      Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
351 //      Reader/writer lock (rw_sempahore)
352 //
353 //      This lock protects the list of VA spaces used when broadcasting
354 //      UVM profiling events.
355 //
356 // - VA space events
357 //      Order: UVM_LOCK_ORDER_VA_SPACE_EVENTS
358 //      Reader/writer lock (rw_semaphore) per uvm_perf_va_space_events_t.
359 //      serializes perf callbacks with event register/unregister. It's separate
360 //      from the VA space lock so it can be taken on the eviction path.
361 //
362 // - VA space tools
363 //      Order: UVM_LOCK_ORDER_VA_SPACE_TOOLS
364 //      Reader/writer lock (rw_semaphore) per uvm_va_space_t. Serializes tools
365 //      reporting with tools register/unregister. Since some of the tools
366 //      events come from perf events, both VA_SPACE_EVENTS and VA_SPACE_TOOLS
367 //      must be taken to register/report some tools events.
368 //
369 // - Leaf locks
370 //      Order: UVM_LOCK_ORDER_LEAF
371 //
372 //      All leaf locks.
373 //
374 // -------------------------------------------------------------------------- //
375 
376 // Remember to add any new lock orders to uvm_lock_order_to_string() in
377 // uvm_lock.c
378 typedef enum
379 {
380     UVM_LOCK_ORDER_INVALID = 0,
381     UVM_LOCK_ORDER_GLOBAL_PM,
382     UVM_LOCK_ORDER_GLOBAL,
383     UVM_LOCK_ORDER_ISR,
384     UVM_LOCK_ORDER_MMAP_LOCK,
385     UVM_LOCK_ORDER_VA_SPACES_LIST,
386     UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS,
387     UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK,
388     UVM_LOCK_ORDER_VA_SPACE,
389     UVM_LOCK_ORDER_EXT_RANGE_TREE,
390     UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL,
391     UVM_LOCK_ORDER_RM_API,
392     UVM_LOCK_ORDER_RM_GPUS,
393     UVM_LOCK_ORDER_VA_BLOCK,
394     UVM_LOCK_ORDER_CHUNK_MAPPING,
395     UVM_LOCK_ORDER_PAGE_TREE,
396     UVM_LOCK_ORDER_PUSH,
397     UVM_LOCK_ORDER_PMM,
398     UVM_LOCK_ORDER_PMM_PMA,
399     UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
400     UVM_LOCK_ORDER_CHANNEL,
401     UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,
402     UVM_LOCK_ORDER_VA_SPACE_EVENTS,
403     UVM_LOCK_ORDER_VA_SPACE_TOOLS,
404     UVM_LOCK_ORDER_SEMA_POOL_TRACKER,
405     UVM_LOCK_ORDER_LEAF,
406     UVM_LOCK_ORDER_COUNT,
407 } uvm_lock_order_t;
408 
409 const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order);
410 
411 typedef enum
412 {
413     UVM_LOCK_FLAGS_INVALID          = 0,
414     UVM_LOCK_FLAGS_MODE_EXCLUSIVE   = (1 << 0),
415     UVM_LOCK_FLAGS_MODE_SHARED      = (1 << 1),
416     UVM_LOCK_FLAGS_MODE_ANY         = (UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_MODE_SHARED),
417     UVM_LOCK_FLAGS_MODE_MASK        = (UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_MODE_SHARED),
418     UVM_LOCK_FLAGS_OUT_OF_ORDER     = (1 << 2),
419     UVM_LOCK_FLAGS_TRYLOCK          = (1 << 3),
420     UVM_LOCK_FLAGS_MASK             = (1 << 4) - 1
421 } uvm_lock_flags_t;
422 
423 // Record locking a lock of given lock_order in exclusive or shared mode,
424 // distinguishing between trylock and normal acquisition attempts.
425 // Returns true if the recorded lock follows all the locking rules and false
426 // otherwise.
427 bool __uvm_record_lock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
428 
429 // Record unlocking a lock of given lock_order in exclusive or shared mode and
430 // possibly out of order.
431 // Returns true if the unlock follows all the locking rules and false otherwise.
432 bool __uvm_record_unlock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
433 
434 bool __uvm_record_downgrade(void *lock, uvm_lock_order_t lock_order);
435 
436 // Check whether a lock of given lock_order is held in exclusive, shared, or
437 // either mode by the current thread.
438 bool __uvm_check_locked(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
439 
440 // Check that no locks are held with the given lock order
441 bool __uvm_check_unlocked_order(uvm_lock_order_t lock_order);
442 
443 // Check that a lock of the given order can be locked, i.e. that no locks are
444 // held with the given or deeper lock order.  Allow for out-of-order locking
445 // when checking for a trylock.
446 bool __uvm_check_lockable_order(uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
447 
448 // Check that all locks have been released in a thread context lock
449 bool __uvm_check_all_unlocked(uvm_thread_context_lock_t *context_lock);
450 
451 // Check that all locks have been released in the current thread context lock
452 bool __uvm_thread_check_all_unlocked(void);
453 
454 // Check that the locking infrastructure has been initialized
455 bool __uvm_locking_initialized(void);
456 
457 #if UVM_IS_DEBUG()
458   // These macros are intended to be expanded on the call site directly and will
459   // print the precise location of the violation while the __uvm_record*
460   // functions will error print the details.
461   #define uvm_record_lock_raw(lock, lock_order, flags) \
462       UVM_ASSERT_MSG(__uvm_record_lock((lock), (lock_order), (flags)), "Locking violation\n")
463   #define uvm_record_unlock_raw(lock, lock_order, flags) \
464       UVM_ASSERT_MSG(__uvm_record_unlock((lock), (lock_order), (flags)), "Locking violation\n")
465   #define uvm_record_downgrade_raw(lock, lock_order) \
466       UVM_ASSERT_MSG(__uvm_record_downgrade((lock), (lock_order)), "Locking violation\n")
467 
468   // Record UVM lock (a lock that has a lock_order member) operation and assert
469   // that it's correct
470   #define uvm_record_lock(lock, flags) \
471       uvm_record_lock_raw((lock), (lock)->lock_order, (flags))
472   #define uvm_record_unlock(lock, flags) uvm_record_unlock_raw((lock), (lock)->lock_order, (flags))
473   #define uvm_record_unlock_out_of_order(lock, flags) \
474             uvm_record_unlock_raw((lock), (lock)->lock_order, (flags) | UVM_LOCK_FLAGS_OUT_OF_ORDER)
475   #define uvm_record_downgrade(lock) uvm_record_downgrade_raw((lock), (lock)->lock_order)
476 
477   // Check whether a UVM lock (a lock that has a lock_order member) is held in
478   // the given mode.
479   #define uvm_check_locked(lock, flags) __uvm_check_locked((lock), (lock)->lock_order, (flags))
480 
481   // Helpers for recording and asserting mmap_lock
482   // (mmap_sem in kernels < 5.8 ) state
483   #define uvm_record_lock_mmap_lock_read(mm) \
484           uvm_record_lock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_SHARED)
485 
486   #define uvm_record_unlock_mmap_lock_read(mm) \
487           uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_SHARED)
488 
489   #define uvm_record_unlock_mmap_lock_read_out_of_order(mm) \
490           uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, \
491                                 UVM_LOCK_FLAGS_MODE_SHARED | UVM_LOCK_FLAGS_OUT_OF_ORDER)
492 
493   #define uvm_record_lock_mmap_lock_write(mm) \
494           uvm_record_lock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
495 
496   #define uvm_record_unlock_mmap_lock_write(mm) \
497           uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
498 
499   #define uvm_record_unlock_mmap_lock_write_out_of_order(mm) \
500           uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, \
501                                 UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_OUT_OF_ORDER)
502 
503   #define uvm_check_locked_mmap_lock(mm, flags) \
504            __uvm_check_locked(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, (flags))
505 
506   // Helpers for recording RM API lock usage around UVM-RM interfaces
507   #define uvm_record_lock_rm_api() \
508           uvm_record_lock_raw((void*)UVM_LOCK_ORDER_RM_API, UVM_LOCK_ORDER_RM_API, \
509                               UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
510   #define uvm_record_unlock_rm_api() \
511           uvm_record_unlock_raw((void*)UVM_LOCK_ORDER_RM_API, UVM_LOCK_ORDER_RM_API, \
512                                 UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
513 
514   // Helpers for recording RM GPUS lock usage around UVM-RM interfaces
515   #define uvm_record_lock_rm_gpus() \
516           uvm_record_lock_raw((void*)UVM_LOCK_ORDER_RM_GPUS, UVM_LOCK_ORDER_RM_GPUS, \
517                               UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
518   #define uvm_record_unlock_rm_gpus() \
519           uvm_record_unlock_raw((void*)UVM_LOCK_ORDER_RM_GPUS, UVM_LOCK_ORDER_RM_GPUS, \
520                                 UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
521 
522   // Helpers for recording both RM locks usage around UVM-RM interfaces
523   #define uvm_record_lock_rm_all() ({ uvm_record_lock_rm_api(); uvm_record_lock_rm_gpus(); })
524   #define uvm_record_unlock_rm_all() ({ uvm_record_unlock_rm_gpus(); uvm_record_unlock_rm_api(); })
525 
526 #else
527   #define uvm_record_lock                               UVM_IGNORE_EXPR2
528   #define uvm_record_unlock                             UVM_IGNORE_EXPR2
529   #define uvm_record_unlock_out_of_order                UVM_IGNORE_EXPR2
530   #define uvm_record_downgrade                          UVM_IGNORE_EXPR
531 
532   static bool uvm_check_locked(void *lock, uvm_lock_flags_t flags)
533   {
534       return false;
535   }
536 
537   #define uvm_record_lock_mmap_lock_read                 UVM_IGNORE_EXPR
538   #define uvm_record_unlock_mmap_lock_read               UVM_IGNORE_EXPR
539   #define uvm_record_unlock_mmap_lock_read_out_of_order  UVM_IGNORE_EXPR
540   #define uvm_record_lock_mmap_lock_write                UVM_IGNORE_EXPR
541   #define uvm_record_unlock_mmap_lock_write              UVM_IGNORE_EXPR
542   #define uvm_record_unlock_mmap_lock_write_out_of_order UVM_IGNORE_EXPR
543 
544   #define uvm_check_locked_mmap_lock                     uvm_check_locked
545 
546   #define uvm_record_lock_rm_api()
547   #define uvm_record_unlock_rm_api()
548 
549   #define uvm_record_lock_rm_gpus()
550   #define uvm_record_unlock_rm_gpus()
551 
552   #define uvm_record_lock_rm_all()
553   #define uvm_record_unlock_rm_all()
554 #endif
555 
556 #define uvm_locking_assert_initialized() UVM_ASSERT(__uvm_locking_initialized())
557 #define uvm_thread_assert_all_unlocked() UVM_ASSERT(__uvm_thread_check_all_unlocked())
558 #define uvm_assert_lockable_order(order) UVM_ASSERT(__uvm_check_lockable_order(order, UVM_LOCK_FLAGS_MODE_ANY))
559 #define uvm_assert_unlocked_order(order) UVM_ASSERT(__uvm_check_unlocked_order(order))
560 
561 // Helpers for locking mmap_lock (mmap_sem in kernels < 5.8)
562 // and recording its usage
563 #define uvm_assert_mmap_lock_locked_mode(mm, flags) ({                                      \
564       typeof(mm) _mm = (mm);                                                                \
565       UVM_ASSERT(nv_mm_rwsem_is_locked(_mm) && uvm_check_locked_mmap_lock((_mm), (flags))); \
566   })
567 
568 #define uvm_assert_mmap_lock_locked(mm) \
569         uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_ANY)
570 #define uvm_assert_mmap_lock_locked_read(mm) \
571         uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_SHARED)
572 #define uvm_assert_mmap_lock_locked_write(mm) \
573         uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
574 
575 #define uvm_down_read_mmap_lock(mm) ({                  \
576         typeof(mm) _mm = (mm);                          \
577         uvm_record_lock_mmap_lock_read(_mm);            \
578         nv_mmap_read_lock(_mm);                         \
579     })
580 
581 #define uvm_up_read_mmap_lock(mm) ({                    \
582         typeof(mm) _mm = (mm);                          \
583         nv_mmap_read_unlock(_mm);                       \
584         uvm_record_unlock_mmap_lock_read(_mm);          \
585     })
586 
587 #define uvm_up_read_mmap_lock_out_of_order(mm) ({           \
588         typeof(mm) _mm = (mm);                              \
589         nv_mmap_read_unlock(_mm);                           \
590         uvm_record_unlock_mmap_lock_read_out_of_order(_mm); \
591     })
592 
593 #define uvm_down_write_mmap_lock(mm) ({                 \
594         typeof(mm) _mm = (mm);                          \
595         uvm_record_lock_mmap_lock_write(_mm);           \
596         nv_mmap_write_lock(_mm);                        \
597     })
598 
599 #define uvm_up_write_mmap_lock(mm) ({                   \
600         typeof(mm) _mm = (mm);                          \
601         nv_mmap_write_unlock(_mm);                      \
602         uvm_record_unlock_mmap_lock_write(_mm);         \
603     })
604 
605 // Helper for calling a UVM-RM interface function with lock recording
606 #define uvm_rm_locked_call(call) ({                     \
607         typeof(call) ret;                               \
608         uvm_record_lock_rm_all();                       \
609         ret = call;                                     \
610         uvm_record_unlock_rm_all();                     \
611         ret;                                            \
612     })
613 
614 // Helper for calling a UVM-RM interface function that returns void with lock recording
615 #define uvm_rm_locked_call_void(call) ({                \
616         uvm_record_lock_rm_all();                       \
617         call;                                           \
618         uvm_record_unlock_rm_all();                     \
619     })
620 
621 typedef struct
622 {
623     struct rw_semaphore sem;
624 #if UVM_IS_DEBUG()
625     uvm_lock_order_t lock_order;
626 #endif
627 } uvm_rw_semaphore_t;
628 
629 //
630 // Note that this is a macro, not an inline or static function so the
631 // "uvm_sem" argument is subsituted as text. If this is invoked with
632 // uvm_assert_rwsem_locked_mode(_sem, flags) then we get code "_sem = _sem"
633 // and _sem is initialized to NULL. Avoid this by using a name unlikely to
634 // be the same as the string passed to "uvm_sem".
635 // See uvm_down_read() and uvm_up_read() below as examples.
636 //
637 #define uvm_assert_rwsem_locked_mode(uvm_sem, flags) ({                               \
638         typeof(uvm_sem) _sem_ = (uvm_sem);                                            \
639         UVM_ASSERT(rwsem_is_locked(&_sem_->sem) && uvm_check_locked(_sem_, (flags))); \
640     })
641 
642 #define uvm_assert_rwsem_locked(uvm_sem) \
643         uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_ANY)
644 #define uvm_assert_rwsem_locked_read(uvm_sem) \
645         uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_SHARED)
646 #define uvm_assert_rwsem_locked_write(uvm_sem) \
647         uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
648 
649 #define uvm_assert_rwsem_unlocked(uvm_sem) UVM_ASSERT(!rwsem_is_locked(&(uvm_sem)->sem))
650 
651 static void uvm_init_rwsem(uvm_rw_semaphore_t *uvm_sem, uvm_lock_order_t lock_order)
652 {
653     init_rwsem(&uvm_sem->sem);
654 #if UVM_IS_DEBUG()
655     uvm_locking_assert_initialized();
656     uvm_sem->lock_order = lock_order;
657 #endif
658     uvm_assert_rwsem_unlocked(uvm_sem);
659 }
660 
661 #define uvm_down_read(uvm_sem) ({                          \
662         typeof(uvm_sem) _sem = (uvm_sem);                  \
663         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
664         down_read(&_sem->sem);                             \
665         uvm_assert_rwsem_locked_read(_sem);                \
666     })
667 
668 #define uvm_up_read(uvm_sem) ({                              \
669         typeof(uvm_sem) _sem = (uvm_sem);                    \
670         uvm_assert_rwsem_locked_read(_sem);                  \
671         up_read(&_sem->sem);                                 \
672         uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
673     })
674 
675 // Unlock w/o any tracking. This should be extremely rare and *_no_tracking
676 // helpers will be added only as needed.
677 //
678 // TODO: Bug 2594854:
679 // TODO: Bug 2583279: Remove macro when bugs are fixed
680 #define uvm_up_read_no_tracking(uvm_sem) ({                  \
681         typeof(uvm_sem) _sem = (uvm_sem);                    \
682         up_read(&_sem->sem);                                 \
683     })
684 
685 #define uvm_down_write(uvm_sem) ({                            \
686         typeof (uvm_sem) _sem = (uvm_sem);                    \
687         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
688         down_write(&_sem->sem);                               \
689         uvm_assert_rwsem_locked_write(_sem);                  \
690     })
691 
692 // trylock for reading: returns 1 if successful, 0 if not.  Out-of-order lock
693 // acquisition via this function is legal, i.e. the lock order checker will
694 // allow it.  However, if an out-of-order lock acquisition attempt fails, it is
695 // the caller's responsibility to back off at least to the point where the
696 // next held lower-order lock is released.
697 #define uvm_down_read_trylock(uvm_sem) ({                                           \
698         typeof(uvm_sem) _sem = (uvm_sem);                                           \
699         int locked;                                                                 \
700         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED | UVM_LOCK_FLAGS_TRYLOCK); \
701         locked = down_read_trylock(&_sem->sem);                                     \
702         if (locked == 0)                                                            \
703             uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED);                    \
704         else                                                                        \
705             uvm_assert_rwsem_locked_read(_sem);                                     \
706         locked;                                                                     \
707     })
708 
709 // Lock w/o any tracking. This should be extremely rare and *_no_tracking
710 // helpers will be added only as needed.
711 //
712 // TODO: Bug 2594854:
713 // TODO: Bug 2583279: Remove macro when bugs are fixed
714 #define uvm_down_read_trylock_no_tracking(uvm_sem) ({                               \
715         typeof(uvm_sem) _sem = (uvm_sem);                                           \
716         down_read_trylock(&_sem->sem);                                              \
717     })
718 
719 // trylock for writing: returns 1 if successful, 0 if not.  Out-of-order lock
720 // acquisition via this function is legal, i.e. the lock order checker will
721 // allow it.  However, if an out-of-order lock acquisition attempt fails, it is
722 // the caller's responsibility to back off at least to the point where the
723 // next held lower-order lock is released.
724 #define uvm_down_write_trylock(uvm_sem) ({                                             \
725         typeof(uvm_sem) _sem = (uvm_sem);                                              \
726         int locked;                                                                    \
727         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK); \
728         locked = down_write_trylock(&_sem->sem);                                       \
729         if (locked == 0)                                                               \
730             uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE);                    \
731         else                                                                           \
732             uvm_assert_rwsem_locked_write(_sem);                                       \
733         locked;                                                                        \
734     })
735 
736 #define uvm_up_write(uvm_sem) ({                                \
737         typeof(uvm_sem) _sem = (uvm_sem);                       \
738         uvm_assert_rwsem_locked_write(_sem);                    \
739         up_write(&_sem->sem);                                   \
740         uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
741     })
742 
743 #define uvm_downgrade_write(uvm_sem) ({                 \
744         typeof(uvm_sem) _sem = (uvm_sem);               \
745         uvm_assert_rwsem_locked_write(_sem);            \
746         downgrade_write(&_sem->sem);                    \
747         uvm_record_downgrade(_sem);                     \
748     })
749 
750 typedef struct
751 {
752     struct mutex m;
753 #if UVM_IS_DEBUG()
754     uvm_lock_order_t lock_order;
755 #endif
756 } uvm_mutex_t;
757 
758 // Note that this is a macro, not an inline or static function so the
759 // "uvm_macro" argument is subsituted as text. If this is invoked with
760 // uvm__mutex_is_locked(_mutex) then we get code "_mutex = _mutex" and _mutex is
761 // initialized to NULL. Avoid this by using a name unlikely to be the same as
762 // the string passed to "uvm_mutex".
763 // See uvm_mutex_lock() and uvm_mutex_unlock() below as examples.
764 //
765 #define uvm_mutex_is_locked(uvm_mutex) ({                                                           \
766         typeof(uvm_mutex) _mutex_ = (uvm_mutex);                                                    \
767         (mutex_is_locked(&_mutex_->m) && uvm_check_locked(_mutex_, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
768     })
769 
770 #define uvm_assert_mutex_locked(uvm_mutex) UVM_ASSERT(uvm_mutex_is_locked(uvm_mutex))
771 #define uvm_assert_mutex_unlocked(uvm_mutex) UVM_ASSERT(!mutex_is_locked(&(uvm_mutex)->m))
772 
773 //
774 // Linux kernel mutexes cannot be used with interrupts disabled. Doing so
775 // can lead to deadlocks.
776 // To warn about mutex usages with interrupts disabled, the following
777 // macros and inline functions wrap around the raw kernel mutex operations
778 // in order to check if the interrupts have been disabled and assert if so.
779 //
780 // TODO: Bug 2690258: evaluate whether !irqs_disabled() && !in_interrupt() is
781 //       enough.
782 //
783 #define uvm_assert_mutex_interrupts() ({                                                                        \
784         UVM_ASSERT_MSG(!irqs_disabled() && !in_interrupt(), "Mutexes cannot be used with interrupts disabled"); \
785     })
786 
787 static void uvm_mutex_init(uvm_mutex_t *mutex, uvm_lock_order_t lock_order)
788 {
789     mutex_init(&mutex->m);
790 #if UVM_IS_DEBUG()
791     uvm_locking_assert_initialized();
792     mutex->lock_order = lock_order;
793 #endif
794     uvm_assert_mutex_unlocked(mutex);
795 }
796 
797 #define uvm_mutex_lock(mutex) ({                                \
798         typeof(mutex) _mutex = (mutex);                         \
799         uvm_assert_mutex_interrupts();                          \
800         uvm_record_lock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
801         mutex_lock(&_mutex->m);                                 \
802         uvm_assert_mutex_locked(_mutex);                        \
803     })
804 
805 // Lock w/o any tracking. This should be extremely rare and *_no_tracking
806 // helpers will be added only as needed.
807 #define uvm_mutex_lock_no_tracking(mutex) ({    \
808         uvm_assert_mutex_interrupts();          \
809         mutex_lock(&(mutex)->m);                \
810     })
811 
812 #define uvm_mutex_trylock(mutex) ({                                                      \
813         typeof(mutex) _mutex = (mutex);                                                  \
814         int locked;                                                                      \
815         uvm_record_lock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK); \
816         locked = mutex_trylock(&_mutex->m);                                              \
817         if (locked == 0)                                                                 \
818             uvm_record_unlock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE);                    \
819         else                                                                             \
820             uvm_assert_mutex_locked(_mutex);                                             \
821         locked;                                                                          \
822     })
823 
824 #define uvm_mutex_unlock(mutex) ({                                \
825         typeof(mutex) _mutex = (mutex);                           \
826         uvm_assert_mutex_interrupts();                            \
827         uvm_assert_mutex_locked(_mutex);                          \
828         mutex_unlock(&_mutex->m);                                 \
829         uvm_record_unlock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
830     })
831 #define uvm_mutex_unlock_out_of_order(mutex) ({                                \
832         typeof(mutex) _mutex = (mutex);                                        \
833         uvm_assert_mutex_interrupts();                                         \
834         uvm_assert_mutex_locked(_mutex);                                       \
835         mutex_unlock(&_mutex->m);                                              \
836         uvm_record_unlock_out_of_order(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
837     })
838 
839 // Unlock w/o any tracking. This should be extremely rare and *_no_tracking
840 // helpers will be added only as needed.
841 #define uvm_mutex_unlock_no_tracking(mutex) ({  \
842         uvm_assert_mutex_interrupts();          \
843         mutex_unlock(&(mutex)->m);              \
844     })
845 
846 typedef struct
847 {
848     struct semaphore sem;
849 #if UVM_IS_DEBUG()
850     uvm_lock_order_t lock_order;
851 #endif
852 } uvm_semaphore_t;
853 
854 static void uvm_sema_init(uvm_semaphore_t *semaphore, int val, uvm_lock_order_t lock_order)
855 {
856     sema_init(&semaphore->sem, val);
857 #if UVM_IS_DEBUG()
858     uvm_locking_assert_initialized();
859     semaphore->lock_order = lock_order;
860 #endif
861 }
862 
863 #define uvm_sem_is_locked(uvm_sem) uvm_check_locked(uvm_sem, UVM_LOCK_FLAGS_MODE_SHARED)
864 
865 #define uvm_down(uvm_sem) ({                               \
866         typeof(uvm_sem) _sem = (uvm_sem);                  \
867         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
868         down(&_sem->sem);                                  \
869     })
870 
871 #define uvm_up(uvm_sem) ({                                   \
872         typeof(uvm_sem) _sem = (uvm_sem);                    \
873         UVM_ASSERT(uvm_sem_is_locked(_sem));                 \
874         up(&_sem->sem);                                      \
875         uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
876     })
877 #define uvm_up_out_of_order(uvm_sem) ({                                   \
878         typeof(uvm_sem) _sem = (uvm_sem);                                 \
879         UVM_ASSERT(uvm_sem_is_locked(_sem));                              \
880         up(&_sem->sem);                                                   \
881         uvm_record_unlock_out_of_order(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
882     })
883 
884 
885 // A regular spinlock
886 // Locked/unlocked with uvm_spin_lock()/uvm_spin_unlock()
887 typedef struct
888 {
889     spinlock_t lock;
890 #if UVM_IS_DEBUG()
891     uvm_lock_order_t lock_order;
892 #endif
893 } uvm_spinlock_t;
894 
895 // A separate spinlock type for spinlocks that need to disable interrupts. For
896 // guaranteed correctness and convenience embed the saved and restored irq state
897 // in the lock itself.
898 // Locked/unlocked with uvm_spin_lock_irqsave()/uvm_spin_unlock_irqrestore()
899 typedef struct
900 {
901     spinlock_t lock;
902     unsigned long irq_flags;
903 #if UVM_IS_DEBUG()
904     uvm_lock_order_t lock_order;
905 #endif
906 } uvm_spinlock_irqsave_t;
907 
908 // Asserts that the spinlock is held. Notably the macros below support both
909 // types of spinlocks.
910 // Note that this is a macro, not an inline or static function so the
911 // "spinlock" argument is subsituted as text. If this is invoked with
912 // uvm_assert_spinlock_locked(_lock) then we get code "_lock = _lock"
913 // and _lock is initialized to NULL. Avoid this by using a name unlikely to
914 // be the same as the string passed to "spinlock".
915 // See uvm_spin_lock() and uvm_spin_unlock() below as examples.
916 //
917 #define uvm_assert_spinlock_locked(spinlock) ({                                                               \
918         typeof(spinlock) _lock_ = (spinlock);                                                                 \
919         UVM_ASSERT(spin_is_locked(&_lock_->lock) && uvm_check_locked(_lock_, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
920     })
921 
922 #define uvm_assert_spinlock_unlocked(spinlock) UVM_ASSERT(!spin_is_locked(&(spinlock)->lock))
923 
924 static void uvm_spin_lock_init(uvm_spinlock_t *spinlock, uvm_lock_order_t lock_order)
925 {
926     spin_lock_init(&spinlock->lock);
927 #if UVM_IS_DEBUG()
928     uvm_locking_assert_initialized();
929     spinlock->lock_order = lock_order;
930 #endif
931     uvm_assert_spinlock_unlocked(spinlock);
932 }
933 
934 #define uvm_spin_lock(uvm_lock) ({                             \
935         typeof(uvm_lock) _lock = (uvm_lock);                   \
936         uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
937         spin_lock(&_lock->lock);                               \
938         uvm_assert_spinlock_locked(_lock);                     \
939     })
940 
941 #define uvm_spin_unlock(uvm_lock) ({                             \
942         typeof(uvm_lock) _lock = (uvm_lock);                     \
943         uvm_assert_spinlock_locked(_lock);                       \
944         spin_unlock(&_lock->lock);                               \
945         uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
946     })
947 
948 static void uvm_spin_lock_irqsave_init(uvm_spinlock_irqsave_t *spinlock, uvm_lock_order_t lock_order)
949 {
950     spin_lock_init(&spinlock->lock);
951 #if UVM_IS_DEBUG()
952     uvm_locking_assert_initialized();
953     spinlock->lock_order = lock_order;
954 #endif
955     uvm_assert_spinlock_unlocked(spinlock);
956 }
957 
958 // Use a temp to not rely on flags being written after acquiring the lock.
959 #define uvm_spin_lock_irqsave(uvm_lock) ({                     \
960         typeof(uvm_lock) _lock = (uvm_lock);                   \
961         unsigned long irq_flags;                               \
962         uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
963         spin_lock_irqsave(&_lock->lock, irq_flags);            \
964         _lock->irq_flags = irq_flags;                          \
965         uvm_assert_spinlock_locked(_lock);                     \
966     })
967 
968 // Use a temp to not rely on flags being read before releasing the lock.
969 #define uvm_spin_unlock_irqrestore(uvm_lock) ({                  \
970         typeof(uvm_lock) _lock = (uvm_lock);                     \
971         unsigned long irq_flags = _lock->irq_flags;              \
972         uvm_assert_spinlock_locked(_lock);                       \
973         spin_unlock_irqrestore(&_lock->lock, irq_flags);         \
974         uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
975     })
976 
977 // Wrapper for a reader-writer spinlock that disables and enables interrupts
978 typedef struct
979 {
980     rwlock_t lock;
981 
982     // This flags variable is only used by writers, since concurrent readers may
983     // have different values.
984     unsigned long irq_flags;
985 
986 #if UVM_IS_DEBUG()
987     uvm_lock_order_t lock_order;
988 
989     // The kernel doesn't provide a function to tell if an rwlock_t is locked,
990     // so we create our own.
991     atomic_t lock_count;
992 #endif
993 } uvm_rwlock_irqsave_t;
994 
995 static bool uvm_rwlock_irqsave_is_locked(uvm_rwlock_irqsave_t *rwlock)
996 {
997 #if UVM_IS_DEBUG()
998     return atomic_read(&rwlock->lock_count) > 0;
999 #else
1000     return false;
1001 #endif
1002 }
1003 
1004 static void uvm_rwlock_irqsave_inc(uvm_rwlock_irqsave_t *rwlock)
1005 {
1006 #if UVM_IS_DEBUG()
1007     atomic_inc(&rwlock->lock_count);
1008 #endif
1009 }
1010 
1011 static void uvm_rwlock_irqsave_dec(uvm_rwlock_irqsave_t *rwlock)
1012 {
1013 #if UVM_IS_DEBUG()
1014     atomic_dec(&rwlock->lock_count);
1015 #endif
1016 }
1017 
1018 #define uvm_assert_rwlock_locked(uvm_rwlock) \
1019     UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_ANY))
1020 #define uvm_assert_rwlock_locked_read(uvm_rwlock) \
1021     UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_SHARED))
1022 #define uvm_assert_rwlock_locked_write(uvm_rwlock) \
1023     UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE))
1024 
1025 #if UVM_IS_DEBUG()
1026     #define uvm_assert_rwlock_unlocked(uvm_rwlock) UVM_ASSERT(!uvm_rwlock_irqsave_is_locked(uvm_rwlock))
1027 #else
1028     #define uvm_assert_rwlock_unlocked(uvm_rwlock)
1029 #endif
1030 
1031 static void uvm_rwlock_irqsave_init(uvm_rwlock_irqsave_t *rwlock, uvm_lock_order_t lock_order)
1032 {
1033     rwlock_init(&rwlock->lock);
1034 #if UVM_IS_DEBUG()
1035     uvm_locking_assert_initialized();
1036     rwlock->lock_order = lock_order;
1037     atomic_set(&rwlock->lock_count, 0);
1038 #endif
1039     uvm_assert_rwlock_unlocked(rwlock);
1040 }
1041 
1042 // We can't store the irq_flags within the lock itself for readers, so they must
1043 // pass in their flags.
1044 #define uvm_read_lock_irqsave(uvm_rwlock, irq_flags) ({     \
1045         typeof(uvm_rwlock) _lock = (uvm_rwlock);            \
1046         uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_SHARED); \
1047         read_lock_irqsave(&_lock->lock, irq_flags);         \
1048         uvm_rwlock_irqsave_inc(uvm_rwlock);                 \
1049         uvm_assert_rwlock_locked_read(_lock);               \
1050     })
1051 
1052 #define uvm_read_unlock_irqrestore(uvm_rwlock, irq_flags) ({    \
1053         typeof(uvm_rwlock) _lock = (uvm_rwlock);                \
1054         uvm_assert_rwlock_locked_read(_lock);                   \
1055         uvm_rwlock_irqsave_dec(uvm_rwlock);                     \
1056         read_unlock_irqrestore(&_lock->lock, irq_flags);        \
1057         uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_SHARED);   \
1058     })
1059 
1060 // Use a temp to not rely on flags being written after acquiring the lock.
1061 #define uvm_write_lock_irqsave(uvm_rwlock) ({                   \
1062         typeof(uvm_rwlock) _lock = (uvm_rwlock);                \
1063         unsigned long irq_flags;                                \
1064         uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE);  \
1065         write_lock_irqsave(&_lock->lock, irq_flags);            \
1066         uvm_rwlock_irqsave_inc(uvm_rwlock);                     \
1067         _lock->irq_flags = irq_flags;                           \
1068         uvm_assert_rwlock_locked_write(_lock);                  \
1069     })
1070 
1071 // Use a temp to not rely on flags being written after acquiring the lock.
1072 #define uvm_write_unlock_irqrestore(uvm_rwlock) ({                  \
1073         typeof(uvm_rwlock) _lock = (uvm_rwlock);                    \
1074         unsigned long irq_flags = _lock->irq_flags;                 \
1075         uvm_assert_rwlock_locked_write(_lock);                      \
1076         uvm_rwlock_irqsave_dec(uvm_rwlock);                         \
1077         write_unlock_irqrestore(&_lock->lock, irq_flags);           \
1078         uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE);    \
1079     })
1080 
1081 // Bit locks are 'compressed' mutexes which take only 1 bit per lock by virtue
1082 // of using shared waitqueues.
1083 typedef struct
1084 {
1085     unsigned long *bits;
1086 
1087 #if UVM_IS_DEBUG()
1088     uvm_lock_order_t lock_order;
1089 #endif
1090 } uvm_bit_locks_t;
1091 
1092 NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_order_t lock_order);
1093 void uvm_bit_locks_deinit(uvm_bit_locks_t *bit_locks);
1094 
1095 // Asserts that the bit lock is held.
1096 //
1097 // TODO: Bug 1766601:
1098 //  - assert for the right ownership (defining the owner might be tricky in
1099 //    the kernel).
1100 #define uvm_assert_bit_locked(bit_locks, bit) ({                             \
1101     typeof(bit_locks) _bit_locks = (bit_locks);                              \
1102     typeof(bit) _bit = (bit);                                                \
1103     UVM_ASSERT(test_bit(_bit, _bit_locks->bits));                            \
1104     UVM_ASSERT(uvm_check_locked(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
1105 })
1106 
1107 #define uvm_assert_bit_unlocked(bit_locks, bit) ({                      \
1108     typeof(bit_locks) _bit_locks = (bit_locks);                         \
1109     typeof(bit) _bit = (bit);                                           \
1110     UVM_ASSERT(!test_bit(_bit, _bit_locks->bits));                      \
1111 })
1112 
1113 static void __uvm_bit_lock(uvm_bit_locks_t *bit_locks, unsigned long bit)
1114 {
1115     int res;
1116 
1117     res = UVM_WAIT_ON_BIT_LOCK(bit_locks->bits, bit, TASK_UNINTERRUPTIBLE);
1118     UVM_ASSERT_MSG(res == 0, "Uninterruptible task interrupted: %d\n", res);
1119     uvm_assert_bit_locked(bit_locks, bit);
1120 }
1121 #define uvm_bit_lock(bit_locks, bit) ({                         \
1122     typeof(bit_locks) _bit_locks = (bit_locks);                 \
1123     typeof(bit) _bit = (bit);                                   \
1124     uvm_record_lock(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1125     __uvm_bit_lock(_bit_locks, _bit);                           \
1126 })
1127 
1128 static void __uvm_bit_unlock(uvm_bit_locks_t *bit_locks, unsigned long bit)
1129 {
1130     uvm_assert_bit_locked(bit_locks, bit);
1131 
1132     clear_bit_unlock(bit, bit_locks->bits);
1133     // Make sure we don't reorder release with wakeup as it would cause
1134     // deadlocks (other thread checking lock and adding itself to queue
1135     // in reversed order). clear_bit_unlock has only release semantics.
1136     smp_mb__after_atomic();
1137     wake_up_bit(bit_locks->bits, bit);
1138 }
1139 #define uvm_bit_unlock(bit_locks, bit) ({                         \
1140     typeof(bit_locks) _bit_locks = (bit_locks);                   \
1141     typeof(bit) _bit = (bit);                                     \
1142     __uvm_bit_unlock(_bit_locks, _bit);                           \
1143     uvm_record_unlock(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1144 })
1145 
1146 #endif // __UVM_LOCK_H__
1147