1 /*******************************************************************************
2     Copyright (c) 2015-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_LOCK_H__
25 #define __UVM_LOCK_H__
26 
27 #include "uvm_forward_decl.h"
28 #include "uvm_linux.h"
29 #include "uvm_common.h"
30 
31 // --------------------------- UVM Locking Order ---------------------------- //
32 //
33 // Any locks described here should have their locking order added to
34 // uvm_lock_order_t below.
35 //
36 // - Global power management lock (g_uvm_global.pm.lock)
37 //      Order: UVM_LOCK_ORDER_GLOBAL_PM
38 //      Reader/write lock (rw_semaphore)
39 //
40 //      Synchronizes user threads with system power management.
41 //
42 //      Taken in read mode by most user-facing UVM driver entry points.  Taken
43 //      in write mode by uvm_suspend(), only, and held for the duration of
44 //      sleep cycles.
45 //
46 //      This lock is special: while it's taken by user-facing entry points,
47 //      and may be taken before or after mmap_lock, this apparent violation of
48 //      lock ordering is permissible because pm_lock may only be taken via
49 //      trylock in read mode by paths which already hold any lower-level
50 //      locks, as well as by paths subject to the kernel's freezer.  Paths
51 //      taking it must be prepared to back off in case of acquisition failures.
52 //
53 //      This, in turn, is acceptable because the lock is taken in write mode
54 //      infrequently, and only as part of to power management.  Starvation is
55 //      not a concern.
56 //
57 //      The mmap_lock deadlock potential aside, the trylock approch is also
58 //      motivated by the need to prevent user threads making UVM system calls
59 //      from blocking when UVM is suspended: when the kernel suspends the
60 //      system, the freezer employed to stop user tasks requires these tasks
61 //      to be interruptible.
62 //
63 // - Global driver state lock (g_uvm_global.global_lock)
64 //      Order: UVM_LOCK_ORDER_GLOBAL
65 //      Exclusive lock (mutex)
66 //
67 //      This protects state associated with GPUs, such as the P2P table
68 //      and instance pointer mappings.
69 //
70 //      This should be taken whenever global GPU state might need to be modified.
71 //
72 // - GPU ISR lock
73 //      Order: UVM_LOCK_ORDER_ISR
74 //      Exclusive lock (mutex) per gpu
75 //
76 //      Protects:
77 //      - gpu->parent->isr.replayable_faults.service_lock:
78 //        Changes to the state of a GPU as it transitions from top-half to
79 //        bottom-half interrupt handler for replayable faults. This lock is
80 //        acquired for that GPU, in the ISR top-half. Then a bottom-half is
81 //        scheduled (to run in a workqueue). Then the bottom-half releases the
82 //        lock when that GPU's processing appears to be done.
83 //
84 //      - gpu->parent->isr.non_replayable_faults.service_lock:
85 //        Changes to the state of a GPU in the bottom-half for non-replayable
86 //        faults. Non-replayable faults are handed-off from RM instead of
87 //        directly from the GPU hardware. This means that we do not keep
88 //        receiving interrupts after RM pops out the faults from the HW buffer.
89 //        In order not to miss fault notifications, we will always schedule a
90 //        bottom-half for non-replayable faults if there are faults ready to be
91 //        consumed in the buffer, even if there already is some bottom-half
92 //        running or scheduled. This lock serializes all scheduled bottom halves
93 //        per GPU which service non-replayable faults.
94 //
95 //      - gpu->parent->isr.access_counters.service_lock:
96 //        Changes to the state of a GPU as it transitions from top-half to
97 //        bottom-half interrupt handler for access counter notifications. This
98 //        lock is acquired for that GPU, in the ISR top-half. Then a bottom-half
99 //        is scheduled (to run in a workqueue). Then the bottom-half releases
100 //        the lock when that GPU's processing appears to be done.
101 //
102 // - mmap_lock (mmap_sem in kernels < 5.8)
103 //      Order: UVM_LOCK_ORDER_MMAP_LOCK
104 //      Reader/writer lock (rw_semaphore)
105 //
106 //      We're often called with the kernel already holding mmap_lock: mmap,
107 //      munmap, CPU fault, etc. These operations may have to take any number of
108 //      UVM locks, so mmap_lock requires special consideration in the lock
109 //      order, since it's sometimes out of our control.
110 //
111 //      We need to hold mmap_lock when calling vm_insert_page, which means that
112 //      any time an operation (such as an ioctl) might need to install a CPU
113 //      mapping, it must take mmap_lock in read mode very early on.
114 //
115 //      However, current->mm is not necessarily the owning mm of the UVM vma.
116 //      fork or fd passing via a UNIX doman socket can cause that. Notably, this
117 //      is also the case when handling GPU faults or doing other operations from
118 //      a kernel thread. In some cases we have an mm associated with a VA space,
119 //      and in those cases we lock that mm instead of current->mm. But since we
120 //      don't always have that luxury, each path specifies the mm to use (either
121 //      explicitly or via uvm_va_block_context_t::mm). That mm may be NULL.
122 //      Later on down the stack we look up the UVM vma and compare its mm before
123 //      operating on that vma.
124 //
125 //      With HMM and ATS, the GPU fault handler takes mmap_lock. GPU faults may
126 //      block forward progress of threads holding the RM GPUs lock until those
127 //      faults are serviced, which means that mmap_lock cannot be held when the
128 //      UVM driver calls into RM. In other words, mmap_lock and the RM GPUs lock
129 //      are mutually exclusive.
130 //
131 // - Global VA spaces list lock
132 //      Order: UVM_LOCK_ORDER_VA_SPACES_LIST
133 //      Mutex which protects g_uvm_global.va_spaces state.
134 //
135 // - VA space writer serialization lock (va_space->serialize_writers_lock)
136 //      Order: UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS
137 //      Exclusive lock (mutex) per uvm_va_space (UVM struct file)
138 //
139 //      This lock prevents a deadlock between RM and UVM by only allowing one
140 //      writer to queue up on the VA space lock at a time.
141 //
142 //      GPU faults are serviced by the UVM bottom half with the VA space lock
143 //      held in read mode. Until they're serviced, these faults may block
144 //      forward progress of RM threads.
145 //
146 //      This constraint means that the UVM driver cannot call into RM while
147 //      GPU fault servicing is blocked. We may block GPU fault servicing by:
148 //      - Taking the VA space lock in write mode
149 //      - Holding the VA space lock in read mode with a writer pending, since
150 //        Linux rw_semaphores are fair.
151 //
152 //      Example of the second condition:
153 //      Thread A        Thread B        UVM BH          Thread C
154 //      UVM API call    UVM API call    GPU fault       RM API call
155 //      ------------    ------------    ------------    ------------
156 //      down_read
157 //                      down_write
158 //                      // Blocked on A
159 //                                      down_read
160 //                                      // Blocked on B
161 //                                                      RM GPU lock
162 //                                                      // Blocked on GPU fault
163 //      RM GPU lock
164 //      // Deadlock
165 //
166 //      The writer serialization lock works around this by biasing the VA space
167 //      lock towards readers, without causing starvation of writers. Writers and
168 //      readers which will make RM calls take this lock, which prevents them
169 //      from queueing up on the VA space rw_semaphore and blocking the UVM
170 //      bottom half.
171 //
172 //      TODO: Bug 1799173: A better long-term approach might be to never allow
173 //            RM calls under the VA space lock at all, but that will take a
174 //            larger restructuring.
175 //
176 // - VA space serialization of down_read with up_write of the VA space lock
177 //   (va_space->read_acquire_write_release_lock)
178 //      Order: UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK
179 //      Exclusive lock (mutex) per uvm_va_space (UVM struct file)
180 //
181 //      This lock prevents a deadlock between RM and UVM by preventing any
182 //      interleaving of down_reads on the VA space lock with concurrent
183 //      up_writes/downgrade_writes. The Linux rw_semaphore implementation does
184 //      not guarantee that two readers will always run concurrently, as shown by
185 //      the following interleaving:
186 //
187 //      Thread A                Thread B
188 //      UVM API call            UVM BH
189 //      ------------            ------------
190 //      down_write
191 //                              down_read
192 //                                  // Fails, calls handler
193 //      up_write
194 //      down_read
195 //          // Success
196 //                                  // Handler sees the lock still active
197 //                                  // Handler waits for lock to be released
198 //                                  // Blocked on A
199 //      RM GPU lock
200 //      // Blocked on GPU fault
201 //
202 //      Given the above interleaving, the kernel's implementation of the
203 //      down_read failure handler running in thread B does not distinguish
204 //      between a reader vs writer holding the lock. From the perspective of all
205 //      other threads, even those which attempt to take the lock for read while
206 //      thread A's reader holds it, a writer is active. Therefore no other
207 //      readers can take the lock, and we result in the same deadlock described
208 //      in the above comments on the VA space writer serialization lock.
209 //
210 //      This lock prevents any such interleaving:
211 //      - Writers take this lock for the duration of the write lock.
212 //
213 //      - Readers which do not call into RM only take this lock across the
214 //        down_read call. If a writer holds the lock, the reader would be
215 //        blocked on the VA space lock anyway. Concurrent readers will serialize
216 //        the taking of the VA space lock, but they will not be serialized
217 //        across their read sections.
218 //
219 //      - Readers which call into RM do not need to take this lock. Their
220 //        down_read is already serialized with a writer's up_write by the
221 //        serialize_writers_lock.
222 //
223 // - VA space lock (va_space->lock)
224 //      Order: UVM_LOCK_ORDER_VA_SPACE
225 //      Reader/writer lock (rw_semaphore) per uvm_va_space (UVM struct file)
226 //
227 //      This is the UVM equivalent of mmap_lock. It protects all state under
228 //      that va_space, such as the VA range tree.
229 //
230 //      Read mode: Faults (CPU and GPU), mapping creation, prefetches. These
231 //      will be serialized at the VA block level if necessary. RM calls are
232 //      allowed only if the VA space serialize_writers_lock is also taken.
233 //
234 //      Write mode: Modification of the range state such as mmap and changes to
235 //      logical permissions or location preferences. RM calls are never allowed.
236 //
237 // - External Allocation Tree lock
238 //      Order: UVM_LOCK_ORDER_EXT_RANGE_TREE
239 //      Exclusive lock (mutex) per external VA range, per GPU.
240 //
241 //      Protects the per-GPU sub-range tree mappings in each external VA range.
242 //
243 // - GPU semaphore pool lock (semaphore_pool->mutex)
244 //      Order: UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL
245 //      Exclusive lock (mutex) per uvm_gpu_semaphore_pool
246 //
247 //      Protects the state of the semaphore pool.
248 //
249 // - RM API lock
250 //      Order: UVM_LOCK_ORDER_RM_API
251 //      Exclusive lock
252 //
253 //      This is an internal RM lock that's acquired by most if not all UVM-RM
254 //      APIs.
255 //      Notably this lock is also held on PMA eviction.
256 //
257 // - RM GPUs lock
258 //      Order: UVM_LOCK_ORDER_RM_GPUS
259 //      Exclusive lock
260 //
261 //      This is an internal RM lock that's acquired by most if not all UVM-RM
262 //      APIs and disables interrupts for the GPUs.
263 //      Notably this lock is *not* held on PMA eviction.
264 //
265 // - VA block lock (va_block->lock)
266 //      Order: UVM_LOCK_ORDER_VA_BLOCK
267 //      Exclusive lock (mutex)
268 //
269 //      Protects:
270 //      - CPU and GPU page table mappings for all VAs under the block
271 //      - Updates to the GPU work tracker for that block (migrations)
272 //
273 //      Operations allowed while holding the lock:
274 //      - CPU allocation (we don't evict CPU memory)
275 //      - GPU memory allocation which cannot evict
276 //      - CPU page table mapping/unmapping
277 //      - Pushing work (GPU page table mapping/unmapping)
278 //
279 //      Operations not allowed while holding the lock:
280 //      - GPU memory allocation which can evict memory (would require nesting
281 //        block locks)
282 //
283 // - GPU DMA Allocation pool lock (gpu->conf_computing.dma_buffer_pool.lock)
284 //      Order: UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL
285 //      Condition: The Confidential Computing feature is enabled
286 //      Exclusive lock (mutex)
287 //
288 //      Protects:
289 //      - Protect the state of the uvm_conf_computing_dma_buffer_pool_t
290 //
291 // - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
292 //   gpu->sysmem_mappings.bitlock)
293 //      Order: UVM_LOCK_ORDER_CHUNK_MAPPING
294 //      Exclusive bitlock (mutex) per each root chunk, or physical sysmem
295 //      segment.
296 //
297 //      A chunk mapping lock is used to enforce serialization when updating
298 //      kernel mappings of GPU root chunks (vidmem), or CPU chunks (sysmem).
299 //      The VA block lock is usually held during the mapping operation.
300 //
301 //      In the case of vidmem, each lock in the bitlock array serializes the
302 //      mapping and unmapping of a single GPU root chunk. If serialization
303 //      is required to update a root chunk, but no mappings are involved, use
304 //      the PMM root chunk lock (order UVM_LOCK_ORDER_PMM_ROOT_CHUNK) instead.
305 //
306 //      In the case of sysmem, each lock in the array serializes the mapping
307 //      of a large segment of system address space: the locking granularity is
308 //      significantly coarser than the CPU chunk size.
309 //
310 // - Page tree lock
311 //      Order: UVM_LOCK_ORDER_PAGE_TREE
312 //      Exclusive lock per GPU page tree
313 //
314 //      This protects a page tree.  All modifications to the device's page tree
315 //      and the host-side cache of that tree must be done under this lock.
316 //      The host-side cache and device state must be consistent when this lock
317 //      is released
318 //
319 //      Operations allowed while holding this lock
320 //      - Pushing work
321 //
322 //      Operations not allowed while holding this lock
323 //      - GPU memory allocation which can evict
324 //
325 // - Channel pool key rotation lock
326 //      Order: UVM_LOCK_ORDER_KEY_ROTATION
327 //      Condition: Confidential Computing is enabled
328 //      Mutex per channel pool
329 //
330 //      The lock ensures mutual exclusion during key rotation affecting all the
331 //      channels in the associated pool. Key rotation in WLC pools is handled
332 //      using a separate lock order, see UVM_LOCK_ORDER_KEY_ROTATION_WLC below.
333 //
334 // - CE channel CSL channel pool semaphore
335 //      Order: UVM_LOCK_ORDER_CSL_PUSH
336 //      Condition: The Confidential Computing feature is enabled
337 //      Semaphore per CE channel pool
338 //
339 //      The semaphore controls concurrent pushes to CE channels that are not WCL
340 //      channels. Secure work submission depends on channel availability in
341 //      GPFIFO entries (as in any other channel type) but also on channel
342 //      locking. Each channel has a lock to enforce ordering of pushes. The
343 //      channel's CSL lock is taken on channel reservation until uvm_push_end.
344 //      When the Confidential Computing feature is enabled, channels are
345 //      stateful, and the CSL lock protects their CSL state/context.
346 //
347 //      Operations allowed while holding this lock
348 //      - Pushing work to CE channels (except for WLC channels)
349 //
350 // - WLC channel pool key rotation lock
351 //      Order: UVM_LOCK_ORDER_KEY_ROTATION_WLC
352 //      Condition: Confidential Computing is enabled
353 //      Mutex of WLC channel pool
354 //
355 //      The lock has the same purpose as the regular channel pool key rotation
356 //      lock. Using a different order lock for WLC channels allows key rotation
357 //      on those channels during indirect work submission.
358 //
359 // - WLC CSL channel pool semaphore
360 //      Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
361 //      Condition: The Confidential Computing feature is enabled
362 //      Semaphore per WLC channel pool
363 //
364 //      The semaphore controls concurrent pushes to WLC channels. WLC work
365 //      submission depends on channel availability in GPFIFO entries (as in any
366 //      other channel type) but also on channel locking. Each WLC channel has a
367 //      lock to enforce ordering of pushes. The channel's CSL lock is taken on
368 //      channel reservation until uvm_push_end. SEC2 channels are stateful
369 //      channels and the CSL lock protects their CSL state/context.
370 //
371 //      This lock ORDER is different and sits below the generic channel CSL
372 //      lock and above the SEC2 CSL lock. This reflects the dual nature of WLC
373 //      channels; they use SEC2 indirect work launch during initialization,
374 //      and after their schedule is initialized they provide indirect launch
375 //      functionality to other CE channels.
376 //
377 //      Operations allowed while holding this lock
378 //      - Pushing work to WLC channels
379 //
380 // - SEC2 CSL channel pool semaphore
381 //      Order: UVM_LOCK_ORDER_SEC2_CSL_PUSH
382 //      Condition: The Confidential Computing feature is enabled
383 //      Semaphore per SEC2 channel pool
384 //
385 //      The semaphore controls concurrent pushes to SEC2 channels. SEC2 work
386 //      submission depends on channel availability in GPFIFO entries (as in any
387 //      other channel type) but also on channel locking. Each SEC2 channel has a
388 //      lock to enforce ordering of pushes. The channel's CSL lock is taken on
389 //      channel reservation until uvm_push_end. SEC2 channels are stateful
390 //      channels and the CSL lock protects their CSL state/context.
391 //
392 //      This lock ORDER is different and lower than UVM_LOCK_ORDER_CSL_PUSH
393 //      to allow secure work submission to use a SEC2 channel to submit
394 //      work before releasing the CSL lock of the originating channel.
395 //
396 //      Operations allowed while holding this lock
397 //      - Pushing work to SEC2 channels
398 //
399 // - Concurrent push semaphore
400 //      Order: UVM_LOCK_ORDER_PUSH
401 //      Semaphore (uvm_semaphore_t)
402 //
403 //      This is a semaphore limiting the amount of concurrent pushes that is
404 //      held for the duration of a push (between uvm_push_begin*() and
405 //      uvm_push_end()).
406 //
407 // - PMM GPU lock (pmm->lock)
408 //      Order: UVM_LOCK_ORDER_PMM
409 //      Exclusive lock (mutex) per uvm_pmm_gpu_t
410 //
411 //      Protects the state of PMM - internal to PMM.
412 //
413 // - PMM GPU PMA lock (pmm->pma_lock)
414 //      Order: UVM_LOCK_ORDER_PMM_PMA
415 //      Reader/writer lock (rw_semaphore) per per uvm_pmm_gpu_t
416 //
417 //      Lock internal to PMM for synchronizing allocations from PMA with
418 //      PMA eviction.
419 //
420 // - PMM root chunk lock (pmm->root_chunks.bitlocks)
421 //      Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK
422 //      Exclusive bitlock (mutex) per each root chunk internal to PMM.
423 //
424 // - Channel lock
425 //      Order: UVM_LOCK_ORDER_CHANNEL
426 //      Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
427 //
428 //      Lock protecting the state of all the channels in a channel pool. The
429 //      channel pool lock documentation contains the guidelines about which lock
430 //      type (mutex or spinlock) to use.
431 //
432 // - WLC Channel lock
433 //      Order: UVM_LOCK_ORDER_WLC_CHANNEL
434 //      Condition: The Confidential Computing feature is enabled
435 //      Spinlock (uvm_spinlock_t)
436 //
437 //      Lock protecting the state of WLC channels in a channel pool. This lock
438 //      is separate from the generic channel lock (UVM_LOCK_ORDER_CHANNEL)
439 //      to allow for indirect worklaunch pushes while holding the main channel
440 //      lock (WLC pushes don't need any of the pushbuffer locks described
441 //      above)
442 //
443 // - Tools global VA space list lock (g_tools_va_space_list_lock)
444 //      Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
445 //      Reader/writer lock (rw_semaphore)
446 //
447 //      This lock protects the list of VA spaces used when broadcasting
448 //      UVM profiling events.
449 //
450 // - VA space events
451 //      Order: UVM_LOCK_ORDER_VA_SPACE_EVENTS
452 //      Reader/writer lock (rw_semaphore) per uvm_perf_va_space_events_t.
453 //      serializes perf callbacks with event register/unregister. It's separate
454 //      from the VA space lock so it can be taken on the eviction path.
455 //
456 // - VA space tools
457 //      Order: UVM_LOCK_ORDER_VA_SPACE_TOOLS
458 //      Reader/writer lock (rw_semaphore) per uvm_va_space_t. Serializes tools
459 //      reporting with tools register/unregister. Since some of the tools
460 //      events come from perf events, both VA_SPACE_EVENTS and VA_SPACE_TOOLS
461 //      must be taken to register/report some tools events.
462 //
463 // - Tracking semaphores
464 //      Order: UVM_LOCK_ORDER_SECURE_SEMAPHORE
465 //      Condition: The Confidential Computing feature is enabled
466 //
467 //      CE semaphore payloads are encrypted, and require to take the CSL lock
468 //      (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
469 
470 // - CSL Context
471 //      Order: UVM_LOCK_ORDER_CSL_CTX
472 //      When the Confidential Computing feature is enabled, encrypt/decrypt
473 //      operations to communicate with GPU are handled by the CSL context.
474 //      This lock protects RM calls that use this context.
475 //
476 // - Leaf locks
477 //      Order: UVM_LOCK_ORDER_LEAF
478 //
479 //      All leaf locks.
480 //
481 // -------------------------------------------------------------------------- //
482 
483 // Remember to add any new lock orders to uvm_lock_order_to_string() in
484 // uvm_lock.c
485 typedef enum
486 {
487     UVM_LOCK_ORDER_INVALID = 0,
488     UVM_LOCK_ORDER_GLOBAL_PM,
489     UVM_LOCK_ORDER_GLOBAL,
490     UVM_LOCK_ORDER_ISR,
491     UVM_LOCK_ORDER_MMAP_LOCK,
492     UVM_LOCK_ORDER_VA_SPACES_LIST,
493     UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS,
494     UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK,
495     UVM_LOCK_ORDER_VA_SPACE,
496     UVM_LOCK_ORDER_EXT_RANGE_TREE,
497     UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL,
498     UVM_LOCK_ORDER_RM_API,
499     UVM_LOCK_ORDER_RM_GPUS,
500     UVM_LOCK_ORDER_VA_BLOCK_MIGRATE,
501     UVM_LOCK_ORDER_VA_BLOCK,
502     UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL,
503     UVM_LOCK_ORDER_CHUNK_MAPPING,
504     UVM_LOCK_ORDER_PAGE_TREE,
505     UVM_LOCK_ORDER_KEY_ROTATION,
506     UVM_LOCK_ORDER_CSL_PUSH,
507     UVM_LOCK_ORDER_KEY_ROTATION_WLC,
508     UVM_LOCK_ORDER_CSL_WLC_PUSH,
509     UVM_LOCK_ORDER_CSL_SEC2_PUSH,
510     UVM_LOCK_ORDER_PUSH,
511     UVM_LOCK_ORDER_PMM,
512     UVM_LOCK_ORDER_PMM_PMA,
513     UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
514     UVM_LOCK_ORDER_CHANNEL,
515     UVM_LOCK_ORDER_WLC_CHANNEL,
516     UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,
517     UVM_LOCK_ORDER_VA_SPACE_EVENTS,
518     UVM_LOCK_ORDER_VA_SPACE_TOOLS,
519     UVM_LOCK_ORDER_SEMA_POOL_TRACKER,
520     UVM_LOCK_ORDER_SECURE_SEMAPHORE,
521 
522     // TODO: Bug 4184836: [uvm][hcc] Remove UVM_LOCK_ORDER_CSL_CTX
523     // This lock order can be removed after RM no longer relies on RPC event
524     // notifications.
525     UVM_LOCK_ORDER_CSL_CTX,
526     UVM_LOCK_ORDER_LEAF,
527     UVM_LOCK_ORDER_COUNT,
528 } uvm_lock_order_t;
529 
530 const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order);
531 
532 typedef enum
533 {
534     UVM_LOCK_FLAGS_INVALID          = 0,
535     UVM_LOCK_FLAGS_MODE_EXCLUSIVE   = (1 << 0),
536     UVM_LOCK_FLAGS_MODE_SHARED      = (1 << 1),
537     UVM_LOCK_FLAGS_MODE_ANY         = (UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_MODE_SHARED),
538     UVM_LOCK_FLAGS_MODE_MASK        = (UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_MODE_SHARED),
539     UVM_LOCK_FLAGS_OUT_OF_ORDER     = (1 << 2),
540     UVM_LOCK_FLAGS_TRYLOCK          = (1 << 3),
541     UVM_LOCK_FLAGS_MASK             = (1 << 4) - 1
542 } uvm_lock_flags_t;
543 
544 // Record locking a lock of given lock_order in exclusive or shared mode,
545 // distinguishing between trylock and normal acquisition attempts.
546 // Returns true if the recorded lock follows all the locking rules and false
547 // otherwise.
548 bool __uvm_record_lock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
549 
550 // Record unlocking a lock of given lock_order in exclusive or shared mode and
551 // possibly out of order.
552 // Returns true if the unlock follows all the locking rules and false otherwise.
553 bool __uvm_record_unlock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
554 
555 bool __uvm_record_downgrade(void *lock, uvm_lock_order_t lock_order);
556 
557 // Check whether a lock of given lock_order is held in exclusive, shared, or
558 // either mode by the current thread.
559 bool __uvm_check_locked(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
560 
561 // Check that no locks are held with the given lock order
562 bool __uvm_check_unlocked_order(uvm_lock_order_t lock_order);
563 
564 // Check that a lock of the given order can be locked, i.e. that no locks are
565 // held with the given or deeper lock order.  Allow for out-of-order locking
566 // when checking for a trylock.
567 bool __uvm_check_lockable_order(uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
568 
569 // Check that all locks have been released in a thread context lock
570 bool __uvm_check_all_unlocked(uvm_thread_context_lock_t *context_lock);
571 
572 // Check that all locks have been released in the current thread context lock
573 bool __uvm_thread_check_all_unlocked(void);
574 
575 // Check that the locking infrastructure has been initialized
576 bool __uvm_locking_initialized(void);
577 
578 #if UVM_IS_DEBUG()
579   // These macros are intended to be expanded on the call site directly and will
580   // print the precise location of the violation while the __uvm_record*
581   // functions will error print the details.
582   #define uvm_record_lock_raw(lock, lock_order, flags) \
583       UVM_ASSERT_MSG(__uvm_record_lock((lock), (lock_order), (flags)), "Locking violation\n")
584   #define uvm_record_unlock_raw(lock, lock_order, flags) \
585       UVM_ASSERT_MSG(__uvm_record_unlock((lock), (lock_order), (flags)), "Locking violation\n")
586   #define uvm_record_downgrade_raw(lock, lock_order) \
587       UVM_ASSERT_MSG(__uvm_record_downgrade((lock), (lock_order)), "Locking violation\n")
588 
589   // Record UVM lock (a lock that has a lock_order member) operation and assert
590   // that it's correct
591   #define uvm_record_lock(lock, flags) \
592       uvm_record_lock_raw((lock), (lock)->lock_order, (flags))
593   #define uvm_record_unlock(lock, flags) uvm_record_unlock_raw((lock), (lock)->lock_order, (flags))
594   #define uvm_record_unlock_out_of_order(lock, flags) \
595             uvm_record_unlock_raw((lock), (lock)->lock_order, (flags) | UVM_LOCK_FLAGS_OUT_OF_ORDER)
596   #define uvm_record_downgrade(lock) uvm_record_downgrade_raw((lock), (lock)->lock_order)
597 
598   // Check whether a UVM lock (a lock that has a lock_order member) is held in
599   // the given mode.
600   #define uvm_check_locked(lock, flags) __uvm_check_locked((lock), (lock)->lock_order, (flags))
601 
602   // Helpers for recording and asserting mmap_lock
603   // (mmap_sem in kernels < 5.8 ) state
604   #define uvm_record_lock_mmap_lock_read(mm) \
605           uvm_record_lock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_SHARED)
606 
607   #define uvm_record_unlock_mmap_lock_read(mm) \
608           uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_SHARED)
609 
610   #define uvm_record_unlock_mmap_lock_read_out_of_order(mm) \
611           uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, \
612                                 UVM_LOCK_FLAGS_MODE_SHARED | UVM_LOCK_FLAGS_OUT_OF_ORDER)
613 
614   #define uvm_record_lock_mmap_lock_write(mm) \
615           uvm_record_lock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
616 
617   #define uvm_record_unlock_mmap_lock_write(mm) \
618           uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
619 
620   #define uvm_record_unlock_mmap_lock_write_out_of_order(mm) \
621           uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, \
622                                 UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_OUT_OF_ORDER)
623 
624   #define uvm_check_locked_mmap_lock(mm, flags) \
625            __uvm_check_locked(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, (flags))
626 
627   // Helpers for recording RM API lock usage around UVM-RM interfaces
628   #define uvm_record_lock_rm_api() \
629           uvm_record_lock_raw((void*)UVM_LOCK_ORDER_RM_API, UVM_LOCK_ORDER_RM_API, \
630                               UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
631   #define uvm_record_unlock_rm_api() \
632           uvm_record_unlock_raw((void*)UVM_LOCK_ORDER_RM_API, UVM_LOCK_ORDER_RM_API, \
633                                 UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
634 
635   // Helpers for recording RM GPUS lock usage around UVM-RM interfaces
636   #define uvm_record_lock_rm_gpus() \
637           uvm_record_lock_raw((void*)UVM_LOCK_ORDER_RM_GPUS, UVM_LOCK_ORDER_RM_GPUS, \
638                               UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
639   #define uvm_record_unlock_rm_gpus() \
640           uvm_record_unlock_raw((void*)UVM_LOCK_ORDER_RM_GPUS, UVM_LOCK_ORDER_RM_GPUS, \
641                                 UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
642 
643   // Helpers for recording both RM locks usage around UVM-RM interfaces
644   #define uvm_record_lock_rm_all() ({ uvm_record_lock_rm_api(); uvm_record_lock_rm_gpus(); })
645   #define uvm_record_unlock_rm_all() ({ uvm_record_unlock_rm_gpus(); uvm_record_unlock_rm_api(); })
646 
647 #else
648   #define uvm_record_lock                               UVM_IGNORE_EXPR2
649   #define uvm_record_unlock                             UVM_IGNORE_EXPR2
650   #define uvm_record_unlock_out_of_order                UVM_IGNORE_EXPR2
651   #define uvm_record_downgrade                          UVM_IGNORE_EXPR
652 
uvm_check_locked(void * lock,uvm_lock_flags_t flags)653   static bool uvm_check_locked(void *lock, uvm_lock_flags_t flags)
654   {
655       return false;
656   }
657 
658   #define uvm_record_lock_mmap_lock_read                 UVM_IGNORE_EXPR
659   #define uvm_record_unlock_mmap_lock_read               UVM_IGNORE_EXPR
660   #define uvm_record_unlock_mmap_lock_read_out_of_order  UVM_IGNORE_EXPR
661   #define uvm_record_lock_mmap_lock_write                UVM_IGNORE_EXPR
662   #define uvm_record_unlock_mmap_lock_write              UVM_IGNORE_EXPR
663   #define uvm_record_unlock_mmap_lock_write_out_of_order UVM_IGNORE_EXPR
664 
665   #define uvm_check_locked_mmap_lock                     uvm_check_locked
666 
667   #define uvm_record_lock_rm_api()
668   #define uvm_record_unlock_rm_api()
669 
670   #define uvm_record_lock_rm_gpus()
671   #define uvm_record_unlock_rm_gpus()
672 
673   #define uvm_record_lock_rm_all()
674   #define uvm_record_unlock_rm_all()
675 #endif
676 
677 #define uvm_locking_assert_initialized() UVM_ASSERT(__uvm_locking_initialized())
678 #define uvm_thread_assert_all_unlocked() UVM_ASSERT(__uvm_thread_check_all_unlocked())
679 #define uvm_assert_lockable_order(order) UVM_ASSERT(__uvm_check_lockable_order(order, UVM_LOCK_FLAGS_MODE_ANY))
680 #define uvm_assert_unlocked_order(order) UVM_ASSERT(__uvm_check_unlocked_order(order))
681 
682 #if UVM_IS_DEBUG()
683 #define uvm_lock_debug_init(lock, order) ({        \
684         uvm_locking_assert_initialized();          \
685         (lock)->lock_order = (order);              \
686     })
687 #else
688 #define uvm_lock_debug_init(lock, order) ((void) order)
689 #endif
690 
691 // Helpers for locking mmap_lock (mmap_sem in kernels < 5.8)
692 // and recording its usage
693 #define uvm_assert_mmap_lock_locked_mode(mm, flags) ({                                      \
694       typeof(mm) _mm = (mm);                                                                \
695       UVM_ASSERT(nv_mm_rwsem_is_locked(_mm) && uvm_check_locked_mmap_lock((_mm), (flags))); \
696   })
697 
698 #define uvm_assert_mmap_lock_locked(mm) \
699         uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_ANY)
700 #define uvm_assert_mmap_lock_locked_read(mm) \
701         uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_SHARED)
702 #define uvm_assert_mmap_lock_locked_write(mm) \
703         uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
704 
705 #define uvm_down_read_mmap_lock(mm) ({                  \
706         typeof(mm) _mm = (mm);                          \
707         uvm_record_lock_mmap_lock_read(_mm);            \
708         nv_mmap_read_lock(_mm);                         \
709     })
710 
711 #define uvm_up_read_mmap_lock(mm) ({                    \
712         typeof(mm) _mm = (mm);                          \
713         nv_mmap_read_unlock(_mm);                       \
714         uvm_record_unlock_mmap_lock_read(_mm);          \
715     })
716 
717 #define uvm_up_read_mmap_lock_out_of_order(mm) ({           \
718         typeof(mm) _mm = (mm);                              \
719         nv_mmap_read_unlock(_mm);                           \
720         uvm_record_unlock_mmap_lock_read_out_of_order(_mm); \
721     })
722 
723 #define uvm_down_write_mmap_lock(mm) ({                 \
724         typeof(mm) _mm = (mm);                          \
725         uvm_record_lock_mmap_lock_write(_mm);           \
726         nv_mmap_write_lock(_mm);                        \
727     })
728 
729 #define uvm_up_write_mmap_lock(mm) ({                   \
730         typeof(mm) _mm = (mm);                          \
731         nv_mmap_write_unlock(_mm);                      \
732         uvm_record_unlock_mmap_lock_write(_mm);         \
733     })
734 
735 // Helper for calling a UVM-RM interface function with lock recording
736 #define uvm_rm_locked_call(call) ({                     \
737         typeof(call) ret;                               \
738         uvm_record_lock_rm_all();                       \
739         ret = call;                                     \
740         uvm_record_unlock_rm_all();                     \
741         ret;                                            \
742     })
743 
744 // Helper for calling a UVM-RM interface function that returns void with lock recording
745 #define uvm_rm_locked_call_void(call) ({                \
746         uvm_record_lock_rm_all();                       \
747         call;                                           \
748         uvm_record_unlock_rm_all();                     \
749     })
750 
751 typedef struct
752 {
753     struct rw_semaphore sem;
754 #if UVM_IS_DEBUG()
755     uvm_lock_order_t lock_order;
756 #endif
757 } uvm_rw_semaphore_t;
758 
759 //
760 // Note that this is a macro, not an inline or static function so the
761 // "uvm_sem" argument is subsituted as text. If this is invoked with
762 // uvm_assert_rwsem_locked_mode(_sem, flags) then we get code "_sem = _sem"
763 // and _sem is initialized to NULL. Avoid this by using a name unlikely to
764 // be the same as the string passed to "uvm_sem".
765 // See uvm_down_read() and uvm_up_read() below as examples.
766 //
767 #define uvm_assert_rwsem_locked_mode(uvm_sem, flags) ({                               \
768         typeof(uvm_sem) _sem_ = (uvm_sem);                                            \
769         UVM_ASSERT(rwsem_is_locked(&_sem_->sem) && uvm_check_locked(_sem_, (flags))); \
770     })
771 
772 #define uvm_assert_rwsem_locked(uvm_sem) \
773         uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_ANY)
774 #define uvm_assert_rwsem_locked_read(uvm_sem) \
775         uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_SHARED)
776 #define uvm_assert_rwsem_locked_write(uvm_sem) \
777         uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
778 
779 #define uvm_assert_rwsem_unlocked(uvm_sem) UVM_ASSERT(!rwsem_is_locked(&(uvm_sem)->sem))
780 
781 #define uvm_init_rwsem(uvm_sem, order) ({                   \
782         uvm_rw_semaphore_t *uvm_sem_ ## order = (uvm_sem);  \
783         init_rwsem(&uvm_sem_ ## order->sem);                \
784         uvm_lock_debug_init(uvm_sem, order);                \
785         uvm_assert_rwsem_unlocked(uvm_sem);                 \
786     })
787 
788 #define uvm_down_read(uvm_sem) ({                          \
789         typeof(uvm_sem) _sem = (uvm_sem);                  \
790         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
791         down_read(&_sem->sem);                             \
792         uvm_assert_rwsem_locked_read(_sem);                \
793     })
794 
795 #define uvm_up_read(uvm_sem) ({                              \
796         typeof(uvm_sem) _sem = (uvm_sem);                    \
797         uvm_assert_rwsem_locked_read(_sem);                  \
798         up_read(&_sem->sem);                                 \
799         uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
800     })
801 
802 // Unlock w/o any tracking. This should be extremely rare and *_no_tracking
803 // helpers will be added only as needed.
804 //
805 // TODO: Bug 2594854:
806 // TODO: Bug 2583279: Remove macro when bugs are fixed
807 #define uvm_up_read_no_tracking(uvm_sem) ({                  \
808         typeof(uvm_sem) _sem = (uvm_sem);                    \
809         up_read(&_sem->sem);                                 \
810     })
811 
812 #define uvm_down_write(uvm_sem) ({                            \
813         typeof (uvm_sem) _sem = (uvm_sem);                    \
814         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
815         down_write(&_sem->sem);                               \
816         uvm_assert_rwsem_locked_write(_sem);                  \
817     })
818 
819 // trylock for reading: returns 1 if successful, 0 if not.  Out-of-order lock
820 // acquisition via this function is legal, i.e. the lock order checker will
821 // allow it.  However, if an out-of-order lock acquisition attempt fails, it is
822 // the caller's responsibility to back off at least to the point where the
823 // next held lower-order lock is released.
824 #define uvm_down_read_trylock(uvm_sem) ({                                           \
825         typeof(uvm_sem) _sem = (uvm_sem);                                           \
826         int locked;                                                                 \
827         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED | UVM_LOCK_FLAGS_TRYLOCK); \
828         locked = down_read_trylock(&_sem->sem);                                     \
829         if (locked == 0)                                                            \
830             uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED);                    \
831         else                                                                        \
832             uvm_assert_rwsem_locked_read(_sem);                                     \
833         locked;                                                                     \
834     })
835 
836 // Lock w/o any tracking. This should be extremely rare and *_no_tracking
837 // helpers will be added only as needed.
838 //
839 // TODO: Bug 2594854:
840 // TODO: Bug 2583279: Remove macro when bugs are fixed
841 #define uvm_down_read_trylock_no_tracking(uvm_sem) ({                               \
842         typeof(uvm_sem) _sem = (uvm_sem);                                           \
843         down_read_trylock(&_sem->sem);                                              \
844     })
845 
846 // trylock for writing: returns 1 if successful, 0 if not.  Out-of-order lock
847 // acquisition via this function is legal, i.e. the lock order checker will
848 // allow it.  However, if an out-of-order lock acquisition attempt fails, it is
849 // the caller's responsibility to back off at least to the point where the
850 // next held lower-order lock is released.
851 #define uvm_down_write_trylock(uvm_sem) ({                                             \
852         typeof(uvm_sem) _sem = (uvm_sem);                                              \
853         int locked;                                                                    \
854         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK); \
855         locked = down_write_trylock(&_sem->sem);                                       \
856         if (locked == 0)                                                               \
857             uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE);                    \
858         else                                                                           \
859             uvm_assert_rwsem_locked_write(_sem);                                       \
860         locked;                                                                        \
861     })
862 
863 #define uvm_up_write(uvm_sem) ({                                \
864         typeof(uvm_sem) _sem = (uvm_sem);                       \
865         uvm_assert_rwsem_locked_write(_sem);                    \
866         up_write(&_sem->sem);                                   \
867         uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
868     })
869 
870 #define uvm_downgrade_write(uvm_sem) ({                 \
871         typeof(uvm_sem) _sem = (uvm_sem);               \
872         uvm_assert_rwsem_locked_write(_sem);            \
873         downgrade_write(&_sem->sem);                    \
874         uvm_record_downgrade(_sem);                     \
875     })
876 
877 typedef struct
878 {
879     struct mutex m;
880 #if UVM_IS_DEBUG()
881     uvm_lock_order_t lock_order;
882 #endif
883 } uvm_mutex_t;
884 
885 // Note that this is a macro, not an inline or static function so the
886 // "uvm_macro" argument is subsituted as text. If this is invoked with
887 // uvm__mutex_is_locked(_mutex) then we get code "_mutex = _mutex" and _mutex is
888 // initialized to NULL. Avoid this by using a name unlikely to be the same as
889 // the string passed to "uvm_mutex".
890 // See uvm_mutex_lock() and uvm_mutex_unlock() below as examples.
891 //
892 #define uvm_mutex_is_locked(uvm_mutex) ({                                                           \
893         typeof(uvm_mutex) _mutex_ = (uvm_mutex);                                                    \
894         (mutex_is_locked(&_mutex_->m) && uvm_check_locked(_mutex_, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
895     })
896 
897 #define uvm_assert_mutex_locked(uvm_mutex) UVM_ASSERT(uvm_mutex_is_locked(uvm_mutex))
898 #define uvm_assert_mutex_unlocked(uvm_mutex) UVM_ASSERT(!mutex_is_locked(&(uvm_mutex)->m))
899 
900 //
901 // Linux kernel mutexes cannot be used with interrupts disabled. Doing so
902 // can lead to deadlocks.
903 // To warn about mutex usages with interrupts disabled, the following
904 // macros and inline functions wrap around the raw kernel mutex operations
905 // in order to check if the interrupts have been disabled and assert if so.
906 //
907 // TODO: Bug 2690258: evaluate whether !irqs_disabled() && !in_interrupt() is
908 //       enough.
909 //
910 #define uvm_assert_mutex_interrupts() ({                                                                        \
911         UVM_ASSERT_MSG(!irqs_disabled() && !in_interrupt(), "Mutexes cannot be used with interrupts disabled"); \
912     })
913 
914 #define uvm_mutex_init(mutex, order) ({                \
915         uvm_mutex_t *mutex_ ## order = (mutex);        \
916         mutex_init(&mutex_ ## order->m);               \
917         uvm_lock_debug_init(mutex, order);             \
918         uvm_assert_mutex_unlocked(mutex);              \
919     })
920 
921 #define uvm_mutex_lock(mutex) ({                                \
922         typeof(mutex) _mutex = (mutex);                         \
923         uvm_assert_mutex_interrupts();                          \
924         uvm_record_lock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
925         mutex_lock(&_mutex->m);                                 \
926         uvm_assert_mutex_locked(_mutex);                        \
927     })
928 
929 // Lock while already holding a lock of the same order taken with
930 // uvm_mutex_lock() variant. Note this shouldn't be used if the held lock was
931 // taken with uvm_mutex_lock_nested() because we only support a single level of
932 // nesting. This should be extremely rare and *_nested helpers will only be
933 // added as needed.
934 #define uvm_mutex_lock_nested(mutex) ({         \
935         uvm_assert_mutex_interrupts();          \
936         mutex_lock_nested(&(mutex)->m, 1);      \
937     })
938 
939 #define uvm_mutex_trylock(mutex) ({                                                      \
940         typeof(mutex) _mutex = (mutex);                                                  \
941         int locked;                                                                      \
942         uvm_record_lock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK); \
943         locked = mutex_trylock(&_mutex->m);                                              \
944         if (locked == 0)                                                                 \
945             uvm_record_unlock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE);                    \
946         else                                                                             \
947             uvm_assert_mutex_locked(_mutex);                                             \
948         locked;                                                                          \
949     })
950 
951 #define uvm_mutex_unlock(mutex) ({                                \
952         typeof(mutex) _mutex = (mutex);                           \
953         uvm_assert_mutex_interrupts();                            \
954         uvm_assert_mutex_locked(_mutex);                          \
955         mutex_unlock(&_mutex->m);                                 \
956         uvm_record_unlock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
957     })
958 #define uvm_mutex_unlock_out_of_order(mutex) ({                                \
959         typeof(mutex) _mutex = (mutex);                                        \
960         uvm_assert_mutex_interrupts();                                         \
961         uvm_assert_mutex_locked(_mutex);                                       \
962         mutex_unlock(&_mutex->m);                                              \
963         uvm_record_unlock_out_of_order(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
964     })
965 
966 // Unlock w/o any tracking.
967 #define uvm_mutex_unlock_nested(mutex) ({       \
968         uvm_assert_mutex_interrupts();          \
969         mutex_unlock(&(mutex)->m);              \
970     })
971 
972 typedef struct
973 {
974     struct semaphore sem;
975 #if UVM_IS_DEBUG()
976     uvm_lock_order_t lock_order;
977 #endif
978 } uvm_semaphore_t;
979 
980 #define uvm_sema_init(semaphore, val, order) ({         \
981         uvm_semaphore_t *sem_ ## order = (semaphore);   \
982         sema_init(&sem_ ## order->sem, (val));          \
983         uvm_lock_debug_init(semaphore, order);          \
984     })
985 
986 #define uvm_sem_is_locked(uvm_sem) uvm_check_locked(uvm_sem, UVM_LOCK_FLAGS_MODE_SHARED)
987 
988 #define uvm_down(uvm_sem) ({                               \
989         typeof(uvm_sem) _sem = (uvm_sem);                  \
990         uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
991         down(&_sem->sem);                                  \
992     })
993 
994 #define uvm_up(uvm_sem) ({                                   \
995         typeof(uvm_sem) _sem = (uvm_sem);                    \
996         UVM_ASSERT(uvm_sem_is_locked(_sem));                 \
997         up(&_sem->sem);                                      \
998         uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
999     })
1000 #define uvm_up_out_of_order(uvm_sem) ({                                   \
1001         typeof(uvm_sem) _sem = (uvm_sem);                                 \
1002         UVM_ASSERT(uvm_sem_is_locked(_sem));                              \
1003         up(&_sem->sem);                                                   \
1004         uvm_record_unlock_out_of_order(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
1005     })
1006 
1007 
1008 // A regular spinlock
1009 // Locked/unlocked with uvm_spin_lock()/uvm_spin_unlock()
1010 typedef struct
1011 {
1012     spinlock_t lock;
1013 #if UVM_IS_DEBUG()
1014     uvm_lock_order_t lock_order;
1015 #endif
1016 } uvm_spinlock_t;
1017 
1018 // A separate spinlock type for spinlocks that need to disable interrupts. For
1019 // guaranteed correctness and convenience embed the saved and restored irq state
1020 // in the lock itself.
1021 // Locked/unlocked with uvm_spin_lock_irqsave()/uvm_spin_unlock_irqrestore()
1022 typedef struct
1023 {
1024     spinlock_t lock;
1025     unsigned long irq_flags;
1026 #if UVM_IS_DEBUG()
1027     uvm_lock_order_t lock_order;
1028 #endif
1029 } uvm_spinlock_irqsave_t;
1030 
1031 // Asserts that the spinlock is held. Notably the macros below support both
1032 // types of spinlocks.
1033 // Note that this is a macro, not an inline or static function so the
1034 // "spinlock" argument is subsituted as text. If this is invoked with
1035 // uvm_assert_spinlock_locked(_lock) then we get code "_lock = _lock"
1036 // and _lock is initialized to NULL. Avoid this by using a name unlikely to
1037 // be the same as the string passed to "spinlock".
1038 // See uvm_spin_lock() and uvm_spin_unlock() below as examples.
1039 //
1040 #define uvm_assert_spinlock_locked(spinlock) ({                              \
1041         typeof(spinlock) _lock_ = (spinlock);                                \
1042         UVM_ASSERT(spin_is_locked(&_lock_->lock));                           \
1043         UVM_ASSERT(uvm_check_locked(_lock_, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
1044     })
1045 
1046 #define uvm_assert_spinlock_unlocked(spinlock) UVM_ASSERT(!spin_is_locked(&(spinlock)->lock))
1047 
1048 #define uvm_spin_lock_init(spinlock, order) ({                  \
1049             uvm_spinlock_t *spinlock_ ## order = (spinlock);    \
1050             spin_lock_init(&spinlock_ ## order->lock);          \
1051             uvm_lock_debug_init(spinlock, order);               \
1052             uvm_assert_spinlock_unlocked(spinlock);             \
1053     })
1054 
1055 #define uvm_spin_lock(uvm_lock) ({                             \
1056         typeof(uvm_lock) _lock = (uvm_lock);                   \
1057         uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1058         spin_lock(&_lock->lock);                               \
1059         uvm_assert_spinlock_locked(_lock);                     \
1060     })
1061 
1062 #define uvm_spin_unlock(uvm_lock) ({                             \
1063         typeof(uvm_lock) _lock = (uvm_lock);                     \
1064         uvm_assert_spinlock_locked(_lock);                       \
1065         spin_unlock(&_lock->lock);                               \
1066         uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1067     })
1068 
1069 #define uvm_spin_lock_irqsave_init(spinlock, order) ({                  \
1070             uvm_spinlock_irqsave_t *spinlock_ ## order = (spinlock);    \
1071             spin_lock_init(&spinlock_ ## order->lock);                  \
1072             uvm_lock_debug_init(spinlock, order);                       \
1073             uvm_assert_spinlock_unlocked(spinlock);                     \
1074     })
1075 
1076 // Use a temp to not rely on flags being written after acquiring the lock.
1077 #define uvm_spin_lock_irqsave(uvm_lock) ({                     \
1078         typeof(uvm_lock) _lock = (uvm_lock);                   \
1079         unsigned long irq_flags;                               \
1080         uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1081         spin_lock_irqsave(&_lock->lock, irq_flags);            \
1082         _lock->irq_flags = irq_flags;                          \
1083         uvm_assert_spinlock_locked(_lock);                     \
1084     })
1085 
1086 // Use a temp to not rely on flags being read before releasing the lock.
1087 #define uvm_spin_unlock_irqrestore(uvm_lock) ({                  \
1088         typeof(uvm_lock) _lock = (uvm_lock);                     \
1089         unsigned long irq_flags = _lock->irq_flags;              \
1090         uvm_assert_spinlock_locked(_lock);                       \
1091         spin_unlock_irqrestore(&_lock->lock, irq_flags);         \
1092         uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1093     })
1094 
1095 // Wrapper for a reader-writer spinlock that disables and enables interrupts
1096 typedef struct
1097 {
1098     rwlock_t lock;
1099 
1100     // This flags variable is only used by writers, since concurrent readers may
1101     // have different values.
1102     unsigned long irq_flags;
1103 
1104 #if UVM_IS_DEBUG()
1105     uvm_lock_order_t lock_order;
1106 
1107     // The kernel doesn't provide a function to tell if an rwlock_t is locked,
1108     // so we create our own.
1109     atomic_t lock_count;
1110 #endif
1111 } uvm_rwlock_irqsave_t;
1112 
uvm_rwlock_irqsave_is_locked(uvm_rwlock_irqsave_t * rwlock)1113 static bool uvm_rwlock_irqsave_is_locked(uvm_rwlock_irqsave_t *rwlock)
1114 {
1115 #if UVM_IS_DEBUG()
1116     return atomic_read(&rwlock->lock_count) > 0;
1117 #else
1118     return false;
1119 #endif
1120 }
1121 
uvm_rwlock_irqsave_inc(uvm_rwlock_irqsave_t * rwlock)1122 static void uvm_rwlock_irqsave_inc(uvm_rwlock_irqsave_t *rwlock)
1123 {
1124 #if UVM_IS_DEBUG()
1125     atomic_inc(&rwlock->lock_count);
1126 #endif
1127 }
1128 
uvm_rwlock_irqsave_dec(uvm_rwlock_irqsave_t * rwlock)1129 static void uvm_rwlock_irqsave_dec(uvm_rwlock_irqsave_t *rwlock)
1130 {
1131 #if UVM_IS_DEBUG()
1132     atomic_dec(&rwlock->lock_count);
1133 #endif
1134 }
1135 
1136 #define uvm_assert_rwlock_locked(uvm_rwlock) \
1137     UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_ANY))
1138 #define uvm_assert_rwlock_locked_read(uvm_rwlock) \
1139     UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_SHARED))
1140 #define uvm_assert_rwlock_locked_write(uvm_rwlock) \
1141     UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE))
1142 
1143 #if UVM_IS_DEBUG()
1144     #define uvm_assert_rwlock_unlocked(uvm_rwlock) UVM_ASSERT(!uvm_rwlock_irqsave_is_locked(uvm_rwlock))
1145 #else
1146     #define uvm_assert_rwlock_unlocked(uvm_rwlock)
1147 #endif
1148 
1149 #define uvm_rwlock_irqsave_init(rwlock, order) ({               \
1150             uvm_rwlock_irqsave_t *rwlock_ ## order = rwlock;    \
1151             rwlock_init(&rwlock_ ## order->lock);               \
1152             uvm_lock_debug_init(rwlock, order);                 \
1153             uvm_assert_rwlock_unlocked(rwlock);                 \
1154         })
1155 
1156 // We can't store the irq_flags within the lock itself for readers, so they must
1157 // pass in their flags.
1158 #define uvm_read_lock_irqsave(uvm_rwlock, irq_flags) ({     \
1159         typeof(uvm_rwlock) _lock = (uvm_rwlock);            \
1160         uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_SHARED); \
1161         read_lock_irqsave(&_lock->lock, irq_flags);         \
1162         uvm_rwlock_irqsave_inc(uvm_rwlock);                 \
1163         uvm_assert_rwlock_locked_read(_lock);               \
1164     })
1165 
1166 #define uvm_read_unlock_irqrestore(uvm_rwlock, irq_flags) ({    \
1167         typeof(uvm_rwlock) _lock = (uvm_rwlock);                \
1168         uvm_assert_rwlock_locked_read(_lock);                   \
1169         uvm_rwlock_irqsave_dec(uvm_rwlock);                     \
1170         read_unlock_irqrestore(&_lock->lock, irq_flags);        \
1171         uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_SHARED);   \
1172     })
1173 
1174 // Use a temp to not rely on flags being written after acquiring the lock.
1175 #define uvm_write_lock_irqsave(uvm_rwlock) ({                   \
1176         typeof(uvm_rwlock) _lock = (uvm_rwlock);                \
1177         unsigned long irq_flags;                                \
1178         uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE);  \
1179         write_lock_irqsave(&_lock->lock, irq_flags);            \
1180         uvm_rwlock_irqsave_inc(uvm_rwlock);                     \
1181         _lock->irq_flags = irq_flags;                           \
1182         uvm_assert_rwlock_locked_write(_lock);                  \
1183     })
1184 
1185 // Use a temp to not rely on flags being written after acquiring the lock.
1186 #define uvm_write_unlock_irqrestore(uvm_rwlock) ({                  \
1187         typeof(uvm_rwlock) _lock = (uvm_rwlock);                    \
1188         unsigned long irq_flags = _lock->irq_flags;                 \
1189         uvm_assert_rwlock_locked_write(_lock);                      \
1190         uvm_rwlock_irqsave_dec(uvm_rwlock);                         \
1191         write_unlock_irqrestore(&_lock->lock, irq_flags);           \
1192         uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE);    \
1193     })
1194 
1195 // Bit locks are 'compressed' mutexes which take only 1 bit per lock by virtue
1196 // of using shared waitqueues.
1197 typedef struct
1198 {
1199     unsigned long *bits;
1200 
1201 #if UVM_IS_DEBUG()
1202     uvm_lock_order_t lock_order;
1203 #endif
1204 } uvm_bit_locks_t;
1205 
1206 NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_order_t lock_order);
1207 void uvm_bit_locks_deinit(uvm_bit_locks_t *bit_locks);
1208 
1209 // Asserts that the bit lock is held.
1210 //
1211 // TODO: Bug 1766601:
1212 //  - assert for the right ownership (defining the owner might be tricky in
1213 //    the kernel).
1214 #define uvm_assert_bit_locked(bit_locks, bit) ({                             \
1215     typeof(bit_locks) _bit_locks = (bit_locks);                              \
1216     typeof(bit) _bit = (bit);                                                \
1217     UVM_ASSERT(test_bit(_bit, _bit_locks->bits));                            \
1218     UVM_ASSERT(uvm_check_locked(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
1219 })
1220 
1221 #define uvm_assert_bit_unlocked(bit_locks, bit) ({                      \
1222     typeof(bit_locks) _bit_locks = (bit_locks);                         \
1223     typeof(bit) _bit = (bit);                                           \
1224     UVM_ASSERT(!test_bit(_bit, _bit_locks->bits));                      \
1225 })
1226 
__uvm_bit_lock(uvm_bit_locks_t * bit_locks,unsigned long bit)1227 static void __uvm_bit_lock(uvm_bit_locks_t *bit_locks, unsigned long bit)
1228 {
1229     int res;
1230 
1231     res = UVM_WAIT_ON_BIT_LOCK(bit_locks->bits, bit, TASK_UNINTERRUPTIBLE);
1232     UVM_ASSERT_MSG(res == 0, "Uninterruptible task interrupted: %d\n", res);
1233     uvm_assert_bit_locked(bit_locks, bit);
1234 }
1235 #define uvm_bit_lock(bit_locks, bit) ({                         \
1236     typeof(bit_locks) _bit_locks = (bit_locks);                 \
1237     typeof(bit) _bit = (bit);                                   \
1238     uvm_record_lock(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1239     __uvm_bit_lock(_bit_locks, _bit);                           \
1240 })
1241 
__uvm_bit_unlock(uvm_bit_locks_t * bit_locks,unsigned long bit)1242 static void __uvm_bit_unlock(uvm_bit_locks_t *bit_locks, unsigned long bit)
1243 {
1244     uvm_assert_bit_locked(bit_locks, bit);
1245 
1246     clear_bit_unlock(bit, bit_locks->bits);
1247     // Make sure we don't reorder release with wakeup as it would cause
1248     // deadlocks (other thread checking lock and adding itself to queue
1249     // in reversed order). clear_bit_unlock has only release semantics.
1250     smp_mb__after_atomic();
1251     wake_up_bit(bit_locks->bits, bit);
1252 }
1253 #define uvm_bit_unlock(bit_locks, bit) ({                         \
1254     typeof(bit_locks) _bit_locks = (bit_locks);                   \
1255     typeof(bit) _bit = (bit);                                     \
1256     __uvm_bit_unlock(_bit_locks, _bit);                           \
1257     uvm_record_unlock(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1258 })
1259 
1260 #endif // __UVM_LOCK_H__
1261