1 /*******************************************************************************
2 Copyright (c) 2015-2022 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #ifndef __UVM_LOCK_H__
25 #define __UVM_LOCK_H__
26
27 #include "uvm_forward_decl.h"
28 #include "uvm_linux.h"
29 #include "uvm_common.h"
30
31 // --------------------------- UVM Locking Order ---------------------------- //
32 //
33 // Any locks described here should have their locking order added to
34 // uvm_lock_order_t below.
35 //
36 // - Global power management lock (g_uvm_global.pm.lock)
37 // Order: UVM_LOCK_ORDER_GLOBAL_PM
38 // Reader/write lock (rw_semaphore)
39 //
40 // Synchronizes user threads with system power management.
41 //
42 // Taken in read mode by most user-facing UVM driver entry points. Taken
43 // in write mode by uvm_suspend(), only, and held for the duration of
44 // sleep cycles.
45 //
46 // This lock is special: while it's taken by user-facing entry points,
47 // and may be taken before or after mmap_lock, this apparent violation of
48 // lock ordering is permissible because pm_lock may only be taken via
49 // trylock in read mode by paths which already hold any lower-level
50 // locks, as well as by paths subject to the kernel's freezer. Paths
51 // taking it must be prepared to back off in case of acquisition failures.
52 //
53 // This, in turn, is acceptable because the lock is taken in write mode
54 // infrequently, and only as part of to power management. Starvation is
55 // not a concern.
56 //
57 // The mmap_lock deadlock potential aside, the trylock approch is also
58 // motivated by the need to prevent user threads making UVM system calls
59 // from blocking when UVM is suspended: when the kernel suspends the
60 // system, the freezer employed to stop user tasks requires these tasks
61 // to be interruptible.
62 //
63 // - Global driver state lock (g_uvm_global.global_lock)
64 // Order: UVM_LOCK_ORDER_GLOBAL
65 // Exclusive lock (mutex)
66 //
67 // This protects state associated with GPUs, such as the P2P table
68 // and instance pointer mappings.
69 //
70 // This should be taken whenever global GPU state might need to be modified.
71 //
72 // - GPU ISR lock
73 // Order: UVM_LOCK_ORDER_ISR
74 // Exclusive lock (mutex) per gpu
75 //
76 // Protects:
77 // - gpu->parent->isr.replayable_faults.service_lock:
78 // Changes to the state of a GPU as it transitions from top-half to
79 // bottom-half interrupt handler for replayable faults. This lock is
80 // acquired for that GPU, in the ISR top-half. Then a bottom-half is
81 // scheduled (to run in a workqueue). Then the bottom-half releases the
82 // lock when that GPU's processing appears to be done.
83 //
84 // - gpu->parent->isr.non_replayable_faults.service_lock:
85 // Changes to the state of a GPU in the bottom-half for non-replayable
86 // faults. Non-replayable faults are handed-off from RM instead of
87 // directly from the GPU hardware. This means that we do not keep
88 // receiving interrupts after RM pops out the faults from the HW buffer.
89 // In order not to miss fault notifications, we will always schedule a
90 // bottom-half for non-replayable faults if there are faults ready to be
91 // consumed in the buffer, even if there already is some bottom-half
92 // running or scheduled. This lock serializes all scheduled bottom halves
93 // per GPU which service non-replayable faults.
94 //
95 // - gpu->parent->isr.access_counters.service_lock:
96 // Changes to the state of a GPU as it transitions from top-half to
97 // bottom-half interrupt handler for access counter notifications. This
98 // lock is acquired for that GPU, in the ISR top-half. Then a bottom-half
99 // is scheduled (to run in a workqueue). Then the bottom-half releases
100 // the lock when that GPU's processing appears to be done.
101 //
102 // - mmap_lock (mmap_sem in kernels < 5.8)
103 // Order: UVM_LOCK_ORDER_MMAP_LOCK
104 // Reader/writer lock (rw_semaphore)
105 //
106 // We're often called with the kernel already holding mmap_lock: mmap,
107 // munmap, CPU fault, etc. These operations may have to take any number of
108 // UVM locks, so mmap_lock requires special consideration in the lock
109 // order, since it's sometimes out of our control.
110 //
111 // We need to hold mmap_lock when calling vm_insert_page, which means that
112 // any time an operation (such as an ioctl) might need to install a CPU
113 // mapping, it must take mmap_lock in read mode very early on.
114 //
115 // However, current->mm is not necessarily the owning mm of the UVM vma.
116 // fork or fd passing via a UNIX doman socket can cause that. Notably, this
117 // is also the case when handling GPU faults or doing other operations from
118 // a kernel thread. In some cases we have an mm associated with a VA space,
119 // and in those cases we lock that mm instead of current->mm. But since we
120 // don't always have that luxury, each path specifies the mm to use (either
121 // explicitly or via uvm_va_block_context_t::mm). That mm may be NULL.
122 // Later on down the stack we look up the UVM vma and compare its mm before
123 // operating on that vma.
124 //
125 // With HMM and ATS, the GPU fault handler takes mmap_lock. GPU faults may
126 // block forward progress of threads holding the RM GPUs lock until those
127 // faults are serviced, which means that mmap_lock cannot be held when the
128 // UVM driver calls into RM. In other words, mmap_lock and the RM GPUs lock
129 // are mutually exclusive.
130 //
131 // - Global VA spaces list lock
132 // Order: UVM_LOCK_ORDER_VA_SPACES_LIST
133 // Mutex which protects g_uvm_global.va_spaces state.
134 //
135 // - VA space writer serialization lock (va_space->serialize_writers_lock)
136 // Order: UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS
137 // Exclusive lock (mutex) per uvm_va_space (UVM struct file)
138 //
139 // This lock prevents a deadlock between RM and UVM by only allowing one
140 // writer to queue up on the VA space lock at a time.
141 //
142 // GPU faults are serviced by the UVM bottom half with the VA space lock
143 // held in read mode. Until they're serviced, these faults may block
144 // forward progress of RM threads.
145 //
146 // This constraint means that the UVM driver cannot call into RM while
147 // GPU fault servicing is blocked. We may block GPU fault servicing by:
148 // - Taking the VA space lock in write mode
149 // - Holding the VA space lock in read mode with a writer pending, since
150 // Linux rw_semaphores are fair.
151 //
152 // Example of the second condition:
153 // Thread A Thread B UVM BH Thread C
154 // UVM API call UVM API call GPU fault RM API call
155 // ------------ ------------ ------------ ------------
156 // down_read
157 // down_write
158 // // Blocked on A
159 // down_read
160 // // Blocked on B
161 // RM GPU lock
162 // // Blocked on GPU fault
163 // RM GPU lock
164 // // Deadlock
165 //
166 // The writer serialization lock works around this by biasing the VA space
167 // lock towards readers, without causing starvation of writers. Writers and
168 // readers which will make RM calls take this lock, which prevents them
169 // from queueing up on the VA space rw_semaphore and blocking the UVM
170 // bottom half.
171 //
172 // TODO: Bug 1799173: A better long-term approach might be to never allow
173 // RM calls under the VA space lock at all, but that will take a
174 // larger restructuring.
175 //
176 // - VA space serialization of down_read with up_write of the VA space lock
177 // (va_space->read_acquire_write_release_lock)
178 // Order: UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK
179 // Exclusive lock (mutex) per uvm_va_space (UVM struct file)
180 //
181 // This lock prevents a deadlock between RM and UVM by preventing any
182 // interleaving of down_reads on the VA space lock with concurrent
183 // up_writes/downgrade_writes. The Linux rw_semaphore implementation does
184 // not guarantee that two readers will always run concurrently, as shown by
185 // the following interleaving:
186 //
187 // Thread A Thread B
188 // UVM API call UVM BH
189 // ------------ ------------
190 // down_write
191 // down_read
192 // // Fails, calls handler
193 // up_write
194 // down_read
195 // // Success
196 // // Handler sees the lock still active
197 // // Handler waits for lock to be released
198 // // Blocked on A
199 // RM GPU lock
200 // // Blocked on GPU fault
201 //
202 // Given the above interleaving, the kernel's implementation of the
203 // down_read failure handler running in thread B does not distinguish
204 // between a reader vs writer holding the lock. From the perspective of all
205 // other threads, even those which attempt to take the lock for read while
206 // thread A's reader holds it, a writer is active. Therefore no other
207 // readers can take the lock, and we result in the same deadlock described
208 // in the above comments on the VA space writer serialization lock.
209 //
210 // This lock prevents any such interleaving:
211 // - Writers take this lock for the duration of the write lock.
212 //
213 // - Readers which do not call into RM only take this lock across the
214 // down_read call. If a writer holds the lock, the reader would be
215 // blocked on the VA space lock anyway. Concurrent readers will serialize
216 // the taking of the VA space lock, but they will not be serialized
217 // across their read sections.
218 //
219 // - Readers which call into RM do not need to take this lock. Their
220 // down_read is already serialized with a writer's up_write by the
221 // serialize_writers_lock.
222 //
223 // - VA space lock (va_space->lock)
224 // Order: UVM_LOCK_ORDER_VA_SPACE
225 // Reader/writer lock (rw_semaphore) per uvm_va_space (UVM struct file)
226 //
227 // This is the UVM equivalent of mmap_lock. It protects all state under
228 // that va_space, such as the VA range tree.
229 //
230 // Read mode: Faults (CPU and GPU), mapping creation, prefetches. These
231 // will be serialized at the VA block level if necessary. RM calls are
232 // allowed only if the VA space serialize_writers_lock is also taken.
233 //
234 // Write mode: Modification of the range state such as mmap and changes to
235 // logical permissions or location preferences. RM calls are never allowed.
236 //
237 // - External Allocation Tree lock
238 // Order: UVM_LOCK_ORDER_EXT_RANGE_TREE
239 // Exclusive lock (mutex) per external VA range, per GPU.
240 //
241 // Protects the per-GPU sub-range tree mappings in each external VA range.
242 //
243 // - GPU semaphore pool lock (semaphore_pool->mutex)
244 // Order: UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL
245 // Exclusive lock (mutex) per uvm_gpu_semaphore_pool
246 //
247 // Protects the state of the semaphore pool.
248 //
249 // - RM API lock
250 // Order: UVM_LOCK_ORDER_RM_API
251 // Exclusive lock
252 //
253 // This is an internal RM lock that's acquired by most if not all UVM-RM
254 // APIs.
255 // Notably this lock is also held on PMA eviction.
256 //
257 // - RM GPUs lock
258 // Order: UVM_LOCK_ORDER_RM_GPUS
259 // Exclusive lock
260 //
261 // This is an internal RM lock that's acquired by most if not all UVM-RM
262 // APIs and disables interrupts for the GPUs.
263 // Notably this lock is *not* held on PMA eviction.
264 //
265 // - VA block lock (va_block->lock)
266 // Order: UVM_LOCK_ORDER_VA_BLOCK
267 // Exclusive lock (mutex)
268 //
269 // Protects:
270 // - CPU and GPU page table mappings for all VAs under the block
271 // - Updates to the GPU work tracker for that block (migrations)
272 //
273 // Operations allowed while holding the lock:
274 // - CPU allocation (we don't evict CPU memory)
275 // - GPU memory allocation which cannot evict
276 // - CPU page table mapping/unmapping
277 // - Pushing work (GPU page table mapping/unmapping)
278 //
279 // Operations not allowed while holding the lock:
280 // - GPU memory allocation which can evict memory (would require nesting
281 // block locks)
282 //
283 // - GPU DMA Allocation pool lock (gpu->conf_computing.dma_buffer_pool.lock)
284 // Order: UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL
285 // Condition: The Confidential Computing feature is enabled
286 // Exclusive lock (mutex)
287 //
288 // Protects:
289 // - Protect the state of the uvm_conf_computing_dma_buffer_pool_t
290 //
291 // - Chunk mapping lock (gpu->root_chunk_mappings.bitlocks and
292 // gpu->sysmem_mappings.bitlock)
293 // Order: UVM_LOCK_ORDER_CHUNK_MAPPING
294 // Exclusive bitlock (mutex) per each root chunk, or physical sysmem
295 // segment.
296 //
297 // A chunk mapping lock is used to enforce serialization when updating
298 // kernel mappings of GPU root chunks (vidmem), or CPU chunks (sysmem).
299 // The VA block lock is usually held during the mapping operation.
300 //
301 // In the case of vidmem, each lock in the bitlock array serializes the
302 // mapping and unmapping of a single GPU root chunk. If serialization
303 // is required to update a root chunk, but no mappings are involved, use
304 // the PMM root chunk lock (order UVM_LOCK_ORDER_PMM_ROOT_CHUNK) instead.
305 //
306 // In the case of sysmem, each lock in the array serializes the mapping
307 // of a large segment of system address space: the locking granularity is
308 // significantly coarser than the CPU chunk size.
309 //
310 // - Page tree lock
311 // Order: UVM_LOCK_ORDER_PAGE_TREE
312 // Exclusive lock per GPU page tree
313 //
314 // This protects a page tree. All modifications to the device's page tree
315 // and the host-side cache of that tree must be done under this lock.
316 // The host-side cache and device state must be consistent when this lock
317 // is released
318 //
319 // Operations allowed while holding this lock
320 // - Pushing work
321 //
322 // Operations not allowed while holding this lock
323 // - GPU memory allocation which can evict
324 //
325 // - Channel pool key rotation lock
326 // Order: UVM_LOCK_ORDER_KEY_ROTATION
327 // Condition: Confidential Computing is enabled
328 // Mutex per channel pool
329 //
330 // The lock ensures mutual exclusion during key rotation affecting all the
331 // channels in the associated pool. Key rotation in WLC pools is handled
332 // using a separate lock order, see UVM_LOCK_ORDER_KEY_ROTATION_WLC below.
333 //
334 // - CE channel CSL channel pool semaphore
335 // Order: UVM_LOCK_ORDER_CSL_PUSH
336 // Condition: The Confidential Computing feature is enabled
337 // Semaphore per CE channel pool
338 //
339 // The semaphore controls concurrent pushes to CE channels that are not WCL
340 // channels. Secure work submission depends on channel availability in
341 // GPFIFO entries (as in any other channel type) but also on channel
342 // locking. Each channel has a lock to enforce ordering of pushes. The
343 // channel's CSL lock is taken on channel reservation until uvm_push_end.
344 // When the Confidential Computing feature is enabled, channels are
345 // stateful, and the CSL lock protects their CSL state/context.
346 //
347 // Operations allowed while holding this lock
348 // - Pushing work to CE channels (except for WLC channels)
349 //
350 // - WLC channel pool key rotation lock
351 // Order: UVM_LOCK_ORDER_KEY_ROTATION_WLC
352 // Condition: Confidential Computing is enabled
353 // Mutex of WLC channel pool
354 //
355 // The lock has the same purpose as the regular channel pool key rotation
356 // lock. Using a different order lock for WLC channels allows key rotation
357 // on those channels during indirect work submission.
358 //
359 // - WLC CSL channel pool semaphore
360 // Order: UVM_LOCK_ORDER_CSL_WLC_PUSH
361 // Condition: The Confidential Computing feature is enabled
362 // Semaphore per WLC channel pool
363 //
364 // The semaphore controls concurrent pushes to WLC channels. WLC work
365 // submission depends on channel availability in GPFIFO entries (as in any
366 // other channel type) but also on channel locking. Each WLC channel has a
367 // lock to enforce ordering of pushes. The channel's CSL lock is taken on
368 // channel reservation until uvm_push_end. SEC2 channels are stateful
369 // channels and the CSL lock protects their CSL state/context.
370 //
371 // This lock ORDER is different and sits below the generic channel CSL
372 // lock and above the SEC2 CSL lock. This reflects the dual nature of WLC
373 // channels; they use SEC2 indirect work launch during initialization,
374 // and after their schedule is initialized they provide indirect launch
375 // functionality to other CE channels.
376 //
377 // Operations allowed while holding this lock
378 // - Pushing work to WLC channels
379 //
380 // - SEC2 CSL channel pool semaphore
381 // Order: UVM_LOCK_ORDER_SEC2_CSL_PUSH
382 // Condition: The Confidential Computing feature is enabled
383 // Semaphore per SEC2 channel pool
384 //
385 // The semaphore controls concurrent pushes to SEC2 channels. SEC2 work
386 // submission depends on channel availability in GPFIFO entries (as in any
387 // other channel type) but also on channel locking. Each SEC2 channel has a
388 // lock to enforce ordering of pushes. The channel's CSL lock is taken on
389 // channel reservation until uvm_push_end. SEC2 channels are stateful
390 // channels and the CSL lock protects their CSL state/context.
391 //
392 // This lock ORDER is different and lower than UVM_LOCK_ORDER_CSL_PUSH
393 // to allow secure work submission to use a SEC2 channel to submit
394 // work before releasing the CSL lock of the originating channel.
395 //
396 // Operations allowed while holding this lock
397 // - Pushing work to SEC2 channels
398 //
399 // - Concurrent push semaphore
400 // Order: UVM_LOCK_ORDER_PUSH
401 // Semaphore (uvm_semaphore_t)
402 //
403 // This is a semaphore limiting the amount of concurrent pushes that is
404 // held for the duration of a push (between uvm_push_begin*() and
405 // uvm_push_end()).
406 //
407 // - PMM GPU lock (pmm->lock)
408 // Order: UVM_LOCK_ORDER_PMM
409 // Exclusive lock (mutex) per uvm_pmm_gpu_t
410 //
411 // Protects the state of PMM - internal to PMM.
412 //
413 // - PMM GPU PMA lock (pmm->pma_lock)
414 // Order: UVM_LOCK_ORDER_PMM_PMA
415 // Reader/writer lock (rw_semaphore) per per uvm_pmm_gpu_t
416 //
417 // Lock internal to PMM for synchronizing allocations from PMA with
418 // PMA eviction.
419 //
420 // - PMM root chunk lock (pmm->root_chunks.bitlocks)
421 // Order: UVM_LOCK_ORDER_PMM_ROOT_CHUNK
422 // Exclusive bitlock (mutex) per each root chunk internal to PMM.
423 //
424 // - Channel lock
425 // Order: UVM_LOCK_ORDER_CHANNEL
426 // Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
427 //
428 // Lock protecting the state of all the channels in a channel pool. The
429 // channel pool lock documentation contains the guidelines about which lock
430 // type (mutex or spinlock) to use.
431 //
432 // - WLC Channel lock
433 // Order: UVM_LOCK_ORDER_WLC_CHANNEL
434 // Condition: The Confidential Computing feature is enabled
435 // Spinlock (uvm_spinlock_t)
436 //
437 // Lock protecting the state of WLC channels in a channel pool. This lock
438 // is separate from the generic channel lock (UVM_LOCK_ORDER_CHANNEL)
439 // to allow for indirect worklaunch pushes while holding the main channel
440 // lock (WLC pushes don't need any of the pushbuffer locks described
441 // above)
442 //
443 // - Tools global VA space list lock (g_tools_va_space_list_lock)
444 // Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
445 // Reader/writer lock (rw_semaphore)
446 //
447 // This lock protects the list of VA spaces used when broadcasting
448 // UVM profiling events.
449 //
450 // - VA space events
451 // Order: UVM_LOCK_ORDER_VA_SPACE_EVENTS
452 // Reader/writer lock (rw_semaphore) per uvm_perf_va_space_events_t.
453 // serializes perf callbacks with event register/unregister. It's separate
454 // from the VA space lock so it can be taken on the eviction path.
455 //
456 // - VA space tools
457 // Order: UVM_LOCK_ORDER_VA_SPACE_TOOLS
458 // Reader/writer lock (rw_semaphore) per uvm_va_space_t. Serializes tools
459 // reporting with tools register/unregister. Since some of the tools
460 // events come from perf events, both VA_SPACE_EVENTS and VA_SPACE_TOOLS
461 // must be taken to register/report some tools events.
462 //
463 // - Tracking semaphores
464 // Order: UVM_LOCK_ORDER_SECURE_SEMAPHORE
465 // Condition: The Confidential Computing feature is enabled
466 //
467 // CE semaphore payloads are encrypted, and require to take the CSL lock
468 // (UVM_LOCK_ORDER_LEAF) to decrypt the payload.
469
470 // - CSL Context
471 // Order: UVM_LOCK_ORDER_CSL_CTX
472 // When the Confidential Computing feature is enabled, encrypt/decrypt
473 // operations to communicate with GPU are handled by the CSL context.
474 // This lock protects RM calls that use this context.
475 //
476 // - Leaf locks
477 // Order: UVM_LOCK_ORDER_LEAF
478 //
479 // All leaf locks.
480 //
481 // -------------------------------------------------------------------------- //
482
483 // Remember to add any new lock orders to uvm_lock_order_to_string() in
484 // uvm_lock.c
485 typedef enum
486 {
487 UVM_LOCK_ORDER_INVALID = 0,
488 UVM_LOCK_ORDER_GLOBAL_PM,
489 UVM_LOCK_ORDER_GLOBAL,
490 UVM_LOCK_ORDER_ISR,
491 UVM_LOCK_ORDER_MMAP_LOCK,
492 UVM_LOCK_ORDER_VA_SPACES_LIST,
493 UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS,
494 UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK,
495 UVM_LOCK_ORDER_VA_SPACE,
496 UVM_LOCK_ORDER_EXT_RANGE_TREE,
497 UVM_LOCK_ORDER_GPU_SEMAPHORE_POOL,
498 UVM_LOCK_ORDER_RM_API,
499 UVM_LOCK_ORDER_RM_GPUS,
500 UVM_LOCK_ORDER_VA_BLOCK_MIGRATE,
501 UVM_LOCK_ORDER_VA_BLOCK,
502 UVM_LOCK_ORDER_CONF_COMPUTING_DMA_BUFFER_POOL,
503 UVM_LOCK_ORDER_CHUNK_MAPPING,
504 UVM_LOCK_ORDER_PAGE_TREE,
505 UVM_LOCK_ORDER_KEY_ROTATION,
506 UVM_LOCK_ORDER_CSL_PUSH,
507 UVM_LOCK_ORDER_KEY_ROTATION_WLC,
508 UVM_LOCK_ORDER_CSL_WLC_PUSH,
509 UVM_LOCK_ORDER_CSL_SEC2_PUSH,
510 UVM_LOCK_ORDER_PUSH,
511 UVM_LOCK_ORDER_PMM,
512 UVM_LOCK_ORDER_PMM_PMA,
513 UVM_LOCK_ORDER_PMM_ROOT_CHUNK,
514 UVM_LOCK_ORDER_CHANNEL,
515 UVM_LOCK_ORDER_WLC_CHANNEL,
516 UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST,
517 UVM_LOCK_ORDER_VA_SPACE_EVENTS,
518 UVM_LOCK_ORDER_VA_SPACE_TOOLS,
519 UVM_LOCK_ORDER_SEMA_POOL_TRACKER,
520 UVM_LOCK_ORDER_SECURE_SEMAPHORE,
521
522 // TODO: Bug 4184836: [uvm][hcc] Remove UVM_LOCK_ORDER_CSL_CTX
523 // This lock order can be removed after RM no longer relies on RPC event
524 // notifications.
525 UVM_LOCK_ORDER_CSL_CTX,
526 UVM_LOCK_ORDER_LEAF,
527 UVM_LOCK_ORDER_COUNT,
528 } uvm_lock_order_t;
529
530 const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order);
531
532 typedef enum
533 {
534 UVM_LOCK_FLAGS_INVALID = 0,
535 UVM_LOCK_FLAGS_MODE_EXCLUSIVE = (1 << 0),
536 UVM_LOCK_FLAGS_MODE_SHARED = (1 << 1),
537 UVM_LOCK_FLAGS_MODE_ANY = (UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_MODE_SHARED),
538 UVM_LOCK_FLAGS_MODE_MASK = (UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_MODE_SHARED),
539 UVM_LOCK_FLAGS_OUT_OF_ORDER = (1 << 2),
540 UVM_LOCK_FLAGS_TRYLOCK = (1 << 3),
541 UVM_LOCK_FLAGS_MASK = (1 << 4) - 1
542 } uvm_lock_flags_t;
543
544 // Record locking a lock of given lock_order in exclusive or shared mode,
545 // distinguishing between trylock and normal acquisition attempts.
546 // Returns true if the recorded lock follows all the locking rules and false
547 // otherwise.
548 bool __uvm_record_lock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
549
550 // Record unlocking a lock of given lock_order in exclusive or shared mode and
551 // possibly out of order.
552 // Returns true if the unlock follows all the locking rules and false otherwise.
553 bool __uvm_record_unlock(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
554
555 bool __uvm_record_downgrade(void *lock, uvm_lock_order_t lock_order);
556
557 // Check whether a lock of given lock_order is held in exclusive, shared, or
558 // either mode by the current thread.
559 bool __uvm_check_locked(void *lock, uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
560
561 // Check that no locks are held with the given lock order
562 bool __uvm_check_unlocked_order(uvm_lock_order_t lock_order);
563
564 // Check that a lock of the given order can be locked, i.e. that no locks are
565 // held with the given or deeper lock order. Allow for out-of-order locking
566 // when checking for a trylock.
567 bool __uvm_check_lockable_order(uvm_lock_order_t lock_order, uvm_lock_flags_t flags);
568
569 // Check that all locks have been released in a thread context lock
570 bool __uvm_check_all_unlocked(uvm_thread_context_lock_t *context_lock);
571
572 // Check that all locks have been released in the current thread context lock
573 bool __uvm_thread_check_all_unlocked(void);
574
575 // Check that the locking infrastructure has been initialized
576 bool __uvm_locking_initialized(void);
577
578 #if UVM_IS_DEBUG()
579 // These macros are intended to be expanded on the call site directly and will
580 // print the precise location of the violation while the __uvm_record*
581 // functions will error print the details.
582 #define uvm_record_lock_raw(lock, lock_order, flags) \
583 UVM_ASSERT_MSG(__uvm_record_lock((lock), (lock_order), (flags)), "Locking violation\n")
584 #define uvm_record_unlock_raw(lock, lock_order, flags) \
585 UVM_ASSERT_MSG(__uvm_record_unlock((lock), (lock_order), (flags)), "Locking violation\n")
586 #define uvm_record_downgrade_raw(lock, lock_order) \
587 UVM_ASSERT_MSG(__uvm_record_downgrade((lock), (lock_order)), "Locking violation\n")
588
589 // Record UVM lock (a lock that has a lock_order member) operation and assert
590 // that it's correct
591 #define uvm_record_lock(lock, flags) \
592 uvm_record_lock_raw((lock), (lock)->lock_order, (flags))
593 #define uvm_record_unlock(lock, flags) uvm_record_unlock_raw((lock), (lock)->lock_order, (flags))
594 #define uvm_record_unlock_out_of_order(lock, flags) \
595 uvm_record_unlock_raw((lock), (lock)->lock_order, (flags) | UVM_LOCK_FLAGS_OUT_OF_ORDER)
596 #define uvm_record_downgrade(lock) uvm_record_downgrade_raw((lock), (lock)->lock_order)
597
598 // Check whether a UVM lock (a lock that has a lock_order member) is held in
599 // the given mode.
600 #define uvm_check_locked(lock, flags) __uvm_check_locked((lock), (lock)->lock_order, (flags))
601
602 // Helpers for recording and asserting mmap_lock
603 // (mmap_sem in kernels < 5.8 ) state
604 #define uvm_record_lock_mmap_lock_read(mm) \
605 uvm_record_lock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_SHARED)
606
607 #define uvm_record_unlock_mmap_lock_read(mm) \
608 uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_SHARED)
609
610 #define uvm_record_unlock_mmap_lock_read_out_of_order(mm) \
611 uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, \
612 UVM_LOCK_FLAGS_MODE_SHARED | UVM_LOCK_FLAGS_OUT_OF_ORDER)
613
614 #define uvm_record_lock_mmap_lock_write(mm) \
615 uvm_record_lock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
616
617 #define uvm_record_unlock_mmap_lock_write(mm) \
618 uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
619
620 #define uvm_record_unlock_mmap_lock_write_out_of_order(mm) \
621 uvm_record_unlock_raw(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, \
622 UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_OUT_OF_ORDER)
623
624 #define uvm_check_locked_mmap_lock(mm, flags) \
625 __uvm_check_locked(nv_mmap_get_lock(mm), UVM_LOCK_ORDER_MMAP_LOCK, (flags))
626
627 // Helpers for recording RM API lock usage around UVM-RM interfaces
628 #define uvm_record_lock_rm_api() \
629 uvm_record_lock_raw((void*)UVM_LOCK_ORDER_RM_API, UVM_LOCK_ORDER_RM_API, \
630 UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
631 #define uvm_record_unlock_rm_api() \
632 uvm_record_unlock_raw((void*)UVM_LOCK_ORDER_RM_API, UVM_LOCK_ORDER_RM_API, \
633 UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
634
635 // Helpers for recording RM GPUS lock usage around UVM-RM interfaces
636 #define uvm_record_lock_rm_gpus() \
637 uvm_record_lock_raw((void*)UVM_LOCK_ORDER_RM_GPUS, UVM_LOCK_ORDER_RM_GPUS, \
638 UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
639 #define uvm_record_unlock_rm_gpus() \
640 uvm_record_unlock_raw((void*)UVM_LOCK_ORDER_RM_GPUS, UVM_LOCK_ORDER_RM_GPUS, \
641 UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
642
643 // Helpers for recording both RM locks usage around UVM-RM interfaces
644 #define uvm_record_lock_rm_all() ({ uvm_record_lock_rm_api(); uvm_record_lock_rm_gpus(); })
645 #define uvm_record_unlock_rm_all() ({ uvm_record_unlock_rm_gpus(); uvm_record_unlock_rm_api(); })
646
647 #else
648 #define uvm_record_lock UVM_IGNORE_EXPR2
649 #define uvm_record_unlock UVM_IGNORE_EXPR2
650 #define uvm_record_unlock_out_of_order UVM_IGNORE_EXPR2
651 #define uvm_record_downgrade UVM_IGNORE_EXPR
652
uvm_check_locked(void * lock,uvm_lock_flags_t flags)653 static bool uvm_check_locked(void *lock, uvm_lock_flags_t flags)
654 {
655 return false;
656 }
657
658 #define uvm_record_lock_mmap_lock_read UVM_IGNORE_EXPR
659 #define uvm_record_unlock_mmap_lock_read UVM_IGNORE_EXPR
660 #define uvm_record_unlock_mmap_lock_read_out_of_order UVM_IGNORE_EXPR
661 #define uvm_record_lock_mmap_lock_write UVM_IGNORE_EXPR
662 #define uvm_record_unlock_mmap_lock_write UVM_IGNORE_EXPR
663 #define uvm_record_unlock_mmap_lock_write_out_of_order UVM_IGNORE_EXPR
664
665 #define uvm_check_locked_mmap_lock uvm_check_locked
666
667 #define uvm_record_lock_rm_api()
668 #define uvm_record_unlock_rm_api()
669
670 #define uvm_record_lock_rm_gpus()
671 #define uvm_record_unlock_rm_gpus()
672
673 #define uvm_record_lock_rm_all()
674 #define uvm_record_unlock_rm_all()
675 #endif
676
677 #define uvm_locking_assert_initialized() UVM_ASSERT(__uvm_locking_initialized())
678 #define uvm_thread_assert_all_unlocked() UVM_ASSERT(__uvm_thread_check_all_unlocked())
679 #define uvm_assert_lockable_order(order) UVM_ASSERT(__uvm_check_lockable_order(order, UVM_LOCK_FLAGS_MODE_ANY))
680 #define uvm_assert_unlocked_order(order) UVM_ASSERT(__uvm_check_unlocked_order(order))
681
682 #if UVM_IS_DEBUG()
683 #define uvm_lock_debug_init(lock, order) ({ \
684 uvm_locking_assert_initialized(); \
685 (lock)->lock_order = (order); \
686 })
687 #else
688 #define uvm_lock_debug_init(lock, order) ((void) order)
689 #endif
690
691 // Helpers for locking mmap_lock (mmap_sem in kernels < 5.8)
692 // and recording its usage
693 #define uvm_assert_mmap_lock_locked_mode(mm, flags) ({ \
694 typeof(mm) _mm = (mm); \
695 UVM_ASSERT(nv_mm_rwsem_is_locked(_mm) && uvm_check_locked_mmap_lock((_mm), (flags))); \
696 })
697
698 #define uvm_assert_mmap_lock_locked(mm) \
699 uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_ANY)
700 #define uvm_assert_mmap_lock_locked_read(mm) \
701 uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_SHARED)
702 #define uvm_assert_mmap_lock_locked_write(mm) \
703 uvm_assert_mmap_lock_locked_mode((mm), UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
704
705 #define uvm_down_read_mmap_lock(mm) ({ \
706 typeof(mm) _mm = (mm); \
707 uvm_record_lock_mmap_lock_read(_mm); \
708 nv_mmap_read_lock(_mm); \
709 })
710
711 #define uvm_up_read_mmap_lock(mm) ({ \
712 typeof(mm) _mm = (mm); \
713 nv_mmap_read_unlock(_mm); \
714 uvm_record_unlock_mmap_lock_read(_mm); \
715 })
716
717 #define uvm_up_read_mmap_lock_out_of_order(mm) ({ \
718 typeof(mm) _mm = (mm); \
719 nv_mmap_read_unlock(_mm); \
720 uvm_record_unlock_mmap_lock_read_out_of_order(_mm); \
721 })
722
723 #define uvm_down_write_mmap_lock(mm) ({ \
724 typeof(mm) _mm = (mm); \
725 uvm_record_lock_mmap_lock_write(_mm); \
726 nv_mmap_write_lock(_mm); \
727 })
728
729 #define uvm_up_write_mmap_lock(mm) ({ \
730 typeof(mm) _mm = (mm); \
731 nv_mmap_write_unlock(_mm); \
732 uvm_record_unlock_mmap_lock_write(_mm); \
733 })
734
735 // Helper for calling a UVM-RM interface function with lock recording
736 #define uvm_rm_locked_call(call) ({ \
737 typeof(call) ret; \
738 uvm_record_lock_rm_all(); \
739 ret = call; \
740 uvm_record_unlock_rm_all(); \
741 ret; \
742 })
743
744 // Helper for calling a UVM-RM interface function that returns void with lock recording
745 #define uvm_rm_locked_call_void(call) ({ \
746 uvm_record_lock_rm_all(); \
747 call; \
748 uvm_record_unlock_rm_all(); \
749 })
750
751 typedef struct
752 {
753 struct rw_semaphore sem;
754 #if UVM_IS_DEBUG()
755 uvm_lock_order_t lock_order;
756 #endif
757 } uvm_rw_semaphore_t;
758
759 //
760 // Note that this is a macro, not an inline or static function so the
761 // "uvm_sem" argument is subsituted as text. If this is invoked with
762 // uvm_assert_rwsem_locked_mode(_sem, flags) then we get code "_sem = _sem"
763 // and _sem is initialized to NULL. Avoid this by using a name unlikely to
764 // be the same as the string passed to "uvm_sem".
765 // See uvm_down_read() and uvm_up_read() below as examples.
766 //
767 #define uvm_assert_rwsem_locked_mode(uvm_sem, flags) ({ \
768 typeof(uvm_sem) _sem_ = (uvm_sem); \
769 UVM_ASSERT(rwsem_is_locked(&_sem_->sem) && uvm_check_locked(_sem_, (flags))); \
770 })
771
772 #define uvm_assert_rwsem_locked(uvm_sem) \
773 uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_ANY)
774 #define uvm_assert_rwsem_locked_read(uvm_sem) \
775 uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_SHARED)
776 #define uvm_assert_rwsem_locked_write(uvm_sem) \
777 uvm_assert_rwsem_locked_mode(uvm_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)
778
779 #define uvm_assert_rwsem_unlocked(uvm_sem) UVM_ASSERT(!rwsem_is_locked(&(uvm_sem)->sem))
780
781 #define uvm_init_rwsem(uvm_sem, order) ({ \
782 uvm_rw_semaphore_t *uvm_sem_ ## order = (uvm_sem); \
783 init_rwsem(&uvm_sem_ ## order->sem); \
784 uvm_lock_debug_init(uvm_sem, order); \
785 uvm_assert_rwsem_unlocked(uvm_sem); \
786 })
787
788 #define uvm_down_read(uvm_sem) ({ \
789 typeof(uvm_sem) _sem = (uvm_sem); \
790 uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
791 down_read(&_sem->sem); \
792 uvm_assert_rwsem_locked_read(_sem); \
793 })
794
795 #define uvm_up_read(uvm_sem) ({ \
796 typeof(uvm_sem) _sem = (uvm_sem); \
797 uvm_assert_rwsem_locked_read(_sem); \
798 up_read(&_sem->sem); \
799 uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
800 })
801
802 // Unlock w/o any tracking. This should be extremely rare and *_no_tracking
803 // helpers will be added only as needed.
804 //
805 // TODO: Bug 2594854:
806 // TODO: Bug 2583279: Remove macro when bugs are fixed
807 #define uvm_up_read_no_tracking(uvm_sem) ({ \
808 typeof(uvm_sem) _sem = (uvm_sem); \
809 up_read(&_sem->sem); \
810 })
811
812 #define uvm_down_write(uvm_sem) ({ \
813 typeof (uvm_sem) _sem = (uvm_sem); \
814 uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
815 down_write(&_sem->sem); \
816 uvm_assert_rwsem_locked_write(_sem); \
817 })
818
819 // trylock for reading: returns 1 if successful, 0 if not. Out-of-order lock
820 // acquisition via this function is legal, i.e. the lock order checker will
821 // allow it. However, if an out-of-order lock acquisition attempt fails, it is
822 // the caller's responsibility to back off at least to the point where the
823 // next held lower-order lock is released.
824 #define uvm_down_read_trylock(uvm_sem) ({ \
825 typeof(uvm_sem) _sem = (uvm_sem); \
826 int locked; \
827 uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED | UVM_LOCK_FLAGS_TRYLOCK); \
828 locked = down_read_trylock(&_sem->sem); \
829 if (locked == 0) \
830 uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
831 else \
832 uvm_assert_rwsem_locked_read(_sem); \
833 locked; \
834 })
835
836 // Lock w/o any tracking. This should be extremely rare and *_no_tracking
837 // helpers will be added only as needed.
838 //
839 // TODO: Bug 2594854:
840 // TODO: Bug 2583279: Remove macro when bugs are fixed
841 #define uvm_down_read_trylock_no_tracking(uvm_sem) ({ \
842 typeof(uvm_sem) _sem = (uvm_sem); \
843 down_read_trylock(&_sem->sem); \
844 })
845
846 // trylock for writing: returns 1 if successful, 0 if not. Out-of-order lock
847 // acquisition via this function is legal, i.e. the lock order checker will
848 // allow it. However, if an out-of-order lock acquisition attempt fails, it is
849 // the caller's responsibility to back off at least to the point where the
850 // next held lower-order lock is released.
851 #define uvm_down_write_trylock(uvm_sem) ({ \
852 typeof(uvm_sem) _sem = (uvm_sem); \
853 int locked; \
854 uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK); \
855 locked = down_write_trylock(&_sem->sem); \
856 if (locked == 0) \
857 uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
858 else \
859 uvm_assert_rwsem_locked_write(_sem); \
860 locked; \
861 })
862
863 #define uvm_up_write(uvm_sem) ({ \
864 typeof(uvm_sem) _sem = (uvm_sem); \
865 uvm_assert_rwsem_locked_write(_sem); \
866 up_write(&_sem->sem); \
867 uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
868 })
869
870 #define uvm_downgrade_write(uvm_sem) ({ \
871 typeof(uvm_sem) _sem = (uvm_sem); \
872 uvm_assert_rwsem_locked_write(_sem); \
873 downgrade_write(&_sem->sem); \
874 uvm_record_downgrade(_sem); \
875 })
876
877 typedef struct
878 {
879 struct mutex m;
880 #if UVM_IS_DEBUG()
881 uvm_lock_order_t lock_order;
882 #endif
883 } uvm_mutex_t;
884
885 // Note that this is a macro, not an inline or static function so the
886 // "uvm_macro" argument is subsituted as text. If this is invoked with
887 // uvm__mutex_is_locked(_mutex) then we get code "_mutex = _mutex" and _mutex is
888 // initialized to NULL. Avoid this by using a name unlikely to be the same as
889 // the string passed to "uvm_mutex".
890 // See uvm_mutex_lock() and uvm_mutex_unlock() below as examples.
891 //
892 #define uvm_mutex_is_locked(uvm_mutex) ({ \
893 typeof(uvm_mutex) _mutex_ = (uvm_mutex); \
894 (mutex_is_locked(&_mutex_->m) && uvm_check_locked(_mutex_, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
895 })
896
897 #define uvm_assert_mutex_locked(uvm_mutex) UVM_ASSERT(uvm_mutex_is_locked(uvm_mutex))
898 #define uvm_assert_mutex_unlocked(uvm_mutex) UVM_ASSERT(!mutex_is_locked(&(uvm_mutex)->m))
899
900 //
901 // Linux kernel mutexes cannot be used with interrupts disabled. Doing so
902 // can lead to deadlocks.
903 // To warn about mutex usages with interrupts disabled, the following
904 // macros and inline functions wrap around the raw kernel mutex operations
905 // in order to check if the interrupts have been disabled and assert if so.
906 //
907 // TODO: Bug 2690258: evaluate whether !irqs_disabled() && !in_interrupt() is
908 // enough.
909 //
910 #define uvm_assert_mutex_interrupts() ({ \
911 UVM_ASSERT_MSG(!irqs_disabled() && !in_interrupt(), "Mutexes cannot be used with interrupts disabled"); \
912 })
913
914 #define uvm_mutex_init(mutex, order) ({ \
915 uvm_mutex_t *mutex_ ## order = (mutex); \
916 mutex_init(&mutex_ ## order->m); \
917 uvm_lock_debug_init(mutex, order); \
918 uvm_assert_mutex_unlocked(mutex); \
919 })
920
921 #define uvm_mutex_lock(mutex) ({ \
922 typeof(mutex) _mutex = (mutex); \
923 uvm_assert_mutex_interrupts(); \
924 uvm_record_lock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
925 mutex_lock(&_mutex->m); \
926 uvm_assert_mutex_locked(_mutex); \
927 })
928
929 // Lock while already holding a lock of the same order taken with
930 // uvm_mutex_lock() variant. Note this shouldn't be used if the held lock was
931 // taken with uvm_mutex_lock_nested() because we only support a single level of
932 // nesting. This should be extremely rare and *_nested helpers will only be
933 // added as needed.
934 #define uvm_mutex_lock_nested(mutex) ({ \
935 uvm_assert_mutex_interrupts(); \
936 mutex_lock_nested(&(mutex)->m, 1); \
937 })
938
939 #define uvm_mutex_trylock(mutex) ({ \
940 typeof(mutex) _mutex = (mutex); \
941 int locked; \
942 uvm_record_lock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE | UVM_LOCK_FLAGS_TRYLOCK); \
943 locked = mutex_trylock(&_mutex->m); \
944 if (locked == 0) \
945 uvm_record_unlock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
946 else \
947 uvm_assert_mutex_locked(_mutex); \
948 locked; \
949 })
950
951 #define uvm_mutex_unlock(mutex) ({ \
952 typeof(mutex) _mutex = (mutex); \
953 uvm_assert_mutex_interrupts(); \
954 uvm_assert_mutex_locked(_mutex); \
955 mutex_unlock(&_mutex->m); \
956 uvm_record_unlock(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
957 })
958 #define uvm_mutex_unlock_out_of_order(mutex) ({ \
959 typeof(mutex) _mutex = (mutex); \
960 uvm_assert_mutex_interrupts(); \
961 uvm_assert_mutex_locked(_mutex); \
962 mutex_unlock(&_mutex->m); \
963 uvm_record_unlock_out_of_order(_mutex, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
964 })
965
966 // Unlock w/o any tracking.
967 #define uvm_mutex_unlock_nested(mutex) ({ \
968 uvm_assert_mutex_interrupts(); \
969 mutex_unlock(&(mutex)->m); \
970 })
971
972 typedef struct
973 {
974 struct semaphore sem;
975 #if UVM_IS_DEBUG()
976 uvm_lock_order_t lock_order;
977 #endif
978 } uvm_semaphore_t;
979
980 #define uvm_sema_init(semaphore, val, order) ({ \
981 uvm_semaphore_t *sem_ ## order = (semaphore); \
982 sema_init(&sem_ ## order->sem, (val)); \
983 uvm_lock_debug_init(semaphore, order); \
984 })
985
986 #define uvm_sem_is_locked(uvm_sem) uvm_check_locked(uvm_sem, UVM_LOCK_FLAGS_MODE_SHARED)
987
988 #define uvm_down(uvm_sem) ({ \
989 typeof(uvm_sem) _sem = (uvm_sem); \
990 uvm_record_lock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
991 down(&_sem->sem); \
992 })
993
994 #define uvm_up(uvm_sem) ({ \
995 typeof(uvm_sem) _sem = (uvm_sem); \
996 UVM_ASSERT(uvm_sem_is_locked(_sem)); \
997 up(&_sem->sem); \
998 uvm_record_unlock(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
999 })
1000 #define uvm_up_out_of_order(uvm_sem) ({ \
1001 typeof(uvm_sem) _sem = (uvm_sem); \
1002 UVM_ASSERT(uvm_sem_is_locked(_sem)); \
1003 up(&_sem->sem); \
1004 uvm_record_unlock_out_of_order(_sem, UVM_LOCK_FLAGS_MODE_SHARED); \
1005 })
1006
1007
1008 // A regular spinlock
1009 // Locked/unlocked with uvm_spin_lock()/uvm_spin_unlock()
1010 typedef struct
1011 {
1012 spinlock_t lock;
1013 #if UVM_IS_DEBUG()
1014 uvm_lock_order_t lock_order;
1015 #endif
1016 } uvm_spinlock_t;
1017
1018 // A separate spinlock type for spinlocks that need to disable interrupts. For
1019 // guaranteed correctness and convenience embed the saved and restored irq state
1020 // in the lock itself.
1021 // Locked/unlocked with uvm_spin_lock_irqsave()/uvm_spin_unlock_irqrestore()
1022 typedef struct
1023 {
1024 spinlock_t lock;
1025 unsigned long irq_flags;
1026 #if UVM_IS_DEBUG()
1027 uvm_lock_order_t lock_order;
1028 #endif
1029 } uvm_spinlock_irqsave_t;
1030
1031 // Asserts that the spinlock is held. Notably the macros below support both
1032 // types of spinlocks.
1033 // Note that this is a macro, not an inline or static function so the
1034 // "spinlock" argument is subsituted as text. If this is invoked with
1035 // uvm_assert_spinlock_locked(_lock) then we get code "_lock = _lock"
1036 // and _lock is initialized to NULL. Avoid this by using a name unlikely to
1037 // be the same as the string passed to "spinlock".
1038 // See uvm_spin_lock() and uvm_spin_unlock() below as examples.
1039 //
1040 #define uvm_assert_spinlock_locked(spinlock) ({ \
1041 typeof(spinlock) _lock_ = (spinlock); \
1042 UVM_ASSERT(spin_is_locked(&_lock_->lock)); \
1043 UVM_ASSERT(uvm_check_locked(_lock_, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
1044 })
1045
1046 #define uvm_assert_spinlock_unlocked(spinlock) UVM_ASSERT(!spin_is_locked(&(spinlock)->lock))
1047
1048 #define uvm_spin_lock_init(spinlock, order) ({ \
1049 uvm_spinlock_t *spinlock_ ## order = (spinlock); \
1050 spin_lock_init(&spinlock_ ## order->lock); \
1051 uvm_lock_debug_init(spinlock, order); \
1052 uvm_assert_spinlock_unlocked(spinlock); \
1053 })
1054
1055 #define uvm_spin_lock(uvm_lock) ({ \
1056 typeof(uvm_lock) _lock = (uvm_lock); \
1057 uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1058 spin_lock(&_lock->lock); \
1059 uvm_assert_spinlock_locked(_lock); \
1060 })
1061
1062 #define uvm_spin_unlock(uvm_lock) ({ \
1063 typeof(uvm_lock) _lock = (uvm_lock); \
1064 uvm_assert_spinlock_locked(_lock); \
1065 spin_unlock(&_lock->lock); \
1066 uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1067 })
1068
1069 #define uvm_spin_lock_irqsave_init(spinlock, order) ({ \
1070 uvm_spinlock_irqsave_t *spinlock_ ## order = (spinlock); \
1071 spin_lock_init(&spinlock_ ## order->lock); \
1072 uvm_lock_debug_init(spinlock, order); \
1073 uvm_assert_spinlock_unlocked(spinlock); \
1074 })
1075
1076 // Use a temp to not rely on flags being written after acquiring the lock.
1077 #define uvm_spin_lock_irqsave(uvm_lock) ({ \
1078 typeof(uvm_lock) _lock = (uvm_lock); \
1079 unsigned long irq_flags; \
1080 uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1081 spin_lock_irqsave(&_lock->lock, irq_flags); \
1082 _lock->irq_flags = irq_flags; \
1083 uvm_assert_spinlock_locked(_lock); \
1084 })
1085
1086 // Use a temp to not rely on flags being read before releasing the lock.
1087 #define uvm_spin_unlock_irqrestore(uvm_lock) ({ \
1088 typeof(uvm_lock) _lock = (uvm_lock); \
1089 unsigned long irq_flags = _lock->irq_flags; \
1090 uvm_assert_spinlock_locked(_lock); \
1091 spin_unlock_irqrestore(&_lock->lock, irq_flags); \
1092 uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1093 })
1094
1095 // Wrapper for a reader-writer spinlock that disables and enables interrupts
1096 typedef struct
1097 {
1098 rwlock_t lock;
1099
1100 // This flags variable is only used by writers, since concurrent readers may
1101 // have different values.
1102 unsigned long irq_flags;
1103
1104 #if UVM_IS_DEBUG()
1105 uvm_lock_order_t lock_order;
1106
1107 // The kernel doesn't provide a function to tell if an rwlock_t is locked,
1108 // so we create our own.
1109 atomic_t lock_count;
1110 #endif
1111 } uvm_rwlock_irqsave_t;
1112
uvm_rwlock_irqsave_is_locked(uvm_rwlock_irqsave_t * rwlock)1113 static bool uvm_rwlock_irqsave_is_locked(uvm_rwlock_irqsave_t *rwlock)
1114 {
1115 #if UVM_IS_DEBUG()
1116 return atomic_read(&rwlock->lock_count) > 0;
1117 #else
1118 return false;
1119 #endif
1120 }
1121
uvm_rwlock_irqsave_inc(uvm_rwlock_irqsave_t * rwlock)1122 static void uvm_rwlock_irqsave_inc(uvm_rwlock_irqsave_t *rwlock)
1123 {
1124 #if UVM_IS_DEBUG()
1125 atomic_inc(&rwlock->lock_count);
1126 #endif
1127 }
1128
uvm_rwlock_irqsave_dec(uvm_rwlock_irqsave_t * rwlock)1129 static void uvm_rwlock_irqsave_dec(uvm_rwlock_irqsave_t *rwlock)
1130 {
1131 #if UVM_IS_DEBUG()
1132 atomic_dec(&rwlock->lock_count);
1133 #endif
1134 }
1135
1136 #define uvm_assert_rwlock_locked(uvm_rwlock) \
1137 UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_ANY))
1138 #define uvm_assert_rwlock_locked_read(uvm_rwlock) \
1139 UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_SHARED))
1140 #define uvm_assert_rwlock_locked_write(uvm_rwlock) \
1141 UVM_ASSERT(uvm_rwlock_irqsave_is_locked(uvm_rwlock) && uvm_check_locked(uvm_rwlock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE))
1142
1143 #if UVM_IS_DEBUG()
1144 #define uvm_assert_rwlock_unlocked(uvm_rwlock) UVM_ASSERT(!uvm_rwlock_irqsave_is_locked(uvm_rwlock))
1145 #else
1146 #define uvm_assert_rwlock_unlocked(uvm_rwlock)
1147 #endif
1148
1149 #define uvm_rwlock_irqsave_init(rwlock, order) ({ \
1150 uvm_rwlock_irqsave_t *rwlock_ ## order = rwlock; \
1151 rwlock_init(&rwlock_ ## order->lock); \
1152 uvm_lock_debug_init(rwlock, order); \
1153 uvm_assert_rwlock_unlocked(rwlock); \
1154 })
1155
1156 // We can't store the irq_flags within the lock itself for readers, so they must
1157 // pass in their flags.
1158 #define uvm_read_lock_irqsave(uvm_rwlock, irq_flags) ({ \
1159 typeof(uvm_rwlock) _lock = (uvm_rwlock); \
1160 uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_SHARED); \
1161 read_lock_irqsave(&_lock->lock, irq_flags); \
1162 uvm_rwlock_irqsave_inc(uvm_rwlock); \
1163 uvm_assert_rwlock_locked_read(_lock); \
1164 })
1165
1166 #define uvm_read_unlock_irqrestore(uvm_rwlock, irq_flags) ({ \
1167 typeof(uvm_rwlock) _lock = (uvm_rwlock); \
1168 uvm_assert_rwlock_locked_read(_lock); \
1169 uvm_rwlock_irqsave_dec(uvm_rwlock); \
1170 read_unlock_irqrestore(&_lock->lock, irq_flags); \
1171 uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_SHARED); \
1172 })
1173
1174 // Use a temp to not rely on flags being written after acquiring the lock.
1175 #define uvm_write_lock_irqsave(uvm_rwlock) ({ \
1176 typeof(uvm_rwlock) _lock = (uvm_rwlock); \
1177 unsigned long irq_flags; \
1178 uvm_record_lock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1179 write_lock_irqsave(&_lock->lock, irq_flags); \
1180 uvm_rwlock_irqsave_inc(uvm_rwlock); \
1181 _lock->irq_flags = irq_flags; \
1182 uvm_assert_rwlock_locked_write(_lock); \
1183 })
1184
1185 // Use a temp to not rely on flags being written after acquiring the lock.
1186 #define uvm_write_unlock_irqrestore(uvm_rwlock) ({ \
1187 typeof(uvm_rwlock) _lock = (uvm_rwlock); \
1188 unsigned long irq_flags = _lock->irq_flags; \
1189 uvm_assert_rwlock_locked_write(_lock); \
1190 uvm_rwlock_irqsave_dec(uvm_rwlock); \
1191 write_unlock_irqrestore(&_lock->lock, irq_flags); \
1192 uvm_record_unlock(_lock, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1193 })
1194
1195 // Bit locks are 'compressed' mutexes which take only 1 bit per lock by virtue
1196 // of using shared waitqueues.
1197 typedef struct
1198 {
1199 unsigned long *bits;
1200
1201 #if UVM_IS_DEBUG()
1202 uvm_lock_order_t lock_order;
1203 #endif
1204 } uvm_bit_locks_t;
1205
1206 NV_STATUS uvm_bit_locks_init(uvm_bit_locks_t *bit_locks, size_t count, uvm_lock_order_t lock_order);
1207 void uvm_bit_locks_deinit(uvm_bit_locks_t *bit_locks);
1208
1209 // Asserts that the bit lock is held.
1210 //
1211 // TODO: Bug 1766601:
1212 // - assert for the right ownership (defining the owner might be tricky in
1213 // the kernel).
1214 #define uvm_assert_bit_locked(bit_locks, bit) ({ \
1215 typeof(bit_locks) _bit_locks = (bit_locks); \
1216 typeof(bit) _bit = (bit); \
1217 UVM_ASSERT(test_bit(_bit, _bit_locks->bits)); \
1218 UVM_ASSERT(uvm_check_locked(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE)); \
1219 })
1220
1221 #define uvm_assert_bit_unlocked(bit_locks, bit) ({ \
1222 typeof(bit_locks) _bit_locks = (bit_locks); \
1223 typeof(bit) _bit = (bit); \
1224 UVM_ASSERT(!test_bit(_bit, _bit_locks->bits)); \
1225 })
1226
__uvm_bit_lock(uvm_bit_locks_t * bit_locks,unsigned long bit)1227 static void __uvm_bit_lock(uvm_bit_locks_t *bit_locks, unsigned long bit)
1228 {
1229 int res;
1230
1231 res = UVM_WAIT_ON_BIT_LOCK(bit_locks->bits, bit, TASK_UNINTERRUPTIBLE);
1232 UVM_ASSERT_MSG(res == 0, "Uninterruptible task interrupted: %d\n", res);
1233 uvm_assert_bit_locked(bit_locks, bit);
1234 }
1235 #define uvm_bit_lock(bit_locks, bit) ({ \
1236 typeof(bit_locks) _bit_locks = (bit_locks); \
1237 typeof(bit) _bit = (bit); \
1238 uvm_record_lock(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1239 __uvm_bit_lock(_bit_locks, _bit); \
1240 })
1241
__uvm_bit_unlock(uvm_bit_locks_t * bit_locks,unsigned long bit)1242 static void __uvm_bit_unlock(uvm_bit_locks_t *bit_locks, unsigned long bit)
1243 {
1244 uvm_assert_bit_locked(bit_locks, bit);
1245
1246 clear_bit_unlock(bit, bit_locks->bits);
1247 // Make sure we don't reorder release with wakeup as it would cause
1248 // deadlocks (other thread checking lock and adding itself to queue
1249 // in reversed order). clear_bit_unlock has only release semantics.
1250 smp_mb__after_atomic();
1251 wake_up_bit(bit_locks->bits, bit);
1252 }
1253 #define uvm_bit_unlock(bit_locks, bit) ({ \
1254 typeof(bit_locks) _bit_locks = (bit_locks); \
1255 typeof(bit) _bit = (bit); \
1256 __uvm_bit_unlock(_bit_locks, _bit); \
1257 uvm_record_unlock(_bit_locks, UVM_LOCK_FLAGS_MODE_EXCLUSIVE); \
1258 })
1259
1260 #endif // __UVM_LOCK_H__
1261