1 /*******************************************************************************
2     Copyright (c) 2015 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_GPU_SEMAPHORE_H__
25 #define __UVM_GPU_SEMAPHORE_H__
26 
27 #include "uvm_forward_decl.h"
28 #include "uvm_lock.h"
29 #include "uvm_rm_mem.h"
30 #include "uvm_linux.h"
31 
32 // A GPU semaphore is a memory location accessible by the GPUs and the CPU
33 // that's used for synchronization among them.
34 // The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
35 // locations. The same memory can be accessed by multiple GPUs and the CPU
36 // allowing for different synchronization schemes.
37 //
38 // The UVM driver maintains a per-GPU semaphore pool that grows on demand as
39 // semaphores are allocated out of it.
40 //
41 // TODO: Bug 200194638: Add support for timestamps (the GPU also supports
42 //       releasing 16-byte semaphores that include an 8-byte timestamp).
43 struct uvm_gpu_semaphore_struct
44 {
45     // The semaphore pool page the semaphore came from
46     uvm_gpu_semaphore_pool_page_t *page;
47 
48     // Pointer to the memory location
49     NvU32 *payload;
50     struct {
51         NvU16 index;
52         NvU32 cached_payload;
53         uvm_rm_mem_t *encrypted_payload;
54         uvm_rm_mem_t *notifier;
55         uvm_rm_mem_t *auth_tag;
56         UvmCslIv *ivs;
57         NvU32 last_pushed_notifier;
58         NvU32 last_observed_notifier;
59     } conf_computing;
60 };
61 
62 // A primitive used for tracking progress of the GPU
63 // Whenever a stream of GPU operations needs to be synchronized it increments
64 // the semaphore's payload as the last step so that other processors
65 // can acquire (wait for) it.
66 // The primitive maintains a 64-bit counter on top of the 32-bit GPU semaphore
67 // to support 2^64 synchronization points instead of just 2^32. The logic relies
68 // on being able to notice every time the 32-bit counter wraps around (see
69 // update_completed_value()).
70 struct uvm_gpu_tracking_semaphore_struct
71 {
72     uvm_gpu_semaphore_t semaphore;
73 
74     // Last completed value
75     // The bottom 32-bits will always match the latest semaphore payload seen in
76     // update_completed_value_locked().
77     atomic64_t completed_value;
78 
79     // Lock protecting updates to the completed_value
80     union {
81         uvm_spinlock_t s_lock;
82         uvm_mutex_t m_lock;
83     };
84 
85     // Last queued value
86     // All accesses to the queued value should be handled by the user of the GPU
87     // tracking semaphore.
88     NvU64 queued_value;
89 };
90 
91 // Create a semaphore pool for a GPU.
92 NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
93 
94 // When the Confidential Computing feature is enabled, semaphore pools
95 // associated with CE channels are allocated in the CPR of vidmem and as such
96 // have all the associated access restrictions. Because of this, they're called
97 // secure pools and secure semaphores are allocated out of said secure pools.
98 NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
99 
100 // Destroy a semaphore pool
101 // Locking:
102 //  - Global lock needs to be held in read mode (for unmapping from all GPUs)
103 //  - Internally acquires:
104 //    - GPU semaphore pool lock
105 //    - RM API lock
106 //    - RM GPUs lock
107 void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);
108 
109 // Allocate a semaphore from the pool.
110 // The semaphore will be mapped on all GPUs currently registered with the UVM
111 // driver, and on all new GPUs which will be registered in the future.
112 // Unless the Confidential Computing feature is enabled and the pool is a
113 // secure pool. In this case, it is only mapped to the GPU that holds the
114 // allocation.
115 // The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
116 // to the proxy address space.
117 //
118 // The semaphore's payload will be initially set to 0.
119 //
120 // Locking:
121 //  - Global lock needs to be held in read mode (for mapping on all GPUs)
122 //  - Internally synchronized and hence safe to be called from multiple threads
123 //  - Internally acquires:
124 //    - GPU semaphore pool lock
125 //    - RM API lock
126 //    - RM GPUs lock
127 NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore);
128 
129 // Free a semaphore
130 // Locking:
131 //  - Internally synchronized and hence safe to be called from multiple threads
132 void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore);
133 
134 // Map all the semaphores from the pool on a GPU
135 //
136 // The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
137 // to the proxy address space.
138 NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu);
139 
140 // Unmap all the semaphores from the pool from a GPU
141 //
142 // The unmapping affects all the VA spaces where the semaphores are currently
143 // mapped.
144 void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu);
145 
146 // Get the GPU VA of a semaphore in UVM's internal address space.
147 NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu);
148 
149 // Get the GPU VA of a semaphore in the proxy address space.
150 NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu);
151 
152 NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space);
153 
154 // Read the 32-bit payload of the semaphore
155 // Notably doesn't provide any memory ordering guarantees and needs to be used with
156 // care. For an example of what needs to be considered see
157 // uvm_gpu_tracking_semaphore_update_completed_value().
158 NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore);
159 
160 // Set the 32-bit payload of the semaphore
161 // Guarantees that all memory accesses preceding setting the payload won't be
162 // moved past it.
163 void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload);
164 
165 // Allocate a GPU tracking semaphore from the pool
166 // Locking same as uvm_gpu_semaphore_alloc()
167 NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem);
168 
169 // Free a GPU tracking semaphore
170 // Locking same as uvm_gpu_semaphore_free()
171 void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem);
172 
173 // Check whether a specific value has been completed
174 //
175 // If true is returned, guarantees that all operations ordered prior to a
176 // processor (commonly a GPU) completing the specific value will be visible to
177 // the caller.
178 //
179 // In case a GPU is supposed to complete a value, care needs to be taken for all GPU
180 // operations to be ordered correctly with the semaphore release that sets the value.
181 // In case it's the CPU completing the value, uvm_gpu_semaphore_set_payload()
182 // should be used that provides the necessary ordering guarantees.
183 //
184 // Locking: this operation is internally synchronized and hence safe to be
185 // called from multiple threads.
186 bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value);
187 
188 // Update and return the completed value
189 //
190 // Provides the same guarantees as if uvm_gpu_tracking_semaphore_is_value_completed()
191 // returned true for the returned completed value.
192 //
193 // Locking: this operation is internally synchronized and hence safe to be
194 // called from multiple threads.
195 NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_sem);
196 
197 // See the comments for uvm_gpu_tracking_semaphore_is_value_completed
198 static bool uvm_gpu_tracking_semaphore_is_completed(uvm_gpu_tracking_semaphore_t *tracking_sem)
199 {
200     return uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, tracking_sem->queued_value);
201 }
202 
203 #endif // __UVM_GPU_SEMAPHORE_H__
204