1 /*******************************************************************************
2     Copyright (c) 2015-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_HAL_H__
25 #define __UVM_HAL_H__
26 
27 #include "uvm_types.h"
28 #include "uvm_common.h"
29 #include "uvm_forward_decl.h"
30 #include "uvm_hal_types.h"
31 #include "uvm_push.h"
32 #include "uvm_gpu.h"
33 #include "uvm_test_ioctl.h"
34 
35 // A dummy method validation that always returns true; it can be used to skip
36 // CE/Host/SW method validations for a given architecture
37 bool uvm_hal_method_is_valid_stub(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
38 
39 typedef void (*uvm_hal_init_t)(uvm_push_t *push);
40 void uvm_hal_maxwell_ce_init(uvm_push_t *push);
41 void uvm_hal_maxwell_host_init_noop(uvm_push_t *push);
42 void uvm_hal_pascal_host_init(uvm_push_t *push);
43 void uvm_hal_maxwell_sec2_init_noop(uvm_push_t *push);
44 void uvm_hal_hopper_sec2_init(uvm_push_t *push);
45 
46 // Host method validation
47 typedef bool (*uvm_hal_host_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
48 bool uvm_hal_ampere_host_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
49 
50 // SW method validation
51 typedef bool (*uvm_hal_host_sw_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
52 bool uvm_hal_ampere_host_sw_method_is_valid(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
53 
54 // Wait for idle
55 typedef void (*uvm_hal_wait_for_idle_t)(uvm_push_t *push);
56 void uvm_hal_maxwell_host_wait_for_idle(uvm_push_t *push);
57 
58 // Membar SYS
59 typedef void (*uvm_hal_membar_sys_t)(uvm_push_t *push);
60 void uvm_hal_maxwell_host_membar_sys(uvm_push_t *push);
61 void uvm_hal_pascal_host_membar_sys(uvm_push_t *push);
62 
63 // Membar GPU
64 typedef void (*uvm_hal_membar_gpu_t)(uvm_push_t *push);
65 void uvm_hal_pascal_host_membar_gpu(uvm_push_t *push);
66 
67 // Put a noop in the pushbuffer of the given size in bytes.
68 // The size needs to be a multiple of 4.
69 typedef void (*uvm_hal_noop_t)(uvm_push_t *push, NvU32 size);
70 void uvm_hal_maxwell_host_noop(uvm_push_t *push, NvU32 size);
71 
72 // Host-generated interrupt method. This will generate a call to
73 // uvm_isr_top_half_entry.
74 //
75 // This is a non-stalling interrupt, which means that it's fire-and-forget. Host
76 // will not stall method processing nor stop channel switching, which means that
77 // we cannot directly identify in software which channel generated the
78 // interrupt.
79 //
80 // We must set up software state before pushing the interrupt, and check any
81 // possible interrupt condition on receiving an interrupt callback.
82 typedef void (*uvm_hal_interrupt_t)(uvm_push_t *push);
83 void uvm_hal_maxwell_host_interrupt(uvm_push_t *push);
84 
85 // Issue a TLB invalidate applying to all VAs in a PDB.
86 //
87 // The PTE caches (TLBs) are always invalidated. The PDE caches for all VAs in
88 // the PDB are invalidated from the specified depth down to the PTEs. This
89 // allows for optimizations if the caller isn't writing all levels of the PDEs.
90 // Depth follows the MMU code convention where depth 0 is the top level and here
91 // means to invalidate everything. See uvm_pascal_mmu.c for an example of depth
92 // mapping to HW PDE levels. Notably 2M PTEs are considered PDEs as far as the
93 // TLBs are concerned and hence on Pascal the depth needs to be at most 3 for
94 // them to be included in the invalidation.
95 //
96 // If the membar parameter is not UVM_MEMBAR_NONE, the specified membar is
97 // performed logically after the TLB invalidate such that all physical memory
98 // accesses using the old translations are ordered to the scope of the membar.
99 typedef void (*uvm_hal_host_tlb_invalidate_all_t)(uvm_push_t *push,
100                                                   uvm_gpu_phys_address_t pdb,
101                                                   NvU32 depth,
102                                                   uvm_membar_t membar);
103 void uvm_hal_maxwell_host_tlb_invalidate_all_a16f(uvm_push_t *push,
104                                                   uvm_gpu_phys_address_t pdb,
105                                                   NvU32 depth,
106                                                   uvm_membar_t membar);
107 void uvm_hal_maxwell_host_tlb_invalidate_all_b06f(uvm_push_t *push,
108                                                   uvm_gpu_phys_address_t pdb,
109                                                   NvU32 depth,
110                                                   uvm_membar_t membar);
111 void uvm_hal_pascal_host_tlb_invalidate_all(uvm_push_t *push,
112                                             uvm_gpu_phys_address_t pdb,
113                                             NvU32 depth,
114                                             uvm_membar_t membar);
115 void uvm_hal_ampere_host_tlb_invalidate_all(uvm_push_t *push,
116                                             uvm_gpu_phys_address_t pdb,
117                                             NvU32 depth,
118                                             uvm_membar_t membar);
119 void uvm_hal_hopper_host_tlb_invalidate_all(uvm_push_t *push,
120                                             uvm_gpu_phys_address_t pdb,
121                                             NvU32 depth,
122                                             uvm_membar_t membar);
123 
124 // Issue a TLB invalidate applying to the specified VA range in a PDB.
125 //
126 // The PTE caches (TLBs) for each page size aligned VA within the VA range
127 // are always invalidated. The PDE caches covering the specified VA
128 // range in the PDB are invalidated from the specified depth down to the PTEs.
129 // Specifying the depth allows for optimizations if the caller isn't writing all
130 // levels of the PDEs. Specifying the page size allows for optimizations if
131 // the caller can guarantee caches for smaller page sizes don't need to be
132 // invalidated.
133 //
134 // Depth follows the MMU code convention where depth 0 is the top level and here
135 // means to invalidate all levels. See uvm_pascal_mmu.c for an example of depth
136 // mapping to HW PDE levels. Notably 2M PTEs are considered PDEs as far as the
137 // TLBs are concerned and hence on Pascal the depth needs to be at most 3 for
138 // them to be included in the invalidation.
139 //
140 // If the membar parameter is not UVM_MEMBAR_NONE, the specified membar is
141 // performed logically after the TLB invalidate such that all physical memory
142 // accesses using the old translations are ordered to the scope of the membar.
143 //
144 // Note that this can end up pushing a lot of methods for big ranges so it's
145 // better not to use it directly. Instead, uvm_tlb_batch* APIs should be used
146 // that automatically switch between targeted VA invalidates and invalidate all.
147 typedef void (*uvm_hal_host_tlb_invalidate_va_t)(uvm_push_t *push,
148                                                  uvm_gpu_phys_address_t pdb,
149                                                  NvU32 depth,
150                                                  NvU64 base,
151                                                  NvU64 size,
152                                                  NvU32 page_size,
153                                                  uvm_membar_t membar);
154 void uvm_hal_maxwell_host_tlb_invalidate_va(uvm_push_t *push,
155                                             uvm_gpu_phys_address_t pdb,
156                                             NvU32 depth,
157                                             NvU64 base,
158                                             NvU64 size,
159                                             NvU32 page_size,
160                                             uvm_membar_t membar);
161 void uvm_hal_pascal_host_tlb_invalidate_va(uvm_push_t *push,
162                                            uvm_gpu_phys_address_t pdb,
163                                            NvU32 depth,
164                                            NvU64 base,
165                                            NvU64 size,
166                                            NvU32 page_size,
167                                            uvm_membar_t membar);
168 void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
169                                           uvm_gpu_phys_address_t pdb,
170                                           NvU32 depth,
171                                           NvU64 base,
172                                           NvU64 size,
173                                           NvU32 page_size,
174                                           uvm_membar_t membar);
175 void uvm_hal_ampere_host_tlb_invalidate_va(uvm_push_t *push,
176                                            uvm_gpu_phys_address_t pdb,
177                                            NvU32 depth,
178                                            NvU64 base,
179                                            NvU64 size,
180                                            NvU32 page_size,
181                                            uvm_membar_t membar);
182 void uvm_hal_hopper_host_tlb_invalidate_va(uvm_push_t *push,
183                                            uvm_gpu_phys_address_t pdb,
184                                            NvU32 depth,
185                                            NvU64 base,
186                                            NvU64 size,
187                                            NvU32 page_size,
188                                            uvm_membar_t membar);
189 
190 typedef void (*uvm_hal_host_tlb_invalidate_test_t)(uvm_push_t *push,
191                                                    uvm_gpu_phys_address_t pdb,
192                                                    UVM_TEST_INVALIDATE_TLB_PARAMS *params);
193 void uvm_hal_maxwell_host_tlb_invalidate_test(uvm_push_t *push,
194                                               uvm_gpu_phys_address_t pdb,
195                                               UVM_TEST_INVALIDATE_TLB_PARAMS *params);
196 void uvm_hal_pascal_host_tlb_invalidate_test(uvm_push_t *push,
197                                              uvm_gpu_phys_address_t pdb,
198                                              UVM_TEST_INVALIDATE_TLB_PARAMS *params);
199 void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
200                                              uvm_gpu_phys_address_t pdb,
201                                              UVM_TEST_INVALIDATE_TLB_PARAMS *params);
202 void uvm_hal_hopper_host_tlb_invalidate_test(uvm_push_t *push,
203                                              uvm_gpu_phys_address_t pdb,
204                                              UVM_TEST_INVALIDATE_TLB_PARAMS *params);
205 
206 // By default all semaphore release operations include a membar sys before the
207 // operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
208 // uvm_push_set_flag().
209 typedef void (*uvm_hal_semaphore_release_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
210 void uvm_hal_maxwell_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
211 void uvm_hal_maxwell_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
212 void uvm_hal_maxwell_sec2_semaphore_release_unsupported(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
213 void uvm_hal_pascal_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
214 void uvm_hal_volta_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
215 void uvm_hal_turing_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
216 void uvm_hal_hopper_sec2_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
217 void uvm_hal_hopper_ce_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
218 void uvm_hal_hopper_host_semaphore_release(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
219 
220 // Release a semaphore including a timestamp at the specific GPU VA.
221 //
222 // This operation writes 16 bytes of memory and the VA needs to be 16-byte
223 // aligned. The value of the released payload is unspecified and shouldn't be
224 // relied on, only the timestamp should be of interest.
225 typedef void (*uvm_hal_semaphore_timestamp_t)(uvm_push_t *push, NvU64 gpu_va);
226 void uvm_hal_maxwell_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
227 void uvm_hal_pascal_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
228 void uvm_hal_volta_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
229 void uvm_hal_hopper_ce_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
230 
231 void uvm_hal_maxwell_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
232 void uvm_hal_volta_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
233 void uvm_hal_hopper_host_semaphore_timestamp(uvm_push_t *push, NvU64 gpu_va);
234 
235 void uvm_hal_maxwell_sec2_semaphore_timestamp_unsupported(uvm_push_t *push, NvU64 gpu_va);
236 void uvm_hal_hopper_sec2_semaphore_timestamp_unsupported(uvm_push_t *push, NvU64 gpu_va);
237 
238 typedef void (*uvm_hal_semaphore_acquire_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
239 void uvm_hal_maxwell_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
240 void uvm_hal_turing_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
241 void uvm_hal_hopper_host_semaphore_acquire(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
242 
243 typedef void (*uvm_hal_host_set_gpfifo_entry_t)(NvU64 *fifo_entry,
244                                                 NvU64 pushbuffer_va,
245                                                 NvU32 pushbuffer_length,
246                                                 uvm_gpfifo_sync_t sync_flag);
247 void uvm_hal_maxwell_host_set_gpfifo_entry(NvU64 *fifo_entry,
248                                            NvU64 pushbuffer_va,
249                                            NvU32 pushbuffer_length,
250                                            uvm_gpfifo_sync_t sync_flag);
251 void uvm_hal_turing_host_set_gpfifo_entry(NvU64 *fifo_entry,
252                                           NvU64 pushbuffer_va,
253                                           NvU32 pushbuffer_length,
254                                           uvm_gpfifo_sync_t sync_flag);
255 void uvm_hal_hopper_host_set_gpfifo_entry(NvU64 *fifo_entry,
256                                           NvU64 pushbuffer_va,
257                                           NvU32 pushbuffer_length,
258                                           uvm_gpfifo_sync_t sync_flag);
259 
260 typedef void (*uvm_hal_host_set_gpfifo_noop_t)(NvU64 *fifo_entry);
261 void uvm_hal_maxwell_host_set_gpfifo_noop(NvU64 *fifo_entry);
262 
263 typedef void (*uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t)(NvU64 *fifo_entry, NvU64 pushbuffer_va);
264 void uvm_hal_maxwell_host_set_gpfifo_pushbuffer_segment_base_unsupported(NvU64 *fifo_entry, NvU64 pushbuffer_va);
265 void uvm_hal_hopper_host_set_gpfifo_pushbuffer_segment_base(NvU64 *fifo_entry, NvU64 pushbuffer_va);
266 
267 typedef void (*uvm_hal_host_write_gpu_put_t)(uvm_channel_t *channel, NvU32 gpu_put);
268 void uvm_hal_maxwell_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
269 void uvm_hal_volta_host_write_gpu_put(uvm_channel_t *channel, NvU32 gpu_put);
270 
271 // Return the current GPU time in nanoseconds
272 typedef NvU64 (*uvm_hal_get_time_t)(uvm_gpu_t *gpu);
273 NvU64 uvm_hal_maxwell_get_time(uvm_gpu_t *gpu);
274 
275 // Internal helpers used by the CE hal
276 // Used to handle the offset encoding differences between architectures
277 typedef void (*uvm_hal_ce_offset_out_t)(uvm_push_t *push, NvU64 offset);
278 void uvm_hal_maxwell_ce_offset_out(uvm_push_t *push, NvU64 offset);
279 void uvm_hal_pascal_ce_offset_out(uvm_push_t *push, NvU64 offset);
280 void uvm_hal_hopper_ce_offset_out(uvm_push_t *push, NvU64 offset);
281 
282 typedef void (*uvm_hal_ce_offset_in_out_t)(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
283 void uvm_hal_maxwell_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
284 void uvm_hal_pascal_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
285 void uvm_hal_hopper_ce_offset_in_out(uvm_push_t *push, NvU64 offset_in, NvU64 offset_out);
286 
287 typedef NvU32 (*uvm_hal_ce_phys_mode_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
288 NvU32 uvm_hal_maxwell_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
289 NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
290 
291 typedef NvU32 (*uvm_hal_ce_plc_mode_t)(void);
292 NvU32 uvm_hal_maxwell_ce_plc_mode(void);
293 NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void);
294 
295 typedef NvU32 (*uvm_hal_ce_memcopy_type_t)(uvm_gpu_address_t dst, uvm_gpu_address_t src);
296 NvU32 uvm_hal_maxwell_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src);
297 NvU32 uvm_hal_hopper_ce_memcopy_copy_type(uvm_gpu_address_t dst, uvm_gpu_address_t src);
298 
299 // CE method validation
300 typedef bool (*uvm_hal_ce_method_is_valid)(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
301 bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data);
302 
303 // Memcopy validation.
304 // The validation happens at the start of the memcopy (uvm_hal_memcopy_t)
305 // execution. Use uvm_hal_ce_memcopy_is_valid_stub to skip the validation for
306 // a given architecture.
307 typedef bool (*uvm_hal_ce_memcopy_is_valid)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
308 bool uvm_hal_ce_memcopy_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
309 bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
310 bool uvm_hal_hopper_ce_memcopy_is_valid(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src);
311 
312 // Patching of the memcopy source; if not needed for a given architecture use
313 // the (empty) uvm_hal_ce_memcopy_patch_src_stub implementation
314 typedef void (*uvm_hal_ce_memcopy_patch_src)(uvm_push_t *push, uvm_gpu_address_t *src);
315 void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src);
316 void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src);
317 
318 // Memcopy size bytes from src to dst.
319 //
320 // By default all CE transfer operations include a membar sys after the
321 // operation and are not pipelined. This can be affected by using
322 // UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
323 typedef void (*uvm_hal_memcopy_t)(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
324 void uvm_hal_maxwell_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
325 void uvm_hal_volta_ce_memcopy(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src, size_t size);
326 
327 // Simple wrapper for uvm_hal_memcopy_t with both addresses being virtual
328 typedef void (*uvm_hal_memcopy_v_to_v_t)(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
329 void uvm_hal_maxwell_ce_memcopy_v_to_v(uvm_push_t *push, NvU64 dst, NvU64 src, size_t size);
330 
331 // Memset validation.
332 // The validation happens at the start of the memset (uvm_hal_memset_*_t)
333 // execution. Use uvm_hal_ce_memset_is_valid_stub to skip the validation for
334 // a given architecture.
335 typedef bool (*uvm_hal_ce_memset_is_valid)(uvm_push_t *push,
336                                            uvm_gpu_address_t dst,
337                                            size_t num_elements,
338                                            size_t element_size);
339 bool uvm_hal_ce_memset_is_valid_stub(uvm_push_t *push, uvm_gpu_address_t dst, size_t num_elements, size_t element_size);
340 bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push,
341                                             uvm_gpu_address_t dst,
342                                             size_t num_elements,
343                                             size_t element_size);
344 bool uvm_hal_hopper_ce_memset_is_valid(uvm_push_t *push,
345                                        uvm_gpu_address_t dst,
346                                        size_t num_elements,
347                                        size_t element_size);
348 
349 // Memset size bytes at dst to a given N-byte input value.
350 //
351 // Size has to be a multiple of the element size. For example, the size passed
352 // to uvm_hal_memset_4_t must be a multiple of 4 bytes.
353 //
354 // By default all CE transfer operations include a membar sys after the
355 // operation and are not pipelined. This can be affected by using
356 // UVM_PUSH_FLAG_NEXT_CE_* flags with uvm_push_set_flag().
357 typedef void (*uvm_hal_memset_1_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
358 typedef void (*uvm_hal_memset_4_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
359 typedef void (*uvm_hal_memset_8_t)(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
360 
361 // Simple wrapper for uvm_hal_memset_4_t with the address being virtual.
362 typedef void (*uvm_hal_memset_v_4_t)(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
363 
364 void uvm_hal_maxwell_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
365 void uvm_hal_maxwell_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
366 void uvm_hal_maxwell_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
367 void uvm_hal_maxwell_ce_memset_v_4(uvm_push_t *push, NvU64 dst_va, NvU32 value, size_t size);
368 
369 void uvm_hal_volta_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
370 void uvm_hal_volta_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
371 void uvm_hal_volta_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
372 
373 void uvm_hal_hopper_ce_memset_1(uvm_push_t *push, uvm_gpu_address_t dst, NvU8 value, size_t size);
374 void uvm_hal_hopper_ce_memset_4(uvm_push_t *push, uvm_gpu_address_t dst, NvU32 value, size_t size);
375 void uvm_hal_hopper_ce_memset_8(uvm_push_t *push, uvm_gpu_address_t dst, NvU64 value, size_t size);
376 
377 // Encrypts the contents of the source buffer into the destination buffer, up to
378 // the given size. The authentication tag of the encrypted contents is written
379 // to auth_tag, so it can be verified later on by a decrypt operation.
380 //
381 // The addressing modes of the destination and authentication tag addresses
382 // should match. If the addressing mode is physical, then the address apertures
383 // should also match.
384 typedef void (*uvm_hal_ce_encrypt_t)(uvm_push_t *push,
385                                      uvm_gpu_address_t dst,
386                                      uvm_gpu_address_t src,
387                                      NvU32 size,
388                                      uvm_gpu_address_t auth_tag);
389 
390 // Decrypts the contents of the source buffer into the destination buffer, up to
391 // the given size. The method also verifies the integrity of the encrypted
392 // buffer by calculating its authentication tag, and comparing it with the one
393 // provided as argument.
394 //
395 // The addressing modes of the source and authentication tag addresses should
396 // match. If the addressing mode is physical, then the address apertures should
397 // also match.
398 typedef void (*uvm_hal_ce_decrypt_t)(uvm_push_t *push,
399                                      uvm_gpu_address_t dst,
400                                      uvm_gpu_address_t src,
401                                      NvU32 size,
402                                      uvm_gpu_address_t auth_tag);
403 
404 void uvm_hal_maxwell_ce_encrypt_unsupported(uvm_push_t *push,
405                                             uvm_gpu_address_t dst,
406                                             uvm_gpu_address_t src,
407                                             NvU32 size,
408                                             uvm_gpu_address_t auth_tag);
409 void uvm_hal_maxwell_ce_decrypt_unsupported(uvm_push_t *push,
410                                             uvm_gpu_address_t dst,
411                                             uvm_gpu_address_t src,
412                                             NvU32 size,
413                                             uvm_gpu_address_t auth_tag);
414 void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
415                                uvm_gpu_address_t dst,
416                                uvm_gpu_address_t src,
417                                NvU32 size,
418                                uvm_gpu_address_t auth_tag);
419 void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
420                                uvm_gpu_address_t dst,
421                                uvm_gpu_address_t src,
422                                NvU32 size,
423                                uvm_gpu_address_t auth_tag);
424 
425 // Increments the semaphore by 1, or resets to 0 if the incremented value would
426 // exceed the payload.
427 //
428 // By default all CE semaphore operations include a membar sys before the
429 // semaphore operation. This can be affected by using UVM_PUSH_FLAG_NEXT_CE_*
430 // flags with uvm_push_set_flag().
431 typedef void (*uvm_hal_semaphore_reduction_inc_t)(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
432 void uvm_hal_maxwell_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
433 void uvm_hal_pascal_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
434 void uvm_hal_volta_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
435 void uvm_hal_hopper_ce_semaphore_reduction_inc(uvm_push_t *push, NvU64 gpu_va, NvU32 payload);
436 
437 // Initialize GPU architecture dependent properties
438 typedef void (*uvm_hal_arch_init_properties_t)(uvm_parent_gpu_t *parent_gpu);
439 void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
440 void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
441 void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
442 void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
443 void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
444 void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
445 void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu);
446 
447 // Retrieve the page-tree HAL for a given big page size
448 typedef uvm_mmu_mode_hal_t *(*uvm_hal_lookup_mode_hal_t)(NvU32 big_page_size);
449 typedef void (*uvm_hal_mmu_enable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
450 typedef void (*uvm_hal_mmu_disable_prefetch_faults_t)(uvm_parent_gpu_t *parent_gpu);
451 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_maxwell(NvU32 big_page_size);
452 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size);
453 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_volta(NvU32 big_page_size);
454 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_turing(NvU32 big_page_size);
455 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_ampere(NvU32 big_page_size);
456 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_hopper(NvU32 big_page_size);
457 void uvm_hal_maxwell_mmu_enable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
458 void uvm_hal_maxwell_mmu_disable_prefetch_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
459 void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
460 void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu);
461 
462 // Convert a faulted MMU engine ID to a UVM engine type. Only engines which have
463 // faults serviced by UVM are handled. On Pascal the only such engine is
464 // GRAPHICS, so no translation is provided.
465 typedef uvm_mmu_engine_type_t (*uvm_hal_mmu_engine_id_to_type_t)(NvU16 mmu_engine_id);
466 uvm_mmu_engine_type_t uvm_hal_maxwell_mmu_engine_id_to_type_unsupported(NvU16 mmu_engine_id);
467 uvm_mmu_engine_type_t uvm_hal_volta_mmu_engine_id_to_type(NvU16 mmu_engine_id);
468 uvm_mmu_engine_type_t uvm_hal_turing_mmu_engine_id_to_type(NvU16 mmu_engine_id);
469 uvm_mmu_engine_type_t uvm_hal_ampere_mmu_engine_id_to_type(NvU16 mmu_engine_id);
470 uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id);
471 
472 typedef NvU16 (*uvm_hal_mmu_client_id_to_utlb_id_t)(NvU16 client_id);
473 NvU16 uvm_hal_maxwell_mmu_client_id_to_utlb_id_unsupported(NvU16 client_id);
474 NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id);
475 NvU16 uvm_hal_volta_mmu_client_id_to_utlb_id(NvU16 client_id);
476 NvU16 uvm_hal_ampere_mmu_client_id_to_utlb_id(NvU16 client_id);
477 NvU16 uvm_hal_hopper_mmu_client_id_to_utlb_id(NvU16 client_id);
478 
479 // Replayable faults
480 typedef void (*uvm_hal_enable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
481 typedef void (*uvm_hal_disable_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu);
482 typedef void (*uvm_hal_clear_replayable_faults_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
483 typedef NvU32 (*uvm_hal_fault_buffer_read_put_t)(uvm_parent_gpu_t *parent_gpu);
484 typedef NvU32 (*uvm_hal_fault_buffer_read_get_t)(uvm_parent_gpu_t *parent_gpu);
485 typedef void (*uvm_hal_fault_buffer_write_get_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
486 typedef NvU8 (*uvm_hal_fault_buffer_get_ve_id_t)(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
487 
488 // Parse the replayable entry at the given buffer index. This also clears the
489 // valid bit of the entry in the buffer.
490 typedef NV_STATUS (*uvm_hal_fault_buffer_parse_replayable_entry_t)(uvm_parent_gpu_t *gpu,
491                                                                    NvU32 index,
492                                                                    uvm_fault_buffer_entry_t *buffer_entry);
493 
494 NV_STATUS uvm_hal_maxwell_fault_buffer_parse_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
495                                                                           NvU32 index,
496                                                                           uvm_fault_buffer_entry_t *buffer_entry);
497 
498 NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
499                                                              NvU32 index,
500                                                              uvm_fault_buffer_entry_t *buffer_entry);
501 
502 NV_STATUS uvm_hal_volta_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu,
503                                                             NvU32 index,
504                                                             uvm_fault_buffer_entry_t *buffer_entry);
505 
506 typedef bool (*uvm_hal_fault_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
507 typedef void (*uvm_hal_fault_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
508 typedef NvU32 (*uvm_hal_fault_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
509 typedef void (*uvm_hal_fault_buffer_replay_t)(uvm_push_t *push, uvm_fault_replay_type_t type);
510 typedef uvm_fault_type_t (*uvm_hal_fault_buffer_get_fault_type_t)(const NvU32 *fault_entry);
511 typedef void (*uvm_hal_fault_cancel_global_t)(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
512 typedef void (*uvm_hal_fault_cancel_targeted_t)(uvm_push_t *push,
513                                                 uvm_gpu_phys_address_t instance_ptr,
514                                                 NvU32 gpc_id,
515                                                 NvU32 client_id);
516 
517 void uvm_hal_maxwell_enable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
518 void uvm_hal_maxwell_disable_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu);
519 void uvm_hal_maxwell_clear_replayable_faults_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
520 NvU32 uvm_hal_maxwell_fault_buffer_read_put_unsupported(uvm_parent_gpu_t *parent_gpu);
521 NvU32 uvm_hal_maxwell_fault_buffer_read_get_unsupported(uvm_parent_gpu_t *parent_gpu);
522 void uvm_hal_maxwell_fault_buffer_write_get_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
523 NvU8 uvm_hal_maxwell_fault_buffer_get_ve_id_unsupported(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
524 uvm_fault_type_t uvm_hal_maxwell_fault_buffer_get_fault_type_unsupported(const NvU32 *fault_entry);
525 
526 void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
527 void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
528 void uvm_hal_pascal_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
529 NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
530 NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
531 void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
532 
533 uvm_fault_type_t uvm_hal_pascal_fault_buffer_get_fault_type(const NvU32 *fault_entry);
534 
535 NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu);
536 NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu);
537 void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index);
538 NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
539 
540 uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry);
541 
542 void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu);
543 void uvm_hal_turing_clear_replayable_faults(uvm_parent_gpu_t *parent_gpu, NvU32 get);
544 NvU8 uvm_hal_hopper_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type);
545 
546 bool uvm_hal_maxwell_fault_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
547 void uvm_hal_maxwell_fault_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
548 NvU32 uvm_hal_maxwell_fault_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
549 bool uvm_hal_pascal_fault_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
550 void uvm_hal_pascal_fault_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
551 NvU32 uvm_hal_pascal_fault_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
552 
553 typedef void (*uvm_hal_fault_buffer_parse_non_replayable_entry_t)(uvm_parent_gpu_t *parent_gpu,
554                                                                   void *fault_packet,
555                                                                   uvm_fault_buffer_entry_t *buffer_entry);
556 void uvm_hal_maxwell_fault_buffer_parse_non_replayable_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
557                                                                          void *fault_packet,
558                                                                          uvm_fault_buffer_entry_t *buffer_entry);
559 void uvm_hal_volta_fault_buffer_parse_non_replayable_entry(uvm_parent_gpu_t *parent_gpu,
560                                                            void *fault_packet,
561                                                            uvm_fault_buffer_entry_t *buffer_entry);
562 
563 void uvm_hal_maxwell_cancel_faults_global_unsupported(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
564 void uvm_hal_pascal_cancel_faults_global(uvm_push_t *push, uvm_gpu_phys_address_t instance_ptr);
565 
566 // Trigger fault replay on the GPU where the given pushbuffer is located.
567 void uvm_hal_maxwell_replay_faults_unsupported(uvm_push_t *push, uvm_fault_replay_type_t type);
568 void uvm_hal_maxwell_cancel_faults_targeted_unsupported(uvm_push_t *push,
569                                                         uvm_gpu_phys_address_t instance_ptr,
570                                                         NvU32 gpc_id,
571                                                         NvU32 client_id);
572 void uvm_hal_pascal_replay_faults(uvm_push_t *push, uvm_fault_replay_type_t type);
573 void uvm_hal_pascal_cancel_faults_targeted(uvm_push_t *push,
574                                            uvm_gpu_phys_address_t instance_ptr,
575                                            NvU32 gpc_id,
576                                            NvU32 client_id);
577 
578 typedef void (*uvm_hal_fault_cancel_va_t)(uvm_push_t *push,
579                                           uvm_gpu_phys_address_t pdb,
580                                           const uvm_fault_buffer_entry_t *fault_entry,
581                                           uvm_fault_cancel_va_mode_t cancel_va_mode);
582 
583 void uvm_hal_maxwell_cancel_faults_va_unsupported(uvm_push_t *push,
584                                                   uvm_gpu_phys_address_t pdb,
585                                                   const uvm_fault_buffer_entry_t *fault_entry,
586                                                   uvm_fault_cancel_va_mode_t cancel_va_mode);
587 
588 void uvm_hal_volta_replay_faults(uvm_push_t *push, uvm_fault_replay_type_t type);
589 void uvm_hal_volta_cancel_faults_va(uvm_push_t *push,
590                                     uvm_gpu_phys_address_t pdb,
591                                     const uvm_fault_buffer_entry_t *fault_entry,
592                                     uvm_fault_cancel_va_mode_t cancel_va_mode);
593 
594 void uvm_hal_hopper_cancel_faults_va(uvm_push_t *push,
595                                      uvm_gpu_phys_address_t pdb,
596                                      const uvm_fault_buffer_entry_t *fault_entry,
597                                      uvm_fault_cancel_va_mode_t cancel_va_mode);
598 
599 typedef void (*uvm_hal_host_clear_faulted_channel_method_t)(uvm_push_t *push,
600                                                             uvm_user_channel_t *user_channel,
601                                                             const uvm_fault_buffer_entry_t *buffer_entry);
602 
603 void uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported(uvm_push_t *push,
604                                                                    uvm_user_channel_t *user_channel,
605                                                                    const uvm_fault_buffer_entry_t *buffer_entry);
606 void uvm_hal_volta_host_clear_faulted_channel_method(uvm_push_t *push,
607                                                      uvm_user_channel_t *user_channel,
608                                                      const uvm_fault_buffer_entry_t *buffer_entry);
609 void uvm_hal_turing_host_clear_faulted_channel_method(uvm_push_t *push,
610                                                       uvm_user_channel_t *user_channel,
611                                                       const uvm_fault_buffer_entry_t *buffer_entry);
612 typedef void (*uvm_hal_host_clear_faulted_channel_register_t)(uvm_user_channel_t *user_channel,
613                                                               const uvm_fault_buffer_entry_t *buffer_entry);
614 void uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported(uvm_user_channel_t *user_channel,
615                                                                      const uvm_fault_buffer_entry_t *buffer_entry);
616 void uvm_hal_ampere_host_clear_faulted_channel_register(uvm_user_channel_t *user_channel,
617                                                         const uvm_fault_buffer_entry_t *buffer_entry);
618 
619 typedef void (*uvm_hal_host_clear_faulted_channel_sw_method_t)(uvm_push_t *push,
620                                                                uvm_user_channel_t *user_channel,
621                                                                const uvm_fault_buffer_entry_t *buffer_entry);
622 void uvm_hal_maxwell_host_clear_faulted_channel_sw_method_unsupported(uvm_push_t *push,
623                                                                      uvm_user_channel_t *user_channel,
624                                                                      const uvm_fault_buffer_entry_t *buffer_entry);
625 void uvm_hal_ampere_host_clear_faulted_channel_sw_method(uvm_push_t *push,
626                                                          uvm_user_channel_t *user_channel,
627                                                          const uvm_fault_buffer_entry_t *buffer_entry);
628 
629 void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
630 void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
631 
632 // Access counters
633 typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
634 typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
635 typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
636 
637 // Parse the entry on the given buffer index. This also clears the valid bit of
638 // the entry in the buffer.
639 typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
640                                                             NvU32 index,
641                                                             uvm_access_counter_buffer_entry_t *buffer_entry);
642 typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
643 typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
644 typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
645 typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
646 typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
647 typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
648                                                         const uvm_access_counter_buffer_entry_t *buffer_entry);
649 
650 void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
651 void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
652 void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
653 void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
654                                                                    NvU32 index,
655                                                                    uvm_access_counter_buffer_entry_t *buffer_entry);
656 bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
657 void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
658 NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
659 void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
660 void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
661 void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
662                                                                const uvm_access_counter_buffer_entry_t *buffer_entry);
663 
664 void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
665 void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
666 void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
667 void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
668                                                      NvU32 index,
669                                                      uvm_access_counter_buffer_entry_t *buffer_entry);
670 bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
671 void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
672 NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
673 
674 void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
675 void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
676 void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
677                                                  const uvm_access_counter_buffer_entry_t *buffer_entry);
678 
679 void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
680 void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
681 
682 // The source and destination addresses must be 16-byte aligned. Note that the
683 // best performance is achieved with 256-byte alignment. The decrypt size must
684 // be larger than 0, and a multiple of 4 bytes.
685 //
686 // The authentication tag address must also be 16-byte aligned.
687 // The authentication tag buffer size is UVM_CONF_COMPUTING_AUTH_TAG_SIZE bytes
688 // defined in uvm_conf_computing.h.
689 //
690 // Decrypts the src buffer into the dst buffer of the given size.
691 // The method also verifies integrity of the src buffer by calculating its
692 // authentication tag and comparing it with the provided one.
693 //
694 // Note: SEC2 does not support encryption.
695 typedef void (*uvm_hal_sec2_decrypt_t)(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, NvU32 size, NvU64 auth_tag_va);
696 
697 void uvm_hal_maxwell_sec2_decrypt_unsupported(uvm_push_t *push,
698                                               NvU64 dst_va,
699                                               NvU64 src_va,
700                                               NvU32 size,
701                                               NvU64 auth_tag_va);
702 void uvm_hal_hopper_sec2_decrypt(uvm_push_t *push, NvU64 dst_va, NvU64 src_va, NvU32 size, NvU64 auth_tag_va);
703 
704 struct uvm_host_hal_struct
705 {
706     uvm_hal_init_t init;
707     uvm_hal_host_method_is_valid method_is_valid;
708     uvm_hal_host_sw_method_is_valid sw_method_is_valid;
709     uvm_hal_wait_for_idle_t wait_for_idle;
710     uvm_hal_membar_sys_t membar_sys;
711     uvm_hal_membar_gpu_t membar_gpu;
712     uvm_hal_noop_t noop;
713     uvm_hal_interrupt_t interrupt;
714     uvm_hal_semaphore_release_t semaphore_release;
715     uvm_hal_semaphore_acquire_t semaphore_acquire;
716     uvm_hal_semaphore_timestamp_t semaphore_timestamp;
717     uvm_hal_host_set_gpfifo_entry_t set_gpfifo_entry;
718     uvm_hal_host_set_gpfifo_noop_t set_gpfifo_noop;
719     uvm_hal_host_set_gpfifo_pushbuffer_segment_base_t set_gpfifo_pushbuffer_segment_base;
720     uvm_hal_host_write_gpu_put_t write_gpu_put;
721     uvm_hal_host_tlb_invalidate_all_t tlb_invalidate_all;
722     uvm_hal_host_tlb_invalidate_va_t tlb_invalidate_va;
723     uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test;
724     uvm_hal_fault_buffer_replay_t replay_faults;
725     uvm_hal_fault_cancel_global_t cancel_faults_global;
726     uvm_hal_fault_cancel_targeted_t cancel_faults_targeted;
727     uvm_hal_fault_cancel_va_t cancel_faults_va;
728     uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_sw_method;
729     uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
730     uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
731     uvm_hal_access_counter_clear_all_t access_counter_clear_all;
732     uvm_hal_access_counter_clear_type_t access_counter_clear_type;
733     uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
734     uvm_hal_get_time_t get_time;
735 };
736 
737 struct uvm_ce_hal_struct
738 {
739     uvm_hal_init_t init;
740     uvm_hal_ce_method_is_valid method_is_valid;
741     uvm_hal_semaphore_release_t semaphore_release;
742     uvm_hal_semaphore_timestamp_t semaphore_timestamp;
743     uvm_hal_ce_offset_out_t offset_out;
744     uvm_hal_ce_offset_in_out_t offset_in_out;
745     uvm_hal_ce_phys_mode_t phys_mode;
746     uvm_hal_ce_plc_mode_t plc_mode;
747     uvm_hal_ce_memcopy_type_t memcopy_copy_type;
748     uvm_hal_ce_memcopy_is_valid memcopy_is_valid;
749     uvm_hal_ce_memcopy_patch_src memcopy_patch_src;
750     uvm_hal_memcopy_t memcopy;
751     uvm_hal_memcopy_v_to_v_t memcopy_v_to_v;
752     uvm_hal_ce_memset_is_valid memset_is_valid;
753     uvm_hal_memset_1_t memset_1;
754     uvm_hal_memset_4_t memset_4;
755     uvm_hal_memset_8_t memset_8;
756     uvm_hal_memset_v_4_t memset_v_4;
757     uvm_hal_semaphore_reduction_inc_t semaphore_reduction_inc;
758     uvm_hal_ce_encrypt_t encrypt;
759     uvm_hal_ce_decrypt_t decrypt;
760 };
761 
762 struct uvm_arch_hal_struct
763 {
764     uvm_hal_arch_init_properties_t init_properties;
765     uvm_hal_lookup_mode_hal_t mmu_mode_hal;
766     uvm_hal_mmu_enable_prefetch_faults_t enable_prefetch_faults;
767     uvm_hal_mmu_disable_prefetch_faults_t disable_prefetch_faults;
768     uvm_hal_mmu_engine_id_to_type_t mmu_engine_id_to_type;
769     uvm_hal_mmu_client_id_to_utlb_id_t mmu_client_id_to_utlb_id;
770 };
771 
772 struct uvm_fault_buffer_hal_struct
773 {
774     uvm_hal_enable_replayable_faults_t enable_replayable_faults;
775     uvm_hal_disable_replayable_faults_t disable_replayable_faults;
776     uvm_hal_clear_replayable_faults_t clear_replayable_faults;
777     uvm_hal_fault_buffer_read_put_t read_put;
778     uvm_hal_fault_buffer_read_get_t read_get;
779     uvm_hal_fault_buffer_write_get_t write_get;
780     uvm_hal_fault_buffer_get_ve_id_t get_ve_id;
781     uvm_hal_fault_buffer_parse_replayable_entry_t parse_replayable_entry;
782     uvm_hal_fault_buffer_entry_is_valid_t entry_is_valid;
783     uvm_hal_fault_buffer_entry_clear_valid_t entry_clear_valid;
784     uvm_hal_fault_buffer_entry_size_t entry_size;
785     uvm_hal_fault_buffer_parse_non_replayable_entry_t parse_non_replayable_entry;
786     uvm_hal_fault_buffer_get_fault_type_t get_fault_type;
787 };
788 
789 struct uvm_access_counter_buffer_hal_struct
790 {
791     uvm_hal_enable_access_counter_notifications_t enable_access_counter_notifications;
792     uvm_hal_disable_access_counter_notifications_t disable_access_counter_notifications;
793     uvm_hal_clear_access_counter_notifications_t clear_access_counter_notifications;
794     uvm_hal_access_counter_buffer_parse_entry_t parse_entry;
795     uvm_hal_access_counter_buffer_entry_is_valid_t entry_is_valid;
796     uvm_hal_access_counter_buffer_entry_clear_valid_t entry_clear_valid;
797     uvm_hal_access_counter_buffer_entry_size_t entry_size;
798 };
799 
800 struct uvm_sec2_hal_struct
801 {
802     uvm_hal_init_t init;
803     uvm_hal_sec2_decrypt_t decrypt;
804     uvm_hal_semaphore_release_t semaphore_release;
805     uvm_hal_semaphore_timestamp_t semaphore_timestamp;
806 };
807 
808 typedef struct
809 {
810     // id is either a hardware class or GPU architecture
811     NvU32 id;
812     NvU32 parent_id;
813     union
814     {
815         // host_ops: id is a hardware class
816         uvm_host_hal_t host_ops;
817 
818         // ce_ops: id is a hardware class
819         uvm_ce_hal_t ce_ops;
820 
821         // arch_ops: id is an architecture
822         uvm_arch_hal_t arch_ops;
823 
824         // fault_buffer_ops: id is an architecture
825         uvm_fault_buffer_hal_t fault_buffer_ops;
826 
827         // access_counter_buffer_ops: id is an architecture
828         uvm_access_counter_buffer_hal_t access_counter_buffer_ops;
829 
830         // sec2_ops: id is an architecture
831         uvm_sec2_hal_t sec2_ops;
832     } u;
833 } uvm_hal_class_ops_t;
834 
835 NV_STATUS uvm_hal_init_table(void);
836 NV_STATUS uvm_hal_init_gpu(uvm_parent_gpu_t *parent_gpu);
837 void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu);
838 
839 // Helper to push a SYS or GPU membar based on the membar type
840 //
841 // Notably this doesn't just get the GPU from the push object to support the
842 // test mode of the page tree code that doesn't do real pushes.
uvm_hal_membar(uvm_gpu_t * gpu,uvm_push_t * push,uvm_membar_t membar)843 static void uvm_hal_membar(uvm_gpu_t *gpu, uvm_push_t *push, uvm_membar_t membar)
844 {
845     switch (membar) {
846         case UVM_MEMBAR_SYS:
847             gpu->parent->host_hal->membar_sys(push);
848             break;
849         case UVM_MEMBAR_GPU:
850             gpu->parent->host_hal->membar_gpu(push);
851             break;
852         case UVM_MEMBAR_NONE:
853             break;
854     }
855 }
856 
uvm_hal_wfi_membar(uvm_push_t * push,uvm_membar_t membar)857 static void uvm_hal_wfi_membar(uvm_push_t *push, uvm_membar_t membar)
858 {
859     uvm_gpu_t *gpu = uvm_push_get_gpu(push);
860     gpu->parent->host_hal->wait_for_idle(push);
861     uvm_hal_membar(gpu, push, membar);
862 }
863 
864 // Internal helper used by the TLB invalidate hal functions. This issues the
865 // appropriate Host membar(s) after a TLB invalidate.
866 void uvm_hal_tlb_invalidate_membar(uvm_push_t *push, uvm_membar_t membar);
867 
868 // Internal helper used by architectures/engines that don't support a FLUSH
869 // operation with a FLUSH_TYPE on the semaphore release method, e.g., pre-Volta
870 // CE. It inspects and clears the MEMBAR push flags, issues a Host WFI +
871 // membar.gpu for MEMBAR_GPU or returns true to indicate the caller to use the
872 // engine's FLUSH for MEMBAR_SYS.
873 bool uvm_hal_membar_before_semaphore(uvm_push_t *push);
874 
875 // Determine the appropriate membar to use on TLB invalidates for GPU PTE
876 // permissions downgrades.
877 //
878 // gpu is the GPU on which the TLB invalidate is happening.
879 //
880 // is_local_vidmem indicates whether all mappings being invalidated pointed to
881 // the local GPU's memory.
882 uvm_membar_t uvm_hal_downgrade_membar_type(uvm_gpu_t *gpu, bool is_local_vidmem);
883 
884 #endif // __UVM_HAL_H__
885