1 /*******************************************************************************
2     Copyright (c) 2015-2020 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 
25 // For Pascal, UVM page tree 'depth' maps to hardware as follows:
26 //
27 // UVM depth   HW level                            VA bits
28 // 0           PDE3                                48:47
29 // 1           PDE2                                46:38
30 // 2           PDE1                                37:29
31 // 3           PDE0 (dual 64k/4k PDE, or 2M PTE)   28:21
32 // 4           PTE_64K / PTE_4K                    20:16 / 20:12
33 
34 #include "uvm_types.h"
35 #include "uvm_forward_decl.h"
36 #include "uvm_global.h"
37 #include "uvm_gpu.h"
38 #include "uvm_mmu.h"
39 #include "uvm_push_macros.h"
40 #include "uvm_pascal_fault_buffer.h"
41 #include "hwref/pascal/gp100/dev_fault.h"
42 #include "hwref/pascal/gp100/dev_fb.h"
43 #include "hwref/pascal/gp100/dev_mmu.h"
44 
45 #define MMU_BIG 0
46 #define MMU_SMALL 1
47 
48 static NvU32 entries_per_index_pascal(NvU32 depth)
49 {
50     UVM_ASSERT(depth < 5);
51     if (depth == 3)
52         return 2;
53     return 1;
54 }
55 
56 static NvLength entry_offset_pascal(NvU32 depth, NvU32 page_size)
57 {
58     UVM_ASSERT(depth < 5);
59     if (page_size == UVM_PAGE_SIZE_4K && depth == 3)
60         return MMU_SMALL;
61     return MMU_BIG;
62 }
63 
64 static NvU64 single_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)
65 {
66     NvU64 pde_bits = 0;
67 
68     if (phys_alloc != NULL) {
69         NvU64 address = phys_alloc->addr.address >> NV_MMU_VER2_PDE_ADDRESS_SHIFT;
70         pde_bits |= HWCONST64(_MMU_VER2, PDE, IS_PDE, TRUE) |
71                     HWCONST64(_MMU_VER2, PDE, VOL, TRUE);
72 
73         switch (phys_alloc->addr.aperture) {
74             case UVM_APERTURE_SYS:
75                 pde_bits |= HWCONST64(_MMU_VER2, PDE, APERTURE, SYSTEM_COHERENT_MEMORY) |
76                             HWVALUE64(_MMU_VER2, PDE, ADDRESS_SYS, address);
77                 break;
78             case UVM_APERTURE_VID:
79                 pde_bits |= HWCONST64(_MMU_VER2, PDE, APERTURE, VIDEO_MEMORY) |
80                             HWVALUE64(_MMU_VER2, PDE, ADDRESS_VID, address);
81                 break;
82             default:
83                 UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", phys_alloc->addr.aperture);
84                 break;
85         }
86     }
87 
88     return pde_bits;
89 }
90 
91 static NvU64 big_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)
92 {
93     NvU64 pde_bits = 0;
94 
95     if (phys_alloc != NULL) {
96         NvU64 address = phys_alloc->addr.address >> NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT;
97         pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, VOL_BIG, TRUE);
98 
99         switch (phys_alloc->addr.aperture) {
100             case UVM_APERTURE_SYS:
101                 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, APERTURE_BIG, SYSTEM_COHERENT_MEMORY) |
102                             HWVALUE64(_MMU_VER2, DUAL_PDE, ADDRESS_BIG_SYS, address);
103                 break;
104             case UVM_APERTURE_VID:
105                 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, APERTURE_BIG, VIDEO_MEMORY) |
106                             HWVALUE64(_MMU_VER2, DUAL_PDE, ADDRESS_BIG_VID, address);
107                 break;
108             default:
109                 UVM_ASSERT_MSG(0, "Invalid big aperture %d\n", phys_alloc->addr.aperture);
110                 break;
111         }
112     }
113 
114     return pde_bits;
115 }
116 
117 static NvU64 small_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)
118 {
119     NvU64 pde_bits = 0;
120 
121     if (phys_alloc != NULL) {
122         NvU64 address = phys_alloc->addr.address >> NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT;
123         pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, VOL_SMALL, TRUE);
124 
125         switch (phys_alloc->addr.aperture) {
126             case UVM_APERTURE_SYS:
127                 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, APERTURE_SMALL, SYSTEM_COHERENT_MEMORY);
128                 pde_bits |= HWVALUE64(_MMU_VER2, DUAL_PDE, ADDRESS_SMALL_SYS, address);
129                 break;
130             case UVM_APERTURE_VID:
131                 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, APERTURE_SMALL, VIDEO_MEMORY);
132                 pde_bits |= HWVALUE64(_MMU_VER2, DUAL_PDE, ADDRESS_SMALL_VID, address);
133                 break;
134             default:
135                 UVM_ASSERT_MSG(0, "Invalid small aperture %d\n", phys_alloc->addr.aperture);
136                 break;
137         }
138     }
139 
140     return pde_bits;
141 }
142 
143 static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
144 {
145     NvU32 entry_count = entries_per_index_pascal(depth);
146     NvU64 *entry_bits = (NvU64 *)entry;
147 
148     if (entry_count == 1) {
149         *entry_bits = single_pde_pascal(*phys_allocs);
150     }
151     else if (entry_count == 2) {
152         entry_bits[MMU_BIG] = big_half_pde_pascal(phys_allocs[MMU_BIG]);
153         entry_bits[MMU_SMALL] = small_half_pde_pascal(phys_allocs[MMU_SMALL]);
154 
155         // This entry applies to the whole dual PDE but is stored in the lower bits
156         entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER2, DUAL_PDE, IS_PDE, TRUE);
157     }
158     else {
159         UVM_ASSERT_MSG(0, "Invalid number of entries per index: %d\n", entry_count);
160     }
161 }
162 
163 static NvLength entry_size_pascal(NvU32 depth)
164 {
165     UVM_ASSERT(depth < 5);
166     if (depth == 3)
167         return 16;
168     else
169         return 8;
170 }
171 
172 static NvU32 index_bits_pascal(NvU32 depth, NvU32 page_size)
173 {
174     static const NvU32 bit_widths[] = {2, 9, 9, 8};
175     // some code paths keep on querying this until they get a 0, meaning only the page offset remains.
176     UVM_ASSERT(depth < 5);
177     if (depth < 4) {
178         return bit_widths[depth];
179     }
180     else if (depth == 4) {
181         switch (page_size) {
182             case UVM_PAGE_SIZE_4K:
183                 return 9;
184             case UVM_PAGE_SIZE_64K:
185                 return 5;
186             default:
187                 break;
188         }
189     }
190     return 0;
191 }
192 
193 static NvU32 num_va_bits_pascal(void)
194 {
195     return 49;
196 }
197 
198 static NvLength allocation_size_pascal(NvU32 depth, NvU32 page_size)
199 {
200     UVM_ASSERT(depth < 5);
201     if (depth == 4 && page_size == UVM_PAGE_SIZE_64K)
202         return 256;
203     // depth 0 requires only a 32 byte allocation, but it must be 4k aligned
204     return 4096;
205 }
206 
207 static NvU32 page_table_depth_pascal(NvU32 page_size)
208 {
209     if (page_size == UVM_PAGE_SIZE_2M)
210         return 3;
211     else
212         return 4;
213 }
214 
215 static NvU32 page_sizes_pascal(void)
216 {
217     return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K;
218 }
219 
220 static NvU64 unmapped_pte_pascal(NvU32 page_size)
221 {
222     // Setting the privilege bit on an otherwise-zeroed big PTE causes the
223     // corresponding 4k PTEs to be ignored. This allows the invalidation of a
224     // mixed PDE range to be much faster.
225     if (page_size != UVM_PAGE_SIZE_64K)
226         return 0;
227 
228     // When VALID == 0, MMU still reads the VOL and PRIV fields. VOL == 1
229     // indicates that the PTE is sparse, so make sure we don't use it.
230     return HWCONST64(_MMU_VER2, PTE, VALID,     FALSE) |
231            HWCONST64(_MMU_VER2, PTE, VOL,       FALSE) |
232            HWCONST64(_MMU_VER2, PTE, PRIVILEGE, TRUE);
233 }
234 
235 static NvU64 make_pte_pascal(uvm_aperture_t aperture, NvU64 address, uvm_prot_t prot, NvU64 flags)
236 {
237     NvU8 aperture_bits = 0;
238     NvU64 pte_bits = 0;
239 
240     UVM_ASSERT(prot != UVM_PROT_NONE);
241     UVM_ASSERT((flags & ~UVM_MMU_PTE_FLAGS_MASK) == 0);
242 
243     // valid 0:0
244     pte_bits |= HWCONST64(_MMU_VER2, PTE, VALID, TRUE);
245 
246     // aperture 2:1
247     if (aperture == UVM_APERTURE_SYS)
248         aperture_bits = NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY;
249     else if (aperture == UVM_APERTURE_VID)
250         aperture_bits = NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY;
251     else if (aperture >= UVM_APERTURE_PEER_0 && aperture <= UVM_APERTURE_PEER_7)
252         aperture_bits = NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY;
253     else
254         UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", aperture);
255 
256     pte_bits |= HWVALUE64(_MMU_VER2, PTE, APERTURE, aperture_bits);
257 
258     // volatile 3:3
259     if (flags & UVM_MMU_PTE_FLAGS_CACHED)
260         pte_bits |= HWCONST64(_MMU_VER2, PTE, VOL, FALSE);
261     else
262         pte_bits |= HWCONST64(_MMU_VER2, PTE, VOL, TRUE);
263 
264     // encrypted 4:4
265     pte_bits |= HWCONST64(_MMU_VER2, PTE, ENCRYPTED, FALSE);
266 
267     // privilege 5:5
268     pte_bits |= HWCONST64(_MMU_VER2, PTE, PRIVILEGE, FALSE);
269 
270     // read only 6:6
271     if (prot == UVM_PROT_READ_ONLY)
272         pte_bits |= HWCONST64(_MMU_VER2, PTE, READ_ONLY, TRUE);
273     else
274         pte_bits |= HWCONST64(_MMU_VER2, PTE, READ_ONLY, FALSE);
275 
276     // atomic disable 7:7
277     if (prot == UVM_PROT_READ_WRITE_ATOMIC)
278         pte_bits |= HWCONST64(_MMU_VER2, PTE, ATOMIC_DISABLE, FALSE);
279     else
280         pte_bits |= HWCONST64(_MMU_VER2, PTE, ATOMIC_DISABLE, TRUE);
281 
282     address >>= NV_MMU_VER2_PTE_ADDRESS_SHIFT;
283     if (aperture == UVM_APERTURE_SYS) {
284         // sys address 53:8
285         pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_SYS, address);
286     }
287     else {
288         // vid address 32:8
289         pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID, address);
290 
291 
292         // peer id 35:33
293         if (aperture != UVM_APERTURE_VID)
294             pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID_PEER, UVM_APERTURE_PEER_ID(aperture));
295 
296         // comptagline 53:36
297         pte_bits |= HWVALUE64(_MMU_VER2, PTE, COMPTAGLINE, 0);
298     }
299 
300     pte_bits |= HWVALUE64(_MMU_VER2, PTE, KIND, NV_MMU_PTE_KIND_PITCH);
301 
302     return pte_bits;
303 }
304 
305 static NvU64 make_sked_reflected_pte_pascal(void)
306 {
307     NvU64 pte_bits = 0;
308 
309     pte_bits |= HWCONST64(_MMU_VER2, PTE, VALID, TRUE);
310     pte_bits |= HWVALUE64(_MMU_VER2, PTE, KIND, NV_MMU_PTE_KIND_SMSKED_MESSAGE);
311 
312     return pte_bits;
313 }
314 
315 static NvU64 make_sparse_pte_pascal(void)
316 {
317     return HWCONST64(_MMU_VER2, PTE, VALID, FALSE) |
318            HWCONST64(_MMU_VER2, PTE, VOL,   TRUE);
319 }
320 
321 static NvU64 poisoned_pte_pascal(void)
322 {
323     // An invalid PTE won't be fatal from faultable units like SM, which is the
324     // most likely source of bad PTE accesses.
325 
326     // Engines with priv accesses won't fault on the priv PTE, so add a backup
327     // mechanism using an impossible memory address. MMU will trigger an
328     // interrupt when it detects a bad physical address.
329     //
330     // This address has to fit within 37 bits (max address width of vidmem) and
331     // be aligned to page_size.
332     NvU64 phys_addr = 0x1bad000000ULL;
333 
334     NvU64 pte_bits = make_pte_pascal(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE);
335     return WRITE_HWCONST64(pte_bits, _MMU_VER2, PTE, PRIVILEGE, TRUE);
336 }
337 
338 static uvm_mmu_mode_hal_t pascal_mmu_mode_hal =
339 {
340     .make_pte = make_pte_pascal,
341     .make_sked_reflected_pte = make_sked_reflected_pte_pascal,
342     .make_sparse_pte = make_sparse_pte_pascal,
343     .unmapped_pte = unmapped_pte_pascal,
344     .poisoned_pte = poisoned_pte_pascal,
345     .make_pde = make_pde_pascal,
346     .entry_size = entry_size_pascal,
347     .index_bits = index_bits_pascal,
348     .entries_per_index = entries_per_index_pascal,
349     .entry_offset = entry_offset_pascal,
350     .num_va_bits = num_va_bits_pascal,
351     .allocation_size = allocation_size_pascal,
352     .page_table_depth = page_table_depth_pascal,
353     .page_sizes = page_sizes_pascal
354 };
355 
356 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size)
357 {
358     UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K);
359 
360     // TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with
361     // 128K big page size for Pascal+ GPUs
362     if (big_page_size == UVM_PAGE_SIZE_128K)
363         return NULL;
364 
365     return &pascal_mmu_mode_hal;
366 }
367 
368 void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
369 {
370     volatile NvU32 *prefetch_control;
371     NvU32 prefetch_control_value;
372 
373     prefetch_control = parent_gpu->fault_buffer_info.rm_info.replayable.pPrefetchCtrl;
374 
375     prefetch_control_value = UVM_GPU_READ_ONCE(*prefetch_control);
376     prefetch_control_value = WRITE_HWCONST(prefetch_control_value, _PFB_PRI_MMU_PAGE, FAULT_CTRL, PRF_FILTER, SEND_ALL);
377     UVM_GPU_WRITE_ONCE(*prefetch_control, prefetch_control_value);
378 }
379 
380 void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
381 {
382     volatile NvU32 *prefetch_control;
383     NvU32 prefetch_control_value;
384 
385     prefetch_control = parent_gpu->fault_buffer_info.rm_info.replayable.pPrefetchCtrl;
386 
387     prefetch_control_value = UVM_GPU_READ_ONCE(*prefetch_control);
388     prefetch_control_value = WRITE_HWCONST(prefetch_control_value, _PFB_PRI_MMU_PAGE, FAULT_CTRL, PRF_FILTER, SEND_NONE);
389     UVM_GPU_WRITE_ONCE(*prefetch_control, prefetch_control_value);
390 }
391 
392 NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id)
393 {
394     switch (client_id) {
395         case NV_PFAULT_CLIENT_GPC_RAST:
396         case NV_PFAULT_CLIENT_GPC_GCC:
397         case NV_PFAULT_CLIENT_GPC_GPCCS:
398             return UVM_PASCAL_GPC_UTLB_ID_RGG;
399         case NV_PFAULT_CLIENT_GPC_PE_0:
400         case NV_PFAULT_CLIENT_GPC_TPCCS_0:
401         case NV_PFAULT_CLIENT_GPC_L1_0:
402         case NV_PFAULT_CLIENT_GPC_T1_0:
403         case NV_PFAULT_CLIENT_GPC_L1_1:
404         case NV_PFAULT_CLIENT_GPC_T1_1:
405             return UVM_PASCAL_GPC_UTLB_ID_LTP0;
406         case NV_PFAULT_CLIENT_GPC_PE_1:
407         case NV_PFAULT_CLIENT_GPC_TPCCS_1:
408         case NV_PFAULT_CLIENT_GPC_L1_2:
409         case NV_PFAULT_CLIENT_GPC_T1_2:
410         case NV_PFAULT_CLIENT_GPC_L1_3:
411         case NV_PFAULT_CLIENT_GPC_T1_3:
412             return UVM_PASCAL_GPC_UTLB_ID_LTP1;
413         case NV_PFAULT_CLIENT_GPC_PE_2:
414         case NV_PFAULT_CLIENT_GPC_TPCCS_2:
415         case NV_PFAULT_CLIENT_GPC_L1_4:
416         case NV_PFAULT_CLIENT_GPC_T1_4:
417         case NV_PFAULT_CLIENT_GPC_L1_5:
418         case NV_PFAULT_CLIENT_GPC_T1_5:
419             return UVM_PASCAL_GPC_UTLB_ID_LTP2;
420         case NV_PFAULT_CLIENT_GPC_PE_3:
421         case NV_PFAULT_CLIENT_GPC_TPCCS_3:
422         case NV_PFAULT_CLIENT_GPC_L1_6:
423         case NV_PFAULT_CLIENT_GPC_T1_6:
424         case NV_PFAULT_CLIENT_GPC_L1_7:
425         case NV_PFAULT_CLIENT_GPC_T1_7:
426             return UVM_PASCAL_GPC_UTLB_ID_LTP3;
427         case NV_PFAULT_CLIENT_GPC_PE_4:
428         case NV_PFAULT_CLIENT_GPC_TPCCS_4:
429         case NV_PFAULT_CLIENT_GPC_L1_8:
430         case NV_PFAULT_CLIENT_GPC_T1_8:
431         case NV_PFAULT_CLIENT_GPC_L1_9:
432         case NV_PFAULT_CLIENT_GPC_T1_9:
433             return UVM_PASCAL_GPC_UTLB_ID_LTP4;
434         default:
435             UVM_ASSERT_MSG(false, "Invalid client value: 0x%x\n", client_id);
436     }
437 
438     return 0;
439 }
440