1 /*******************************************************************************
2     Copyright (c) 2018-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_hal.h"
25 #include "uvm_hal_types.h"
26 #include "clc6b5.h"
27 #include "clc7b5.h"
28 #include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
29 
30 bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
31 {
32     if (!uvm_channel_is_proxy(push->channel))
33         return true;
34 
35     switch (method_address) {
36         case NVC56F_SET_OBJECT:
37         case NVC6B5_SET_SEMAPHORE_A:
38         case NVC6B5_SET_SEMAPHORE_B:
39         case NVC6B5_SET_SEMAPHORE_PAYLOAD:
40         case NVC6B5_SET_SRC_PHYS_MODE:
41         case NVC6B5_SET_DST_PHYS_MODE:
42         case NVC6B5_LAUNCH_DMA:
43         case NVC6B5_OFFSET_IN_UPPER:
44         case NVC6B5_OFFSET_IN_LOWER:
45         case NVC6B5_OFFSET_OUT_UPPER:
46         case NVC6B5_OFFSET_OUT_LOWER:
47         case NVC6B5_LINE_LENGTH_IN:
48         case NVC6B5_SET_REMAP_CONST_A:
49         case NVC6B5_SET_REMAP_CONST_B:
50         case NVC6B5_SET_REMAP_COMPONENTS:
51             return true;
52     }
53 
54     UVM_ERR_PRINT("Unsupported CE method 0x%x\n", method_address);
55     return false;
56 }
57 
58 static NvU32 ce_aperture(uvm_aperture_t aperture)
59 {
60     BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
61                  HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
62     BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
63                  HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
64     BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
65                  HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));
66 
67     if (aperture == UVM_APERTURE_SYS) {
68         return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
69     }
70     else if (aperture == UVM_APERTURE_VID) {
71         return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
72     }
73     else {
74         return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
75                HWVALUE(C6B5, SET_SRC_PHYS_MODE, FLA, 0) |
76                HWVALUE(C6B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
77     }
78 }
79 
80 // Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
81 // flags
82 NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
83 {
84     NvU32 launch_dma_src_dst_type = 0;
85 
86     if (src.is_virtual)
87         launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
88     else
89         launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
90 
91     if (dst.is_virtual)
92         launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
93     else
94         launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
95 
96     if (!src.is_virtual && !dst.is_virtual) {
97         NV_PUSH_2U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
98                          SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
99     }
100     else if (!src.is_virtual) {
101         NV_PUSH_1U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
102     }
103     else if (!dst.is_virtual) {
104         NV_PUSH_1U(C6B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
105     }
106 
107     return launch_dma_src_dst_type;
108 }
109 
110 NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
111 {
112     return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
113 }
114 
115 bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
116 {
117     NvU64 push_begin_gpu_va;
118     uvm_gpu_t *gpu = uvm_push_get_gpu(push);
119 
120     if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
121         return true;
122 
123     if (uvm_channel_is_proxy(push->channel)) {
124         if (dst.is_virtual) {
125             UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
126             return false;
127         }
128 
129         if (dst.aperture != UVM_APERTURE_VID) {
130             UVM_ERR_PRINT("Destination address of memcopy must be in vidmem\n");
131             return false;
132         }
133 
134         // The source address is irrelevant, since it is a pushbuffer offset
135         if (!IS_ALIGNED(dst.address, 8)){
136             UVM_ERR_PRINT("Destination address of memcopy is not 8-byte aligned");
137             return false;
138         }
139 
140         if (!src.is_virtual) {
141             UVM_ERR_PRINT("Source address of memcopy must be virtual\n");
142             return false;
143         }
144 
145         push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
146 
147         if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
148             UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
149             return false;
150         }
151     }
152     else {
153         // TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a
154         // UVM internal channel cannot use peer physical addresses.
155         if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) {
156             UVM_ERR_PRINT("Destination address of memcopy must be virtual, not physical (aperture: %s)\n",
157                           uvm_gpu_address_aperture_string(dst));
158             return false;
159         }
160 
161         if (!src.is_virtual && !uvm_aperture_is_peer(src.aperture)) {
162             UVM_ERR_PRINT("Source address of memcopy must be virtual, not physical (aperture: %s)\n",
163                           uvm_gpu_address_aperture_string(src));
164             return false;
165         }
166     }
167 
168     return true;
169 }
170 
171 // In SR-IOV heavy (GA100 only), the UVM driver is expected to push a patched
172 // version of an inlined memcopy to the proxy channels. The patching consists in
173 // passing the offset of the inlined data within the push as the source virtual
174 // address, instead of passing its GPU VA.
175 //
176 // Copies pushed to internal channels use the GPU VA of the inlined data,
177 // irrespective of the virtualization mode.
178 void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
179 {
180     if (!uvm_channel_is_proxy(push->channel))
181         return;
182 
183     src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
184 }
185 
186 bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
187 {
188     uvm_gpu_t *gpu = uvm_push_get_gpu(push);
189 
190     if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
191         return true;
192 
193     if (uvm_channel_is_proxy(push->channel)) {
194         if (dst.is_virtual) {
195             UVM_ERR_PRINT("Destination address of memset must be physical, not virtual\n");
196             return false;
197         }
198 
199         if (dst.aperture != UVM_APERTURE_VID) {
200             UVM_ERR_PRINT("Destination address of memset must be in vidmem\n");
201             return false;
202         }
203 
204         if (!IS_ALIGNED(dst.address, 8)){
205             UVM_ERR_PRINT("Destination address of memset is not 8-byte aligned");
206             return false;
207         }
208 
209         // Disallow memsets that don't match the page table/directory entry
210         // size. PDE0 entries are 16 bytes wide, but those are written using a
211         // memcopy.
212         //
213         // The memset size is not checked to be a multiple of the element size
214         // because the check is not exclusive of SR-IOV heavy, and it is already
215         // present in the uvm_hal_*_memset_* functions.
216         if (element_size != 8) {
217             UVM_ERR_PRINT("Memset data must be 8 bytes wide, but found %zu instead\n", element_size);
218             return false;
219         }
220     }
221     // TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a
222     // UVM internal channel cannot use peer physical addresses.
223     else if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) {
224         UVM_ERR_PRINT("Destination address of memset must be virtual, not physical (aperture: %s)\n",
225                       uvm_gpu_address_aperture_string(dst));
226         return false;
227     }
228 
229     return true;
230 }
231