1 /******************************************************************************* 2 Copyright (c) 2018-2022 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_hal.h" 25 #include "uvm_hal_types.h" 26 #include "clc6b5.h" 27 #include "clc7b5.h" 28 #include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT 29 30 bool uvm_hal_ampere_ce_method_is_valid_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data) 31 { 32 if (!uvm_channel_is_proxy(push->channel)) 33 return true; 34 35 switch (method_address) { 36 case NVC56F_SET_OBJECT: 37 case NVC6B5_SET_SEMAPHORE_A: 38 case NVC6B5_SET_SEMAPHORE_B: 39 case NVC6B5_SET_SEMAPHORE_PAYLOAD: 40 case NVC6B5_SET_SRC_PHYS_MODE: 41 case NVC6B5_SET_DST_PHYS_MODE: 42 case NVC6B5_LAUNCH_DMA: 43 case NVC6B5_OFFSET_IN_UPPER: 44 case NVC6B5_OFFSET_IN_LOWER: 45 case NVC6B5_OFFSET_OUT_UPPER: 46 case NVC6B5_OFFSET_OUT_LOWER: 47 case NVC6B5_LINE_LENGTH_IN: 48 case NVC6B5_SET_REMAP_CONST_A: 49 case NVC6B5_SET_REMAP_CONST_B: 50 case NVC6B5_SET_REMAP_COMPONENTS: 51 return true; 52 } 53 54 UVM_ERR_PRINT("Unsupported CE method 0x%x\n", method_address); 55 return false; 56 } 57 58 static NvU32 ce_aperture(uvm_aperture_t aperture) 59 { 60 BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) != 61 HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB)); 62 BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) != 63 HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM)); 64 BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) != 65 HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, PEERMEM)); 66 67 if (aperture == UVM_APERTURE_SYS) { 68 return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM); 69 } 70 else if (aperture == UVM_APERTURE_VID) { 71 return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB); 72 } 73 else { 74 return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) | 75 HWVALUE(C6B5, SET_SRC_PHYS_MODE, FLA, 0) | 76 HWVALUE(C6B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture)); 77 } 78 } 79 80 // Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE 81 // flags 82 NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src) 83 { 84 NvU32 launch_dma_src_dst_type = 0; 85 86 if (src.is_virtual) 87 launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL); 88 else 89 launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL); 90 91 if (dst.is_virtual) 92 launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, VIRTUAL); 93 else 94 launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, PHYSICAL); 95 96 if (!src.is_virtual && !dst.is_virtual) { 97 NV_PUSH_2U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture), 98 SET_DST_PHYS_MODE, ce_aperture(dst.aperture)); 99 } 100 else if (!src.is_virtual) { 101 NV_PUSH_1U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture)); 102 } 103 else if (!dst.is_virtual) { 104 NV_PUSH_1U(C6B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture)); 105 } 106 107 return launch_dma_src_dst_type; 108 } 109 110 NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void) 111 { 112 return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE); 113 } 114 115 bool uvm_hal_ampere_ce_memcopy_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src) 116 { 117 NvU64 push_begin_gpu_va; 118 uvm_gpu_t *gpu = uvm_push_get_gpu(push); 119 120 if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu)) 121 return true; 122 123 if (uvm_channel_is_proxy(push->channel)) { 124 if (dst.is_virtual) { 125 UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n"); 126 return false; 127 } 128 129 if (dst.aperture != UVM_APERTURE_VID) { 130 UVM_ERR_PRINT("Destination address of memcopy must be in vidmem\n"); 131 return false; 132 } 133 134 // The source address is irrelevant, since it is a pushbuffer offset 135 if (!IS_ALIGNED(dst.address, 8)){ 136 UVM_ERR_PRINT("Destination address of memcopy is not 8-byte aligned"); 137 return false; 138 } 139 140 if (!src.is_virtual) { 141 UVM_ERR_PRINT("Source address of memcopy must be virtual\n"); 142 return false; 143 } 144 145 push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push); 146 147 if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) { 148 UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n"); 149 return false; 150 } 151 } 152 else { 153 // TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a 154 // UVM internal channel cannot use peer physical addresses. 155 if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) { 156 UVM_ERR_PRINT("Destination address of memcopy must be virtual, not physical (aperture: %s)\n", 157 uvm_gpu_address_aperture_string(dst)); 158 return false; 159 } 160 161 if (!src.is_virtual && !uvm_aperture_is_peer(src.aperture)) { 162 UVM_ERR_PRINT("Source address of memcopy must be virtual, not physical (aperture: %s)\n", 163 uvm_gpu_address_aperture_string(src)); 164 return false; 165 } 166 } 167 168 return true; 169 } 170 171 // In SR-IOV heavy (GA100 only), the UVM driver is expected to push a patched 172 // version of an inlined memcopy to the proxy channels. The patching consists in 173 // passing the offset of the inlined data within the push as the source virtual 174 // address, instead of passing its GPU VA. 175 // 176 // Copies pushed to internal channels use the GPU VA of the inlined data, 177 // irrespective of the virtualization mode. 178 void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src) 179 { 180 if (!uvm_channel_is_proxy(push->channel)) 181 return; 182 183 src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push); 184 } 185 186 bool uvm_hal_ampere_ce_memset_is_valid_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size) 187 { 188 uvm_gpu_t *gpu = uvm_push_get_gpu(push); 189 190 if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu)) 191 return true; 192 193 if (uvm_channel_is_proxy(push->channel)) { 194 if (dst.is_virtual) { 195 UVM_ERR_PRINT("Destination address of memset must be physical, not virtual\n"); 196 return false; 197 } 198 199 if (dst.aperture != UVM_APERTURE_VID) { 200 UVM_ERR_PRINT("Destination address of memset must be in vidmem\n"); 201 return false; 202 } 203 204 if (!IS_ALIGNED(dst.address, 8)){ 205 UVM_ERR_PRINT("Destination address of memset is not 8-byte aligned"); 206 return false; 207 } 208 209 // Disallow memsets that don't match the page table/directory entry 210 // size. PDE0 entries are 16 bytes wide, but those are written using a 211 // memcopy. 212 // 213 // The memset size is not checked to be a multiple of the element size 214 // because the check is not exclusive of SR-IOV heavy, and it is already 215 // present in the uvm_hal_*_memset_* functions. 216 if (element_size != 8) { 217 UVM_ERR_PRINT("Memset data must be 8 bytes wide, but found %zu instead\n", element_size); 218 return false; 219 } 220 } 221 // TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a 222 // UVM internal channel cannot use peer physical addresses. 223 else if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) { 224 UVM_ERR_PRINT("Destination address of memset must be virtual, not physical (aperture: %s)\n", 225 uvm_gpu_address_aperture_string(dst)); 226 return false; 227 } 228 229 return true; 230 } 231