1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "os/os.h" 25 #include "kernel/gpu/nvlink/kernel_nvlink.h" 26 #include "kernel/gpu/nvlink/kernel_ioctrl.h" 27 #include "gpu/mem_mgr/mem_mgr.h" 28 #include "gpu/gpu.h" 29 30 /** 31 * @brief This routine overrides the nvlink connection topology if chiplib arguments 32 * have been provided. It queries MODS API for the chiplib overrides and based 33 * on that, derives hshub configuration values that are programmed at a later 34 * stage during nvlink state load. The override values should exist for ALL 35 * links or NO links. The field encoding can be found in phys_nvlink.h 36 * 37 * @param[in] pGpu OBJGPU pointer 38 * @param[in] pKernelNvlink KernelNvlink pointer 39 * @param[in] phase unused 40 */ 41 NV_STATUS 42 knvlinkOverrideConfig_GA100 43 ( 44 OBJGPU *pGpu, 45 KernelNvlink *pKernelNvlink, 46 NvU32 phase 47 ) 48 { 49 NV_STATUS status = NV_OK; 50 NvU32 i; 51 52 pKernelNvlink->pLinkConnection = portMemAllocNonPaged(sizeof(NvU32) * NVLINK_MAX_LINKS_SW); 53 if (pKernelNvlink->pLinkConnection == NULL) 54 return NV_ERR_NO_MEMORY; 55 56 portMemSet(pKernelNvlink->pLinkConnection, 0, sizeof(NvU32) * NVLINK_MAX_LINKS_SW); 57 58 // 59 // To deal with the nonlegacy force config reg keys, we need to now fill 60 // in the default phys links, use a unity 1/1 map. 61 // 62 for (i = 0; i < NVLINK_MAX_LINKS_SW; i++) 63 { 64 // The physical link is guaranteed valid in all cases 65 pKernelNvlink->pLinkConnection[i] = DRF_NUM(_NVLINK, _ARCH_CONNECTION, _PHYSICAL_LINK, i); 66 } 67 68 // Check to see if there are chiplib overrides for nvlink configuration 69 status = osGetForcedNVLinkConnection(pGpu, NVLINK_MAX_LINKS_SW, pKernelNvlink->pLinkConnection); 70 if ((NV_OK != status) || pKernelNvlink->bForceAutoconfig) 71 { 72 // A non-OK status implies there are no overrides. 73 NV_PRINTF(LEVEL_INFO, "Not using forced config!\n"); 74 75 portMemFree(pKernelNvlink->pLinkConnection); 76 pKernelNvlink->pLinkConnection = NULL; 77 return NV_OK; 78 } 79 80 NV2080_CTRL_NVLINK_PROCESS_FORCED_CONFIGS_PARAMS forcedConfigParams; 81 portMemSet(&forcedConfigParams, 0, sizeof(forcedConfigParams)); 82 83 forcedConfigParams.bLegacyForcedConfig = NV_FALSE; 84 portMemCopy(&forcedConfigParams.linkConnection, (sizeof(NvU32) * NVLINK_MAX_LINKS_SW), 85 pKernelNvlink->pLinkConnection, (sizeof(NvU32) * NVLINK_MAX_LINKS_SW)); 86 87 // 88 // RPC to GSP-RM to for GSP-RM to process the forced NVLink configurations. This includes 89 // setting up of HSHUB state and programming the memory subsystem registers. 90 // 91 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 92 NV2080_CTRL_CMD_NVLINK_PROCESS_FORCED_CONFIGS, 93 (void *)&forcedConfigParams, 94 sizeof(forcedConfigParams)); 95 if (status != NV_OK) 96 { 97 NV_PRINTF(LEVEL_ERROR, "Failed to process forced NVLink configurations !\n"); 98 99 portMemFree(pKernelNvlink->pLinkConnection); 100 pKernelNvlink->pLinkConnection = NULL; 101 return status; 102 } 103 104 pKernelNvlink->bOverrideComputePeerMode = forcedConfigParams.bOverrideComputePeerMode; 105 106 // 107 // Now, CPU-RM should process the forced configurations and update its state, which includes 108 // the topology information and the required link masks. 109 // 110 return knvlinkSetupTopologyForForcedConfig(pGpu, pKernelNvlink); 111 } 112 113 /*! 114 * @brief Wrapper function chose between removing all or peer mappings 115 * 116 * @param[in] pGpu OBJGPU pointer 117 * @param[in] pKernelNvlink KernelNvlink pointer 118 * @param[in] bAllMapping Whether both sysmem and peer mappings should be removed 119 * @param[in] peerMask Mask of peers for which mappings will be removed 120 * @param[in] bL2Entry Are the mappings being removed because of L2 entry? 121 * 122 * @return Returns NV_OK on success 123 */ 124 NV_STATUS 125 knvlinkRemoveMapping_GA100 126 ( 127 OBJGPU *pGpu, 128 KernelNvlink *pKernelNvlink, 129 NvBool bAllMapping, 130 NvU32 peerMask, 131 NvBool bL2Entry 132 ) 133 { 134 NV_STATUS status = NV_OK; 135 NvU32 peerId; 136 NvBool bBufferReady = NV_FALSE; 137 138 NV2080_CTRL_NVLINK_REMOVE_NVLINK_MAPPING_PARAMS params; 139 portMemSet(¶ms, 0, sizeof(params)); 140 141 params.bL2Entry = bL2Entry; 142 143 if (bAllMapping) 144 { 145 params.mapTypeMask = NV2080_CTRL_NVLINK_REMOVE_NVLINK_MAPPING_TYPE_SYSMEM | 146 NV2080_CTRL_NVLINK_REMOVE_NVLINK_MAPPING_TYPE_PEER; 147 params.peerMask = (1 << NVLINK_MAX_PEERS_SW) - 1; 148 } 149 else 150 { 151 params.mapTypeMask = NV2080_CTRL_NVLINK_REMOVE_NVLINK_MAPPING_TYPE_PEER; 152 params.peerMask = peerMask; 153 } 154 155 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 156 NV2080_CTRL_CMD_NVLINK_REMOVE_NVLINK_MAPPING, 157 (void *)¶ms, sizeof(params)); 158 if (status != NV_OK) 159 return status; 160 161 // 162 // Ampere+, along with HSHUB config registers, we also need to update 163 // the MUX registers and the connection config registers. So, we have 164 // to call nvlinkCurrentConfig instead of nvlinkUpdateHshubConfigRegs 165 // 166 status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink); 167 if (status != NV_OK) 168 { 169 NV_ASSERT(status != NV_OK); 170 return status; 171 } 172 173 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_CONFIG_REQUIRE_INITIALIZED_LINKS_CHECK)) 174 { 175 FOR_EACH_INDEX_IN_MASK(32, peerId, peerMask) 176 { 177 if (pKernelNvlink->initializedLinks & pKernelNvlink->peerLinkMasks[peerId]) 178 { 179 bBufferReady = NV_TRUE; 180 break; 181 } 182 } FOR_EACH_INDEX_IN_MASK_END; 183 184 if (!bBufferReady) 185 { 186 status = knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 187 188 } 189 } 190 else 191 { 192 status = knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 193 } 194 195 return status; 196 } 197 198 /*! 199 * @brief Validates fabric base address. 200 * 201 * @param[in] pGpu OBJGPU pointer 202 * @param[in] pKernelNvlink KernelNvlink pointer 203 * @param[in] fabricBaseAddr Address to be validated 204 * 205 * @returns On success, NV_OK. 206 * On failure, returns NV_ERR_XXX. 207 */ 208 NV_STATUS 209 knvlinkValidateFabricBaseAddress_GA100 210 ( 211 OBJGPU *pGpu, 212 KernelNvlink *pKernelNvlink, 213 NvU64 fabricBaseAddr 214 ) 215 { 216 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 217 NvU64 fbSizeBytes; 218 NvU64 fbUpperLimit; 219 220 fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20; 221 222 // 223 // Ampere SKUs will be paired with NVSwitches (Limerock) supporting 2K 224 // mapslots that can cover 64GB each. Make sure that the fabric base 225 // address being used is valid to cover whole frame buffer. 226 // 227 228 // Check if fabric address is aligned to mapslot size. 229 if (fabricBaseAddr & (NVBIT64(36) - 1)) 230 { 231 return NV_ERR_INVALID_ARGUMENT; 232 } 233 234 // Align fbSize to mapslot size. 235 fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(36)); 236 237 238 // Check for integer overflow 239 if (!portSafeAddU64(fabricBaseAddr, fbSizeBytes, &fbUpperLimit)) 240 { 241 return NV_ERR_INVALID_ARGUMENT; 242 } 243 244 // Make sure the address range doesn't go beyond the limit, (2K * 64GB). 245 if (fbUpperLimit > NVBIT64(47)) 246 { 247 return NV_ERR_INVALID_ARGUMENT; 248 } 249 250 return NV_OK; 251 } 252