1 /******************************************************************************* 2 Copyright (c) 2016-2023 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN hint OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_linux.h" 25 #include "uvm_perf_events.h" 26 #include "uvm_perf_module.h" 27 #include "uvm_perf_prefetch.h" 28 #include "uvm_kvmalloc.h" 29 #include "uvm_va_block.h" 30 #include "uvm_va_range.h" 31 #include "uvm_test.h" 32 33 // 34 // Tunables for prefetch detection/prevention (configurable via module parameters) 35 // 36 37 // Enable/disable prefetch performance heuristics 38 static unsigned uvm_perf_prefetch_enable = 1; 39 40 // TODO: Bug 1778037: [uvm] Use adaptive threshold for page prefetching 41 #define UVM_PREFETCH_THRESHOLD_DEFAULT 51 42 43 // Percentage of children subregions that need to be resident in order to 44 // trigger prefetching of the remaining subregions 45 // 46 // Valid values 1-100 47 static unsigned uvm_perf_prefetch_threshold = UVM_PREFETCH_THRESHOLD_DEFAULT; 48 49 #define UVM_PREFETCH_MIN_FAULTS_MIN 1 50 #define UVM_PREFETCH_MIN_FAULTS_DEFAULT 1 51 #define UVM_PREFETCH_MIN_FAULTS_MAX 20 52 53 // Minimum number of faults on a block in order to enable the prefetching 54 // logic 55 static unsigned uvm_perf_prefetch_min_faults = UVM_PREFETCH_MIN_FAULTS_DEFAULT; 56 57 // Module parameters for the tunables 58 module_param(uvm_perf_prefetch_enable, uint, S_IRUGO); 59 module_param(uvm_perf_prefetch_threshold, uint, S_IRUGO); 60 module_param(uvm_perf_prefetch_min_faults, uint, S_IRUGO); 61 62 static bool g_uvm_perf_prefetch_enable; 63 static unsigned g_uvm_perf_prefetch_threshold; 64 static unsigned g_uvm_perf_prefetch_min_faults; 65 66 void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, 67 uvm_page_index_t page_index, 68 uvm_perf_prefetch_bitmap_tree_iter_t *iter) 69 { 70 UVM_ASSERT(bitmap_tree->level_count > 0); 71 UVM_ASSERT_MSG(page_index < bitmap_tree->leaf_count, 72 "%zd vs %zd", 73 (size_t)page_index, 74 (size_t)bitmap_tree->leaf_count); 75 76 iter->level_idx = bitmap_tree->level_count - 1; 77 iter->node_idx = page_index; 78 } 79 80 uvm_va_block_region_t uvm_perf_prefetch_bitmap_tree_iter_get_range(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, 81 const uvm_perf_prefetch_bitmap_tree_iter_t *iter) 82 { 83 NvU16 range_leaves = uvm_perf_tree_iter_leaf_range(bitmap_tree, iter); 84 NvU16 range_start = uvm_perf_tree_iter_leaf_range_start(bitmap_tree, iter); 85 uvm_va_block_region_t subregion = uvm_va_block_region(range_start, range_start + range_leaves); 86 87 UVM_ASSERT(iter->level_idx >= 0); 88 UVM_ASSERT(iter->level_idx < bitmap_tree->level_count); 89 90 return subregion; 91 } 92 93 NvU16 uvm_perf_prefetch_bitmap_tree_iter_get_count(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, 94 const uvm_perf_prefetch_bitmap_tree_iter_t *iter) 95 { 96 uvm_va_block_region_t subregion = uvm_perf_prefetch_bitmap_tree_iter_get_range(bitmap_tree, iter); 97 98 return uvm_page_mask_region_weight(&bitmap_tree->pages, subregion); 99 } 100 101 static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index, 102 uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, 103 uvm_va_block_region_t max_prefetch_region) 104 { 105 NvU16 counter; 106 uvm_perf_prefetch_bitmap_tree_iter_t iter; 107 uvm_va_block_region_t prefetch_region = uvm_va_block_region(0, 0); 108 109 uvm_perf_prefetch_bitmap_tree_traverse_counters(counter, 110 bitmap_tree, 111 page_index - max_prefetch_region.first + bitmap_tree->offset, 112 &iter) { 113 uvm_va_block_region_t subregion = uvm_perf_prefetch_bitmap_tree_iter_get_range(bitmap_tree, &iter); 114 NvU16 subregion_pages = uvm_va_block_region_num_pages(subregion); 115 116 UVM_ASSERT(counter <= subregion_pages); 117 if (counter * 100 > subregion_pages * g_uvm_perf_prefetch_threshold) 118 prefetch_region = subregion; 119 } 120 121 // Clamp prefetch region to actual pages 122 if (prefetch_region.outer) { 123 prefetch_region.first += max_prefetch_region.first; 124 if (prefetch_region.first < bitmap_tree->offset) { 125 prefetch_region.first = bitmap_tree->offset; 126 } 127 else { 128 prefetch_region.first -= bitmap_tree->offset; 129 if (prefetch_region.first < max_prefetch_region.first) 130 prefetch_region.first = max_prefetch_region.first; 131 } 132 133 prefetch_region.outer += max_prefetch_region.first; 134 if (prefetch_region.outer < bitmap_tree->offset) { 135 prefetch_region.outer = bitmap_tree->offset; 136 } 137 else { 138 prefetch_region.outer -= bitmap_tree->offset; 139 if (prefetch_region.outer > max_prefetch_region.outer) 140 prefetch_region.outer = max_prefetch_region.outer; 141 } 142 } 143 144 return prefetch_region; 145 } 146 147 static void grow_fault_granularity_if_no_thrashing(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, 148 uvm_va_block_region_t region, 149 uvm_page_index_t first, 150 const uvm_page_mask_t *faulted_pages, 151 const uvm_page_mask_t *thrashing_pages) 152 { 153 if (!uvm_page_mask_region_empty(faulted_pages, region) && 154 (!thrashing_pages || uvm_page_mask_region_empty(thrashing_pages, region))) { 155 UVM_ASSERT(region.first >= first); 156 region.first = region.first - first + bitmap_tree->offset; 157 region.outer = region.outer - first + bitmap_tree->offset; 158 UVM_ASSERT(region.outer <= bitmap_tree->leaf_count); 159 uvm_page_mask_region_fill(&bitmap_tree->pages, region); 160 } 161 } 162 163 static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, 164 NvU32 big_page_size, 165 uvm_va_block_region_t big_pages_region, 166 uvm_va_block_region_t max_prefetch_region, 167 const uvm_page_mask_t *faulted_pages, 168 const uvm_page_mask_t *thrashing_pages) 169 { 170 uvm_page_index_t pages_per_big_page = big_page_size / PAGE_SIZE; 171 uvm_page_index_t page_index; 172 173 // Migrate whole block if no big pages and no page in it is thrashing 174 if (!big_pages_region.outer) { 175 grow_fault_granularity_if_no_thrashing(bitmap_tree, 176 max_prefetch_region, 177 max_prefetch_region.first, 178 faulted_pages, 179 thrashing_pages); 180 return; 181 } 182 183 // Migrate whole "prefix" if no page in it is thrashing 184 if (big_pages_region.first > max_prefetch_region.first) { 185 uvm_va_block_region_t prefix_region = uvm_va_block_region(max_prefetch_region.first, big_pages_region.first); 186 187 grow_fault_granularity_if_no_thrashing(bitmap_tree, 188 prefix_region, 189 max_prefetch_region.first, 190 faulted_pages, 191 thrashing_pages); 192 } 193 194 // Migrate whole big pages if they are not thrashing 195 for (page_index = big_pages_region.first; 196 page_index < big_pages_region.outer; 197 page_index += pages_per_big_page) { 198 uvm_va_block_region_t big_region = uvm_va_block_region(page_index, 199 page_index + pages_per_big_page); 200 201 grow_fault_granularity_if_no_thrashing(bitmap_tree, 202 big_region, 203 max_prefetch_region.first, 204 faulted_pages, 205 thrashing_pages); 206 } 207 208 // Migrate whole "suffix" if no page in it is thrashing 209 if (big_pages_region.outer < max_prefetch_region.outer) { 210 uvm_va_block_region_t suffix_region = uvm_va_block_region(big_pages_region.outer, 211 max_prefetch_region.outer); 212 213 grow_fault_granularity_if_no_thrashing(bitmap_tree, 214 suffix_region, 215 max_prefetch_region.first, 216 faulted_pages, 217 thrashing_pages); 218 } 219 } 220 221 // Within a block we only allow prefetching to a single processor. Therefore, 222 // if two processors are accessing non-overlapping regions within the same 223 // block they won't benefit from prefetching. 224 // 225 // TODO: Bug 1778034: [uvm] Explore prefetching to different processors within 226 // a VA block. 227 static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block, 228 uvm_va_block_context_t *va_block_context, 229 uvm_processor_id_t new_residency, 230 const uvm_page_mask_t *faulted_pages, 231 uvm_va_block_region_t faulted_region, 232 uvm_page_mask_t *prefetch_pages, 233 uvm_perf_prefetch_bitmap_tree_t *bitmap_tree) 234 { 235 uvm_page_index_t page_index; 236 const uvm_page_mask_t *resident_mask = NULL; 237 const uvm_page_mask_t *thrashing_pages = NULL; 238 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 239 const uvm_va_policy_t *policy = va_block_context->policy; 240 uvm_va_block_region_t max_prefetch_region; 241 NvU32 big_page_size; 242 uvm_va_block_region_t big_pages_region; 243 244 if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) { 245 va_block->prefetch_info.last_migration_proc_id = new_residency; 246 va_block->prefetch_info.fault_migrations_to_last_proc = 0; 247 } 248 249 // Compute the expanded region that prefetching is allowed from. 250 if (uvm_va_block_is_hmm(va_block)) { 251 max_prefetch_region = uvm_hmm_get_prefetch_region(va_block, 252 va_block_context, 253 uvm_va_block_region_start(va_block, faulted_region)); 254 } 255 else { 256 max_prefetch_region = uvm_va_block_region_from_block(va_block); 257 } 258 259 uvm_page_mask_zero(prefetch_pages); 260 261 if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL) 262 resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency); 263 264 // If this is a first-touch fault and the destination processor is the 265 // preferred location, populate the whole max_prefetch_region. 266 if (uvm_processor_mask_empty(&va_block->resident) && 267 uvm_id_equal(new_residency, policy->preferred_location)) { 268 uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region); 269 goto done; 270 } 271 272 if (resident_mask) 273 uvm_page_mask_or(&bitmap_tree->pages, resident_mask, faulted_pages); 274 else 275 uvm_page_mask_copy(&bitmap_tree->pages, faulted_pages); 276 277 // If we are using a subregion of the va_block, align bitmap_tree 278 uvm_page_mask_shift_right(&bitmap_tree->pages, &bitmap_tree->pages, max_prefetch_region.first); 279 280 // Get the big page size for the new residency. 281 // Assume 64K size if the new residency is the CPU or no GPU va space is 282 // registered in the current process for this GPU. 283 if (UVM_ID_IS_GPU(new_residency) && 284 uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, new_residency)) { 285 uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, new_residency); 286 287 big_page_size = uvm_va_block_gpu_big_page_size(va_block, gpu); 288 } 289 else { 290 big_page_size = UVM_PAGE_SIZE_64K; 291 } 292 293 big_pages_region = uvm_va_block_big_page_region_subset(va_block, max_prefetch_region, big_page_size); 294 295 // Adjust the prefetch tree to big page granularity to make sure that we 296 // get big page-friendly prefetching hints 297 if (big_pages_region.first - max_prefetch_region.first > 0) { 298 bitmap_tree->offset = big_page_size / PAGE_SIZE - (big_pages_region.first - max_prefetch_region.first); 299 bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region) + bitmap_tree->offset; 300 301 UVM_ASSERT(bitmap_tree->offset < big_page_size / PAGE_SIZE); 302 UVM_ASSERT(bitmap_tree->leaf_count <= PAGES_PER_UVM_VA_BLOCK); 303 304 uvm_page_mask_shift_left(&bitmap_tree->pages, &bitmap_tree->pages, bitmap_tree->offset); 305 } 306 else { 307 bitmap_tree->offset = 0; 308 bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region); 309 } 310 311 bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1; 312 313 thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block); 314 315 // Assume big pages by default. Prefetch the rest of 4KB subregions within 316 // the big page region unless there is thrashing. 317 grow_fault_granularity(bitmap_tree, 318 big_page_size, 319 big_pages_region, 320 max_prefetch_region, 321 faulted_pages, 322 thrashing_pages); 323 324 // Do not compute prefetch regions with faults on pages that are thrashing 325 if (thrashing_pages) 326 uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages); 327 else 328 uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages); 329 330 // Update the tree using the scratch mask to compute the pages to prefetch 331 for_each_va_block_page_in_region_mask(page_index, &va_block_context->scratch_page_mask, faulted_region) { 332 uvm_va_block_region_t region = compute_prefetch_region(page_index, bitmap_tree, max_prefetch_region); 333 334 uvm_page_mask_region_fill(prefetch_pages, region); 335 336 // Early out if we have already prefetched until the end of the VA block 337 if (region.outer == max_prefetch_region.outer) 338 break; 339 } 340 341 done: 342 // Do not prefetch pages that are going to be migrated/populated due to a 343 // fault 344 uvm_page_mask_andnot(prefetch_pages, prefetch_pages, faulted_pages); 345 346 // TODO: Bug 1765432: prefetching pages that are already mapped on the CPU 347 // would trigger a remap, which may cause a large overhead. Therefore, 348 // exclude them from the mask. 349 // For HMM, we don't know what pages are mapped by the CPU unless we try to 350 // migrate them. Prefetch pages will only be opportunistically migrated. 351 if (UVM_ID_IS_CPU(new_residency) && !uvm_va_block_is_hmm(va_block)) { 352 uvm_page_mask_and(&va_block_context->scratch_page_mask, 353 resident_mask, 354 &va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ]); 355 uvm_page_mask_andnot(prefetch_pages, prefetch_pages, &va_block_context->scratch_page_mask); 356 } 357 358 // Avoid prefetching pages that are thrashing 359 if (thrashing_pages) 360 uvm_page_mask_andnot(prefetch_pages, prefetch_pages, thrashing_pages); 361 362 va_block->prefetch_info.fault_migrations_to_last_proc += uvm_page_mask_region_weight(faulted_pages, faulted_region); 363 364 return uvm_page_mask_weight(prefetch_pages); 365 } 366 367 void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block, 368 uvm_va_block_context_t *va_block_context, 369 uvm_processor_id_t new_residency, 370 const uvm_page_mask_t *faulted_pages, 371 uvm_va_block_region_t faulted_region, 372 uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, 373 uvm_perf_prefetch_hint_t *out_hint) 374 { 375 const uvm_va_policy_t *policy = va_block_context->policy; 376 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block); 377 uvm_page_mask_t *prefetch_pages = &out_hint->prefetch_pages_mask; 378 NvU32 pending_prefetch_pages; 379 380 uvm_assert_rwsem_locked(&va_space->lock); 381 uvm_assert_mutex_locked(&va_block->lock); 382 UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, faulted_region)); 383 UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, va_block_context, faulted_region)); 384 385 out_hint->residency = UVM_ID_INVALID; 386 uvm_page_mask_zero(prefetch_pages); 387 388 if (!g_uvm_perf_prefetch_enable) 389 return; 390 391 if (!va_space->test.page_prefetch_enabled) 392 return; 393 394 pending_prefetch_pages = uvm_perf_prefetch_prenotify_fault_migrations(va_block, 395 va_block_context, 396 new_residency, 397 faulted_pages, 398 faulted_region, 399 prefetch_pages, 400 bitmap_tree); 401 402 if (va_block->prefetch_info.fault_migrations_to_last_proc >= g_uvm_perf_prefetch_min_faults && 403 pending_prefetch_pages > 0) { 404 bool changed = false; 405 uvm_range_group_range_t *rgr; 406 407 // Only prefetch in range group ranges which have pages that need to 408 // move. 409 uvm_range_group_for_each_range_in(rgr, va_space, va_block->start, va_block->end) { 410 uvm_va_block_region_t region = uvm_va_block_region_from_start_end(va_block, 411 max(rgr->node.start, va_block->start), 412 min(rgr->node.end, va_block->end)); 413 414 if (uvm_page_mask_region_empty(faulted_pages, region) && 415 !uvm_page_mask_region_empty(prefetch_pages, region)) { 416 uvm_page_mask_region_clear(prefetch_pages, region); 417 changed = true; 418 } 419 } 420 421 if (changed) 422 pending_prefetch_pages = uvm_page_mask_weight(prefetch_pages); 423 424 if (pending_prefetch_pages > 0) 425 out_hint->residency = va_block->prefetch_info.last_migration_proc_id; 426 } 427 } 428 429 NV_STATUS uvm_perf_prefetch_init(void) 430 { 431 g_uvm_perf_prefetch_enable = uvm_perf_prefetch_enable != 0; 432 433 if (!g_uvm_perf_prefetch_enable) 434 return NV_OK; 435 436 if (uvm_perf_prefetch_threshold <= 100) { 437 g_uvm_perf_prefetch_threshold = uvm_perf_prefetch_threshold; 438 } 439 else { 440 pr_info("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n", 441 uvm_perf_prefetch_threshold, UVM_PREFETCH_THRESHOLD_DEFAULT); 442 443 g_uvm_perf_prefetch_threshold = UVM_PREFETCH_THRESHOLD_DEFAULT; 444 } 445 446 if (uvm_perf_prefetch_min_faults >= UVM_PREFETCH_MIN_FAULTS_MIN && 447 uvm_perf_prefetch_min_faults <= UVM_PREFETCH_MIN_FAULTS_MAX) { 448 g_uvm_perf_prefetch_min_faults = uvm_perf_prefetch_min_faults; 449 } 450 else { 451 pr_info("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n", 452 uvm_perf_prefetch_min_faults, UVM_PREFETCH_MIN_FAULTS_DEFAULT); 453 454 g_uvm_perf_prefetch_min_faults = UVM_PREFETCH_MIN_FAULTS_DEFAULT; 455 } 456 457 return NV_OK; 458 } 459 460 NV_STATUS uvm_test_set_page_prefetch_policy(UVM_TEST_SET_PAGE_PREFETCH_POLICY_PARAMS *params, struct file *filp) 461 { 462 uvm_va_space_t *va_space = uvm_va_space_get(filp); 463 464 if (params->policy >= UVM_TEST_PAGE_PREFETCH_POLICY_MAX) 465 return NV_ERR_INVALID_ARGUMENT; 466 467 uvm_va_space_down_write(va_space); 468 469 if (params->policy == UVM_TEST_PAGE_PREFETCH_POLICY_ENABLE) 470 va_space->test.page_prefetch_enabled = true; 471 else 472 va_space->test.page_prefetch_enabled = false; 473 474 uvm_va_space_up_write(va_space); 475 476 return NV_OK; 477 } 478