1 /******************************************************************************* 2 Copyright (c) 2022 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_common.h" 25 #include "uvm_linux.h" 26 #include "uvm_types.h" 27 #include "uvm_va_policy.h" 28 #include "uvm_va_block.h" 29 #include "uvm_va_space.h" 30 #include "uvm_va_range.h" 31 32 const uvm_va_policy_t uvm_va_policy_default = { 33 .preferred_location = UVM_ID_INVALID, 34 .preferred_nid = NUMA_NO_NODE, 35 .read_duplication = UVM_READ_DUPLICATION_UNSET, 36 }; 37 38 bool uvm_va_policy_is_read_duplicate(const uvm_va_policy_t *policy, uvm_va_space_t *va_space) 39 { 40 return policy->read_duplication == UVM_READ_DUPLICATION_ENABLED && 41 uvm_va_space_can_read_duplicate(va_space, NULL); 42 } 43 44 const uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr) 45 { 46 uvm_assert_mutex_locked(&va_block->lock); 47 48 if (uvm_va_block_is_hmm(va_block)) { 49 const uvm_va_policy_node_t *node = uvm_va_policy_node_find(va_block, addr); 50 51 return node ? &node->policy : &uvm_va_policy_default; 52 } 53 else { 54 return uvm_va_range_get_policy(va_block->va_range); 55 } 56 } 57 58 // HMM va_blocks can have different polices for different regions withing the 59 // va_block. This function checks the given region is covered by the same policy 60 // and asserts if the region is covered by different policies. 61 // This always returns true and is intended to only be used with UVM_ASSERT() to 62 // avoid calling it on release builds. 63 // Locking: the va_block lock must be held. 64 static bool uvm_hmm_va_block_assert_policy_is_valid(uvm_va_block_t *va_block, 65 const uvm_va_policy_t *policy, 66 uvm_va_block_region_t region) 67 { 68 const uvm_va_policy_node_t *node; 69 70 if (uvm_va_policy_is_default(policy)) { 71 // There should only be the default policy within the region. 72 node = uvm_va_policy_node_iter_first(va_block, 73 uvm_va_block_region_start(va_block, region), 74 uvm_va_block_region_end(va_block, region)); 75 UVM_ASSERT(!node); 76 } 77 else { 78 // The policy node should cover the region. 79 node = uvm_va_policy_node_from_policy(policy); 80 UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region)); 81 UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region)); 82 } 83 84 return true; 85 } 86 87 const uvm_va_policy_t *uvm_va_policy_get_region(uvm_va_block_t *va_block, uvm_va_block_region_t region) 88 { 89 uvm_assert_mutex_locked(&va_block->lock); 90 91 if (uvm_va_block_is_hmm(va_block)) { 92 const uvm_va_policy_t *policy; 93 const uvm_va_policy_node_t *node = uvm_va_policy_node_find(va_block, uvm_va_block_region_start(va_block, region)); 94 95 policy = node ? &node->policy : &uvm_va_policy_default; 96 UVM_ASSERT(uvm_hmm_va_block_assert_policy_is_valid(va_block, policy, region)); 97 return policy; 98 } 99 else { 100 return uvm_va_range_get_policy(va_block->va_range); 101 } 102 } 103 104 #if UVM_IS_CONFIG_HMM() 105 106 static struct kmem_cache *g_uvm_va_policy_node_cache __read_mostly; 107 108 static uvm_va_policy_node_t *uvm_va_policy_node_container(uvm_range_tree_node_t *tree_node) 109 { 110 return container_of(tree_node, uvm_va_policy_node_t, node); 111 } 112 113 NV_STATUS uvm_va_policy_init(void) 114 { 115 g_uvm_va_policy_node_cache = NV_KMEM_CACHE_CREATE("uvm_va_policy_node_t", uvm_va_policy_node_t); 116 if (!g_uvm_va_policy_node_cache) 117 return NV_ERR_NO_MEMORY; 118 119 return NV_OK; 120 } 121 122 void uvm_va_policy_exit(void) 123 { 124 kmem_cache_destroy_safe(&g_uvm_va_policy_node_cache); 125 } 126 127 static uvm_va_policy_node_t *uvm_va_policy_node_alloc(NvU64 start, NvU64 end) 128 { 129 uvm_va_policy_node_t *node; 130 131 UVM_ASSERT(PAGE_ALIGNED(start)); 132 UVM_ASSERT(PAGE_ALIGNED(end + 1)); 133 134 node = nv_kmem_cache_zalloc(g_uvm_va_policy_node_cache, NV_UVM_GFP_FLAGS); 135 if (!node) 136 return NULL; 137 138 node->node.start = start; 139 node->node.end = end; 140 141 return node; 142 } 143 144 static void uvm_va_policy_node_free(uvm_va_policy_node_t *node) 145 { 146 kmem_cache_free(g_uvm_va_policy_node_cache, node); 147 } 148 149 static uvm_va_policy_node_t *uvm_va_policy_node_create(uvm_va_block_t *va_block, NvU64 start, NvU64 end) 150 { 151 uvm_va_policy_node_t *node; 152 NV_STATUS status; 153 154 UVM_ASSERT(uvm_va_block_is_hmm(va_block)); 155 uvm_assert_mutex_locked(&va_block->lock); 156 UVM_ASSERT(start >= va_block->start); 157 UVM_ASSERT(end <= va_block->end); 158 159 node = uvm_va_policy_node_alloc(start, end); 160 if (!node) 161 return NULL; 162 163 node->policy = uvm_va_policy_default; 164 165 status = uvm_range_tree_add(&va_block->hmm.va_policy_tree, &node->node); 166 UVM_ASSERT(status == NV_OK); 167 168 return node; 169 } 170 171 uvm_va_policy_node_t *uvm_va_policy_node_find(uvm_va_block_t *va_block, NvU64 addr) 172 { 173 uvm_range_tree_node_t *tree_node; 174 175 UVM_ASSERT(uvm_va_block_is_hmm(va_block)); 176 uvm_assert_mutex_locked(&va_block->lock); 177 178 tree_node = uvm_range_tree_find(&va_block->hmm.va_policy_tree, addr); 179 if (!tree_node) 180 return NULL; 181 182 return uvm_va_policy_node_container(tree_node); 183 } 184 185 uvm_va_policy_node_t *uvm_va_policy_node_iter_first(uvm_va_block_t *va_block, NvU64 start, NvU64 end) 186 { 187 uvm_range_tree_node_t *tree_node; 188 189 UVM_ASSERT(uvm_va_block_is_hmm(va_block)); 190 uvm_assert_mutex_locked(&va_block->lock); 191 192 tree_node = uvm_range_tree_iter_first(&va_block->hmm.va_policy_tree, start, end); 193 if (!tree_node) 194 return NULL; 195 196 return uvm_va_policy_node_container(tree_node); 197 } 198 199 uvm_va_policy_node_t *uvm_va_policy_node_iter_next(uvm_va_block_t *va_block, 200 uvm_va_policy_node_t *node, 201 NvU64 end) 202 { 203 uvm_range_tree_node_t *tree_node; 204 205 if (!node) 206 return NULL; 207 208 tree_node = uvm_range_tree_iter_next(&va_block->hmm.va_policy_tree, &node->node, end); 209 if (!tree_node) 210 return NULL; 211 212 return uvm_va_policy_node_container(tree_node); 213 } 214 215 const uvm_va_policy_t *uvm_va_policy_iter_first(uvm_va_block_t *va_block, 216 NvU64 start, 217 NvU64 end, 218 uvm_va_policy_node_t **out_node, 219 uvm_va_block_region_t *out_region) 220 { 221 uvm_range_tree_node_t *tree_node; 222 uvm_va_policy_node_t *node; 223 const uvm_va_policy_t *policy; 224 uvm_va_block_region_t region; 225 226 UVM_ASSERT(uvm_va_block_is_hmm(va_block)); 227 uvm_assert_mutex_locked(&va_block->lock); 228 UVM_ASSERT(start >= va_block->start); 229 UVM_ASSERT(end <= va_block->end); 230 UVM_ASSERT(start < end); 231 232 region.first = uvm_va_block_cpu_page_index(va_block, start); 233 234 // Even if no policy is found, we return the default policy and loop 235 // one time. 236 tree_node = uvm_range_tree_iter_first(&va_block->hmm.va_policy_tree, start, end); 237 if (tree_node) { 238 node = uvm_va_policy_node_container(tree_node); 239 if (node->node.start <= start) { 240 policy = &node->policy; 241 region.outer = uvm_va_block_cpu_page_index(va_block, min(end, node->node.end)) + 1; 242 } 243 else { 244 // This node starts after the requested start so use the default, 245 // then use this policy node. 246 policy = &uvm_va_policy_default; 247 region.outer = uvm_va_block_cpu_page_index(va_block, node->node.start - 1) + 1; 248 } 249 } 250 else { 251 node = NULL; 252 policy = &uvm_va_policy_default; 253 region.outer = uvm_va_block_cpu_page_index(va_block, end) + 1; 254 } 255 256 *out_node = node; 257 *out_region = region; 258 return policy; 259 } 260 261 const uvm_va_policy_t *uvm_va_policy_iter_next(uvm_va_block_t *va_block, 262 const uvm_va_policy_t *policy, 263 NvU64 end, 264 uvm_va_policy_node_t **inout_node, 265 uvm_va_block_region_t *inout_region) 266 { 267 uvm_va_policy_node_t *node = *inout_node; 268 uvm_va_policy_node_t *next; 269 uvm_va_block_region_t region; 270 271 if (!node) 272 return NULL; 273 274 next = uvm_va_policy_node_iter_next(va_block, node, end); 275 276 if (uvm_va_policy_is_default(policy)) { 277 // We haven't used the current policy node yet so use it now. 278 next = node; 279 policy = &node->policy; 280 region = uvm_va_block_region_from_start_end(va_block, 281 node->node.start, 282 min(end, node->node.end)); 283 } 284 else if (!next) { 285 if (node->node.end >= end) 286 return NULL; 287 policy = &uvm_va_policy_default; 288 region.first = inout_region->outer; 289 region.outer = uvm_va_block_cpu_page_index(va_block, end) + 1; 290 } 291 else { 292 region.first = inout_region->outer; 293 294 if (next->node.start <= uvm_va_block_region_start(va_block, region)) { 295 policy = &next->policy; 296 region.outer = uvm_va_block_cpu_page_index(va_block, min(end, next->node.end)) + 1; 297 } 298 else { 299 // There is a gap between the last node and next so use the 300 // default policy. 301 policy = &uvm_va_policy_default; 302 region.outer = uvm_va_block_cpu_page_index(va_block, next->node.start - 1) + 1; 303 } 304 } 305 306 *inout_node = next; 307 *inout_region = region; 308 return policy; 309 } 310 311 NV_STATUS uvm_va_policy_node_split(uvm_va_block_t *va_block, 312 uvm_va_policy_node_t *old, 313 NvU64 new_end, 314 uvm_va_policy_node_t **new_ptr) 315 { 316 uvm_va_policy_node_t *new; 317 318 UVM_ASSERT(uvm_va_block_is_hmm(va_block)); 319 uvm_assert_mutex_locked(&va_block->lock); 320 321 UVM_ASSERT(new_end > old->node.start); 322 UVM_ASSERT(new_end < old->node.end); 323 324 new = uvm_va_policy_node_alloc(new_end + 1, old->node.end); 325 if (!new) 326 return NV_ERR_NO_MEMORY; 327 328 new->policy = old->policy; 329 330 uvm_range_tree_split(&va_block->hmm.va_policy_tree, &old->node, &new->node); 331 332 if (new_ptr) 333 *new_ptr = new; 334 335 return NV_OK; 336 } 337 338 void uvm_va_policy_node_split_move(uvm_va_block_t *old_va_block, 339 uvm_va_block_t *new_va_block) 340 { 341 uvm_va_policy_node_t *node, *next; 342 NV_STATUS status; 343 344 UVM_ASSERT(uvm_va_block_is_hmm(old_va_block)); 345 UVM_ASSERT(uvm_va_block_is_hmm(new_va_block)); 346 uvm_assert_mutex_locked(&old_va_block->lock); 347 348 UVM_ASSERT(old_va_block->end + 1 == new_va_block->start); 349 350 uvm_for_each_va_policy_node_in_safe(node, next, old_va_block, new_va_block->start, new_va_block->end) { 351 uvm_range_tree_remove(&old_va_block->hmm.va_policy_tree, &node->node); 352 UVM_ASSERT(node->node.start >= new_va_block->start); 353 UVM_ASSERT(node->node.end <= new_va_block->end); 354 status = uvm_range_tree_add(&new_va_block->hmm.va_policy_tree, &node->node); 355 UVM_ASSERT(status == NV_OK); 356 } 357 } 358 359 void uvm_va_policy_clear(uvm_va_block_t *va_block, NvU64 start, NvU64 end) 360 { 361 uvm_va_policy_node_t *node, *new; 362 uvm_range_tree_node_t *tree_node; 363 NV_STATUS status; 364 365 UVM_ASSERT(uvm_va_block_is_hmm(va_block)); 366 uvm_assert_mutex_locked(&va_block->lock); 367 368 tree_node = uvm_range_tree_iter_first(&va_block->hmm.va_policy_tree, start, end); 369 while (tree_node) { 370 node = uvm_va_policy_node_container(tree_node); 371 tree_node = uvm_range_tree_iter_next(&va_block->hmm.va_policy_tree, &node->node, end); 372 373 if (node->node.start < start) { 374 if (node->node.end <= end) { 375 uvm_range_tree_shrink_node(&va_block->hmm.va_policy_tree, &node->node, node->node.start, start - 1); 376 continue; 377 } 378 379 status = uvm_va_policy_node_split(va_block, node, start - 1, &new); 380 // If we can't split, save the policy before the part being cleared 381 // but forget the policy after the range to be cleared. 382 // Since policy isn't a guarantee, at least this is safe. 383 if (status != NV_OK) { 384 uvm_range_tree_shrink_node(&va_block->hmm.va_policy_tree, &node->node, node->node.start, start - 1); 385 continue; 386 } 387 388 node = new; 389 } 390 391 if (node->node.end > end) { 392 uvm_range_tree_shrink_node(&va_block->hmm.va_policy_tree, &node->node, end + 1, node->node.end); 393 continue; 394 } 395 396 uvm_range_tree_remove(&va_block->hmm.va_policy_tree, &node->node); 397 uvm_va_policy_node_free(node); 398 } 399 } 400 401 static void uvm_va_policy_node_set(uvm_va_policy_node_t *node, 402 uvm_va_policy_type_t which, 403 uvm_processor_id_t processor_id, 404 uvm_read_duplication_policy_t new_policy) 405 { 406 switch (which) { 407 case UVM_VA_POLICY_PREFERRED_LOCATION: 408 UVM_ASSERT(!UVM_ID_IS_INVALID(processor_id)); 409 node->policy.preferred_location = processor_id; 410 break; 411 412 case UVM_VA_POLICY_ACCESSED_BY: 413 UVM_ASSERT(!UVM_ID_IS_INVALID(processor_id)); 414 uvm_processor_mask_set(&node->policy.accessed_by, processor_id); 415 break; 416 417 case UVM_VA_POLICY_READ_DUPLICATION: 418 UVM_ASSERT(new_policy == UVM_READ_DUPLICATION_ENABLED || 419 new_policy == UVM_READ_DUPLICATION_DISABLED); 420 node->policy.read_duplication = new_policy; 421 break; 422 423 default: 424 UVM_ASSERT_MSG(0, "Unknown policy type %u\n", which); 425 break; 426 } 427 } 428 429 static void uvm_va_policy_node_clear(uvm_va_block_t *va_block, 430 uvm_va_policy_node_t *node, 431 uvm_va_policy_type_t which, 432 uvm_processor_id_t processor_id, 433 uvm_read_duplication_policy_t new_policy) 434 { 435 switch (which) { 436 case UVM_VA_POLICY_PREFERRED_LOCATION: 437 UVM_ASSERT(UVM_ID_IS_INVALID(processor_id)); 438 node->policy.preferred_location = processor_id; 439 break; 440 441 case UVM_VA_POLICY_ACCESSED_BY: 442 UVM_ASSERT(!UVM_ID_IS_INVALID(processor_id)); 443 uvm_processor_mask_clear(&node->policy.accessed_by, processor_id); 444 break; 445 446 case UVM_VA_POLICY_READ_DUPLICATION: 447 default: 448 // Read duplication is never set back to UVM_READ_DUPLICATION_UNSET. 449 UVM_ASSERT(0); 450 break; 451 } 452 453 // Check to see if the node is now the default and can be removed. 454 if (UVM_ID_IS_INVALID(node->policy.preferred_location) && 455 uvm_processor_mask_empty(&node->policy.accessed_by) && 456 node->policy.read_duplication == UVM_READ_DUPLICATION_UNSET) { 457 uvm_range_tree_remove(&va_block->hmm.va_policy_tree, &node->node); 458 uvm_va_policy_node_free(node); 459 } 460 } 461 462 static uvm_va_policy_node_t *create_node_and_set(uvm_va_block_t *va_block, 463 NvU64 start, 464 NvU64 end, 465 uvm_va_policy_type_t which, 466 uvm_processor_id_t processor_id, 467 uvm_read_duplication_policy_t new_policy) 468 { 469 uvm_va_policy_node_t *node; 470 471 // Create a new node for the missing range. 472 node = uvm_va_policy_node_create(va_block, start, end); 473 if (!node) 474 return node; 475 476 uvm_va_policy_node_set(node, which, processor_id, new_policy); 477 478 return node; 479 } 480 481 static bool va_policy_node_split_needed(uvm_va_policy_node_t *node, 482 NvU64 start, 483 NvU64 end, 484 uvm_va_policy_type_t which, 485 bool is_default, 486 uvm_processor_id_t processor_id, 487 uvm_read_duplication_policy_t new_policy) 488 { 489 // If the node doesn't extend beyond the range being set, it doesn't need 490 // to be split. 491 if (node->node.start >= start && node->node.end <= end) 492 return false; 493 494 // If the new policy value doesn't match the old value, a split is needed. 495 switch (which) { 496 case UVM_VA_POLICY_PREFERRED_LOCATION: 497 return !uvm_id_equal(node->policy.preferred_location, processor_id); 498 499 case UVM_VA_POLICY_ACCESSED_BY: 500 if (is_default) 501 return uvm_processor_mask_test(&node->policy.accessed_by, processor_id); 502 else 503 return !uvm_processor_mask_test(&node->policy.accessed_by, processor_id); 504 505 case UVM_VA_POLICY_READ_DUPLICATION: 506 return node->policy.read_duplication != new_policy; 507 508 default: 509 UVM_ASSERT(0); 510 return false; 511 } 512 } 513 514 NV_STATUS uvm_va_policy_set_range(uvm_va_block_t *va_block, 515 NvU64 start, 516 NvU64 end, 517 uvm_va_policy_type_t which, 518 bool is_default, 519 uvm_processor_id_t processor_id, 520 uvm_read_duplication_policy_t new_policy) 521 { 522 uvm_va_policy_node_t *node, *next, *new; 523 NvU64 addr; 524 NvU64 node_start; 525 NvU64 node_end; 526 527 UVM_ASSERT(uvm_va_block_is_hmm(va_block)); 528 uvm_assert_mutex_locked(&va_block->lock); 529 UVM_ASSERT(PAGE_ALIGNED(start)); 530 UVM_ASSERT(PAGE_ALIGNED(end + 1)); 531 UVM_ASSERT(start < end); 532 UVM_ASSERT(start >= va_block->start); 533 UVM_ASSERT(start < va_block->end); 534 UVM_ASSERT(end <= va_block->end); 535 536 // Note that the policy range ends have already been split so we only need 537 // to fill in the middle or remove nodes. 538 node = uvm_va_policy_node_iter_first(va_block, start, end); 539 540 if (!node) { 541 // There is no policy in the given range so it is already the default. 542 if (is_default) 543 return NV_OK; 544 545 // Create a new node for the missing range. 546 node = create_node_and_set(va_block, 547 start, 548 end, 549 which, 550 processor_id, 551 new_policy); 552 if (!node) 553 return NV_ERR_NO_MEMORY; 554 555 return NV_OK; 556 } 557 558 for (addr = start; node; addr = node_end + 1, node = next) { 559 node_start = node->node.start; 560 node_end = node->node.end; 561 562 // Nodes should have been split before setting policy so verify that. 563 UVM_ASSERT(!va_policy_node_split_needed(node, start, end, which, is_default, processor_id, new_policy)); 564 565 next = uvm_va_policy_node_iter_next(va_block, node, end); 566 567 if (is_default) { 568 uvm_va_policy_node_clear(va_block, node, which, processor_id, new_policy); 569 // Note that node may have been deleted. 570 } 571 else { 572 uvm_va_policy_node_set(node, which, processor_id, new_policy); 573 574 // TODO: Bug 1707562: Add support for merging policy ranges. 575 } 576 577 if (!is_default && addr < node_start) { 578 // Create a new node for the missing range on the left. 579 new = create_node_and_set(va_block, 580 addr, 581 node_start - 1, 582 which, 583 processor_id, 584 new_policy); 585 if (!new) 586 return NV_ERR_NO_MEMORY; 587 } 588 else if (!is_default && !next && node_end < end) { 589 // Create a new node for the missing range on the right. 590 new = create_node_and_set(va_block, 591 node_end + 1, 592 end, 593 which, 594 processor_id, 595 new_policy); 596 if (!new) 597 return NV_ERR_NO_MEMORY; 598 break; 599 } 600 } 601 602 return NV_OK; 603 } 604 605 const uvm_va_policy_t *uvm_va_policy_set_preferred_location(uvm_va_block_t *va_block, 606 uvm_va_block_region_t region, 607 uvm_processor_id_t processor_id, 608 const uvm_va_policy_t *old_policy) 609 { 610 NvU64 start = uvm_va_block_region_start(va_block, region); 611 NvU64 end = uvm_va_block_region_end(va_block, region); 612 uvm_va_policy_node_t *node; 613 614 if (uvm_va_policy_is_default(old_policy)) { 615 616 UVM_ASSERT(!UVM_ID_IS_INVALID(processor_id)); 617 UVM_ASSERT(!uvm_range_tree_iter_first(&va_block->hmm.va_policy_tree, start, end)); 618 619 node = uvm_va_policy_node_create(va_block, start, end); 620 if (!node) 621 return NULL; 622 } 623 else { 624 // Since the old_policy isn't the constant default policy, we know it 625 // is an allocated uvm_va_policy_node_t and can be cast. 626 node = container_of((uvm_va_policy_t *)old_policy, uvm_va_policy_node_t, policy); 627 628 // The caller guarantees that the policy node doesn't require splitting 629 // and that the policy is changing. 630 UVM_ASSERT(node->node.start >= start); 631 UVM_ASSERT(node->node.end <= end); 632 UVM_ASSERT(!uvm_id_equal(node->policy.preferred_location, processor_id)); 633 } 634 635 node->policy.preferred_location = processor_id; 636 637 return &node->policy; 638 } 639 640 #endif // UVM_IS_CONFIG_HMM() 641