1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2015-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <linux/pci.h> 25 #include <linux/acpi.h> 26 #include "kfd_crat.h" 27 #include "kfd_priv.h" 28 #include "kfd_topology.h" 29 #include "kfd_iommu.h" 30 #include "amdgpu.h" 31 #include "amdgpu_amdkfd.h" 32 33 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. 34 * GPU processor ID are expressed with Bit[31]=1. 35 * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs 36 * used in the CRAT. 37 */ 38 static uint32_t gpu_processor_id_low = 0x80001000; 39 40 /* Return the next available gpu_processor_id and increment it for next GPU 41 * @total_cu_count - Total CUs present in the GPU including ones 42 * masked off 43 */ 44 static inline unsigned int get_and_inc_gpu_processor_id( 45 unsigned int total_cu_count) 46 { 47 int current_id = gpu_processor_id_low; 48 49 gpu_processor_id_low += total_cu_count; 50 return current_id; 51 } 52 53 54 static struct kfd_gpu_cache_info kaveri_cache_info[] = { 55 { 56 /* TCP L1 Cache per CU */ 57 .cache_size = 16, 58 .cache_level = 1, 59 .flags = (CRAT_CACHE_FLAGS_ENABLED | 60 CRAT_CACHE_FLAGS_DATA_CACHE | 61 CRAT_CACHE_FLAGS_SIMD_CACHE), 62 .num_cu_shared = 1, 63 }, 64 { 65 /* Scalar L1 Instruction Cache (in SQC module) per bank */ 66 .cache_size = 16, 67 .cache_level = 1, 68 .flags = (CRAT_CACHE_FLAGS_ENABLED | 69 CRAT_CACHE_FLAGS_INST_CACHE | 70 CRAT_CACHE_FLAGS_SIMD_CACHE), 71 .num_cu_shared = 2, 72 }, 73 { 74 /* Scalar L1 Data Cache (in SQC module) per bank */ 75 .cache_size = 8, 76 .cache_level = 1, 77 .flags = (CRAT_CACHE_FLAGS_ENABLED | 78 CRAT_CACHE_FLAGS_DATA_CACHE | 79 CRAT_CACHE_FLAGS_SIMD_CACHE), 80 .num_cu_shared = 2, 81 }, 82 83 /* TODO: Add L2 Cache information */ 84 }; 85 86 87 static struct kfd_gpu_cache_info carrizo_cache_info[] = { 88 { 89 /* TCP L1 Cache per CU */ 90 .cache_size = 16, 91 .cache_level = 1, 92 .flags = (CRAT_CACHE_FLAGS_ENABLED | 93 CRAT_CACHE_FLAGS_DATA_CACHE | 94 CRAT_CACHE_FLAGS_SIMD_CACHE), 95 .num_cu_shared = 1, 96 }, 97 { 98 /* Scalar L1 Instruction Cache (in SQC module) per bank */ 99 .cache_size = 8, 100 .cache_level = 1, 101 .flags = (CRAT_CACHE_FLAGS_ENABLED | 102 CRAT_CACHE_FLAGS_INST_CACHE | 103 CRAT_CACHE_FLAGS_SIMD_CACHE), 104 .num_cu_shared = 4, 105 }, 106 { 107 /* Scalar L1 Data Cache (in SQC module) per bank. */ 108 .cache_size = 4, 109 .cache_level = 1, 110 .flags = (CRAT_CACHE_FLAGS_ENABLED | 111 CRAT_CACHE_FLAGS_DATA_CACHE | 112 CRAT_CACHE_FLAGS_SIMD_CACHE), 113 .num_cu_shared = 4, 114 }, 115 116 /* TODO: Add L2 Cache information */ 117 }; 118 119 #define hawaii_cache_info kaveri_cache_info 120 #define tonga_cache_info carrizo_cache_info 121 #define fiji_cache_info carrizo_cache_info 122 #define polaris10_cache_info carrizo_cache_info 123 #define polaris11_cache_info carrizo_cache_info 124 #define polaris12_cache_info carrizo_cache_info 125 #define vegam_cache_info carrizo_cache_info 126 127 /* NOTE: L1 cache information has been updated and L2/L3 128 * cache information has been added for Vega10 and 129 * newer ASICs. The unit for cache_size is KiB. 130 * In future, check & update cache details 131 * for every new ASIC is required. 132 */ 133 134 static struct kfd_gpu_cache_info vega10_cache_info[] = { 135 { 136 /* TCP L1 Cache per CU */ 137 .cache_size = 16, 138 .cache_level = 1, 139 .flags = (CRAT_CACHE_FLAGS_ENABLED | 140 CRAT_CACHE_FLAGS_DATA_CACHE | 141 CRAT_CACHE_FLAGS_SIMD_CACHE), 142 .num_cu_shared = 1, 143 }, 144 { 145 /* Scalar L1 Instruction Cache per SQC */ 146 .cache_size = 32, 147 .cache_level = 1, 148 .flags = (CRAT_CACHE_FLAGS_ENABLED | 149 CRAT_CACHE_FLAGS_INST_CACHE | 150 CRAT_CACHE_FLAGS_SIMD_CACHE), 151 .num_cu_shared = 3, 152 }, 153 { 154 /* Scalar L1 Data Cache per SQC */ 155 .cache_size = 16, 156 .cache_level = 1, 157 .flags = (CRAT_CACHE_FLAGS_ENABLED | 158 CRAT_CACHE_FLAGS_DATA_CACHE | 159 CRAT_CACHE_FLAGS_SIMD_CACHE), 160 .num_cu_shared = 3, 161 }, 162 { 163 /* L2 Data Cache per GPU (Total Tex Cache) */ 164 .cache_size = 4096, 165 .cache_level = 2, 166 .flags = (CRAT_CACHE_FLAGS_ENABLED | 167 CRAT_CACHE_FLAGS_DATA_CACHE | 168 CRAT_CACHE_FLAGS_SIMD_CACHE), 169 .num_cu_shared = 16, 170 }, 171 }; 172 173 static struct kfd_gpu_cache_info raven_cache_info[] = { 174 { 175 /* TCP L1 Cache per CU */ 176 .cache_size = 16, 177 .cache_level = 1, 178 .flags = (CRAT_CACHE_FLAGS_ENABLED | 179 CRAT_CACHE_FLAGS_DATA_CACHE | 180 CRAT_CACHE_FLAGS_SIMD_CACHE), 181 .num_cu_shared = 1, 182 }, 183 { 184 /* Scalar L1 Instruction Cache per SQC */ 185 .cache_size = 32, 186 .cache_level = 1, 187 .flags = (CRAT_CACHE_FLAGS_ENABLED | 188 CRAT_CACHE_FLAGS_INST_CACHE | 189 CRAT_CACHE_FLAGS_SIMD_CACHE), 190 .num_cu_shared = 3, 191 }, 192 { 193 /* Scalar L1 Data Cache per SQC */ 194 .cache_size = 16, 195 .cache_level = 1, 196 .flags = (CRAT_CACHE_FLAGS_ENABLED | 197 CRAT_CACHE_FLAGS_DATA_CACHE | 198 CRAT_CACHE_FLAGS_SIMD_CACHE), 199 .num_cu_shared = 3, 200 }, 201 { 202 /* L2 Data Cache per GPU (Total Tex Cache) */ 203 .cache_size = 1024, 204 .cache_level = 2, 205 .flags = (CRAT_CACHE_FLAGS_ENABLED | 206 CRAT_CACHE_FLAGS_DATA_CACHE | 207 CRAT_CACHE_FLAGS_SIMD_CACHE), 208 .num_cu_shared = 11, 209 }, 210 }; 211 212 static struct kfd_gpu_cache_info renoir_cache_info[] = { 213 { 214 /* TCP L1 Cache per CU */ 215 .cache_size = 16, 216 .cache_level = 1, 217 .flags = (CRAT_CACHE_FLAGS_ENABLED | 218 CRAT_CACHE_FLAGS_DATA_CACHE | 219 CRAT_CACHE_FLAGS_SIMD_CACHE), 220 .num_cu_shared = 1, 221 }, 222 { 223 /* Scalar L1 Instruction Cache per SQC */ 224 .cache_size = 32, 225 .cache_level = 1, 226 .flags = (CRAT_CACHE_FLAGS_ENABLED | 227 CRAT_CACHE_FLAGS_INST_CACHE | 228 CRAT_CACHE_FLAGS_SIMD_CACHE), 229 .num_cu_shared = 3, 230 }, 231 { 232 /* Scalar L1 Data Cache per SQC */ 233 .cache_size = 16, 234 .cache_level = 1, 235 .flags = (CRAT_CACHE_FLAGS_ENABLED | 236 CRAT_CACHE_FLAGS_DATA_CACHE | 237 CRAT_CACHE_FLAGS_SIMD_CACHE), 238 .num_cu_shared = 3, 239 }, 240 { 241 /* L2 Data Cache per GPU (Total Tex Cache) */ 242 .cache_size = 1024, 243 .cache_level = 2, 244 .flags = (CRAT_CACHE_FLAGS_ENABLED | 245 CRAT_CACHE_FLAGS_DATA_CACHE | 246 CRAT_CACHE_FLAGS_SIMD_CACHE), 247 .num_cu_shared = 8, 248 }, 249 }; 250 251 static struct kfd_gpu_cache_info vega12_cache_info[] = { 252 { 253 /* TCP L1 Cache per CU */ 254 .cache_size = 16, 255 .cache_level = 1, 256 .flags = (CRAT_CACHE_FLAGS_ENABLED | 257 CRAT_CACHE_FLAGS_DATA_CACHE | 258 CRAT_CACHE_FLAGS_SIMD_CACHE), 259 .num_cu_shared = 1, 260 }, 261 { 262 /* Scalar L1 Instruction Cache per SQC */ 263 .cache_size = 32, 264 .cache_level = 1, 265 .flags = (CRAT_CACHE_FLAGS_ENABLED | 266 CRAT_CACHE_FLAGS_INST_CACHE | 267 CRAT_CACHE_FLAGS_SIMD_CACHE), 268 .num_cu_shared = 3, 269 }, 270 { 271 /* Scalar L1 Data Cache per SQC */ 272 .cache_size = 16, 273 .cache_level = 1, 274 .flags = (CRAT_CACHE_FLAGS_ENABLED | 275 CRAT_CACHE_FLAGS_DATA_CACHE | 276 CRAT_CACHE_FLAGS_SIMD_CACHE), 277 .num_cu_shared = 3, 278 }, 279 { 280 /* L2 Data Cache per GPU (Total Tex Cache) */ 281 .cache_size = 2048, 282 .cache_level = 2, 283 .flags = (CRAT_CACHE_FLAGS_ENABLED | 284 CRAT_CACHE_FLAGS_DATA_CACHE | 285 CRAT_CACHE_FLAGS_SIMD_CACHE), 286 .num_cu_shared = 5, 287 }, 288 }; 289 290 static struct kfd_gpu_cache_info vega20_cache_info[] = { 291 { 292 /* TCP L1 Cache per CU */ 293 .cache_size = 16, 294 .cache_level = 1, 295 .flags = (CRAT_CACHE_FLAGS_ENABLED | 296 CRAT_CACHE_FLAGS_DATA_CACHE | 297 CRAT_CACHE_FLAGS_SIMD_CACHE), 298 .num_cu_shared = 1, 299 }, 300 { 301 /* Scalar L1 Instruction Cache per SQC */ 302 .cache_size = 32, 303 .cache_level = 1, 304 .flags = (CRAT_CACHE_FLAGS_ENABLED | 305 CRAT_CACHE_FLAGS_INST_CACHE | 306 CRAT_CACHE_FLAGS_SIMD_CACHE), 307 .num_cu_shared = 3, 308 }, 309 { 310 /* Scalar L1 Data Cache per SQC */ 311 .cache_size = 16, 312 .cache_level = 1, 313 .flags = (CRAT_CACHE_FLAGS_ENABLED | 314 CRAT_CACHE_FLAGS_DATA_CACHE | 315 CRAT_CACHE_FLAGS_SIMD_CACHE), 316 .num_cu_shared = 3, 317 }, 318 { 319 /* L2 Data Cache per GPU (Total Tex Cache) */ 320 .cache_size = 8192, 321 .cache_level = 2, 322 .flags = (CRAT_CACHE_FLAGS_ENABLED | 323 CRAT_CACHE_FLAGS_DATA_CACHE | 324 CRAT_CACHE_FLAGS_SIMD_CACHE), 325 .num_cu_shared = 16, 326 }, 327 }; 328 329 static struct kfd_gpu_cache_info aldebaran_cache_info[] = { 330 { 331 /* TCP L1 Cache per CU */ 332 .cache_size = 16, 333 .cache_level = 1, 334 .flags = (CRAT_CACHE_FLAGS_ENABLED | 335 CRAT_CACHE_FLAGS_DATA_CACHE | 336 CRAT_CACHE_FLAGS_SIMD_CACHE), 337 .num_cu_shared = 1, 338 }, 339 { 340 /* Scalar L1 Instruction Cache per SQC */ 341 .cache_size = 32, 342 .cache_level = 1, 343 .flags = (CRAT_CACHE_FLAGS_ENABLED | 344 CRAT_CACHE_FLAGS_INST_CACHE | 345 CRAT_CACHE_FLAGS_SIMD_CACHE), 346 .num_cu_shared = 2, 347 }, 348 { 349 /* Scalar L1 Data Cache per SQC */ 350 .cache_size = 16, 351 .cache_level = 1, 352 .flags = (CRAT_CACHE_FLAGS_ENABLED | 353 CRAT_CACHE_FLAGS_DATA_CACHE | 354 CRAT_CACHE_FLAGS_SIMD_CACHE), 355 .num_cu_shared = 2, 356 }, 357 { 358 /* L2 Data Cache per GPU (Total Tex Cache) */ 359 .cache_size = 8192, 360 .cache_level = 2, 361 .flags = (CRAT_CACHE_FLAGS_ENABLED | 362 CRAT_CACHE_FLAGS_DATA_CACHE | 363 CRAT_CACHE_FLAGS_SIMD_CACHE), 364 .num_cu_shared = 14, 365 }, 366 }; 367 368 static struct kfd_gpu_cache_info navi10_cache_info[] = { 369 { 370 /* TCP L1 Cache per CU */ 371 .cache_size = 16, 372 .cache_level = 1, 373 .flags = (CRAT_CACHE_FLAGS_ENABLED | 374 CRAT_CACHE_FLAGS_DATA_CACHE | 375 CRAT_CACHE_FLAGS_SIMD_CACHE), 376 .num_cu_shared = 1, 377 }, 378 { 379 /* Scalar L1 Instruction Cache per SQC */ 380 .cache_size = 32, 381 .cache_level = 1, 382 .flags = (CRAT_CACHE_FLAGS_ENABLED | 383 CRAT_CACHE_FLAGS_INST_CACHE | 384 CRAT_CACHE_FLAGS_SIMD_CACHE), 385 .num_cu_shared = 2, 386 }, 387 { 388 /* Scalar L1 Data Cache per SQC */ 389 .cache_size = 16, 390 .cache_level = 1, 391 .flags = (CRAT_CACHE_FLAGS_ENABLED | 392 CRAT_CACHE_FLAGS_DATA_CACHE | 393 CRAT_CACHE_FLAGS_SIMD_CACHE), 394 .num_cu_shared = 2, 395 }, 396 { 397 /* GL1 Data Cache per SA */ 398 .cache_size = 128, 399 .cache_level = 1, 400 .flags = (CRAT_CACHE_FLAGS_ENABLED | 401 CRAT_CACHE_FLAGS_DATA_CACHE | 402 CRAT_CACHE_FLAGS_SIMD_CACHE), 403 .num_cu_shared = 10, 404 }, 405 { 406 /* L2 Data Cache per GPU (Total Tex Cache) */ 407 .cache_size = 4096, 408 .cache_level = 2, 409 .flags = (CRAT_CACHE_FLAGS_ENABLED | 410 CRAT_CACHE_FLAGS_DATA_CACHE | 411 CRAT_CACHE_FLAGS_SIMD_CACHE), 412 .num_cu_shared = 10, 413 }, 414 }; 415 416 static struct kfd_gpu_cache_info vangogh_cache_info[] = { 417 { 418 /* TCP L1 Cache per CU */ 419 .cache_size = 16, 420 .cache_level = 1, 421 .flags = (CRAT_CACHE_FLAGS_ENABLED | 422 CRAT_CACHE_FLAGS_DATA_CACHE | 423 CRAT_CACHE_FLAGS_SIMD_CACHE), 424 .num_cu_shared = 1, 425 }, 426 { 427 /* Scalar L1 Instruction Cache per SQC */ 428 .cache_size = 32, 429 .cache_level = 1, 430 .flags = (CRAT_CACHE_FLAGS_ENABLED | 431 CRAT_CACHE_FLAGS_INST_CACHE | 432 CRAT_CACHE_FLAGS_SIMD_CACHE), 433 .num_cu_shared = 2, 434 }, 435 { 436 /* Scalar L1 Data Cache per SQC */ 437 .cache_size = 16, 438 .cache_level = 1, 439 .flags = (CRAT_CACHE_FLAGS_ENABLED | 440 CRAT_CACHE_FLAGS_DATA_CACHE | 441 CRAT_CACHE_FLAGS_SIMD_CACHE), 442 .num_cu_shared = 2, 443 }, 444 { 445 /* GL1 Data Cache per SA */ 446 .cache_size = 128, 447 .cache_level = 1, 448 .flags = (CRAT_CACHE_FLAGS_ENABLED | 449 CRAT_CACHE_FLAGS_DATA_CACHE | 450 CRAT_CACHE_FLAGS_SIMD_CACHE), 451 .num_cu_shared = 8, 452 }, 453 { 454 /* L2 Data Cache per GPU (Total Tex Cache) */ 455 .cache_size = 1024, 456 .cache_level = 2, 457 .flags = (CRAT_CACHE_FLAGS_ENABLED | 458 CRAT_CACHE_FLAGS_DATA_CACHE | 459 CRAT_CACHE_FLAGS_SIMD_CACHE), 460 .num_cu_shared = 8, 461 }, 462 }; 463 464 static struct kfd_gpu_cache_info navi14_cache_info[] = { 465 { 466 /* TCP L1 Cache per CU */ 467 .cache_size = 16, 468 .cache_level = 1, 469 .flags = (CRAT_CACHE_FLAGS_ENABLED | 470 CRAT_CACHE_FLAGS_DATA_CACHE | 471 CRAT_CACHE_FLAGS_SIMD_CACHE), 472 .num_cu_shared = 1, 473 }, 474 { 475 /* Scalar L1 Instruction Cache per SQC */ 476 .cache_size = 32, 477 .cache_level = 1, 478 .flags = (CRAT_CACHE_FLAGS_ENABLED | 479 CRAT_CACHE_FLAGS_INST_CACHE | 480 CRAT_CACHE_FLAGS_SIMD_CACHE), 481 .num_cu_shared = 2, 482 }, 483 { 484 /* Scalar L1 Data Cache per SQC */ 485 .cache_size = 16, 486 .cache_level = 1, 487 .flags = (CRAT_CACHE_FLAGS_ENABLED | 488 CRAT_CACHE_FLAGS_DATA_CACHE | 489 CRAT_CACHE_FLAGS_SIMD_CACHE), 490 .num_cu_shared = 2, 491 }, 492 { 493 /* GL1 Data Cache per SA */ 494 .cache_size = 128, 495 .cache_level = 1, 496 .flags = (CRAT_CACHE_FLAGS_ENABLED | 497 CRAT_CACHE_FLAGS_DATA_CACHE | 498 CRAT_CACHE_FLAGS_SIMD_CACHE), 499 .num_cu_shared = 12, 500 }, 501 { 502 /* L2 Data Cache per GPU (Total Tex Cache) */ 503 .cache_size = 2048, 504 .cache_level = 2, 505 .flags = (CRAT_CACHE_FLAGS_ENABLED | 506 CRAT_CACHE_FLAGS_DATA_CACHE | 507 CRAT_CACHE_FLAGS_SIMD_CACHE), 508 .num_cu_shared = 12, 509 }, 510 }; 511 512 static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { 513 { 514 /* TCP L1 Cache per CU */ 515 .cache_size = 16, 516 .cache_level = 1, 517 .flags = (CRAT_CACHE_FLAGS_ENABLED | 518 CRAT_CACHE_FLAGS_DATA_CACHE | 519 CRAT_CACHE_FLAGS_SIMD_CACHE), 520 .num_cu_shared = 1, 521 }, 522 { 523 /* Scalar L1 Instruction Cache per SQC */ 524 .cache_size = 32, 525 .cache_level = 1, 526 .flags = (CRAT_CACHE_FLAGS_ENABLED | 527 CRAT_CACHE_FLAGS_INST_CACHE | 528 CRAT_CACHE_FLAGS_SIMD_CACHE), 529 .num_cu_shared = 2, 530 }, 531 { 532 /* Scalar L1 Data Cache per SQC */ 533 .cache_size = 16, 534 .cache_level = 1, 535 .flags = (CRAT_CACHE_FLAGS_ENABLED | 536 CRAT_CACHE_FLAGS_DATA_CACHE | 537 CRAT_CACHE_FLAGS_SIMD_CACHE), 538 .num_cu_shared = 2, 539 }, 540 { 541 /* GL1 Data Cache per SA */ 542 .cache_size = 128, 543 .cache_level = 1, 544 .flags = (CRAT_CACHE_FLAGS_ENABLED | 545 CRAT_CACHE_FLAGS_DATA_CACHE | 546 CRAT_CACHE_FLAGS_SIMD_CACHE), 547 .num_cu_shared = 10, 548 }, 549 { 550 /* L2 Data Cache per GPU (Total Tex Cache) */ 551 .cache_size = 4096, 552 .cache_level = 2, 553 .flags = (CRAT_CACHE_FLAGS_ENABLED | 554 CRAT_CACHE_FLAGS_DATA_CACHE | 555 CRAT_CACHE_FLAGS_SIMD_CACHE), 556 .num_cu_shared = 10, 557 }, 558 { 559 /* L3 Data Cache per GPU */ 560 .cache_size = 128*1024, 561 .cache_level = 3, 562 .flags = (CRAT_CACHE_FLAGS_ENABLED | 563 CRAT_CACHE_FLAGS_DATA_CACHE | 564 CRAT_CACHE_FLAGS_SIMD_CACHE), 565 .num_cu_shared = 10, 566 }, 567 }; 568 569 static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { 570 { 571 /* TCP L1 Cache per CU */ 572 .cache_size = 16, 573 .cache_level = 1, 574 .flags = (CRAT_CACHE_FLAGS_ENABLED | 575 CRAT_CACHE_FLAGS_DATA_CACHE | 576 CRAT_CACHE_FLAGS_SIMD_CACHE), 577 .num_cu_shared = 1, 578 }, 579 { 580 /* Scalar L1 Instruction Cache per SQC */ 581 .cache_size = 32, 582 .cache_level = 1, 583 .flags = (CRAT_CACHE_FLAGS_ENABLED | 584 CRAT_CACHE_FLAGS_INST_CACHE | 585 CRAT_CACHE_FLAGS_SIMD_CACHE), 586 .num_cu_shared = 2, 587 }, 588 { 589 /* Scalar L1 Data Cache per SQC */ 590 .cache_size = 16, 591 .cache_level = 1, 592 .flags = (CRAT_CACHE_FLAGS_ENABLED | 593 CRAT_CACHE_FLAGS_DATA_CACHE | 594 CRAT_CACHE_FLAGS_SIMD_CACHE), 595 .num_cu_shared = 2, 596 }, 597 { 598 /* GL1 Data Cache per SA */ 599 .cache_size = 128, 600 .cache_level = 1, 601 .flags = (CRAT_CACHE_FLAGS_ENABLED | 602 CRAT_CACHE_FLAGS_DATA_CACHE | 603 CRAT_CACHE_FLAGS_SIMD_CACHE), 604 .num_cu_shared = 10, 605 }, 606 { 607 /* L2 Data Cache per GPU (Total Tex Cache) */ 608 .cache_size = 3072, 609 .cache_level = 2, 610 .flags = (CRAT_CACHE_FLAGS_ENABLED | 611 CRAT_CACHE_FLAGS_DATA_CACHE | 612 CRAT_CACHE_FLAGS_SIMD_CACHE), 613 .num_cu_shared = 10, 614 }, 615 { 616 /* L3 Data Cache per GPU */ 617 .cache_size = 96*1024, 618 .cache_level = 3, 619 .flags = (CRAT_CACHE_FLAGS_ENABLED | 620 CRAT_CACHE_FLAGS_DATA_CACHE | 621 CRAT_CACHE_FLAGS_SIMD_CACHE), 622 .num_cu_shared = 10, 623 }, 624 }; 625 626 static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { 627 { 628 /* TCP L1 Cache per CU */ 629 .cache_size = 16, 630 .cache_level = 1, 631 .flags = (CRAT_CACHE_FLAGS_ENABLED | 632 CRAT_CACHE_FLAGS_DATA_CACHE | 633 CRAT_CACHE_FLAGS_SIMD_CACHE), 634 .num_cu_shared = 1, 635 }, 636 { 637 /* Scalar L1 Instruction Cache per SQC */ 638 .cache_size = 32, 639 .cache_level = 1, 640 .flags = (CRAT_CACHE_FLAGS_ENABLED | 641 CRAT_CACHE_FLAGS_INST_CACHE | 642 CRAT_CACHE_FLAGS_SIMD_CACHE), 643 .num_cu_shared = 2, 644 }, 645 { 646 /* Scalar L1 Data Cache per SQC */ 647 .cache_size = 16, 648 .cache_level = 1, 649 .flags = (CRAT_CACHE_FLAGS_ENABLED | 650 CRAT_CACHE_FLAGS_DATA_CACHE | 651 CRAT_CACHE_FLAGS_SIMD_CACHE), 652 .num_cu_shared = 2, 653 }, 654 { 655 /* GL1 Data Cache per SA */ 656 .cache_size = 128, 657 .cache_level = 1, 658 .flags = (CRAT_CACHE_FLAGS_ENABLED | 659 CRAT_CACHE_FLAGS_DATA_CACHE | 660 CRAT_CACHE_FLAGS_SIMD_CACHE), 661 .num_cu_shared = 8, 662 }, 663 { 664 /* L2 Data Cache per GPU (Total Tex Cache) */ 665 .cache_size = 2048, 666 .cache_level = 2, 667 .flags = (CRAT_CACHE_FLAGS_ENABLED | 668 CRAT_CACHE_FLAGS_DATA_CACHE | 669 CRAT_CACHE_FLAGS_SIMD_CACHE), 670 .num_cu_shared = 8, 671 }, 672 { 673 /* L3 Data Cache per GPU */ 674 .cache_size = 32*1024, 675 .cache_level = 3, 676 .flags = (CRAT_CACHE_FLAGS_ENABLED | 677 CRAT_CACHE_FLAGS_DATA_CACHE | 678 CRAT_CACHE_FLAGS_SIMD_CACHE), 679 .num_cu_shared = 8, 680 }, 681 }; 682 683 static struct kfd_gpu_cache_info beige_goby_cache_info[] = { 684 { 685 /* TCP L1 Cache per CU */ 686 .cache_size = 16, 687 .cache_level = 1, 688 .flags = (CRAT_CACHE_FLAGS_ENABLED | 689 CRAT_CACHE_FLAGS_DATA_CACHE | 690 CRAT_CACHE_FLAGS_SIMD_CACHE), 691 .num_cu_shared = 1, 692 }, 693 { 694 /* Scalar L1 Instruction Cache per SQC */ 695 .cache_size = 32, 696 .cache_level = 1, 697 .flags = (CRAT_CACHE_FLAGS_ENABLED | 698 CRAT_CACHE_FLAGS_INST_CACHE | 699 CRAT_CACHE_FLAGS_SIMD_CACHE), 700 .num_cu_shared = 2, 701 }, 702 { 703 /* Scalar L1 Data Cache per SQC */ 704 .cache_size = 16, 705 .cache_level = 1, 706 .flags = (CRAT_CACHE_FLAGS_ENABLED | 707 CRAT_CACHE_FLAGS_DATA_CACHE | 708 CRAT_CACHE_FLAGS_SIMD_CACHE), 709 .num_cu_shared = 2, 710 }, 711 { 712 /* GL1 Data Cache per SA */ 713 .cache_size = 128, 714 .cache_level = 1, 715 .flags = (CRAT_CACHE_FLAGS_ENABLED | 716 CRAT_CACHE_FLAGS_DATA_CACHE | 717 CRAT_CACHE_FLAGS_SIMD_CACHE), 718 .num_cu_shared = 8, 719 }, 720 { 721 /* L2 Data Cache per GPU (Total Tex Cache) */ 722 .cache_size = 1024, 723 .cache_level = 2, 724 .flags = (CRAT_CACHE_FLAGS_ENABLED | 725 CRAT_CACHE_FLAGS_DATA_CACHE | 726 CRAT_CACHE_FLAGS_SIMD_CACHE), 727 .num_cu_shared = 8, 728 }, 729 { 730 /* L3 Data Cache per GPU */ 731 .cache_size = 16*1024, 732 .cache_level = 3, 733 .flags = (CRAT_CACHE_FLAGS_ENABLED | 734 CRAT_CACHE_FLAGS_DATA_CACHE | 735 CRAT_CACHE_FLAGS_SIMD_CACHE), 736 .num_cu_shared = 8, 737 }, 738 }; 739 740 static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { 741 { 742 /* TCP L1 Cache per CU */ 743 .cache_size = 16, 744 .cache_level = 1, 745 .flags = (CRAT_CACHE_FLAGS_ENABLED | 746 CRAT_CACHE_FLAGS_DATA_CACHE | 747 CRAT_CACHE_FLAGS_SIMD_CACHE), 748 .num_cu_shared = 1, 749 }, 750 { 751 /* Scalar L1 Instruction Cache per SQC */ 752 .cache_size = 32, 753 .cache_level = 1, 754 .flags = (CRAT_CACHE_FLAGS_ENABLED | 755 CRAT_CACHE_FLAGS_INST_CACHE | 756 CRAT_CACHE_FLAGS_SIMD_CACHE), 757 .num_cu_shared = 2, 758 }, 759 { 760 /* Scalar L1 Data Cache per SQC */ 761 .cache_size = 16, 762 .cache_level = 1, 763 .flags = (CRAT_CACHE_FLAGS_ENABLED | 764 CRAT_CACHE_FLAGS_DATA_CACHE | 765 CRAT_CACHE_FLAGS_SIMD_CACHE), 766 .num_cu_shared = 2, 767 }, 768 { 769 /* GL1 Data Cache per SA */ 770 .cache_size = 128, 771 .cache_level = 1, 772 .flags = (CRAT_CACHE_FLAGS_ENABLED | 773 CRAT_CACHE_FLAGS_DATA_CACHE | 774 CRAT_CACHE_FLAGS_SIMD_CACHE), 775 .num_cu_shared = 6, 776 }, 777 { 778 /* L2 Data Cache per GPU (Total Tex Cache) */ 779 .cache_size = 2048, 780 .cache_level = 2, 781 .flags = (CRAT_CACHE_FLAGS_ENABLED | 782 CRAT_CACHE_FLAGS_DATA_CACHE | 783 CRAT_CACHE_FLAGS_SIMD_CACHE), 784 .num_cu_shared = 6, 785 }, 786 }; 787 788 static struct kfd_gpu_cache_info gfx1037_cache_info[] = { 789 { 790 /* TCP L1 Cache per CU */ 791 .cache_size = 16, 792 .cache_level = 1, 793 .flags = (CRAT_CACHE_FLAGS_ENABLED | 794 CRAT_CACHE_FLAGS_DATA_CACHE | 795 CRAT_CACHE_FLAGS_SIMD_CACHE), 796 .num_cu_shared = 1, 797 }, 798 { 799 /* Scalar L1 Instruction Cache per SQC */ 800 .cache_size = 32, 801 .cache_level = 1, 802 .flags = (CRAT_CACHE_FLAGS_ENABLED | 803 CRAT_CACHE_FLAGS_INST_CACHE | 804 CRAT_CACHE_FLAGS_SIMD_CACHE), 805 .num_cu_shared = 2, 806 }, 807 { 808 /* Scalar L1 Data Cache per SQC */ 809 .cache_size = 16, 810 .cache_level = 1, 811 .flags = (CRAT_CACHE_FLAGS_ENABLED | 812 CRAT_CACHE_FLAGS_DATA_CACHE | 813 CRAT_CACHE_FLAGS_SIMD_CACHE), 814 .num_cu_shared = 2, 815 }, 816 { 817 /* GL1 Data Cache per SA */ 818 .cache_size = 128, 819 .cache_level = 1, 820 .flags = (CRAT_CACHE_FLAGS_ENABLED | 821 CRAT_CACHE_FLAGS_DATA_CACHE | 822 CRAT_CACHE_FLAGS_SIMD_CACHE), 823 .num_cu_shared = 2, 824 }, 825 { 826 /* L2 Data Cache per GPU (Total Tex Cache) */ 827 .cache_size = 256, 828 .cache_level = 2, 829 .flags = (CRAT_CACHE_FLAGS_ENABLED | 830 CRAT_CACHE_FLAGS_DATA_CACHE | 831 CRAT_CACHE_FLAGS_SIMD_CACHE), 832 .num_cu_shared = 2, 833 }, 834 }; 835 836 static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { 837 { 838 /* TCP L1 Cache per CU */ 839 .cache_size = 16, 840 .cache_level = 1, 841 .flags = (CRAT_CACHE_FLAGS_ENABLED | 842 CRAT_CACHE_FLAGS_DATA_CACHE | 843 CRAT_CACHE_FLAGS_SIMD_CACHE), 844 .num_cu_shared = 1, 845 }, 846 { 847 /* Scalar L1 Instruction Cache per SQC */ 848 .cache_size = 32, 849 .cache_level = 1, 850 .flags = (CRAT_CACHE_FLAGS_ENABLED | 851 CRAT_CACHE_FLAGS_INST_CACHE | 852 CRAT_CACHE_FLAGS_SIMD_CACHE), 853 .num_cu_shared = 2, 854 }, 855 { 856 /* Scalar L1 Data Cache per SQC */ 857 .cache_size = 16, 858 .cache_level = 1, 859 .flags = (CRAT_CACHE_FLAGS_ENABLED | 860 CRAT_CACHE_FLAGS_DATA_CACHE | 861 CRAT_CACHE_FLAGS_SIMD_CACHE), 862 .num_cu_shared = 2, 863 }, 864 { 865 /* GL1 Data Cache per SA */ 866 .cache_size = 128, 867 .cache_level = 1, 868 .flags = (CRAT_CACHE_FLAGS_ENABLED | 869 CRAT_CACHE_FLAGS_DATA_CACHE | 870 CRAT_CACHE_FLAGS_SIMD_CACHE), 871 .num_cu_shared = 2, 872 }, 873 { 874 /* L2 Data Cache per GPU (Total Tex Cache) */ 875 .cache_size = 256, 876 .cache_level = 2, 877 .flags = (CRAT_CACHE_FLAGS_ENABLED | 878 CRAT_CACHE_FLAGS_DATA_CACHE | 879 CRAT_CACHE_FLAGS_SIMD_CACHE), 880 .num_cu_shared = 2, 881 }, 882 }; 883 884 static struct kfd_gpu_cache_info dummy_cache_info[] = { 885 { 886 /* TCP L1 Cache per CU */ 887 .cache_size = 16, 888 .cache_level = 1, 889 .flags = (CRAT_CACHE_FLAGS_ENABLED | 890 CRAT_CACHE_FLAGS_DATA_CACHE | 891 CRAT_CACHE_FLAGS_SIMD_CACHE), 892 .num_cu_shared = 1, 893 }, 894 { 895 /* Scalar L1 Instruction Cache per SQC */ 896 .cache_size = 32, 897 .cache_level = 1, 898 .flags = (CRAT_CACHE_FLAGS_ENABLED | 899 CRAT_CACHE_FLAGS_INST_CACHE | 900 CRAT_CACHE_FLAGS_SIMD_CACHE), 901 .num_cu_shared = 2, 902 }, 903 { 904 /* Scalar L1 Data Cache per SQC */ 905 .cache_size = 16, 906 .cache_level = 1, 907 .flags = (CRAT_CACHE_FLAGS_ENABLED | 908 CRAT_CACHE_FLAGS_DATA_CACHE | 909 CRAT_CACHE_FLAGS_SIMD_CACHE), 910 .num_cu_shared = 2, 911 }, 912 { 913 /* GL1 Data Cache per SA */ 914 .cache_size = 128, 915 .cache_level = 1, 916 .flags = (CRAT_CACHE_FLAGS_ENABLED | 917 CRAT_CACHE_FLAGS_DATA_CACHE | 918 CRAT_CACHE_FLAGS_SIMD_CACHE), 919 .num_cu_shared = 6, 920 }, 921 { 922 /* L2 Data Cache per GPU (Total Tex Cache) */ 923 .cache_size = 2048, 924 .cache_level = 2, 925 .flags = (CRAT_CACHE_FLAGS_ENABLED | 926 CRAT_CACHE_FLAGS_DATA_CACHE | 927 CRAT_CACHE_FLAGS_SIMD_CACHE), 928 .num_cu_shared = 6, 929 }, 930 }; 931 932 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, 933 struct crat_subtype_computeunit *cu) 934 { 935 dev->node_props.cpu_cores_count = cu->num_cpu_cores; 936 dev->node_props.cpu_core_id_base = cu->processor_id_low; 937 if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) 938 dev->node_props.capability |= HSA_CAP_ATS_PRESENT; 939 940 pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, 941 cu->processor_id_low); 942 } 943 944 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, 945 struct crat_subtype_computeunit *cu) 946 { 947 dev->node_props.simd_id_base = cu->processor_id_low; 948 dev->node_props.simd_count = cu->num_simd_cores; 949 dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; 950 dev->node_props.max_waves_per_simd = cu->max_waves_simd; 951 dev->node_props.wave_front_size = cu->wave_front_size; 952 dev->node_props.array_count = cu->array_count; 953 dev->node_props.cu_per_simd_array = cu->num_cu_per_array; 954 dev->node_props.simd_per_cu = cu->num_simd_per_cu; 955 dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; 956 if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) 957 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; 958 pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); 959 } 960 961 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct 962 * topology device present in the device_list 963 */ 964 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, 965 struct list_head *device_list) 966 { 967 struct kfd_topology_device *dev; 968 969 pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", 970 cu->proximity_domain, cu->hsa_capability); 971 list_for_each_entry(dev, device_list, list) { 972 if (cu->proximity_domain == dev->proximity_domain) { 973 if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) 974 kfd_populated_cu_info_cpu(dev, cu); 975 976 if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) 977 kfd_populated_cu_info_gpu(dev, cu); 978 break; 979 } 980 } 981 982 return 0; 983 } 984 985 static struct kfd_mem_properties * 986 find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width, 987 struct kfd_topology_device *dev) 988 { 989 struct kfd_mem_properties *props; 990 991 list_for_each_entry(props, &dev->mem_props, list) { 992 if (props->heap_type == heap_type 993 && props->flags == flags 994 && props->width == width) 995 return props; 996 } 997 998 return NULL; 999 } 1000 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct 1001 * topology device present in the device_list 1002 */ 1003 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, 1004 struct list_head *device_list) 1005 { 1006 struct kfd_mem_properties *props; 1007 struct kfd_topology_device *dev; 1008 uint32_t heap_type; 1009 uint64_t size_in_bytes; 1010 uint32_t flags = 0; 1011 uint32_t width; 1012 1013 pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", 1014 mem->proximity_domain); 1015 list_for_each_entry(dev, device_list, list) { 1016 if (mem->proximity_domain == dev->proximity_domain) { 1017 /* We're on GPU node */ 1018 if (dev->node_props.cpu_cores_count == 0) { 1019 /* APU */ 1020 if (mem->visibility_type == 0) 1021 heap_type = 1022 HSA_MEM_HEAP_TYPE_FB_PRIVATE; 1023 /* dGPU */ 1024 else 1025 heap_type = mem->visibility_type; 1026 } else 1027 heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; 1028 1029 if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) 1030 flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; 1031 if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) 1032 flags |= HSA_MEM_FLAGS_NON_VOLATILE; 1033 1034 size_in_bytes = 1035 ((uint64_t)mem->length_high << 32) + 1036 mem->length_low; 1037 width = mem->width; 1038 1039 /* Multiple banks of the same type are aggregated into 1040 * one. User mode doesn't care about multiple physical 1041 * memory segments. It's managed as a single virtual 1042 * heap for user mode. 1043 */ 1044 props = find_subtype_mem(heap_type, flags, width, dev); 1045 if (props) { 1046 props->size_in_bytes += size_in_bytes; 1047 break; 1048 } 1049 1050 props = kfd_alloc_struct(props); 1051 if (!props) 1052 return -ENOMEM; 1053 1054 props->heap_type = heap_type; 1055 props->flags = flags; 1056 props->size_in_bytes = size_in_bytes; 1057 props->width = width; 1058 1059 dev->node_props.mem_banks_count++; 1060 list_add_tail(&props->list, &dev->mem_props); 1061 1062 break; 1063 } 1064 } 1065 1066 return 0; 1067 } 1068 1069 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct 1070 * topology device present in the device_list 1071 */ 1072 static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, 1073 struct list_head *device_list) 1074 { 1075 struct kfd_cache_properties *props; 1076 struct kfd_topology_device *dev; 1077 uint32_t id; 1078 uint32_t total_num_of_cu; 1079 1080 id = cache->processor_id_low; 1081 1082 pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); 1083 list_for_each_entry(dev, device_list, list) { 1084 total_num_of_cu = (dev->node_props.array_count * 1085 dev->node_props.cu_per_simd_array); 1086 1087 /* Cache infomration in CRAT doesn't have proximity_domain 1088 * information as it is associated with a CPU core or GPU 1089 * Compute Unit. So map the cache using CPU core Id or SIMD 1090 * (GPU) ID. 1091 * TODO: This works because currently we can safely assume that 1092 * Compute Units are parsed before caches are parsed. In 1093 * future, remove this dependency 1094 */ 1095 if ((id >= dev->node_props.cpu_core_id_base && 1096 id <= dev->node_props.cpu_core_id_base + 1097 dev->node_props.cpu_cores_count) || 1098 (id >= dev->node_props.simd_id_base && 1099 id < dev->node_props.simd_id_base + 1100 total_num_of_cu)) { 1101 props = kfd_alloc_struct(props); 1102 if (!props) 1103 return -ENOMEM; 1104 1105 props->processor_id_low = id; 1106 props->cache_level = cache->cache_level; 1107 props->cache_size = cache->cache_size; 1108 props->cacheline_size = cache->cache_line_size; 1109 props->cachelines_per_tag = cache->lines_per_tag; 1110 props->cache_assoc = cache->associativity; 1111 props->cache_latency = cache->cache_latency; 1112 1113 memcpy(props->sibling_map, cache->sibling_map, 1114 sizeof(props->sibling_map)); 1115 1116 /* set the sibling_map_size as 32 for CRAT from ACPI */ 1117 props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; 1118 1119 if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) 1120 props->cache_type |= HSA_CACHE_TYPE_DATA; 1121 if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) 1122 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; 1123 if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) 1124 props->cache_type |= HSA_CACHE_TYPE_CPU; 1125 if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) 1126 props->cache_type |= HSA_CACHE_TYPE_HSACU; 1127 1128 dev->cache_count++; 1129 dev->node_props.caches_count++; 1130 list_add_tail(&props->list, &dev->cache_props); 1131 1132 break; 1133 } 1134 } 1135 1136 return 0; 1137 } 1138 1139 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct 1140 * topology device present in the device_list 1141 */ 1142 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, 1143 struct list_head *device_list) 1144 { 1145 struct kfd_iolink_properties *props = NULL, *props2; 1146 struct kfd_topology_device *dev, *to_dev; 1147 uint32_t id_from; 1148 uint32_t id_to; 1149 1150 id_from = iolink->proximity_domain_from; 1151 id_to = iolink->proximity_domain_to; 1152 1153 pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n", 1154 id_from, id_to); 1155 list_for_each_entry(dev, device_list, list) { 1156 if (id_from == dev->proximity_domain) { 1157 props = kfd_alloc_struct(props); 1158 if (!props) 1159 return -ENOMEM; 1160 1161 props->node_from = id_from; 1162 props->node_to = id_to; 1163 props->ver_maj = iolink->version_major; 1164 props->ver_min = iolink->version_minor; 1165 props->iolink_type = iolink->io_interface_type; 1166 1167 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) 1168 props->weight = 20; 1169 else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) 1170 props->weight = 15 * iolink->num_hops_xgmi; 1171 else 1172 props->weight = node_distance(id_from, id_to); 1173 1174 props->min_latency = iolink->minimum_latency; 1175 props->max_latency = iolink->maximum_latency; 1176 props->min_bandwidth = iolink->minimum_bandwidth_mbs; 1177 props->max_bandwidth = iolink->maximum_bandwidth_mbs; 1178 props->rec_transfer_size = 1179 iolink->recommended_transfer_size; 1180 1181 dev->node_props.io_links_count++; 1182 list_add_tail(&props->list, &dev->io_link_props); 1183 break; 1184 } 1185 } 1186 1187 /* CPU topology is created before GPUs are detected, so CPU->GPU 1188 * links are not built at that time. If a PCIe type is discovered, it 1189 * means a GPU is detected and we are adding GPU->CPU to the topology. 1190 * At this time, also add the corresponded CPU->GPU link if GPU 1191 * is large bar. 1192 * For xGMI, we only added the link with one direction in the crat 1193 * table, add corresponded reversed direction link now. 1194 */ 1195 if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { 1196 to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to); 1197 if (!to_dev) 1198 return -ENODEV; 1199 /* same everything but the other direction */ 1200 props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); 1201 if (!props2) 1202 return -ENOMEM; 1203 1204 props2->node_from = id_to; 1205 props2->node_to = id_from; 1206 props2->kobj = NULL; 1207 to_dev->node_props.io_links_count++; 1208 list_add_tail(&props2->list, &to_dev->io_link_props); 1209 } 1210 1211 return 0; 1212 } 1213 1214 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device 1215 * present in the device_list 1216 * @sub_type_hdr - subtype section of crat_image 1217 * @device_list - list of topology devices present in this crat_image 1218 */ 1219 static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, 1220 struct list_head *device_list) 1221 { 1222 struct crat_subtype_computeunit *cu; 1223 struct crat_subtype_memory *mem; 1224 struct crat_subtype_cache *cache; 1225 struct crat_subtype_iolink *iolink; 1226 int ret = 0; 1227 1228 switch (sub_type_hdr->type) { 1229 case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: 1230 cu = (struct crat_subtype_computeunit *)sub_type_hdr; 1231 ret = kfd_parse_subtype_cu(cu, device_list); 1232 break; 1233 case CRAT_SUBTYPE_MEMORY_AFFINITY: 1234 mem = (struct crat_subtype_memory *)sub_type_hdr; 1235 ret = kfd_parse_subtype_mem(mem, device_list); 1236 break; 1237 case CRAT_SUBTYPE_CACHE_AFFINITY: 1238 cache = (struct crat_subtype_cache *)sub_type_hdr; 1239 ret = kfd_parse_subtype_cache(cache, device_list); 1240 break; 1241 case CRAT_SUBTYPE_TLB_AFFINITY: 1242 /* 1243 * For now, nothing to do here 1244 */ 1245 pr_debug("Found TLB entry in CRAT table (not processing)\n"); 1246 break; 1247 case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: 1248 /* 1249 * For now, nothing to do here 1250 */ 1251 pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); 1252 break; 1253 case CRAT_SUBTYPE_IOLINK_AFFINITY: 1254 iolink = (struct crat_subtype_iolink *)sub_type_hdr; 1255 ret = kfd_parse_subtype_iolink(iolink, device_list); 1256 break; 1257 default: 1258 pr_warn("Unknown subtype %d in CRAT\n", 1259 sub_type_hdr->type); 1260 } 1261 1262 return ret; 1263 } 1264 1265 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT 1266 * create a kfd_topology_device and add in to device_list. Also parse 1267 * CRAT subtypes and attach it to appropriate kfd_topology_device 1268 * @crat_image - input image containing CRAT 1269 * @device_list - [OUT] list of kfd_topology_device generated after 1270 * parsing crat_image 1271 * @proximity_domain - Proximity domain of the first device in the table 1272 * 1273 * Return - 0 if successful else -ve value 1274 */ 1275 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, 1276 uint32_t proximity_domain) 1277 { 1278 struct kfd_topology_device *top_dev = NULL; 1279 struct crat_subtype_generic *sub_type_hdr; 1280 uint16_t node_id; 1281 int ret = 0; 1282 struct crat_header *crat_table = (struct crat_header *)crat_image; 1283 uint16_t num_nodes; 1284 uint32_t image_len; 1285 1286 if (!crat_image) 1287 return -EINVAL; 1288 1289 if (!list_empty(device_list)) { 1290 pr_warn("Error device list should be empty\n"); 1291 return -EINVAL; 1292 } 1293 1294 num_nodes = crat_table->num_domains; 1295 image_len = crat_table->length; 1296 1297 pr_debug("Parsing CRAT table with %d nodes\n", num_nodes); 1298 1299 for (node_id = 0; node_id < num_nodes; node_id++) { 1300 top_dev = kfd_create_topology_device(device_list); 1301 if (!top_dev) 1302 break; 1303 top_dev->proximity_domain = proximity_domain++; 1304 } 1305 1306 if (!top_dev) { 1307 ret = -ENOMEM; 1308 goto err; 1309 } 1310 1311 memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); 1312 memcpy(top_dev->oem_table_id, crat_table->oem_table_id, 1313 CRAT_OEMTABLEID_LENGTH); 1314 top_dev->oem_revision = crat_table->oem_revision; 1315 1316 sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 1317 while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < 1318 ((char *)crat_image) + image_len) { 1319 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { 1320 ret = kfd_parse_subtype(sub_type_hdr, device_list); 1321 if (ret) 1322 break; 1323 } 1324 1325 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1326 sub_type_hdr->length); 1327 } 1328 1329 err: 1330 if (ret) 1331 kfd_release_topology_device_list(device_list); 1332 1333 return ret; 1334 } 1335 1336 1337 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, 1338 struct kfd_gpu_cache_info *pcache_info) 1339 { 1340 struct amdgpu_device *adev = kdev->adev; 1341 int i = 0; 1342 1343 /* TCP L1 Cache per CU */ 1344 if (adev->gfx.config.gc_tcp_l1_size) { 1345 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; 1346 pcache_info[i].cache_level = 1; 1347 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1348 CRAT_CACHE_FLAGS_DATA_CACHE | 1349 CRAT_CACHE_FLAGS_SIMD_CACHE); 1350 pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; 1351 i++; 1352 } 1353 /* Scalar L1 Instruction Cache per SQC */ 1354 if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { 1355 pcache_info[i].cache_size = 1356 adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; 1357 pcache_info[i].cache_level = 1; 1358 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1359 CRAT_CACHE_FLAGS_INST_CACHE | 1360 CRAT_CACHE_FLAGS_SIMD_CACHE); 1361 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 1362 i++; 1363 } 1364 /* Scalar L1 Data Cache per SQC */ 1365 if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { 1366 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; 1367 pcache_info[i].cache_level = 1; 1368 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1369 CRAT_CACHE_FLAGS_DATA_CACHE | 1370 CRAT_CACHE_FLAGS_SIMD_CACHE); 1371 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 1372 i++; 1373 } 1374 /* GL1 Data Cache per SA */ 1375 if (adev->gfx.config.gc_gl1c_per_sa && 1376 adev->gfx.config.gc_gl1c_size_per_instance) { 1377 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * 1378 adev->gfx.config.gc_gl1c_size_per_instance; 1379 pcache_info[i].cache_level = 1; 1380 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1381 CRAT_CACHE_FLAGS_DATA_CACHE | 1382 CRAT_CACHE_FLAGS_SIMD_CACHE); 1383 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1384 i++; 1385 } 1386 /* L2 Data Cache per GPU (Total Tex Cache) */ 1387 if (adev->gfx.config.gc_gl2c_per_gpu) { 1388 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; 1389 pcache_info[i].cache_level = 2; 1390 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1391 CRAT_CACHE_FLAGS_DATA_CACHE | 1392 CRAT_CACHE_FLAGS_SIMD_CACHE); 1393 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1394 i++; 1395 } 1396 /* L3 Data Cache per GPU */ 1397 if (adev->gmc.mall_size) { 1398 pcache_info[i].cache_size = adev->gmc.mall_size / 1024; 1399 pcache_info[i].cache_level = 3; 1400 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1401 CRAT_CACHE_FLAGS_DATA_CACHE | 1402 CRAT_CACHE_FLAGS_SIMD_CACHE); 1403 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1404 i++; 1405 } 1406 return i; 1407 } 1408 1409 int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) 1410 { 1411 int num_of_cache_types = 0; 1412 1413 switch (kdev->adev->asic_type) { 1414 case CHIP_KAVERI: 1415 *pcache_info = kaveri_cache_info; 1416 num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); 1417 break; 1418 case CHIP_HAWAII: 1419 *pcache_info = hawaii_cache_info; 1420 num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); 1421 break; 1422 case CHIP_CARRIZO: 1423 *pcache_info = carrizo_cache_info; 1424 num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); 1425 break; 1426 case CHIP_TONGA: 1427 *pcache_info = tonga_cache_info; 1428 num_of_cache_types = ARRAY_SIZE(tonga_cache_info); 1429 break; 1430 case CHIP_FIJI: 1431 *pcache_info = fiji_cache_info; 1432 num_of_cache_types = ARRAY_SIZE(fiji_cache_info); 1433 break; 1434 case CHIP_POLARIS10: 1435 *pcache_info = polaris10_cache_info; 1436 num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); 1437 break; 1438 case CHIP_POLARIS11: 1439 *pcache_info = polaris11_cache_info; 1440 num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); 1441 break; 1442 case CHIP_POLARIS12: 1443 *pcache_info = polaris12_cache_info; 1444 num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); 1445 break; 1446 case CHIP_VEGAM: 1447 *pcache_info = vegam_cache_info; 1448 num_of_cache_types = ARRAY_SIZE(vegam_cache_info); 1449 break; 1450 default: 1451 switch (KFD_GC_VERSION(kdev)) { 1452 case IP_VERSION(9, 0, 1): 1453 *pcache_info = vega10_cache_info; 1454 num_of_cache_types = ARRAY_SIZE(vega10_cache_info); 1455 break; 1456 case IP_VERSION(9, 2, 1): 1457 *pcache_info = vega12_cache_info; 1458 num_of_cache_types = ARRAY_SIZE(vega12_cache_info); 1459 break; 1460 case IP_VERSION(9, 4, 0): 1461 case IP_VERSION(9, 4, 1): 1462 *pcache_info = vega20_cache_info; 1463 num_of_cache_types = ARRAY_SIZE(vega20_cache_info); 1464 break; 1465 case IP_VERSION(9, 4, 2): 1466 *pcache_info = aldebaran_cache_info; 1467 num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); 1468 break; 1469 case IP_VERSION(9, 1, 0): 1470 case IP_VERSION(9, 2, 2): 1471 *pcache_info = raven_cache_info; 1472 num_of_cache_types = ARRAY_SIZE(raven_cache_info); 1473 break; 1474 case IP_VERSION(9, 3, 0): 1475 *pcache_info = renoir_cache_info; 1476 num_of_cache_types = ARRAY_SIZE(renoir_cache_info); 1477 break; 1478 case IP_VERSION(10, 1, 10): 1479 case IP_VERSION(10, 1, 2): 1480 case IP_VERSION(10, 1, 3): 1481 case IP_VERSION(10, 1, 4): 1482 *pcache_info = navi10_cache_info; 1483 num_of_cache_types = ARRAY_SIZE(navi10_cache_info); 1484 break; 1485 case IP_VERSION(10, 1, 1): 1486 *pcache_info = navi14_cache_info; 1487 num_of_cache_types = ARRAY_SIZE(navi14_cache_info); 1488 break; 1489 case IP_VERSION(10, 3, 0): 1490 *pcache_info = sienna_cichlid_cache_info; 1491 num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); 1492 break; 1493 case IP_VERSION(10, 3, 2): 1494 *pcache_info = navy_flounder_cache_info; 1495 num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); 1496 break; 1497 case IP_VERSION(10, 3, 4): 1498 *pcache_info = dimgrey_cavefish_cache_info; 1499 num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); 1500 break; 1501 case IP_VERSION(10, 3, 1): 1502 *pcache_info = vangogh_cache_info; 1503 num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); 1504 break; 1505 case IP_VERSION(10, 3, 5): 1506 *pcache_info = beige_goby_cache_info; 1507 num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); 1508 break; 1509 case IP_VERSION(10, 3, 3): 1510 *pcache_info = yellow_carp_cache_info; 1511 num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); 1512 break; 1513 case IP_VERSION(10, 3, 6): 1514 *pcache_info = gc_10_3_6_cache_info; 1515 num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info); 1516 break; 1517 case IP_VERSION(10, 3, 7): 1518 *pcache_info = gfx1037_cache_info; 1519 num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info); 1520 break; 1521 case IP_VERSION(11, 0, 0): 1522 case IP_VERSION(11, 0, 1): 1523 case IP_VERSION(11, 0, 2): 1524 case IP_VERSION(11, 0, 3): 1525 num_of_cache_types = 1526 kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); 1527 break; 1528 default: 1529 *pcache_info = dummy_cache_info; 1530 num_of_cache_types = ARRAY_SIZE(dummy_cache_info); 1531 pr_warn("dummy cache info is used temporarily and real cache info need update later.\n"); 1532 break; 1533 } 1534 } 1535 return num_of_cache_types; 1536 } 1537 1538 static bool kfd_ignore_crat(void) 1539 { 1540 bool ret; 1541 1542 if (ignore_crat) 1543 return true; 1544 1545 #ifndef KFD_SUPPORT_IOMMU_V2 1546 ret = true; 1547 #else 1548 ret = false; 1549 #endif 1550 1551 return ret; 1552 } 1553 1554 /* 1555 * kfd_create_crat_image_acpi - Allocates memory for CRAT image and 1556 * copies CRAT from ACPI (if available). 1557 * NOTE: Call kfd_destroy_crat_image to free CRAT image memory 1558 * 1559 * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then 1560 * crat_image will be NULL 1561 * @size: [OUT] size of crat_image 1562 * 1563 * Return 0 if successful else return error code 1564 */ 1565 int kfd_create_crat_image_acpi(void **crat_image, size_t *size) 1566 { 1567 struct acpi_table_header *crat_table; 1568 acpi_status status; 1569 void *pcrat_image; 1570 int rc = 0; 1571 1572 if (!crat_image) 1573 return -EINVAL; 1574 1575 *crat_image = NULL; 1576 1577 if (kfd_ignore_crat()) { 1578 pr_info("CRAT table disabled by module option\n"); 1579 return -ENODATA; 1580 } 1581 1582 /* Fetch the CRAT table from ACPI */ 1583 status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); 1584 if (status == AE_NOT_FOUND) { 1585 pr_info("CRAT table not found\n"); 1586 return -ENODATA; 1587 } else if (ACPI_FAILURE(status)) { 1588 const char *err = acpi_format_exception(status); 1589 1590 pr_err("CRAT table error: %s\n", err); 1591 return -EINVAL; 1592 } 1593 1594 pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL); 1595 if (!pcrat_image) { 1596 rc = -ENOMEM; 1597 goto out; 1598 } 1599 1600 memcpy(pcrat_image, crat_table, crat_table->length); 1601 *crat_image = pcrat_image; 1602 *size = crat_table->length; 1603 out: 1604 acpi_put_table(crat_table); 1605 return rc; 1606 } 1607 1608 /* Memory required to create Virtual CRAT. 1609 * Since there is no easy way to predict the amount of memory required, the 1610 * following amount is allocated for GPU Virtual CRAT. This is 1611 * expected to cover all known conditions. But to be safe additional check 1612 * is put in the code to ensure we don't overwrite. 1613 */ 1614 #define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) 1615 1616 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node 1617 * 1618 * @numa_node_id: CPU NUMA node id 1619 * @avail_size: Available size in the memory 1620 * @sub_type_hdr: Memory into which compute info will be filled in 1621 * 1622 * Return 0 if successful else return -ve value 1623 */ 1624 static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, 1625 int proximity_domain, 1626 struct crat_subtype_computeunit *sub_type_hdr) 1627 { 1628 const struct cpumask *cpumask; 1629 1630 *avail_size -= sizeof(struct crat_subtype_computeunit); 1631 if (*avail_size < 0) 1632 return -ENOMEM; 1633 1634 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); 1635 1636 /* Fill in subtype header data */ 1637 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; 1638 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); 1639 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 1640 1641 cpumask = cpumask_of_node(numa_node_id); 1642 1643 /* Fill in CU data */ 1644 sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; 1645 sub_type_hdr->proximity_domain = proximity_domain; 1646 sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); 1647 if (sub_type_hdr->processor_id_low == -1) 1648 return -EINVAL; 1649 1650 sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); 1651 1652 return 0; 1653 } 1654 1655 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node 1656 * 1657 * @numa_node_id: CPU NUMA node id 1658 * @avail_size: Available size in the memory 1659 * @sub_type_hdr: Memory into which compute info will be filled in 1660 * 1661 * Return 0 if successful else return -ve value 1662 */ 1663 static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, 1664 int proximity_domain, 1665 struct crat_subtype_memory *sub_type_hdr) 1666 { 1667 uint64_t mem_in_bytes = 0; 1668 pg_data_t *pgdat; 1669 int zone_type; 1670 1671 *avail_size -= sizeof(struct crat_subtype_memory); 1672 if (*avail_size < 0) 1673 return -ENOMEM; 1674 1675 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); 1676 1677 /* Fill in subtype header data */ 1678 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; 1679 sub_type_hdr->length = sizeof(struct crat_subtype_memory); 1680 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 1681 1682 /* Fill in Memory Subunit data */ 1683 1684 /* Unlike si_meminfo, si_meminfo_node is not exported. So 1685 * the following lines are duplicated from si_meminfo_node 1686 * function 1687 */ 1688 pgdat = NODE_DATA(numa_node_id); 1689 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 1690 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); 1691 mem_in_bytes <<= PAGE_SHIFT; 1692 1693 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); 1694 sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); 1695 sub_type_hdr->proximity_domain = proximity_domain; 1696 1697 return 0; 1698 } 1699 1700 #ifdef CONFIG_X86_64 1701 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, 1702 uint32_t *num_entries, 1703 struct crat_subtype_iolink *sub_type_hdr) 1704 { 1705 int nid; 1706 struct cpuinfo_x86 *c = &cpu_data(0); 1707 uint8_t link_type; 1708 1709 if (c->x86_vendor == X86_VENDOR_AMD) 1710 link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT; 1711 else 1712 link_type = CRAT_IOLINK_TYPE_QPI_1_1; 1713 1714 *num_entries = 0; 1715 1716 /* Create IO links from this node to other CPU nodes */ 1717 for_each_online_node(nid) { 1718 if (nid == numa_node_id) /* node itself */ 1719 continue; 1720 1721 *avail_size -= sizeof(struct crat_subtype_iolink); 1722 if (*avail_size < 0) 1723 return -ENOMEM; 1724 1725 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 1726 1727 /* Fill in subtype header data */ 1728 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 1729 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 1730 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 1731 1732 /* Fill in IO link data */ 1733 sub_type_hdr->proximity_domain_from = numa_node_id; 1734 sub_type_hdr->proximity_domain_to = nid; 1735 sub_type_hdr->io_interface_type = link_type; 1736 1737 (*num_entries)++; 1738 sub_type_hdr++; 1739 } 1740 1741 return 0; 1742 } 1743 #endif 1744 1745 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU 1746 * 1747 * @pcrat_image: Fill in VCRAT for CPU 1748 * @size: [IN] allocated size of crat_image. 1749 * [OUT] actual size of data filled in crat_image 1750 */ 1751 static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) 1752 { 1753 struct crat_header *crat_table = (struct crat_header *)pcrat_image; 1754 struct acpi_table_header *acpi_table; 1755 acpi_status status; 1756 struct crat_subtype_generic *sub_type_hdr; 1757 int avail_size = *size; 1758 int numa_node_id; 1759 #ifdef CONFIG_X86_64 1760 uint32_t entries = 0; 1761 #endif 1762 int ret = 0; 1763 1764 if (!pcrat_image) 1765 return -EINVAL; 1766 1767 /* Fill in CRAT Header. 1768 * Modify length and total_entries as subunits are added. 1769 */ 1770 avail_size -= sizeof(struct crat_header); 1771 if (avail_size < 0) 1772 return -ENOMEM; 1773 1774 memset(crat_table, 0, sizeof(struct crat_header)); 1775 memcpy(&crat_table->signature, CRAT_SIGNATURE, 1776 sizeof(crat_table->signature)); 1777 crat_table->length = sizeof(struct crat_header); 1778 1779 status = acpi_get_table("DSDT", 0, &acpi_table); 1780 if (status != AE_OK) 1781 pr_warn("DSDT table not found for OEM information\n"); 1782 else { 1783 crat_table->oem_revision = acpi_table->revision; 1784 memcpy(crat_table->oem_id, acpi_table->oem_id, 1785 CRAT_OEMID_LENGTH); 1786 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, 1787 CRAT_OEMTABLEID_LENGTH); 1788 acpi_put_table(acpi_table); 1789 } 1790 crat_table->total_entries = 0; 1791 crat_table->num_domains = 0; 1792 1793 sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 1794 1795 for_each_online_node(numa_node_id) { 1796 if (kfd_numa_node_to_apic_id(numa_node_id) == -1) 1797 continue; 1798 1799 /* Fill in Subtype: Compute Unit */ 1800 ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size, 1801 crat_table->num_domains, 1802 (struct crat_subtype_computeunit *)sub_type_hdr); 1803 if (ret < 0) 1804 return ret; 1805 crat_table->length += sub_type_hdr->length; 1806 crat_table->total_entries++; 1807 1808 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1809 sub_type_hdr->length); 1810 1811 /* Fill in Subtype: Memory */ 1812 ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size, 1813 crat_table->num_domains, 1814 (struct crat_subtype_memory *)sub_type_hdr); 1815 if (ret < 0) 1816 return ret; 1817 crat_table->length += sub_type_hdr->length; 1818 crat_table->total_entries++; 1819 1820 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1821 sub_type_hdr->length); 1822 1823 /* Fill in Subtype: IO Link */ 1824 #ifdef CONFIG_X86_64 1825 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, 1826 &entries, 1827 (struct crat_subtype_iolink *)sub_type_hdr); 1828 if (ret < 0) 1829 return ret; 1830 1831 if (entries) { 1832 crat_table->length += (sub_type_hdr->length * entries); 1833 crat_table->total_entries += entries; 1834 1835 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1836 sub_type_hdr->length * entries); 1837 } 1838 #else 1839 pr_info("IO link not available for non x86 platforms\n"); 1840 #endif 1841 1842 crat_table->num_domains++; 1843 } 1844 1845 /* TODO: Add cache Subtype for CPU. 1846 * Currently, CPU cache information is available in function 1847 * detect_cache_attributes(cpu) defined in the file 1848 * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not 1849 * exported and to get the same information the code needs to be 1850 * duplicated. 1851 */ 1852 1853 *size = crat_table->length; 1854 pr_info("Virtual CRAT table created for CPU\n"); 1855 1856 return 0; 1857 } 1858 1859 static int kfd_fill_gpu_memory_affinity(int *avail_size, 1860 struct kfd_dev *kdev, uint8_t type, uint64_t size, 1861 struct crat_subtype_memory *sub_type_hdr, 1862 uint32_t proximity_domain, 1863 const struct kfd_local_mem_info *local_mem_info) 1864 { 1865 *avail_size -= sizeof(struct crat_subtype_memory); 1866 if (*avail_size < 0) 1867 return -ENOMEM; 1868 1869 memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); 1870 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; 1871 sub_type_hdr->length = sizeof(struct crat_subtype_memory); 1872 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; 1873 1874 sub_type_hdr->proximity_domain = proximity_domain; 1875 1876 pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", 1877 type, size); 1878 1879 sub_type_hdr->length_low = lower_32_bits(size); 1880 sub_type_hdr->length_high = upper_32_bits(size); 1881 1882 sub_type_hdr->width = local_mem_info->vram_width; 1883 sub_type_hdr->visibility_type = type; 1884 1885 return 0; 1886 } 1887 1888 #ifdef CONFIG_ACPI_NUMA 1889 static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) 1890 { 1891 struct acpi_table_header *table_header = NULL; 1892 struct acpi_subtable_header *sub_header = NULL; 1893 unsigned long table_end, subtable_len; 1894 u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 | 1895 pci_dev_id(kdev->pdev); 1896 u32 bdf; 1897 acpi_status status; 1898 struct acpi_srat_cpu_affinity *cpu; 1899 struct acpi_srat_generic_affinity *gpu; 1900 int pxm = 0, max_pxm = 0; 1901 int numa_node = NUMA_NO_NODE; 1902 bool found = false; 1903 1904 /* Fetch the SRAT table from ACPI */ 1905 status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header); 1906 if (status == AE_NOT_FOUND) { 1907 pr_warn("SRAT table not found\n"); 1908 return; 1909 } else if (ACPI_FAILURE(status)) { 1910 const char *err = acpi_format_exception(status); 1911 pr_err("SRAT table error: %s\n", err); 1912 return; 1913 } 1914 1915 table_end = (unsigned long)table_header + table_header->length; 1916 1917 /* Parse all entries looking for a match. */ 1918 sub_header = (struct acpi_subtable_header *) 1919 ((unsigned long)table_header + 1920 sizeof(struct acpi_table_srat)); 1921 subtable_len = sub_header->length; 1922 1923 while (((unsigned long)sub_header) + subtable_len < table_end) { 1924 /* 1925 * If length is 0, break from this loop to avoid 1926 * infinite loop. 1927 */ 1928 if (subtable_len == 0) { 1929 pr_err("SRAT invalid zero length\n"); 1930 break; 1931 } 1932 1933 switch (sub_header->type) { 1934 case ACPI_SRAT_TYPE_CPU_AFFINITY: 1935 cpu = (struct acpi_srat_cpu_affinity *)sub_header; 1936 pxm = *((u32 *)cpu->proximity_domain_hi) << 8 | 1937 cpu->proximity_domain_lo; 1938 if (pxm > max_pxm) 1939 max_pxm = pxm; 1940 break; 1941 case ACPI_SRAT_TYPE_GENERIC_AFFINITY: 1942 gpu = (struct acpi_srat_generic_affinity *)sub_header; 1943 bdf = *((u16 *)(&gpu->device_handle[0])) << 16 | 1944 *((u16 *)(&gpu->device_handle[2])); 1945 if (bdf == pci_id) { 1946 found = true; 1947 numa_node = pxm_to_node(gpu->proximity_domain); 1948 } 1949 break; 1950 default: 1951 break; 1952 } 1953 1954 if (found) 1955 break; 1956 1957 sub_header = (struct acpi_subtable_header *) 1958 ((unsigned long)sub_header + subtable_len); 1959 subtable_len = sub_header->length; 1960 } 1961 1962 acpi_put_table(table_header); 1963 1964 /* Workaround bad cpu-gpu binding case */ 1965 if (found && (numa_node < 0 || 1966 numa_node > pxm_to_node(max_pxm))) 1967 numa_node = 0; 1968 1969 if (numa_node != NUMA_NO_NODE) 1970 set_dev_node(&kdev->pdev->dev, numa_node); 1971 } 1972 #endif 1973 1974 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU 1975 * to its NUMA node 1976 * @avail_size: Available size in the memory 1977 * @kdev - [IN] GPU device 1978 * @sub_type_hdr: Memory into which io link info will be filled in 1979 * @proximity_domain - proximity domain of the GPU node 1980 * 1981 * Return 0 if successful else return -ve value 1982 */ 1983 static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, 1984 struct kfd_dev *kdev, 1985 struct crat_subtype_iolink *sub_type_hdr, 1986 uint32_t proximity_domain) 1987 { 1988 *avail_size -= sizeof(struct crat_subtype_iolink); 1989 if (*avail_size < 0) 1990 return -ENOMEM; 1991 1992 memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 1993 1994 /* Fill in subtype header data */ 1995 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 1996 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 1997 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; 1998 if (kfd_dev_is_large_bar(kdev)) 1999 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 2000 2001 /* Fill in IOLINK subtype. 2002 * TODO: Fill-in other fields of iolink subtype 2003 */ 2004 if (kdev->adev->gmc.xgmi.connected_to_cpu) { 2005 /* 2006 * with host gpu xgmi link, host can access gpu memory whether 2007 * or not pcie bar type is large, so always create bidirectional 2008 * io link. 2009 */ 2010 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 2011 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; 2012 sub_type_hdr->num_hops_xgmi = 1; 2013 if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) { 2014 sub_type_hdr->minimum_bandwidth_mbs = 2015 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( 2016 kdev->adev, NULL, true); 2017 sub_type_hdr->maximum_bandwidth_mbs = 2018 sub_type_hdr->minimum_bandwidth_mbs; 2019 } 2020 } else { 2021 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; 2022 sub_type_hdr->minimum_bandwidth_mbs = 2023 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true); 2024 sub_type_hdr->maximum_bandwidth_mbs = 2025 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false); 2026 } 2027 2028 sub_type_hdr->proximity_domain_from = proximity_domain; 2029 2030 #ifdef CONFIG_ACPI_NUMA 2031 if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) 2032 kfd_find_numa_node_in_srat(kdev); 2033 #endif 2034 #ifdef CONFIG_NUMA 2035 if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) 2036 sub_type_hdr->proximity_domain_to = 0; 2037 else 2038 sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node; 2039 #else 2040 sub_type_hdr->proximity_domain_to = 0; 2041 #endif 2042 return 0; 2043 } 2044 2045 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, 2046 struct kfd_dev *kdev, 2047 struct kfd_dev *peer_kdev, 2048 struct crat_subtype_iolink *sub_type_hdr, 2049 uint32_t proximity_domain_from, 2050 uint32_t proximity_domain_to) 2051 { 2052 *avail_size -= sizeof(struct crat_subtype_iolink); 2053 if (*avail_size < 0) 2054 return -ENOMEM; 2055 2056 memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 2057 2058 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 2059 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 2060 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | 2061 CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 2062 2063 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; 2064 sub_type_hdr->proximity_domain_from = proximity_domain_from; 2065 sub_type_hdr->proximity_domain_to = proximity_domain_to; 2066 sub_type_hdr->num_hops_xgmi = 2067 amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); 2068 sub_type_hdr->maximum_bandwidth_mbs = 2069 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false); 2070 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? 2071 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; 2072 2073 return 0; 2074 } 2075 2076 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU 2077 * 2078 * @pcrat_image: Fill in VCRAT for GPU 2079 * @size: [IN] allocated size of crat_image. 2080 * [OUT] actual size of data filled in crat_image 2081 */ 2082 static int kfd_create_vcrat_image_gpu(void *pcrat_image, 2083 size_t *size, struct kfd_dev *kdev, 2084 uint32_t proximity_domain) 2085 { 2086 struct crat_header *crat_table = (struct crat_header *)pcrat_image; 2087 struct crat_subtype_generic *sub_type_hdr; 2088 struct kfd_local_mem_info local_mem_info; 2089 struct kfd_topology_device *peer_dev; 2090 struct crat_subtype_computeunit *cu; 2091 struct kfd_cu_info cu_info; 2092 int avail_size = *size; 2093 uint32_t total_num_of_cu; 2094 uint32_t nid = 0; 2095 int ret = 0; 2096 2097 if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) 2098 return -EINVAL; 2099 2100 /* Fill the CRAT Header. 2101 * Modify length and total_entries as subunits are added. 2102 */ 2103 avail_size -= sizeof(struct crat_header); 2104 if (avail_size < 0) 2105 return -ENOMEM; 2106 2107 memset(crat_table, 0, sizeof(struct crat_header)); 2108 2109 memcpy(&crat_table->signature, CRAT_SIGNATURE, 2110 sizeof(crat_table->signature)); 2111 /* Change length as we add more subtypes*/ 2112 crat_table->length = sizeof(struct crat_header); 2113 crat_table->num_domains = 1; 2114 crat_table->total_entries = 0; 2115 2116 /* Fill in Subtype: Compute Unit 2117 * First fill in the sub type header and then sub type data 2118 */ 2119 avail_size -= sizeof(struct crat_subtype_computeunit); 2120 if (avail_size < 0) 2121 return -ENOMEM; 2122 2123 sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); 2124 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); 2125 2126 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; 2127 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); 2128 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 2129 2130 /* Fill CU subtype data */ 2131 cu = (struct crat_subtype_computeunit *)sub_type_hdr; 2132 cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; 2133 cu->proximity_domain = proximity_domain; 2134 2135 amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); 2136 cu->num_simd_per_cu = cu_info.simd_per_cu; 2137 cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; 2138 cu->max_waves_simd = cu_info.max_waves_per_simd; 2139 2140 cu->wave_front_size = cu_info.wave_front_size; 2141 cu->array_count = cu_info.num_shader_arrays_per_engine * 2142 cu_info.num_shader_engines; 2143 total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); 2144 cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); 2145 cu->num_cu_per_array = cu_info.num_cu_per_sh; 2146 cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; 2147 cu->num_banks = cu_info.num_shader_engines; 2148 cu->lds_size_in_kb = cu_info.lds_size; 2149 2150 cu->hsa_capability = 0; 2151 2152 /* Check if this node supports IOMMU. During parsing this flag will 2153 * translate to HSA_CAP_ATS_PRESENT 2154 */ 2155 if (!kfd_iommu_check_device(kdev)) 2156 cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; 2157 2158 crat_table->length += sub_type_hdr->length; 2159 crat_table->total_entries++; 2160 2161 /* Fill in Subtype: Memory. Only on systems with large BAR (no 2162 * private FB), report memory as public. On other systems 2163 * report the total FB size (public+private) as a single 2164 * private heap. 2165 */ 2166 local_mem_info = kdev->local_mem_info; 2167 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 2168 sub_type_hdr->length); 2169 2170 if (debug_largebar) 2171 local_mem_info.local_mem_size_private = 0; 2172 2173 if (local_mem_info.local_mem_size_private == 0) 2174 ret = kfd_fill_gpu_memory_affinity(&avail_size, 2175 kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, 2176 local_mem_info.local_mem_size_public, 2177 (struct crat_subtype_memory *)sub_type_hdr, 2178 proximity_domain, 2179 &local_mem_info); 2180 else 2181 ret = kfd_fill_gpu_memory_affinity(&avail_size, 2182 kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE, 2183 local_mem_info.local_mem_size_public + 2184 local_mem_info.local_mem_size_private, 2185 (struct crat_subtype_memory *)sub_type_hdr, 2186 proximity_domain, 2187 &local_mem_info); 2188 if (ret < 0) 2189 return ret; 2190 2191 crat_table->length += sizeof(struct crat_subtype_memory); 2192 crat_table->total_entries++; 2193 2194 /* Fill in Subtype: IO_LINKS 2195 * Only direct links are added here which is Link from GPU to 2196 * its NUMA node. Indirect links are added by userspace. 2197 */ 2198 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 2199 sub_type_hdr->length); 2200 ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, 2201 (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); 2202 2203 if (ret < 0) 2204 return ret; 2205 2206 crat_table->length += sub_type_hdr->length; 2207 crat_table->total_entries++; 2208 2209 2210 /* Fill in Subtype: IO_LINKS 2211 * Direct links from GPU to other GPUs through xGMI. 2212 * We will loop GPUs that already be processed (with lower value 2213 * of proximity_domain), add the link for the GPUs with same 2214 * hive id (from this GPU to other GPU) . The reversed iolink 2215 * (from other GPU to this GPU) will be added 2216 * in kfd_parse_subtype_iolink. 2217 */ 2218 if (kdev->hive_id) { 2219 for (nid = 0; nid < proximity_domain; ++nid) { 2220 peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); 2221 if (!peer_dev->gpu) 2222 continue; 2223 if (peer_dev->gpu->hive_id != kdev->hive_id) 2224 continue; 2225 sub_type_hdr = (typeof(sub_type_hdr))( 2226 (char *)sub_type_hdr + 2227 sizeof(struct crat_subtype_iolink)); 2228 ret = kfd_fill_gpu_xgmi_link_to_gpu( 2229 &avail_size, kdev, peer_dev->gpu, 2230 (struct crat_subtype_iolink *)sub_type_hdr, 2231 proximity_domain, nid); 2232 if (ret < 0) 2233 return ret; 2234 crat_table->length += sub_type_hdr->length; 2235 crat_table->total_entries++; 2236 } 2237 } 2238 *size = crat_table->length; 2239 pr_info("Virtual CRAT table created for GPU\n"); 2240 2241 return ret; 2242 } 2243 2244 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and 2245 * creates a Virtual CRAT (VCRAT) image 2246 * 2247 * NOTE: Call kfd_destroy_crat_image to free CRAT image memory 2248 * 2249 * @crat_image: VCRAT image created because ACPI does not have a 2250 * CRAT for this device 2251 * @size: [OUT] size of virtual crat_image 2252 * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device 2253 * COMPUTE_UNIT_GPU - Create VCRAT for GPU 2254 * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU 2255 * -- this option is not currently implemented. 2256 * The assumption is that all AMD APUs will have CRAT 2257 * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU 2258 * 2259 * Return 0 if successful else return -ve value 2260 */ 2261 int kfd_create_crat_image_virtual(void **crat_image, size_t *size, 2262 int flags, struct kfd_dev *kdev, 2263 uint32_t proximity_domain) 2264 { 2265 void *pcrat_image = NULL; 2266 int ret = 0, num_nodes; 2267 size_t dyn_size; 2268 2269 if (!crat_image) 2270 return -EINVAL; 2271 2272 *crat_image = NULL; 2273 2274 /* Allocate the CPU Virtual CRAT size based on the number of online 2275 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. 2276 * This should cover all the current conditions. A check is put not 2277 * to overwrite beyond allocated size for GPUs 2278 */ 2279 switch (flags) { 2280 case COMPUTE_UNIT_CPU: 2281 num_nodes = num_online_nodes(); 2282 dyn_size = sizeof(struct crat_header) + 2283 num_nodes * (sizeof(struct crat_subtype_computeunit) + 2284 sizeof(struct crat_subtype_memory) + 2285 (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); 2286 pcrat_image = kvmalloc(dyn_size, GFP_KERNEL); 2287 if (!pcrat_image) 2288 return -ENOMEM; 2289 *size = dyn_size; 2290 pr_debug("CRAT size is %ld", dyn_size); 2291 ret = kfd_create_vcrat_image_cpu(pcrat_image, size); 2292 break; 2293 case COMPUTE_UNIT_GPU: 2294 if (!kdev) 2295 return -EINVAL; 2296 pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); 2297 if (!pcrat_image) 2298 return -ENOMEM; 2299 *size = VCRAT_SIZE_FOR_GPU; 2300 ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, 2301 proximity_domain); 2302 break; 2303 case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): 2304 /* TODO: */ 2305 ret = -EINVAL; 2306 pr_err("VCRAT not implemented for APU\n"); 2307 break; 2308 default: 2309 ret = -EINVAL; 2310 } 2311 2312 if (!ret) 2313 *crat_image = pcrat_image; 2314 else 2315 kvfree(pcrat_image); 2316 2317 return ret; 2318 } 2319 2320 2321 /* kfd_destroy_crat_image 2322 * 2323 * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) 2324 * 2325 */ 2326 void kfd_destroy_crat_image(void *crat_image) 2327 { 2328 kvfree(crat_image); 2329 } 2330