1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "i915_vgpu.h" 31 #include "i915_trace.h" 32 #include "intel_drv.h" 33 34 #include <linux/bitmap.h> 35 #include <linux/highmem.h> 36 37 /** 38 * DOC: Global GTT views 39 * 40 * Background and previous state 41 * 42 * Historically objects could exists (be bound) in global GTT space only as 43 * singular instances with a view representing all of the object's backing pages 44 * in a linear fashion. This view will be called a normal view. 45 * 46 * To support multiple views of the same object, where the number of mapped 47 * pages is not equal to the backing store, or where the layout of the pages 48 * is not linear, concept of a GGTT view was added. 49 * 50 * One example of an alternative view is a stereo display driven by a single 51 * image. In this case we would have a framebuffer looking like this 52 * (2x2 pages): 53 * 54 * 12 55 * 34 56 * 57 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 58 * rendering. In contrast, fed to the display engine would be an alternative 59 * view which could look something like this: 60 * 61 * 1212 62 * 3434 63 * 64 * In this example both the size and layout of pages in the alternative view is 65 * different from the normal view. 66 * 67 * Implementation and usage 68 * 69 * GGTT views are implemented using VMAs and are distinguished via enum 70 * i915_ggtt_view_type and struct i915_ggtt_view. 71 * 72 * A new flavour of core GEM functions which work with GGTT bound objects were 73 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 74 * renaming in large amounts of code. They take the struct i915_ggtt_view 75 * parameter encapsulating all metadata required to implement a view. 76 * 77 * As a helper for callers which are only interested in the normal view, 78 * globally const i915_ggtt_view_normal singleton instance exists. All old core 79 * GEM API functions, the ones not taking the view parameter, are operating on, 80 * or with the normal GGTT view. 81 * 82 * Code wanting to add or use a new GGTT view needs to: 83 * 84 * 1. Add a new enum with a suitable name. 85 * 2. Extend the metadata in the i915_ggtt_view structure if required. 86 * 3. Add support to i915_get_vma_pages(). 87 * 88 * New views are required to build a scatter-gather table from within the 89 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 90 * exists for the lifetime of an VMA. 91 * 92 * Core API is designed to have copy semantics which means that passed in 93 * struct i915_ggtt_view does not need to be persistent (left around after 94 * calling the core API functions). 95 * 96 */ 97 98 static int 99 i915_get_ggtt_vma_pages(struct i915_vma *vma); 100 101 const struct i915_ggtt_view i915_ggtt_view_normal; 102 const struct i915_ggtt_view i915_ggtt_view_rotated = { 103 .type = I915_GGTT_VIEW_ROTATED 104 }; 105 106 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 107 { 108 bool has_aliasing_ppgtt; 109 bool has_full_ppgtt; 110 111 has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6; 112 has_full_ppgtt = INTEL_INFO(dev)->gen >= 7; 113 114 if (intel_vgpu_active(dev)) 115 has_full_ppgtt = false; /* emulation is too hard */ 116 117 /* 118 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 119 * execlists, the sole mechanism available to submit work. 120 */ 121 if (INTEL_INFO(dev)->gen < 9 && 122 (enable_ppgtt == 0 || !has_aliasing_ppgtt)) 123 return 0; 124 125 if (enable_ppgtt == 1) 126 return 1; 127 128 if (enable_ppgtt == 2 && has_full_ppgtt) 129 return 2; 130 131 #ifdef CONFIG_INTEL_IOMMU 132 /* Disable ppgtt on SNB if VT-d is on. */ 133 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 134 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 135 return 0; 136 } 137 #endif 138 139 /* Early VLV doesn't have this */ 140 if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && 141 dev->pdev->revision < 0xb) { 142 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 143 return 0; 144 } 145 146 if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists) 147 return 2; 148 else 149 return has_aliasing_ppgtt ? 1 : 0; 150 } 151 152 static int ppgtt_bind_vma(struct i915_vma *vma, 153 enum i915_cache_level cache_level, 154 u32 unused) 155 { 156 u32 pte_flags = 0; 157 const unsigned int num_entries = vma->obj->base.size >> PAGE_SHIFT; 158 159 /* Currently applicable only to VLV */ 160 if (vma->obj->gt_ro) 161 pte_flags |= PTE_READ_ONLY; 162 163 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 164 num_entries, 165 cache_level, pte_flags); 166 167 return 0; 168 } 169 170 static void ppgtt_unbind_vma(struct i915_vma *vma) 171 { 172 vma->vm->clear_range(vma->vm, 173 vma->node.start, 174 vma->obj->base.size, 175 true); 176 } 177 178 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 179 enum i915_cache_level level, 180 bool valid) 181 { 182 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 183 pte |= addr; 184 185 switch (level) { 186 case I915_CACHE_NONE: 187 pte |= PPAT_UNCACHED_INDEX; 188 break; 189 case I915_CACHE_WT: 190 pte |= PPAT_DISPLAY_ELLC_INDEX; 191 break; 192 default: 193 pte |= PPAT_CACHED_INDEX; 194 break; 195 } 196 197 return pte; 198 } 199 200 static gen8_pde_t gen8_pde_encode(struct drm_device *dev, 201 dma_addr_t addr, 202 enum i915_cache_level level) 203 { 204 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 205 pde |= addr; 206 if (level != I915_CACHE_NONE) 207 pde |= PPAT_CACHED_PDE_INDEX; 208 else 209 pde |= PPAT_UNCACHED_INDEX; 210 return pde; 211 } 212 213 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 214 enum i915_cache_level level, 215 bool valid, u32 unused) 216 { 217 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 218 pte |= GEN6_PTE_ADDR_ENCODE(addr); 219 220 switch (level) { 221 case I915_CACHE_L3_LLC: 222 case I915_CACHE_LLC: 223 pte |= GEN6_PTE_CACHE_LLC; 224 break; 225 case I915_CACHE_NONE: 226 pte |= GEN6_PTE_UNCACHED; 227 break; 228 default: 229 MISSING_CASE(level); 230 } 231 232 return pte; 233 } 234 235 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 236 enum i915_cache_level level, 237 bool valid, u32 unused) 238 { 239 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 240 pte |= GEN6_PTE_ADDR_ENCODE(addr); 241 242 switch (level) { 243 case I915_CACHE_L3_LLC: 244 pte |= GEN7_PTE_CACHE_L3_LLC; 245 break; 246 case I915_CACHE_LLC: 247 pte |= GEN6_PTE_CACHE_LLC; 248 break; 249 case I915_CACHE_NONE: 250 pte |= GEN6_PTE_UNCACHED; 251 break; 252 default: 253 MISSING_CASE(level); 254 } 255 256 return pte; 257 } 258 259 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 260 enum i915_cache_level level, 261 bool valid, u32 flags) 262 { 263 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 264 pte |= GEN6_PTE_ADDR_ENCODE(addr); 265 266 if (!(flags & PTE_READ_ONLY)) 267 pte |= BYT_PTE_WRITEABLE; 268 269 if (level != I915_CACHE_NONE) 270 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 271 272 return pte; 273 } 274 275 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 276 enum i915_cache_level level, 277 bool valid, u32 unused) 278 { 279 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 280 pte |= HSW_PTE_ADDR_ENCODE(addr); 281 282 if (level != I915_CACHE_NONE) 283 pte |= HSW_WB_LLC_AGE3; 284 285 return pte; 286 } 287 288 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 289 enum i915_cache_level level, 290 bool valid, u32 unused) 291 { 292 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 293 pte |= HSW_PTE_ADDR_ENCODE(addr); 294 295 switch (level) { 296 case I915_CACHE_NONE: 297 break; 298 case I915_CACHE_WT: 299 pte |= HSW_WT_ELLC_LLC_AGE3; 300 break; 301 default: 302 pte |= HSW_WB_ELLC_LLC_AGE3; 303 break; 304 } 305 306 return pte; 307 } 308 309 #define i915_dma_unmap_single(px, dev) \ 310 __i915_dma_unmap_single((px)->daddr, dev) 311 312 static void __i915_dma_unmap_single(dma_addr_t daddr, 313 struct drm_device *dev) 314 { 315 #if 0 316 struct device *device = &dev->pdev->dev; 317 318 dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL); 319 #endif 320 } 321 322 /** 323 * i915_dma_map_single() - Create a dma mapping for a page table/dir/etc. 324 * @px: Page table/dir/etc to get a DMA map for 325 * @dev: drm device 326 * 327 * Page table allocations are unified across all gens. They always require a 328 * single 4k allocation, as well as a DMA mapping. If we keep the structs 329 * symmetric here, the simple macro covers us for every page table type. 330 * 331 * Return: 0 if success. 332 */ 333 #define i915_dma_map_single(px, dev) \ 334 i915_dma_map_page_single((px)->page, (dev), &(px)->daddr) 335 336 static int i915_dma_map_page_single(struct vm_page *page, 337 struct drm_device *dev, 338 dma_addr_t *daddr) 339 { 340 struct device *device = dev->pdev->dev; 341 342 *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 343 if (dma_mapping_error(device, *daddr)) 344 return -ENOMEM; 345 346 return 0; 347 } 348 349 static void unmap_and_free_pt(struct i915_page_table *pt, 350 struct drm_device *dev) 351 { 352 if (WARN_ON(!pt->page)) 353 return; 354 355 i915_dma_unmap_single(pt, dev); 356 __free_page(pt->page); 357 kfree(pt->used_ptes); 358 kfree(pt); 359 } 360 361 static void gen8_initialize_pt(struct i915_address_space *vm, 362 struct i915_page_table *pt) 363 { 364 gen8_pte_t *pt_vaddr, scratch_pte; 365 int i; 366 367 pt_vaddr = kmap_atomic(pt->page); 368 scratch_pte = gen8_pte_encode(vm->scratch.addr, 369 I915_CACHE_LLC, true); 370 371 for (i = 0; i < GEN8_PTES; i++) 372 pt_vaddr[i] = scratch_pte; 373 374 if (!HAS_LLC(vm->dev)) 375 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 376 kunmap_atomic(pt_vaddr); 377 } 378 379 static struct i915_page_table *alloc_pt_single(struct drm_device *dev) 380 { 381 struct i915_page_table *pt; 382 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 383 GEN8_PTES : GEN6_PTES; 384 int ret = -ENOMEM; 385 386 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 387 if (!pt) 388 return ERR_PTR(-ENOMEM); 389 390 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 391 GFP_KERNEL); 392 393 if (!pt->used_ptes) 394 goto fail_bitmap; 395 396 pt->page = alloc_page(GFP_KERNEL); 397 if (!pt->page) 398 goto fail_page; 399 400 ret = i915_dma_map_single(pt, dev); 401 if (ret) 402 goto fail_dma; 403 404 return pt; 405 406 fail_dma: 407 __free_page(pt->page); 408 fail_page: 409 kfree(pt->used_ptes); 410 fail_bitmap: 411 kfree(pt); 412 413 return ERR_PTR(ret); 414 } 415 416 /** 417 * alloc_pt_range() - Allocate a multiple page tables 418 * @pd: The page directory which will have at least @count entries 419 * available to point to the allocated page tables. 420 * @pde: First page directory entry for which we are allocating. 421 * @count: Number of pages to allocate. 422 * @dev: DRM device. 423 * 424 * Allocates multiple page table pages and sets the appropriate entries in the 425 * page table structure within the page directory. Function cleans up after 426 * itself on any failures. 427 * 428 * Return: 0 if allocation succeeded. 429 */ 430 static int alloc_pt_range(struct i915_page_directory *pd, uint16_t pde, size_t count, 431 struct drm_device *dev) 432 { 433 int i, ret; 434 435 /* 512 is the max page tables per page_directory on any platform. */ 436 if (WARN_ON(pde + count > I915_PDES)) 437 return -EINVAL; 438 439 for (i = pde; i < pde + count; i++) { 440 struct i915_page_table *pt = alloc_pt_single(dev); 441 442 if (IS_ERR(pt)) { 443 ret = PTR_ERR(pt); 444 goto err_out; 445 } 446 WARN(pd->page_table[i], 447 "Leaking page directory entry %d (%p)\n", 448 i, pd->page_table[i]); 449 pd->page_table[i] = pt; 450 } 451 452 return 0; 453 454 err_out: 455 while (i-- > pde) 456 unmap_and_free_pt(pd->page_table[i], dev); 457 return ret; 458 } 459 460 static void unmap_and_free_pd(struct i915_page_directory *pd, 461 struct drm_device *dev) 462 { 463 if (pd->page) { 464 i915_dma_unmap_single(pd, dev); 465 __free_page(pd->page); 466 kfree(pd->used_pdes); 467 kfree(pd); 468 } 469 } 470 471 static struct i915_page_directory *alloc_pd_single(struct drm_device *dev) 472 { 473 struct i915_page_directory *pd; 474 int ret = -ENOMEM; 475 476 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 477 if (!pd) 478 return ERR_PTR(-ENOMEM); 479 480 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 481 sizeof(*pd->used_pdes), GFP_KERNEL); 482 if (!pd->used_pdes) 483 goto free_pd; 484 485 pd->page = alloc_page(GFP_KERNEL); 486 if (!pd->page) 487 goto free_bitmap; 488 489 ret = i915_dma_map_single(pd, dev); 490 if (ret) 491 goto free_page; 492 493 return pd; 494 495 free_page: 496 __free_page(pd->page); 497 free_bitmap: 498 kfree(pd->used_pdes); 499 free_pd: 500 kfree(pd); 501 502 return ERR_PTR(ret); 503 } 504 505 /* Broadwell Page Directory Pointer Descriptors */ 506 static int gen8_write_pdp(struct intel_engine_cs *ring, 507 unsigned entry, 508 dma_addr_t addr) 509 { 510 int ret; 511 512 BUG_ON(entry >= 4); 513 514 ret = intel_ring_begin(ring, 6); 515 if (ret) 516 return ret; 517 518 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 519 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 520 intel_ring_emit(ring, upper_32_bits(addr)); 521 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 522 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 523 intel_ring_emit(ring, lower_32_bits(addr)); 524 intel_ring_advance(ring); 525 526 return 0; 527 } 528 529 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, 530 struct intel_engine_cs *ring) 531 { 532 int i, ret; 533 534 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 535 struct i915_page_directory *pd = ppgtt->pdp.page_directory[i]; 536 dma_addr_t pd_daddr = pd ? pd->daddr : ppgtt->scratch_pd->daddr; 537 /* The page directory might be NULL, but we need to clear out 538 * whatever the previous context might have used. */ 539 ret = gen8_write_pdp(ring, i, pd_daddr); 540 if (ret) 541 return ret; 542 } 543 544 return 0; 545 } 546 547 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 548 uint64_t start, 549 uint64_t length, 550 bool use_scratch) 551 { 552 struct i915_hw_ppgtt *ppgtt = 553 container_of(vm, struct i915_hw_ppgtt, base); 554 gen8_pte_t *pt_vaddr, scratch_pte; 555 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 556 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 557 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 558 unsigned num_entries = length >> PAGE_SHIFT; 559 unsigned last_pte, i; 560 561 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 562 I915_CACHE_LLC, use_scratch); 563 564 while (num_entries) { 565 struct i915_page_directory *pd; 566 struct i915_page_table *pt; 567 struct vm_page *page_table; 568 569 if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) 570 break; 571 572 pd = ppgtt->pdp.page_directory[pdpe]; 573 574 if (WARN_ON(!pd->page_table[pde])) 575 break; 576 577 pt = pd->page_table[pde]; 578 579 if (WARN_ON(!pt->page)) 580 break; 581 582 page_table = pt->page; 583 584 last_pte = pte + num_entries; 585 if (last_pte > GEN8_PTES) 586 last_pte = GEN8_PTES; 587 588 pt_vaddr = kmap_atomic(page_table); 589 590 for (i = pte; i < last_pte; i++) { 591 pt_vaddr[i] = scratch_pte; 592 num_entries--; 593 } 594 595 if (!HAS_LLC(ppgtt->base.dev)) 596 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 597 kunmap_atomic(pt_vaddr); 598 599 pte = 0; 600 if (++pde == I915_PDES) { 601 pdpe++; 602 pde = 0; 603 } 604 } 605 } 606 607 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 608 vm_page_t *pages, 609 uint64_t start, 610 unsigned int num_entries, 611 enum i915_cache_level cache_level, u32 unused) 612 { 613 struct i915_hw_ppgtt *ppgtt = 614 container_of(vm, struct i915_hw_ppgtt, base); 615 gen8_pte_t *pt_vaddr; 616 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 617 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 618 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 619 int i; 620 621 pt_vaddr = NULL; 622 623 for (i=0;i<num_entries;i++) { 624 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES)) 625 break; 626 627 if (pt_vaddr == NULL) { 628 struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe]; 629 struct i915_page_table *pt = pd->page_table[pde]; 630 struct vm_page *page_table = pt->page; 631 632 pt_vaddr = kmap_atomic(page_table); 633 } 634 635 pt_vaddr[pte] = 636 gen8_pte_encode(VM_PAGE_TO_PHYS(pages[i]), 637 cache_level, true); 638 if (++pte == GEN8_PTES) { 639 if (!HAS_LLC(ppgtt->base.dev)) 640 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 641 kunmap_atomic(pt_vaddr); 642 pt_vaddr = NULL; 643 if (++pde == I915_PDES) { 644 pdpe++; 645 pde = 0; 646 } 647 pte = 0; 648 } 649 } 650 if (pt_vaddr) { 651 if (!HAS_LLC(ppgtt->base.dev)) 652 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 653 kunmap_atomic(pt_vaddr); 654 } 655 } 656 657 static void __gen8_do_map_pt(gen8_pde_t * const pde, 658 struct i915_page_table *pt, 659 struct drm_device *dev) 660 { 661 gen8_pde_t entry = 662 gen8_pde_encode(dev, pt->daddr, I915_CACHE_LLC); 663 *pde = entry; 664 } 665 666 static void gen8_initialize_pd(struct i915_address_space *vm, 667 struct i915_page_directory *pd) 668 { 669 struct i915_hw_ppgtt *ppgtt = 670 container_of(vm, struct i915_hw_ppgtt, base); 671 gen8_pde_t *page_directory; 672 struct i915_page_table *pt; 673 int i; 674 675 page_directory = kmap_atomic(pd->page); 676 pt = ppgtt->scratch_pt; 677 for (i = 0; i < I915_PDES; i++) 678 /* Map the PDE to the page table */ 679 __gen8_do_map_pt(page_directory + i, pt, vm->dev); 680 681 if (!HAS_LLC(vm->dev)) 682 drm_clflush_virt_range(page_directory, PAGE_SIZE); 683 kunmap_atomic(page_directory); 684 } 685 686 static void gen8_free_page_tables(struct i915_page_directory *pd, struct drm_device *dev) 687 { 688 int i; 689 690 if (!pd->page) 691 return; 692 693 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 694 if (WARN_ON(!pd->page_table[i])) 695 continue; 696 697 unmap_and_free_pt(pd->page_table[i], dev); 698 pd->page_table[i] = NULL; 699 } 700 } 701 702 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 703 { 704 struct i915_hw_ppgtt *ppgtt = 705 container_of(vm, struct i915_hw_ppgtt, base); 706 int i; 707 708 for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) { 709 if (WARN_ON(!ppgtt->pdp.page_directory[i])) 710 continue; 711 712 gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev); 713 unmap_and_free_pd(ppgtt->pdp.page_directory[i], ppgtt->base.dev); 714 } 715 716 unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev); 717 unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev); 718 } 719 720 /** 721 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 722 * @ppgtt: Master ppgtt structure. 723 * @pd: Page directory for this address range. 724 * @start: Starting virtual address to begin allocations. 725 * @length Size of the allocations. 726 * @new_pts: Bitmap set by function with new allocations. Likely used by the 727 * caller to free on error. 728 * 729 * Allocate the required number of page tables. Extremely similar to 730 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 731 * the page directory boundary (instead of the page directory pointer). That 732 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 733 * possible, and likely that the caller will need to use multiple calls of this 734 * function to achieve the appropriate allocation. 735 * 736 * Return: 0 if success; negative error code otherwise. 737 */ 738 static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt, 739 struct i915_page_directory *pd, 740 uint64_t start, 741 uint64_t length, 742 unsigned long *new_pts) 743 { 744 struct drm_device *dev = ppgtt->base.dev; 745 struct i915_page_table *pt; 746 uint64_t temp; 747 uint32_t pde; 748 749 gen8_for_each_pde(pt, pd, start, length, temp, pde) { 750 /* Don't reallocate page tables */ 751 if (pt) { 752 /* Scratch is never allocated this way */ 753 WARN_ON(pt == ppgtt->scratch_pt); 754 continue; 755 } 756 757 pt = alloc_pt_single(dev); 758 if (IS_ERR(pt)) 759 goto unwind_out; 760 761 gen8_initialize_pt(&ppgtt->base, pt); 762 pd->page_table[pde] = pt; 763 set_bit(pde, new_pts); 764 } 765 766 return 0; 767 768 unwind_out: 769 for_each_set_bit(pde, new_pts, I915_PDES) 770 unmap_and_free_pt(pd->page_table[pde], dev); 771 772 return -ENOMEM; 773 } 774 775 /** 776 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 777 * @ppgtt: Master ppgtt structure. 778 * @pdp: Page directory pointer for this address range. 779 * @start: Starting virtual address to begin allocations. 780 * @length Size of the allocations. 781 * @new_pds Bitmap set by function with new allocations. Likely used by the 782 * caller to free on error. 783 * 784 * Allocate the required number of page directories starting at the pde index of 785 * @start, and ending at the pde index @start + @length. This function will skip 786 * over already allocated page directories within the range, and only allocate 787 * new ones, setting the appropriate pointer within the pdp as well as the 788 * correct position in the bitmap @new_pds. 789 * 790 * The function will only allocate the pages within the range for a give page 791 * directory pointer. In other words, if @start + @length straddles a virtually 792 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 793 * required by the caller, This is not currently possible, and the BUG in the 794 * code will prevent it. 795 * 796 * Return: 0 if success; negative error code otherwise. 797 */ 798 static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt, 799 struct i915_page_directory_pointer *pdp, 800 uint64_t start, 801 uint64_t length, 802 unsigned long *new_pds) 803 { 804 struct drm_device *dev = ppgtt->base.dev; 805 struct i915_page_directory *pd; 806 uint64_t temp; 807 uint32_t pdpe; 808 809 WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES)); 810 811 /* FIXME: upper bound must not overflow 32 bits */ 812 WARN_ON((start + length) > (1ULL << 32)); 813 814 gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { 815 if (pd) 816 continue; 817 818 pd = alloc_pd_single(dev); 819 if (IS_ERR(pd)) 820 goto unwind_out; 821 822 gen8_initialize_pd(&ppgtt->base, pd); 823 pdp->page_directory[pdpe] = pd; 824 set_bit(pdpe, new_pds); 825 } 826 827 return 0; 828 829 unwind_out: 830 for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES) 831 unmap_and_free_pd(pdp->page_directory[pdpe], dev); 832 833 return -ENOMEM; 834 } 835 836 static void 837 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts) 838 { 839 int i; 840 841 for (i = 0; i < GEN8_LEGACY_PDPES; i++) 842 kfree(new_pts[i]); 843 kfree(new_pts); 844 kfree(new_pds); 845 } 846 847 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 848 * of these are based on the number of PDPEs in the system. 849 */ 850 static 851 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 852 unsigned long ***new_pts) 853 { 854 int i; 855 unsigned long *pds; 856 unsigned long **pts; 857 858 pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL); 859 if (!pds) 860 return -ENOMEM; 861 862 pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL); 863 if (!pts) { 864 kfree(pds); 865 return -ENOMEM; 866 } 867 868 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 869 pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES), 870 sizeof(unsigned long), GFP_KERNEL); 871 if (!pts[i]) 872 goto err_out; 873 } 874 875 *new_pds = pds; 876 *new_pts = pts; 877 878 return 0; 879 880 err_out: 881 free_gen8_temp_bitmaps(pds, pts); 882 return -ENOMEM; 883 } 884 885 static int gen8_alloc_va_range(struct i915_address_space *vm, 886 uint64_t start, 887 uint64_t length) 888 { 889 struct i915_hw_ppgtt *ppgtt = 890 container_of(vm, struct i915_hw_ppgtt, base); 891 unsigned long *new_page_dirs, **new_page_tables; 892 struct i915_page_directory *pd; 893 const uint64_t orig_start = start; 894 const uint64_t orig_length = length; 895 uint64_t temp; 896 uint32_t pdpe; 897 int ret; 898 899 /* Wrap is never okay since we can only represent 48b, and we don't 900 * actually use the other side of the canonical address space. 901 */ 902 if (WARN_ON(start + length < start)) 903 return -ERANGE; 904 905 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables); 906 if (ret) 907 return ret; 908 909 /* Do the allocations first so we can easily bail out */ 910 ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length, 911 new_page_dirs); 912 if (ret) { 913 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 914 return ret; 915 } 916 917 /* For every page directory referenced, allocate page tables */ 918 gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { 919 ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length, 920 new_page_tables[pdpe]); 921 if (ret) 922 goto err_out; 923 } 924 925 start = orig_start; 926 length = orig_length; 927 928 /* Allocations have completed successfully, so set the bitmaps, and do 929 * the mappings. */ 930 gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { 931 gen8_pde_t *const page_directory = kmap_atomic(pd->page); 932 struct i915_page_table *pt; 933 uint64_t pd_len = gen8_clamp_pd(start, length); 934 uint64_t pd_start = start; 935 uint32_t pde; 936 937 /* Every pd should be allocated, we just did that above. */ 938 WARN_ON(!pd); 939 940 gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) { 941 /* Same reasoning as pd */ 942 WARN_ON(!pt); 943 WARN_ON(!pd_len); 944 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 945 946 /* Set our used ptes within the page table */ 947 bitmap_set(pt->used_ptes, 948 gen8_pte_index(pd_start), 949 gen8_pte_count(pd_start, pd_len)); 950 951 /* Our pde is now pointing to the pagetable, pt */ 952 set_bit(pde, pd->used_pdes); 953 954 /* Map the PDE to the page table */ 955 __gen8_do_map_pt(page_directory + pde, pt, vm->dev); 956 957 /* NB: We haven't yet mapped ptes to pages. At this 958 * point we're still relying on insert_entries() */ 959 } 960 961 if (!HAS_LLC(vm->dev)) 962 drm_clflush_virt_range(page_directory, PAGE_SIZE); 963 964 kunmap_atomic(page_directory); 965 966 set_bit(pdpe, ppgtt->pdp.used_pdpes); 967 } 968 969 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 970 return 0; 971 972 err_out: 973 while (pdpe--) { 974 for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES) 975 unmap_and_free_pt(ppgtt->pdp.page_directory[pdpe]->page_table[temp], vm->dev); 976 } 977 978 for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES) 979 unmap_and_free_pd(ppgtt->pdp.page_directory[pdpe], vm->dev); 980 981 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 982 return ret; 983 } 984 985 /* 986 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 987 * with a net effect resembling a 2-level page table in normal x86 terms. Each 988 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 989 * space. 990 * 991 */ 992 static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) 993 { 994 ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev); 995 if (IS_ERR(ppgtt->scratch_pt)) 996 return PTR_ERR(ppgtt->scratch_pt); 997 998 ppgtt->scratch_pd = alloc_pd_single(ppgtt->base.dev); 999 if (IS_ERR(ppgtt->scratch_pd)) 1000 return PTR_ERR(ppgtt->scratch_pd); 1001 1002 gen8_initialize_pt(&ppgtt->base, ppgtt->scratch_pt); 1003 gen8_initialize_pd(&ppgtt->base, ppgtt->scratch_pd); 1004 1005 ppgtt->base.start = 0; 1006 ppgtt->base.total = size; 1007 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1008 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1009 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1010 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1011 ppgtt->base.bind_vma = ppgtt_bind_vma; 1012 1013 ppgtt->switch_mm = gen8_mm_switch; 1014 1015 return 0; 1016 } 1017 1018 static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1019 { 1020 struct drm_device *dev = ppgtt->base.dev; 1021 struct drm_i915_private *dev_priv = dev->dev_private; 1022 uint64_t start = 0, size = dev_priv->gtt.base.total; 1023 int ret; 1024 1025 ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total); 1026 if (ret) 1027 return ret; 1028 1029 /* Aliasing PPGTT has to always work and be mapped because of the way we 1030 * use RESTORE_INHIBIT in the context switch. This will be fixed 1031 * eventually. */ 1032 ret = gen8_alloc_va_range(&ppgtt->base, start, size); 1033 if (ret) { 1034 unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev); 1035 unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev); 1036 return ret; 1037 } 1038 1039 ppgtt->base.allocate_va_range = NULL; 1040 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1041 1042 return 0; 1043 } 1044 1045 /* 1046 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1047 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1048 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1049 * space. 1050 * 1051 */ 1052 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1053 { 1054 ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev); 1055 if (IS_ERR(ppgtt->scratch_pt)) 1056 return PTR_ERR(ppgtt->scratch_pt); 1057 1058 ppgtt->scratch_pd = alloc_pd_single(ppgtt->base.dev); 1059 if (IS_ERR(ppgtt->scratch_pd)) 1060 return PTR_ERR(ppgtt->scratch_pd); 1061 1062 gen8_initialize_pt(&ppgtt->base, ppgtt->scratch_pt); 1063 gen8_initialize_pd(&ppgtt->base, ppgtt->scratch_pd); 1064 1065 ppgtt->base.start = 0; 1066 ppgtt->base.total = 1ULL << 32; 1067 #define CONFIG_X86_32 0 1068 if (IS_ENABLED(CONFIG_X86_32)) 1069 /* While we have a proliferation of size_t variables 1070 * we cannot represent the full ppgtt size on 32bit, 1071 * so limit it to the same size as the GGTT (currently 1072 * 2GiB). 1073 */ 1074 ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; 1075 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1076 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1077 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1078 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1079 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1080 ppgtt->base.bind_vma = ppgtt_bind_vma; 1081 1082 ppgtt->switch_mm = gen8_mm_switch; 1083 1084 return 0; 1085 } 1086 1087 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1088 { 1089 struct i915_address_space *vm = &ppgtt->base; 1090 struct i915_page_table *unused; 1091 gen6_pte_t scratch_pte; 1092 uint32_t pd_entry; 1093 uint32_t pte, pde, temp; 1094 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1095 1096 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 1097 1098 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) { 1099 u32 expected; 1100 gen6_pte_t *pt_vaddr; 1101 dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr; 1102 pd_entry = readl(ppgtt->pd_addr + pde); 1103 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1104 1105 if (pd_entry != expected) 1106 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1107 pde, 1108 pd_entry, 1109 expected); 1110 seq_printf(m, "\tPDE: %x\n", pd_entry); 1111 1112 pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde]->page); 1113 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1114 unsigned long va = 1115 (pde * PAGE_SIZE * GEN6_PTES) + 1116 (pte * PAGE_SIZE); 1117 int i; 1118 bool found = false; 1119 for (i = 0; i < 4; i++) 1120 if (pt_vaddr[pte + i] != scratch_pte) 1121 found = true; 1122 if (!found) 1123 continue; 1124 1125 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1126 for (i = 0; i < 4; i++) { 1127 if (pt_vaddr[pte + i] != scratch_pte) 1128 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1129 else 1130 seq_puts(m, " SCRATCH "); 1131 } 1132 seq_puts(m, "\n"); 1133 } 1134 kunmap_atomic(pt_vaddr); 1135 } 1136 } 1137 1138 /* Write pde (index) from the page directory @pd to the page table @pt */ 1139 static void gen6_write_pde(struct i915_page_directory *pd, 1140 const int pde, struct i915_page_table *pt) 1141 { 1142 /* Caller needs to make sure the write completes if necessary */ 1143 struct i915_hw_ppgtt *ppgtt = 1144 container_of(pd, struct i915_hw_ppgtt, pd); 1145 u32 pd_entry; 1146 1147 pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr); 1148 pd_entry |= GEN6_PDE_VALID; 1149 1150 writel(pd_entry, ppgtt->pd_addr + pde); 1151 } 1152 1153 /* Write all the page tables found in the ppgtt structure to incrementing page 1154 * directories. */ 1155 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1156 struct i915_page_directory *pd, 1157 uint32_t start, uint32_t length) 1158 { 1159 struct i915_page_table *pt; 1160 uint32_t pde, temp; 1161 1162 gen6_for_each_pde(pt, pd, start, length, temp, pde) 1163 gen6_write_pde(pd, pde, pt); 1164 1165 /* Make sure write is complete before other code can use this page 1166 * table. Also require for WC mapped PTEs */ 1167 readl(dev_priv->gtt.gsm); 1168 } 1169 1170 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1171 { 1172 BUG_ON(ppgtt->pd.pd_offset & 0x3f); 1173 1174 return (ppgtt->pd.pd_offset / 64) << 16; 1175 } 1176 1177 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1178 struct intel_engine_cs *ring) 1179 { 1180 int ret; 1181 1182 /* NB: TLBs must be flushed and invalidated before a switch */ 1183 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1184 if (ret) 1185 return ret; 1186 1187 ret = intel_ring_begin(ring, 6); 1188 if (ret) 1189 return ret; 1190 1191 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1192 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 1193 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1194 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 1195 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1196 intel_ring_emit(ring, MI_NOOP); 1197 intel_ring_advance(ring); 1198 1199 return 0; 1200 } 1201 1202 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt, 1203 struct intel_engine_cs *ring) 1204 { 1205 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1206 1207 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 1208 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 1209 return 0; 1210 } 1211 1212 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1213 struct intel_engine_cs *ring) 1214 { 1215 int ret; 1216 1217 /* NB: TLBs must be flushed and invalidated before a switch */ 1218 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1219 if (ret) 1220 return ret; 1221 1222 ret = intel_ring_begin(ring, 6); 1223 if (ret) 1224 return ret; 1225 1226 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1227 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 1228 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1229 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 1230 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1231 intel_ring_emit(ring, MI_NOOP); 1232 intel_ring_advance(ring); 1233 1234 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1235 if (ring->id != RCS) { 1236 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1237 if (ret) 1238 return ret; 1239 } 1240 1241 return 0; 1242 } 1243 1244 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1245 struct intel_engine_cs *ring) 1246 { 1247 struct drm_device *dev = ppgtt->base.dev; 1248 struct drm_i915_private *dev_priv = dev->dev_private; 1249 1250 1251 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 1252 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 1253 1254 POSTING_READ(RING_PP_DIR_DCLV(ring)); 1255 1256 return 0; 1257 } 1258 1259 static void gen8_ppgtt_enable(struct drm_device *dev) 1260 { 1261 struct drm_i915_private *dev_priv = dev->dev_private; 1262 struct intel_engine_cs *ring; 1263 int j; 1264 1265 for_each_ring(ring, dev_priv, j) { 1266 I915_WRITE(RING_MODE_GEN7(ring), 1267 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1268 } 1269 } 1270 1271 static void gen7_ppgtt_enable(struct drm_device *dev) 1272 { 1273 struct drm_i915_private *dev_priv = dev->dev_private; 1274 struct intel_engine_cs *ring; 1275 uint32_t ecochk, ecobits; 1276 int i; 1277 1278 ecobits = I915_READ(GAC_ECO_BITS); 1279 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1280 1281 ecochk = I915_READ(GAM_ECOCHK); 1282 if (IS_HASWELL(dev)) { 1283 ecochk |= ECOCHK_PPGTT_WB_HSW; 1284 } else { 1285 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1286 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1287 } 1288 I915_WRITE(GAM_ECOCHK, ecochk); 1289 1290 for_each_ring(ring, dev_priv, i) { 1291 /* GFX_MODE is per-ring on gen7+ */ 1292 I915_WRITE(RING_MODE_GEN7(ring), 1293 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1294 } 1295 } 1296 1297 static void gen6_ppgtt_enable(struct drm_device *dev) 1298 { 1299 struct drm_i915_private *dev_priv = dev->dev_private; 1300 uint32_t ecochk, gab_ctl, ecobits; 1301 1302 ecobits = I915_READ(GAC_ECO_BITS); 1303 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1304 ECOBITS_PPGTT_CACHE64B); 1305 1306 gab_ctl = I915_READ(GAB_CTL); 1307 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1308 1309 ecochk = I915_READ(GAM_ECOCHK); 1310 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1311 1312 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1313 } 1314 1315 /* PPGTT support for Sandybdrige/Gen6 and later */ 1316 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1317 uint64_t start, 1318 uint64_t length, 1319 bool use_scratch) 1320 { 1321 struct i915_hw_ppgtt *ppgtt = 1322 container_of(vm, struct i915_hw_ppgtt, base); 1323 gen6_pte_t *pt_vaddr, scratch_pte; 1324 unsigned first_entry = start >> PAGE_SHIFT; 1325 unsigned num_entries = length >> PAGE_SHIFT; 1326 unsigned act_pt = first_entry / GEN6_PTES; 1327 unsigned first_pte = first_entry % GEN6_PTES; 1328 unsigned last_pte, i; 1329 1330 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 1331 1332 while (num_entries) { 1333 last_pte = first_pte + num_entries; 1334 if (last_pte > GEN6_PTES) 1335 last_pte = GEN6_PTES; 1336 1337 pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page); 1338 1339 for (i = first_pte; i < last_pte; i++) 1340 pt_vaddr[i] = scratch_pte; 1341 1342 kunmap_atomic(pt_vaddr); 1343 1344 num_entries -= last_pte - first_pte; 1345 first_pte = 0; 1346 act_pt++; 1347 } 1348 } 1349 1350 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1351 vm_page_t *pages, 1352 uint64_t start, 1353 unsigned num_entries, 1354 enum i915_cache_level cache_level, u32 flags) 1355 { 1356 struct i915_hw_ppgtt *ppgtt = 1357 container_of(vm, struct i915_hw_ppgtt, base); 1358 gen6_pte_t *pt_vaddr; 1359 unsigned first_entry = start >> PAGE_SHIFT; 1360 unsigned act_pt = first_entry / GEN6_PTES; 1361 unsigned act_pte = first_entry % GEN6_PTES; 1362 1363 pt_vaddr = NULL; 1364 for (int i=0;i<num_entries;i++) { 1365 if (pt_vaddr == NULL) 1366 pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page); 1367 1368 pt_vaddr[act_pte] = 1369 vm->pte_encode(VM_PAGE_TO_PHYS(pages[i]), 1370 cache_level, true, flags); 1371 1372 if (++act_pte == GEN6_PTES) { 1373 kunmap_atomic(pt_vaddr); 1374 pt_vaddr = NULL; 1375 act_pt++; 1376 act_pte = 0; 1377 } 1378 } 1379 if (pt_vaddr) 1380 kunmap_atomic(pt_vaddr); 1381 } 1382 1383 /* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we 1384 * are switching between contexts with the same LRCA, we also must do a force 1385 * restore. 1386 */ 1387 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 1388 { 1389 /* If current vm != vm, */ 1390 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 1391 } 1392 1393 static void gen6_initialize_pt(struct i915_address_space *vm, 1394 struct i915_page_table *pt) 1395 { 1396 gen6_pte_t *pt_vaddr, scratch_pte; 1397 int i; 1398 1399 WARN_ON(vm->scratch.addr == 0); 1400 1401 scratch_pte = vm->pte_encode(vm->scratch.addr, 1402 I915_CACHE_LLC, true, 0); 1403 1404 pt_vaddr = kmap_atomic(pt->page); 1405 1406 for (i = 0; i < GEN6_PTES; i++) 1407 pt_vaddr[i] = scratch_pte; 1408 1409 kunmap_atomic(pt_vaddr); 1410 } 1411 1412 static int gen6_alloc_va_range(struct i915_address_space *vm, 1413 uint64_t start, uint64_t length) 1414 { 1415 DECLARE_BITMAP(new_page_tables, I915_PDES); 1416 struct drm_device *dev = vm->dev; 1417 struct drm_i915_private *dev_priv = dev->dev_private; 1418 struct i915_hw_ppgtt *ppgtt = 1419 container_of(vm, struct i915_hw_ppgtt, base); 1420 struct i915_page_table *pt; 1421 const uint32_t start_save = start, length_save = length; 1422 uint32_t pde, temp; 1423 int ret; 1424 1425 WARN_ON(upper_32_bits(start)); 1426 1427 bitmap_zero(new_page_tables, I915_PDES); 1428 1429 /* The allocation is done in two stages so that we can bail out with 1430 * minimal amount of pain. The first stage finds new page tables that 1431 * need allocation. The second stage marks use ptes within the page 1432 * tables. 1433 */ 1434 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { 1435 if (pt != ppgtt->scratch_pt) { 1436 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1437 continue; 1438 } 1439 1440 /* We've already allocated a page table */ 1441 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1442 1443 pt = alloc_pt_single(dev); 1444 if (IS_ERR(pt)) { 1445 ret = PTR_ERR(pt); 1446 goto unwind_out; 1447 } 1448 1449 gen6_initialize_pt(vm, pt); 1450 1451 ppgtt->pd.page_table[pde] = pt; 1452 set_bit(pde, new_page_tables); 1453 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1454 } 1455 1456 start = start_save; 1457 length = length_save; 1458 1459 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { 1460 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1461 1462 bitmap_zero(tmp_bitmap, GEN6_PTES); 1463 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1464 gen6_pte_count(start, length)); 1465 1466 if (test_and_clear_bit(pde, new_page_tables)) 1467 gen6_write_pde(&ppgtt->pd, pde, pt); 1468 1469 trace_i915_page_table_entry_map(vm, pde, pt, 1470 gen6_pte_index(start), 1471 gen6_pte_count(start, length), 1472 GEN6_PTES); 1473 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1474 GEN6_PTES); 1475 } 1476 1477 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1478 1479 /* Make sure write is complete before other code can use this page 1480 * table. Also require for WC mapped PTEs */ 1481 readl(dev_priv->gtt.gsm); 1482 1483 mark_tlbs_dirty(ppgtt); 1484 return 0; 1485 1486 unwind_out: 1487 for_each_set_bit(pde, new_page_tables, I915_PDES) { 1488 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 1489 1490 ppgtt->pd.page_table[pde] = ppgtt->scratch_pt; 1491 unmap_and_free_pt(pt, vm->dev); 1492 } 1493 1494 mark_tlbs_dirty(ppgtt); 1495 return ret; 1496 } 1497 1498 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1499 { 1500 struct i915_hw_ppgtt *ppgtt = 1501 container_of(vm, struct i915_hw_ppgtt, base); 1502 struct i915_page_table *pt; 1503 uint32_t pde; 1504 1505 1506 drm_mm_remove_node(&ppgtt->node); 1507 1508 gen6_for_all_pdes(pt, ppgtt, pde) { 1509 if (pt != ppgtt->scratch_pt) 1510 unmap_and_free_pt(pt, ppgtt->base.dev); 1511 } 1512 1513 unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev); 1514 unmap_and_free_pd(&ppgtt->pd, ppgtt->base.dev); 1515 } 1516 1517 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1518 { 1519 struct drm_device *dev = ppgtt->base.dev; 1520 struct drm_i915_private *dev_priv = dev->dev_private; 1521 bool retried = false; 1522 int ret; 1523 1524 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1525 * allocator works in address space sizes, so it's multiplied by page 1526 * size. We allocate at the top of the GTT to avoid fragmentation. 1527 */ 1528 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 1529 ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev); 1530 if (IS_ERR(ppgtt->scratch_pt)) 1531 return PTR_ERR(ppgtt->scratch_pt); 1532 1533 gen6_initialize_pt(&ppgtt->base, ppgtt->scratch_pt); 1534 1535 alloc: 1536 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 1537 &ppgtt->node, GEN6_PD_SIZE, 1538 GEN6_PD_ALIGN, 0, 1539 0, dev_priv->gtt.base.total, 1540 DRM_MM_TOPDOWN); 1541 if (ret == -ENOSPC && !retried) { 1542 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 1543 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1544 I915_CACHE_NONE, 1545 0, dev_priv->gtt.base.total, 1546 0); 1547 if (ret) 1548 goto err_out; 1549 1550 retried = true; 1551 goto alloc; 1552 } 1553 1554 if (ret) 1555 goto err_out; 1556 1557 1558 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 1559 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1560 1561 return 0; 1562 1563 err_out: 1564 unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev); 1565 return ret; 1566 } 1567 1568 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1569 { 1570 return gen6_ppgtt_allocate_page_directories(ppgtt); 1571 } 1572 1573 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 1574 uint64_t start, uint64_t length) 1575 { 1576 struct i915_page_table *unused; 1577 uint32_t pde, temp; 1578 1579 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) 1580 ppgtt->pd.page_table[pde] = ppgtt->scratch_pt; 1581 } 1582 1583 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing) 1584 { 1585 struct drm_device *dev = ppgtt->base.dev; 1586 struct drm_i915_private *dev_priv = dev->dev_private; 1587 int ret; 1588 1589 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 1590 if (IS_GEN6(dev)) { 1591 ppgtt->switch_mm = gen6_mm_switch; 1592 } else if (IS_HASWELL(dev)) { 1593 ppgtt->switch_mm = hsw_mm_switch; 1594 } else if (IS_GEN7(dev)) { 1595 ppgtt->switch_mm = gen7_mm_switch; 1596 } else 1597 BUG(); 1598 1599 if (intel_vgpu_active(dev)) 1600 ppgtt->switch_mm = vgpu_mm_switch; 1601 1602 ret = gen6_ppgtt_alloc(ppgtt); 1603 if (ret) 1604 return ret; 1605 1606 if (aliasing) { 1607 /* preallocate all pts */ 1608 ret = alloc_pt_range(&ppgtt->pd, 0, I915_PDES, 1609 ppgtt->base.dev); 1610 1611 if (ret) { 1612 gen6_ppgtt_cleanup(&ppgtt->base); 1613 return ret; 1614 } 1615 } 1616 1617 ppgtt->base.allocate_va_range = aliasing ? NULL : gen6_alloc_va_range; 1618 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1619 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1620 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1621 ppgtt->base.bind_vma = ppgtt_bind_vma; 1622 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1623 ppgtt->base.start = 0; 1624 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 1625 ppgtt->debug_dump = gen6_dump_ppgtt; 1626 1627 ppgtt->pd.pd_offset = 1628 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 1629 1630 ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm + 1631 ppgtt->pd.pd_offset / sizeof(gen6_pte_t); 1632 1633 if (aliasing) 1634 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1635 else 1636 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 1637 1638 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 1639 1640 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", 1641 ppgtt->node.size >> 20, 1642 ppgtt->node.start / PAGE_SIZE); 1643 1644 DRM_DEBUG("Adding PPGTT at offset %x\n", 1645 ppgtt->pd.pd_offset << 10); 1646 1647 return 0; 1648 } 1649 1650 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt, 1651 bool aliasing) 1652 { 1653 struct drm_i915_private *dev_priv = dev->dev_private; 1654 1655 ppgtt->base.dev = dev; 1656 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 1657 1658 if (INTEL_INFO(dev)->gen < 8) 1659 return gen6_ppgtt_init(ppgtt, aliasing); 1660 else if (aliasing) 1661 return gen8_aliasing_ppgtt_init(ppgtt); 1662 else 1663 return gen8_ppgtt_init(ppgtt); 1664 } 1665 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1666 { 1667 struct drm_i915_private *dev_priv = dev->dev_private; 1668 int ret = 0; 1669 1670 ret = __hw_ppgtt_init(dev, ppgtt, false); 1671 if (ret == 0) { 1672 kref_init(&ppgtt->ref); 1673 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 1674 ppgtt->base.total); 1675 i915_init_vm(dev_priv, &ppgtt->base); 1676 } 1677 1678 return ret; 1679 } 1680 1681 int i915_ppgtt_init_hw(struct drm_device *dev) 1682 { 1683 struct drm_i915_private *dev_priv = dev->dev_private; 1684 struct intel_engine_cs *ring; 1685 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 1686 int i, ret = 0; 1687 1688 /* In the case of execlists, PPGTT is enabled by the context descriptor 1689 * and the PDPs are contained within the context itself. We don't 1690 * need to do anything here. */ 1691 if (i915.enable_execlists) 1692 return 0; 1693 1694 if (!USES_PPGTT(dev)) 1695 return 0; 1696 1697 if (IS_GEN6(dev)) 1698 gen6_ppgtt_enable(dev); 1699 else if (IS_GEN7(dev)) 1700 gen7_ppgtt_enable(dev); 1701 else if (INTEL_INFO(dev)->gen >= 8) 1702 gen8_ppgtt_enable(dev); 1703 else 1704 MISSING_CASE(INTEL_INFO(dev)->gen); 1705 1706 if (ppgtt) { 1707 for_each_ring(ring, dev_priv, i) { 1708 ret = ppgtt->switch_mm(ppgtt, ring); 1709 if (ret != 0) 1710 return ret; 1711 } 1712 } 1713 1714 return ret; 1715 } 1716 struct i915_hw_ppgtt * 1717 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) 1718 { 1719 struct i915_hw_ppgtt *ppgtt; 1720 int ret; 1721 1722 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 1723 if (!ppgtt) 1724 return ERR_PTR(-ENOMEM); 1725 1726 ret = i915_ppgtt_init(dev, ppgtt); 1727 if (ret) { 1728 kfree(ppgtt); 1729 return ERR_PTR(ret); 1730 } 1731 1732 ppgtt->file_priv = fpriv; 1733 1734 trace_i915_ppgtt_create(&ppgtt->base); 1735 1736 return ppgtt; 1737 } 1738 1739 void i915_ppgtt_release(struct kref *kref) 1740 { 1741 struct i915_hw_ppgtt *ppgtt = 1742 container_of(kref, struct i915_hw_ppgtt, ref); 1743 1744 trace_i915_ppgtt_release(&ppgtt->base); 1745 1746 /* vmas should already be unbound */ 1747 WARN_ON(!list_empty(&ppgtt->base.active_list)); 1748 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 1749 1750 list_del(&ppgtt->base.global_link); 1751 drm_mm_takedown(&ppgtt->base.mm); 1752 1753 ppgtt->base.cleanup(&ppgtt->base); 1754 kfree(ppgtt); 1755 } 1756 1757 extern int intel_iommu_gfx_mapped; 1758 /* Certain Gen5 chipsets require require idling the GPU before 1759 * unmapping anything from the GTT when VT-d is enabled. 1760 */ 1761 static bool needs_idle_maps(struct drm_device *dev) 1762 { 1763 #ifdef CONFIG_INTEL_IOMMU 1764 /* Query intel_iommu to see if we need the workaround. Presumably that 1765 * was loaded first. 1766 */ 1767 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 1768 return true; 1769 #endif 1770 return false; 1771 } 1772 1773 static bool do_idling(struct drm_i915_private *dev_priv) 1774 { 1775 bool ret = dev_priv->mm.interruptible; 1776 1777 if (unlikely(dev_priv->gtt.do_idle_maps)) { 1778 dev_priv->mm.interruptible = false; 1779 if (i915_gpu_idle(dev_priv->dev)) { 1780 DRM_ERROR("Couldn't idle GPU\n"); 1781 /* Wait a bit, in hopes it avoids the hang */ 1782 udelay(10); 1783 } 1784 } 1785 1786 return ret; 1787 } 1788 1789 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 1790 { 1791 if (unlikely(dev_priv->gtt.do_idle_maps)) 1792 dev_priv->mm.interruptible = interruptible; 1793 } 1794 1795 void i915_check_and_clear_faults(struct drm_device *dev) 1796 { 1797 struct drm_i915_private *dev_priv = dev->dev_private; 1798 struct intel_engine_cs *ring; 1799 int i; 1800 1801 if (INTEL_INFO(dev)->gen < 6) 1802 return; 1803 1804 for_each_ring(ring, dev_priv, i) { 1805 u32 fault_reg; 1806 fault_reg = I915_READ(RING_FAULT_REG(ring)); 1807 if (fault_reg & RING_FAULT_VALID) { 1808 #if 0 1809 DRM_DEBUG_DRIVER("Unexpected fault\n" 1810 "\tAddr: 0x%08lx\n" 1811 "\tAddress space: %s\n" 1812 "\tSource ID: %d\n" 1813 "\tType: %d\n", 1814 fault_reg & PAGE_MASK, 1815 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 1816 RING_FAULT_SRCID(fault_reg), 1817 RING_FAULT_FAULT_TYPE(fault_reg)); 1818 #endif 1819 I915_WRITE(RING_FAULT_REG(ring), 1820 fault_reg & ~RING_FAULT_VALID); 1821 } 1822 } 1823 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 1824 } 1825 1826 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 1827 { 1828 if (INTEL_INFO(dev_priv->dev)->gen < 6) { 1829 intel_gtt_chipset_flush(); 1830 } else { 1831 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1832 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1833 } 1834 } 1835 1836 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 1837 { 1838 struct drm_i915_private *dev_priv = dev->dev_private; 1839 1840 /* Don't bother messing with faults pre GEN6 as we have little 1841 * documentation supporting that it's a good idea. 1842 */ 1843 if (INTEL_INFO(dev)->gen < 6) 1844 return; 1845 1846 i915_check_and_clear_faults(dev); 1847 1848 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1849 dev_priv->gtt.base.start, 1850 dev_priv->gtt.base.total, 1851 true); 1852 1853 i915_ggtt_flush(dev_priv); 1854 } 1855 1856 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 1857 { 1858 #if 0 1859 if (!dma_map_sg(&obj->base.dev->pdev->dev, 1860 obj->pages->sgl, obj->pages->nents, 1861 PCI_DMA_BIDIRECTIONAL)) 1862 return -ENOSPC; 1863 #endif 1864 1865 return 0; 1866 } 1867 1868 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 1869 { 1870 #if 0 1871 writeq(pte, addr); 1872 #else 1873 iowrite32((u32)pte, addr); 1874 iowrite32(pte >> 32, addr + 4); 1875 #endif 1876 } 1877 1878 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1879 vm_page_t *pages, 1880 uint64_t start, 1881 unsigned int num_entries, 1882 enum i915_cache_level level, u32 unused) 1883 { 1884 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1885 unsigned first_entry = start >> PAGE_SHIFT; 1886 gen8_pte_t __iomem *gtt_entries = 1887 (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1888 int i = 0; 1889 dma_addr_t addr = 0; 1890 1891 for (i=0;i<num_entries;i++) { 1892 addr = VM_PAGE_TO_PHYS(pages[i]); 1893 gen8_set_pte(>t_entries[i], 1894 gen8_pte_encode(addr, level, true)); 1895 } 1896 1897 /* 1898 * XXX: This serves as a posting read to make sure that the PTE has 1899 * actually been updated. There is some concern that even though 1900 * registers and PTEs are within the same BAR that they are potentially 1901 * of NUMA access patterns. Therefore, even with the way we assume 1902 * hardware should work, we must keep this posting read for paranoia. 1903 */ 1904 if (i != 0) 1905 WARN_ON(readq(>t_entries[i-1]) 1906 != gen8_pte_encode(addr, level, true)); 1907 1908 /* This next bit makes the above posting read even more important. We 1909 * want to flush the TLBs only after we're certain all the PTE updates 1910 * have finished. 1911 */ 1912 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1913 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1914 } 1915 1916 /* 1917 * Binds an object into the global gtt with the specified cache level. The object 1918 * will be accessible to the GPU via commands whose operands reference offsets 1919 * within the global GTT as well as accessible by the GPU through the GMADR 1920 * mapped BAR (dev_priv->mm.gtt->gtt). 1921 */ 1922 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1923 vm_page_t *pages, 1924 uint64_t start, 1925 unsigned int num_entries, 1926 enum i915_cache_level level, u32 flags) 1927 { 1928 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1929 unsigned first_entry = start >> PAGE_SHIFT; 1930 gen6_pte_t __iomem *gtt_entries = 1931 (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1932 int i = 0; 1933 dma_addr_t addr = 0; /* shut up gcc */ 1934 1935 for (i = 0; i < num_entries; i++) { 1936 addr = VM_PAGE_TO_PHYS(pages[i]); 1937 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); 1938 } 1939 1940 /* XXX: This serves as a posting read to make sure that the PTE has 1941 * actually been updated. There is some concern that even though 1942 * registers and PTEs are within the same BAR that they are potentially 1943 * of NUMA access patterns. Therefore, even with the way we assume 1944 * hardware should work, we must keep this posting read for paranoia. 1945 */ 1946 if (i != 0) { 1947 unsigned long gtt = readl(>t_entries[i-1]); 1948 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); 1949 } 1950 1951 /* This next bit makes the above posting read even more important. We 1952 * want to flush the TLBs only after we're certain all the PTE updates 1953 * have finished. 1954 */ 1955 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1956 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1957 } 1958 1959 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1960 uint64_t start, 1961 uint64_t length, 1962 bool use_scratch) 1963 { 1964 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1965 unsigned first_entry = start >> PAGE_SHIFT; 1966 unsigned num_entries = length >> PAGE_SHIFT; 1967 gen8_pte_t scratch_pte, __iomem *gtt_base = 1968 (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1969 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1970 int i; 1971 1972 if (WARN(num_entries > max_entries, 1973 "First entry = %d; Num entries = %d (max=%d)\n", 1974 first_entry, num_entries, max_entries)) 1975 num_entries = max_entries; 1976 1977 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1978 I915_CACHE_LLC, 1979 use_scratch); 1980 for (i = 0; i < num_entries; i++) 1981 gen8_set_pte(>t_base[i], scratch_pte); 1982 readl(gtt_base); 1983 } 1984 1985 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1986 uint64_t start, 1987 uint64_t length, 1988 bool use_scratch) 1989 { 1990 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1991 unsigned first_entry = start >> PAGE_SHIFT; 1992 unsigned num_entries = length >> PAGE_SHIFT; 1993 gen6_pte_t scratch_pte, __iomem *gtt_base = 1994 (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1995 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1996 int i; 1997 1998 if (WARN(num_entries > max_entries, 1999 "First entry = %d; Num entries = %d (max=%d)\n", 2000 first_entry, num_entries, max_entries)) 2001 num_entries = max_entries; 2002 2003 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0); 2004 2005 for (i = 0; i < num_entries; i++) 2006 iowrite32(scratch_pte, >t_base[i]); 2007 readl(gtt_base); 2008 } 2009 2010 static int i915_ggtt_bind_vma(struct i915_vma *vma, 2011 enum i915_cache_level cache_level, 2012 u32 unused) 2013 { 2014 const unsigned long entry = vma->node.start >> PAGE_SHIFT; 2015 const unsigned int num_entries = vma->obj->base.size >> PAGE_SHIFT; 2016 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2017 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2018 2019 BUG_ON(!i915_is_ggtt(vma->vm)); 2020 intel_gtt_insert_pages(entry, num_entries, vma->ggtt_view.pages, flags); 2021 2022 vma->bound |= GLOBAL_BIND; 2023 return 0; 2024 } 2025 2026 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2027 uint64_t start, 2028 uint64_t length, 2029 bool unused) 2030 { 2031 unsigned first_entry = start >> PAGE_SHIFT; 2032 unsigned num_entries = length >> PAGE_SHIFT; 2033 intel_gtt_clear_range(first_entry, num_entries); 2034 } 2035 2036 static void i915_ggtt_unbind_vma(struct i915_vma *vma) 2037 { 2038 const unsigned int first = vma->node.start >> PAGE_SHIFT; 2039 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; 2040 2041 BUG_ON(!i915_is_ggtt(vma->vm)); 2042 intel_gtt_clear_range(first, size); 2043 } 2044 2045 static int ggtt_bind_vma(struct i915_vma *vma, 2046 enum i915_cache_level cache_level, 2047 u32 flags) 2048 { 2049 struct drm_device *dev = vma->vm->dev; 2050 struct drm_i915_private *dev_priv = dev->dev_private; 2051 struct drm_i915_gem_object *obj = vma->obj; 2052 struct vm_page **pages = obj->pages; 2053 u32 pte_flags = 0; 2054 2055 /* Currently applicable only to VLV */ 2056 if (obj->gt_ro) 2057 pte_flags |= PTE_READ_ONLY; 2058 2059 if (i915_is_ggtt(vma->vm)) 2060 pages = vma->ggtt_view.pages; 2061 2062 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { 2063 vma->vm->insert_entries(vma->vm, pages, 2064 vma->node.start, 2065 obj->base.size >> PAGE_SHIFT, 2066 cache_level, pte_flags); 2067 2068 /* Note the inconsistency here is due to absence of the 2069 * aliasing ppgtt on gen4 and earlier. Though we always 2070 * request PIN_USER for execbuffer (translated to LOCAL_BIND), 2071 * without the appgtt, we cannot honour that request and so 2072 * must substitute it with a global binding. Since we do this 2073 * behind the upper layers back, we need to explicitly set 2074 * the bound flag ourselves. 2075 */ 2076 vma->bound |= GLOBAL_BIND; 2077 2078 } 2079 2080 if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) { 2081 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 2082 appgtt->base.insert_entries(&appgtt->base, pages, 2083 vma->node.start, 2084 obj->base.size >> PAGE_SHIFT, 2085 cache_level, pte_flags); 2086 } 2087 2088 return 0; 2089 } 2090 2091 static void ggtt_unbind_vma(struct i915_vma *vma) 2092 { 2093 struct drm_device *dev = vma->vm->dev; 2094 struct drm_i915_private *dev_priv = dev->dev_private; 2095 struct drm_i915_gem_object *obj = vma->obj; 2096 const uint64_t size = min_t(uint64_t, 2097 obj->base.size, 2098 vma->node.size); 2099 2100 if (vma->bound & GLOBAL_BIND) { 2101 vma->vm->clear_range(vma->vm, 2102 vma->node.start, 2103 size, 2104 true); 2105 } 2106 2107 if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { 2108 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 2109 2110 appgtt->base.clear_range(&appgtt->base, 2111 vma->node.start, 2112 size, 2113 true); 2114 } 2115 } 2116 2117 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2118 { 2119 struct drm_device *dev = obj->base.dev; 2120 struct drm_i915_private *dev_priv = dev->dev_private; 2121 bool interruptible; 2122 2123 interruptible = do_idling(dev_priv); 2124 2125 #if 0 2126 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, 2127 PCI_DMA_BIDIRECTIONAL); 2128 #endif 2129 2130 undo_idling(dev_priv, interruptible); 2131 } 2132 2133 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2134 unsigned long color, 2135 u64 *start, 2136 u64 *end) 2137 { 2138 if (node->color != color) 2139 *start += 4096; 2140 2141 if (!list_empty(&node->node_list)) { 2142 node = list_entry(node->node_list.next, 2143 struct drm_mm_node, 2144 node_list); 2145 if (node->allocated && node->color != color) 2146 *end -= 4096; 2147 } 2148 } 2149 2150 static int i915_gem_setup_global_gtt(struct drm_device *dev, 2151 unsigned long start, 2152 unsigned long mappable_end, 2153 unsigned long end) 2154 { 2155 /* Let GEM Manage all of the aperture. 2156 * 2157 * However, leave one page at the end still bound to the scratch page. 2158 * There are a number of places where the hardware apparently prefetches 2159 * past the end of the object, and we've seen multiple hangs with the 2160 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2161 * aperture. One page should be enough to keep any prefetching inside 2162 * of the aperture. 2163 */ 2164 struct drm_i915_private *dev_priv = dev->dev_private; 2165 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 2166 unsigned long mappable; 2167 int error; 2168 struct drm_mm_node *entry; 2169 struct drm_i915_gem_object *obj; 2170 unsigned long hole_start, hole_end; 2171 int ret; 2172 2173 kprintf("MAPPABLE_END VS END %016jx %016jx\n", mappable_end, end); 2174 tsleep(&mappable_end, 0, "DELAY", hz); /* for kprintf */ 2175 /*BUG_ON(mappable_end > end);*/ 2176 2177 mappable = min(end, mappable_end) - start; 2178 2179 /* Subtract the guard page ... */ 2180 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 2181 2182 dev_priv->gtt.base.start = start; 2183 dev_priv->gtt.base.total = end - start; 2184 2185 if (intel_vgpu_active(dev)) { 2186 ret = intel_vgt_balloon(dev); 2187 if (ret) 2188 return ret; 2189 } 2190 2191 if (!HAS_LLC(dev)) 2192 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 2193 2194 /* Mark any preallocated objects as occupied */ 2195 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 2196 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 2197 2198 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 2199 i915_gem_obj_ggtt_offset(obj), obj->base.size); 2200 2201 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 2202 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 2203 if (ret) { 2204 DRM_DEBUG_KMS("Reservation failed: %i\n", ret); 2205 return ret; 2206 } 2207 vma->bound |= GLOBAL_BIND; 2208 } 2209 2210 /* Clear any non-preallocated blocks */ 2211 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 2212 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2213 hole_start, hole_end); 2214 ggtt_vm->clear_range(ggtt_vm, hole_start, 2215 hole_end - hole_start, true); 2216 } 2217 2218 #ifdef __DragonFly__ 2219 device_printf(dev->dev, 2220 "taking over the fictitious range 0x%lx-0x%lx\n", 2221 dev_priv->gtt.mappable_base + start, dev_priv->gtt.mappable_base + start + mappable); 2222 error = -vm_phys_fictitious_reg_range(dev_priv->gtt.mappable_base + start, 2223 dev_priv->gtt.mappable_base + start + mappable, VM_MEMATTR_WRITE_COMBINING); 2224 #endif 2225 2226 /* And finally clear the reserved guard page */ 2227 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 2228 2229 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { 2230 struct i915_hw_ppgtt *ppgtt; 2231 2232 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2233 if (!ppgtt) 2234 return -ENOMEM; 2235 2236 ret = __hw_ppgtt_init(dev, ppgtt, true); 2237 if (ret) { 2238 ppgtt->base.cleanup(&ppgtt->base); 2239 kfree(ppgtt); 2240 return ret; 2241 } 2242 2243 dev_priv->mm.aliasing_ppgtt = ppgtt; 2244 } 2245 2246 return 0; 2247 } 2248 2249 void i915_gem_init_global_gtt(struct drm_device *dev) 2250 { 2251 struct drm_i915_private *dev_priv = dev->dev_private; 2252 unsigned long gtt_size, mappable_size; 2253 2254 gtt_size = dev_priv->gtt.base.total; 2255 mappable_size = dev_priv->gtt.mappable_end; 2256 2257 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 2258 } 2259 2260 void i915_global_gtt_cleanup(struct drm_device *dev) 2261 { 2262 struct drm_i915_private *dev_priv = dev->dev_private; 2263 struct i915_address_space *vm = &dev_priv->gtt.base; 2264 2265 if (dev_priv->mm.aliasing_ppgtt) { 2266 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2267 2268 ppgtt->base.cleanup(&ppgtt->base); 2269 } 2270 2271 if (drm_mm_initialized(&vm->mm)) { 2272 if (intel_vgpu_active(dev)) 2273 intel_vgt_deballoon(); 2274 2275 drm_mm_takedown(&vm->mm); 2276 list_del(&vm->global_link); 2277 } 2278 2279 vm->cleanup(vm); 2280 } 2281 2282 static int setup_scratch_page(struct drm_device *dev) 2283 { 2284 struct drm_i915_private *dev_priv = dev->dev_private; 2285 struct vm_page *page; 2286 dma_addr_t dma_addr; 2287 2288 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 2289 if (page == NULL) 2290 return -ENOMEM; 2291 set_pages_uc(page, 1); 2292 2293 #ifdef CONFIG_INTEL_IOMMU 2294 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 2295 PCI_DMA_BIDIRECTIONAL); 2296 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 2297 return -EINVAL; 2298 #else 2299 dma_addr = page_to_phys(page); 2300 #endif 2301 dev_priv->gtt.base.scratch.page = page; 2302 dev_priv->gtt.base.scratch.addr = dma_addr; 2303 2304 return 0; 2305 } 2306 2307 #if 0 2308 static void teardown_scratch_page(struct drm_device *dev) 2309 { 2310 struct drm_i915_private *dev_priv = dev->dev_private; 2311 struct vm_page *page = dev_priv->gtt.base.scratch.page; 2312 2313 set_pages_wb(page, 1); 2314 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 2315 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 2316 __free_page(page); 2317 } 2318 #endif 2319 2320 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2321 { 2322 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2323 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2324 return snb_gmch_ctl << 20; 2325 } 2326 2327 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2328 { 2329 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2330 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2331 if (bdw_gmch_ctl) 2332 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2333 2334 #ifdef CONFIG_X86_32 2335 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2336 if (bdw_gmch_ctl > 4) 2337 bdw_gmch_ctl = 4; 2338 #endif 2339 2340 return bdw_gmch_ctl << 20; 2341 } 2342 2343 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2344 { 2345 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2346 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2347 2348 if (gmch_ctrl) 2349 return 1 << (20 + gmch_ctrl); 2350 2351 return 0; 2352 } 2353 2354 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2355 { 2356 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2357 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2358 return snb_gmch_ctl << 25; /* 32 MB units */ 2359 } 2360 2361 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2362 { 2363 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2364 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2365 return bdw_gmch_ctl << 25; /* 32 MB units */ 2366 } 2367 2368 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2369 { 2370 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2371 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2372 2373 /* 2374 * 0x0 to 0x10: 32MB increments starting at 0MB 2375 * 0x11 to 0x16: 4MB increments starting at 8MB 2376 * 0x17 to 0x1d: 4MB increments start at 36MB 2377 */ 2378 if (gmch_ctrl < 0x11) 2379 return gmch_ctrl << 25; 2380 else if (gmch_ctrl < 0x17) 2381 return (gmch_ctrl - 0x11 + 2) << 22; 2382 else 2383 return (gmch_ctrl - 0x17 + 9) << 22; 2384 } 2385 2386 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2387 { 2388 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2389 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2390 2391 if (gen9_gmch_ctl < 0xf0) 2392 return gen9_gmch_ctl << 25; /* 32 MB units */ 2393 else 2394 /* 4MB increments starting at 0xf0 for 4MB */ 2395 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2396 } 2397 2398 static int ggtt_probe_common(struct drm_device *dev, 2399 size_t gtt_size) 2400 { 2401 struct drm_i915_private *dev_priv = dev->dev_private; 2402 phys_addr_t gtt_phys_addr; 2403 int ret; 2404 2405 /* For Modern GENs the PTEs and register space are split in the BAR */ 2406 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 2407 (pci_resource_len(dev->pdev, 0) / 2); 2408 2409 /* 2410 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2411 * dropped. For WC mappings in general we have 64 byte burst writes 2412 * when the WC buffer is flushed, so we can't use it, but have to 2413 * resort to an uncached mapping. The WC issue is easily caught by the 2414 * readback check when writing GTT PTE entries. 2415 */ 2416 if (IS_BROXTON(dev)) 2417 dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size); 2418 else 2419 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 2420 if (!dev_priv->gtt.gsm) { 2421 DRM_ERROR("Failed to map the gtt page table\n"); 2422 return -ENOMEM; 2423 } 2424 2425 ret = setup_scratch_page(dev); 2426 if (ret) { 2427 DRM_ERROR("Scratch setup failed\n"); 2428 /* iounmap will also get called at remove, but meh */ 2429 #if 0 2430 iounmap(dev_priv->gtt.gsm); 2431 #endif 2432 } 2433 2434 return ret; 2435 } 2436 2437 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2438 * bits. When using advanced contexts each context stores its own PAT, but 2439 * writing this data shouldn't be harmful even in those cases. */ 2440 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2441 { 2442 uint64_t pat; 2443 2444 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2445 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2446 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2447 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2448 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2449 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2450 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2451 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2452 2453 if (!USES_PPGTT(dev_priv->dev)) 2454 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2455 * so RTL will always use the value corresponding to 2456 * pat_sel = 000". 2457 * So let's disable cache for GGTT to avoid screen corruptions. 2458 * MOCS still can be used though. 2459 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2460 * before this patch, i.e. the same uncached + snooping access 2461 * like on gen6/7 seems to be in effect. 2462 * - So this just fixes blitter/render access. Again it looks 2463 * like it's not just uncached access, but uncached + snooping. 2464 * So we can still hold onto all our assumptions wrt cpu 2465 * clflushing on LLC machines. 2466 */ 2467 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 2468 2469 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 2470 * write would work. */ 2471 I915_WRITE(GEN8_PRIVATE_PAT, pat); 2472 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 2473 } 2474 2475 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 2476 { 2477 uint64_t pat; 2478 2479 /* 2480 * Map WB on BDW to snooped on CHV. 2481 * 2482 * Only the snoop bit has meaning for CHV, the rest is 2483 * ignored. 2484 * 2485 * The hardware will never snoop for certain types of accesses: 2486 * - CPU GTT (GMADR->GGTT->no snoop->memory) 2487 * - PPGTT page tables 2488 * - some other special cycles 2489 * 2490 * As with BDW, we also need to consider the following for GT accesses: 2491 * "For GGTT, there is NO pat_sel[2:0] from the entry, 2492 * so RTL will always use the value corresponding to 2493 * pat_sel = 000". 2494 * Which means we must set the snoop bit in PAT entry 0 2495 * in order to keep the global status page working. 2496 */ 2497 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 2498 GEN8_PPAT(1, 0) | 2499 GEN8_PPAT(2, 0) | 2500 GEN8_PPAT(3, 0) | 2501 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 2502 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 2503 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 2504 GEN8_PPAT(7, CHV_PPAT_SNOOP); 2505 2506 I915_WRITE(GEN8_PRIVATE_PAT, pat); 2507 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 2508 } 2509 2510 static int gen8_gmch_probe(struct drm_device *dev, 2511 size_t *gtt_total, 2512 size_t *stolen, 2513 phys_addr_t *mappable_base, 2514 unsigned long *mappable_end) 2515 { 2516 struct drm_i915_private *dev_priv = dev->dev_private; 2517 unsigned int gtt_size; 2518 u16 snb_gmch_ctl; 2519 int ret; 2520 2521 /* TODO: We're not aware of mappable constraints on gen8 yet */ 2522 *mappable_base = pci_resource_start(dev->pdev, 2); 2523 *mappable_end = pci_resource_len(dev->pdev, 2); 2524 2525 #if 0 2526 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 2527 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 2528 #endif 2529 2530 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2531 2532 if (INTEL_INFO(dev)->gen >= 9) { 2533 *stolen = gen9_get_stolen_size(snb_gmch_ctl); 2534 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 2535 } else if (IS_CHERRYVIEW(dev)) { 2536 *stolen = chv_get_stolen_size(snb_gmch_ctl); 2537 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); 2538 } else { 2539 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 2540 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 2541 } 2542 2543 *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 2544 2545 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 2546 chv_setup_private_ppat(dev_priv); 2547 else 2548 bdw_setup_private_ppat(dev_priv); 2549 2550 ret = ggtt_probe_common(dev, gtt_size); 2551 2552 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 2553 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 2554 dev_priv->gtt.base.bind_vma = ggtt_bind_vma; 2555 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma; 2556 2557 return ret; 2558 } 2559 2560 static int gen6_gmch_probe(struct drm_device *dev, 2561 size_t *gtt_total, 2562 size_t *stolen, 2563 phys_addr_t *mappable_base, 2564 unsigned long *mappable_end) 2565 { 2566 struct drm_i915_private *dev_priv = dev->dev_private; 2567 unsigned int gtt_size; 2568 u16 snb_gmch_ctl; 2569 int ret; 2570 2571 *mappable_base = pci_resource_start(dev->pdev, 2); 2572 *mappable_end = pci_resource_len(dev->pdev, 2); 2573 2574 /* 64/512MB is the current min/max we actually know of, but this is just 2575 * a coarse sanity check. 2576 */ 2577 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 2578 DRM_ERROR("Unknown GMADR size (%lx)\n", 2579 dev_priv->gtt.mappable_end); 2580 return -ENXIO; 2581 } 2582 2583 #if 0 2584 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 2585 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 2586 #endif 2587 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2588 2589 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 2590 2591 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 2592 *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 2593 2594 ret = ggtt_probe_common(dev, gtt_size); 2595 2596 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 2597 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 2598 dev_priv->gtt.base.bind_vma = ggtt_bind_vma; 2599 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma; 2600 2601 return ret; 2602 } 2603 2604 static void gen6_gmch_remove(struct i915_address_space *vm) 2605 { 2606 #if 0 2607 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 2608 2609 iounmap(gtt->gsm); 2610 teardown_scratch_page(vm->dev); 2611 #endif 2612 } 2613 2614 static int i915_gmch_probe(struct drm_device *dev, 2615 size_t *gtt_total, 2616 size_t *stolen, 2617 phys_addr_t *mappable_base, 2618 unsigned long *mappable_end) 2619 { 2620 struct drm_i915_private *dev_priv = dev->dev_private; 2621 #if 0 2622 int ret; 2623 2624 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 2625 if (!ret) { 2626 DRM_ERROR("failed to set up gmch\n"); 2627 return -EIO; 2628 } 2629 #endif 2630 2631 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 2632 2633 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 2634 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 2635 dev_priv->gtt.base.bind_vma = i915_ggtt_bind_vma; 2636 dev_priv->gtt.base.unbind_vma = i915_ggtt_unbind_vma; 2637 2638 if (unlikely(dev_priv->gtt.do_idle_maps)) 2639 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 2640 2641 return 0; 2642 } 2643 2644 static void i915_gmch_remove(struct i915_address_space *vm) 2645 { 2646 intel_gmch_remove(); 2647 } 2648 2649 int i915_gem_gtt_init(struct drm_device *dev) 2650 { 2651 struct drm_i915_private *dev_priv = dev->dev_private; 2652 struct i915_gtt *gtt = &dev_priv->gtt; 2653 int ret; 2654 2655 if (INTEL_INFO(dev)->gen <= 5) { 2656 gtt->gtt_probe = i915_gmch_probe; 2657 gtt->base.cleanup = i915_gmch_remove; 2658 } else if (INTEL_INFO(dev)->gen < 8) { 2659 gtt->gtt_probe = gen6_gmch_probe; 2660 gtt->base.cleanup = gen6_gmch_remove; 2661 if (IS_HASWELL(dev) && dev_priv->ellc_size) 2662 gtt->base.pte_encode = iris_pte_encode; 2663 else if (IS_HASWELL(dev)) 2664 gtt->base.pte_encode = hsw_pte_encode; 2665 else if (IS_VALLEYVIEW(dev)) 2666 gtt->base.pte_encode = byt_pte_encode; 2667 else if (INTEL_INFO(dev)->gen >= 7) 2668 gtt->base.pte_encode = ivb_pte_encode; 2669 else 2670 gtt->base.pte_encode = snb_pte_encode; 2671 } else { 2672 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 2673 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 2674 } 2675 2676 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 2677 >t->mappable_base, >t->mappable_end); 2678 if (ret) 2679 return ret; 2680 2681 gtt->base.dev = dev; 2682 2683 /* GMADR is the PCI mmio aperture into the global GTT. */ 2684 DRM_INFO("Memory usable by graphics device = %zdM\n", 2685 gtt->base.total >> 20); 2686 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 2687 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 2688 #ifdef CONFIG_INTEL_IOMMU 2689 if (intel_iommu_gfx_mapped) 2690 DRM_INFO("VT-d active for gfx access\n"); 2691 #endif 2692 /* 2693 * i915.enable_ppgtt is read-only, so do an early pass to validate the 2694 * user's requested state against the hardware/driver capabilities. We 2695 * do this now so that we can print out any log messages once rather 2696 * than every time we check intel_enable_ppgtt(). 2697 */ 2698 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 2699 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 2700 2701 return 0; 2702 } 2703 2704 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 2705 { 2706 struct drm_i915_private *dev_priv = dev->dev_private; 2707 struct drm_i915_gem_object *obj; 2708 struct i915_address_space *vm; 2709 struct i915_vma *vma; 2710 bool flush; 2711 2712 i915_check_and_clear_faults(dev); 2713 2714 /* First fill our portion of the GTT with scratch pages */ 2715 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 2716 dev_priv->gtt.base.start, 2717 dev_priv->gtt.base.total, 2718 true); 2719 2720 /* Cache flush objects bound into GGTT and rebind them. */ 2721 vm = &dev_priv->gtt.base; 2722 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 2723 flush = false; 2724 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2725 if (vma->vm != vm) 2726 continue; 2727 2728 WARN_ON(i915_vma_bind(vma, obj->cache_level, 2729 PIN_UPDATE)); 2730 2731 flush = true; 2732 } 2733 2734 if (flush) 2735 i915_gem_clflush_object(obj, obj->pin_display); 2736 } 2737 2738 if (INTEL_INFO(dev)->gen >= 8) { 2739 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 2740 chv_setup_private_ppat(dev_priv); 2741 else 2742 bdw_setup_private_ppat(dev_priv); 2743 2744 return; 2745 } 2746 2747 if (USES_PPGTT(dev)) { 2748 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 2749 /* TODO: Perhaps it shouldn't be gen6 specific */ 2750 2751 struct i915_hw_ppgtt *ppgtt = 2752 container_of(vm, struct i915_hw_ppgtt, 2753 base); 2754 2755 if (i915_is_ggtt(vm)) 2756 ppgtt = dev_priv->mm.aliasing_ppgtt; 2757 2758 gen6_write_page_range(dev_priv, &ppgtt->pd, 2759 0, ppgtt->base.total); 2760 } 2761 } 2762 2763 i915_ggtt_flush(dev_priv); 2764 } 2765 2766 static struct i915_vma * 2767 __i915_gem_vma_create(struct drm_i915_gem_object *obj, 2768 struct i915_address_space *vm, 2769 const struct i915_ggtt_view *ggtt_view) 2770 { 2771 struct i915_vma *vma; 2772 2773 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 2774 return ERR_PTR(-EINVAL); 2775 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 2776 if (vma == NULL) 2777 return ERR_PTR(-ENOMEM); 2778 2779 INIT_LIST_HEAD(&vma->vma_link); 2780 INIT_LIST_HEAD(&vma->mm_list); 2781 INIT_LIST_HEAD(&vma->exec_list); 2782 vma->vm = vm; 2783 vma->obj = obj; 2784 2785 if (i915_is_ggtt(vm)) 2786 vma->ggtt_view = *ggtt_view; 2787 2788 list_add_tail(&vma->vma_link, &obj->vma_list); 2789 if (!i915_is_ggtt(vm)) 2790 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 2791 2792 return vma; 2793 } 2794 2795 struct i915_vma * 2796 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 2797 struct i915_address_space *vm) 2798 { 2799 struct i915_vma *vma; 2800 2801 vma = i915_gem_obj_to_vma(obj, vm); 2802 if (!vma) 2803 vma = __i915_gem_vma_create(obj, vm, 2804 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); 2805 2806 return vma; 2807 } 2808 2809 struct i915_vma * 2810 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, 2811 const struct i915_ggtt_view *view) 2812 { 2813 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 2814 struct i915_vma *vma; 2815 2816 if (WARN_ON(!view)) 2817 return ERR_PTR(-EINVAL); 2818 2819 vma = i915_gem_obj_to_ggtt_view(obj, view); 2820 2821 if (IS_ERR(vma)) 2822 return vma; 2823 2824 if (!vma) 2825 vma = __i915_gem_vma_create(obj, ggtt, view); 2826 2827 return vma; 2828 2829 } 2830 2831 #if 0 2832 static void 2833 rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height, 2834 struct sg_table *st) 2835 { 2836 unsigned int column, row; 2837 unsigned int src_idx; 2838 struct scatterlist *sg = st->sgl; 2839 2840 st->nents = 0; 2841 2842 for (column = 0; column < width; column++) { 2843 src_idx = width * (height - 1) + column; 2844 for (row = 0; row < height; row++) { 2845 st->nents++; 2846 /* We don't need the pages, but need to initialize 2847 * the entries so the sg list can be happily traversed. 2848 * The only thing we need are DMA addresses. 2849 */ 2850 sg_set_page(sg, NULL, PAGE_SIZE, 0); 2851 sg_dma_address(sg) = in[src_idx]; 2852 sg_dma_len(sg) = PAGE_SIZE; 2853 sg = sg_next(sg); 2854 src_idx -= width; 2855 } 2856 } 2857 } 2858 2859 static struct sg_table * 2860 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view, 2861 struct drm_i915_gem_object *obj) 2862 { 2863 struct drm_device *dev = obj->base.dev; 2864 struct intel_rotation_info *rot_info = &ggtt_view->rotation_info; 2865 unsigned long size, pages, rot_pages; 2866 struct sg_page_iter sg_iter; 2867 unsigned long i; 2868 dma_addr_t *page_addr_list; 2869 struct sg_table *st; 2870 unsigned int tile_pitch, tile_height; 2871 unsigned int width_pages, height_pages; 2872 int ret = -ENOMEM; 2873 2874 pages = obj->base.size / PAGE_SIZE; 2875 2876 /* Calculate tiling geometry. */ 2877 tile_height = intel_tile_height(dev, rot_info->pixel_format, 2878 rot_info->fb_modifier); 2879 tile_pitch = PAGE_SIZE / tile_height; 2880 width_pages = DIV_ROUND_UP(rot_info->pitch, tile_pitch); 2881 height_pages = DIV_ROUND_UP(rot_info->height, tile_height); 2882 rot_pages = width_pages * height_pages; 2883 size = rot_pages * PAGE_SIZE; 2884 2885 /* Allocate a temporary list of source pages for random access. */ 2886 page_addr_list = drm_malloc_ab(pages, sizeof(dma_addr_t)); 2887 if (!page_addr_list) 2888 return ERR_PTR(ret); 2889 2890 /* Allocate target SG list. */ 2891 st = kmalloc(sizeof(*st), GFP_KERNEL); 2892 if (!st) 2893 goto err_st_alloc; 2894 2895 ret = sg_alloc_table(st, rot_pages, GFP_KERNEL); 2896 if (ret) 2897 goto err_sg_alloc; 2898 2899 /* Populate source page list from the object. */ 2900 i = 0; 2901 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2902 page_addr_list[i] = sg_page_iter_dma_address(&sg_iter); 2903 i++; 2904 } 2905 2906 /* Rotate the pages. */ 2907 rotate_pages(page_addr_list, width_pages, height_pages, st); 2908 2909 DRM_DEBUG_KMS( 2910 "Created rotated page mapping for object size %lu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %lu pages).\n", 2911 size, rot_info->pitch, rot_info->height, 2912 rot_info->pixel_format, width_pages, height_pages, 2913 rot_pages); 2914 2915 drm_free_large(page_addr_list); 2916 2917 return st; 2918 2919 err_sg_alloc: 2920 kfree(st); 2921 err_st_alloc: 2922 drm_free_large(page_addr_list); 2923 2924 DRM_DEBUG_KMS( 2925 "Failed to create rotated mapping for object size %lu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %lu pages)\n", 2926 size, ret, rot_info->pitch, rot_info->height, 2927 rot_info->pixel_format, width_pages, height_pages, 2928 rot_pages); 2929 return ERR_PTR(ret); 2930 } 2931 2932 static struct sg_table * 2933 intel_partial_pages(const struct i915_ggtt_view *view, 2934 struct drm_i915_gem_object *obj) 2935 { 2936 struct sg_table *st; 2937 struct scatterlist *sg; 2938 struct sg_page_iter obj_sg_iter; 2939 int ret = -ENOMEM; 2940 2941 st = kmalloc(sizeof(*st), GFP_KERNEL); 2942 if (!st) 2943 goto err_st_alloc; 2944 2945 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); 2946 if (ret) 2947 goto err_sg_alloc; 2948 2949 sg = st->sgl; 2950 st->nents = 0; 2951 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, 2952 view->params.partial.offset) 2953 { 2954 if (st->nents >= view->params.partial.size) 2955 break; 2956 2957 sg_set_page(sg, NULL, PAGE_SIZE, 0); 2958 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); 2959 sg_dma_len(sg) = PAGE_SIZE; 2960 2961 sg = sg_next(sg); 2962 st->nents++; 2963 } 2964 2965 return st; 2966 2967 err_sg_alloc: 2968 kfree(st); 2969 err_st_alloc: 2970 return ERR_PTR(ret); 2971 } 2972 #endif 2973 2974 static int 2975 i915_get_ggtt_vma_pages(struct i915_vma *vma) 2976 { 2977 int ret = 0; 2978 2979 if (vma->ggtt_view.pages) 2980 return 0; 2981 2982 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 2983 vma->ggtt_view.pages = vma->obj->pages; 2984 #if 0 2985 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 2986 vma->ggtt_view.pages = 2987 intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj); 2988 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 2989 vma->ggtt_view.pages = 2990 intel_partial_pages(&vma->ggtt_view, vma->obj); 2991 #endif 2992 else 2993 WARN_ONCE(1, "GGTT view %u not implemented!\n", 2994 vma->ggtt_view.type); 2995 2996 if (!vma->ggtt_view.pages) { 2997 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 2998 vma->ggtt_view.type); 2999 ret = -EINVAL; 3000 } else if (IS_ERR(vma->ggtt_view.pages)) { 3001 ret = PTR_ERR(vma->ggtt_view.pages); 3002 vma->ggtt_view.pages = NULL; 3003 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3004 vma->ggtt_view.type, ret); 3005 } 3006 3007 return ret; 3008 } 3009 3010 /** 3011 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3012 * @vma: VMA to map 3013 * @cache_level: mapping cache level 3014 * @flags: flags like global or local mapping 3015 * 3016 * DMA addresses are taken from the scatter-gather table of this object (or of 3017 * this VMA in case of non-default GGTT views) and PTE entries set up. 3018 * Note that DMA addresses are also the only part of the SG table we care about. 3019 */ 3020 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3021 u32 flags) 3022 { 3023 int ret; 3024 u32 bind_flags; 3025 3026 if (WARN_ON(flags == 0)) 3027 return -EINVAL; 3028 3029 if (vma->vm->allocate_va_range) { 3030 trace_i915_va_alloc(vma->vm, vma->node.start, 3031 vma->node.size, 3032 VM_TO_TRACE_NAME(vma->vm)); 3033 3034 ret = vma->vm->allocate_va_range(vma->vm, 3035 vma->node.start, 3036 vma->node.size); 3037 if (ret) 3038 return ret; 3039 } 3040 3041 if (i915_is_ggtt(vma->vm)) { 3042 ret = i915_get_ggtt_vma_pages(vma); 3043 if (ret) 3044 return 0; 3045 } 3046 3047 bind_flags = 0; 3048 if (flags & PIN_GLOBAL) 3049 bind_flags |= GLOBAL_BIND; 3050 if (flags & PIN_USER) 3051 bind_flags |= LOCAL_BIND; 3052 3053 if (flags & PIN_UPDATE) 3054 bind_flags |= vma->bound; 3055 else 3056 bind_flags &= ~vma->bound; 3057 3058 if (bind_flags == 0) 3059 return 0; 3060 3061 if (vma->bound == 0 && vma->vm->allocate_va_range) { 3062 trace_i915_va_alloc(vma->vm, 3063 vma->node.start, 3064 vma->node.size, 3065 VM_TO_TRACE_NAME(vma->vm)); 3066 3067 ret = vma->vm->allocate_va_range(vma->vm, 3068 vma->node.start, 3069 vma->node.size); 3070 if (ret) 3071 return ret; 3072 } 3073 3074 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3075 if (ret) 3076 return ret; 3077 3078 vma->bound |= bind_flags; 3079 3080 return 0; 3081 } 3082 3083 /** 3084 * i915_ggtt_view_size - Get the size of a GGTT view. 3085 * @obj: Object the view is of. 3086 * @view: The view in question. 3087 * 3088 * @return The size of the GGTT view in bytes. 3089 */ 3090 size_t 3091 i915_ggtt_view_size(struct drm_i915_gem_object *obj, 3092 const struct i915_ggtt_view *view) 3093 { 3094 if (view->type == I915_GGTT_VIEW_NORMAL || 3095 view->type == I915_GGTT_VIEW_ROTATED) { 3096 return obj->base.size; 3097 } else if (view->type == I915_GGTT_VIEW_PARTIAL) { 3098 return view->params.partial.size << PAGE_SHIFT; 3099 } else { 3100 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); 3101 return obj->base.size; 3102 } 3103 } 3104