1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 35 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) 36 37 /** 38 * DOC: Global GTT views 39 * 40 * Background and previous state 41 * 42 * Historically objects could exists (be bound) in global GTT space only as 43 * singular instances with a view representing all of the object's backing pages 44 * in a linear fashion. This view will be called a normal view. 45 * 46 * To support multiple views of the same object, where the number of mapped 47 * pages is not equal to the backing store, or where the layout of the pages 48 * is not linear, concept of a GGTT view was added. 49 * 50 * One example of an alternative view is a stereo display driven by a single 51 * image. In this case we would have a framebuffer looking like this 52 * (2x2 pages): 53 * 54 * 12 55 * 34 56 * 57 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 58 * rendering. In contrast, fed to the display engine would be an alternative 59 * view which could look something like this: 60 * 61 * 1212 62 * 3434 63 * 64 * In this example both the size and layout of pages in the alternative view is 65 * different from the normal view. 66 * 67 * Implementation and usage 68 * 69 * GGTT views are implemented using VMAs and are distinguished via enum 70 * i915_ggtt_view_type and struct i915_ggtt_view. 71 * 72 * A new flavour of core GEM functions which work with GGTT bound objects were 73 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 74 * renaming in large amounts of code. They take the struct i915_ggtt_view 75 * parameter encapsulating all metadata required to implement a view. 76 * 77 * As a helper for callers which are only interested in the normal view, 78 * globally const i915_ggtt_view_normal singleton instance exists. All old core 79 * GEM API functions, the ones not taking the view parameter, are operating on, 80 * or with the normal GGTT view. 81 * 82 * Code wanting to add or use a new GGTT view needs to: 83 * 84 * 1. Add a new enum with a suitable name. 85 * 2. Extend the metadata in the i915_ggtt_view structure if required. 86 * 3. Add support to i915_get_vma_pages(). 87 * 88 * New views are required to build a scatter-gather table from within the 89 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 90 * exists for the lifetime of an VMA. 91 * 92 * Core API is designed to have copy semantics which means that passed in 93 * struct i915_ggtt_view does not need to be persistent (left around after 94 * calling the core API functions). 95 * 96 */ 97 98 static inline struct i915_ggtt * 99 i915_vm_to_ggtt(struct i915_address_space *vm) 100 { 101 GEM_BUG_ON(!i915_is_ggtt(vm)); 102 return container_of(vm, struct i915_ggtt, base); 103 } 104 105 static int 106 i915_get_ggtt_vma_pages(struct i915_vma *vma); 107 108 const struct i915_ggtt_view i915_ggtt_view_normal = { 109 .type = I915_GGTT_VIEW_NORMAL, 110 }; 111 const struct i915_ggtt_view i915_ggtt_view_rotated = { 112 .type = I915_GGTT_VIEW_ROTATED, 113 }; 114 115 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 116 int enable_ppgtt) 117 { 118 bool has_aliasing_ppgtt; 119 bool has_full_ppgtt; 120 bool has_full_48bit_ppgtt; 121 122 has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6; 123 has_full_ppgtt = INTEL_GEN(dev_priv) >= 7; 124 has_full_48bit_ppgtt = 125 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; 126 127 if (intel_vgpu_active(dev_priv)) { 128 /* emulation is too hard */ 129 has_full_ppgtt = false; 130 has_full_48bit_ppgtt = false; 131 } 132 133 if (!has_aliasing_ppgtt) 134 return 0; 135 136 /* 137 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 138 * execlists, the sole mechanism available to submit work. 139 */ 140 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 141 return 0; 142 143 if (enable_ppgtt == 1) 144 return 1; 145 146 if (enable_ppgtt == 2 && has_full_ppgtt) 147 return 2; 148 149 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 150 return 3; 151 152 #ifdef CONFIG_INTEL_IOMMU 153 /* Disable ppgtt on SNB if VT-d is on. */ 154 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 155 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 156 return 0; 157 } 158 #endif 159 160 /* Early VLV doesn't have this */ 161 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 162 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 163 return 0; 164 } 165 166 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) 167 return has_full_48bit_ppgtt ? 3 : 2; 168 else 169 return has_aliasing_ppgtt ? 1 : 0; 170 } 171 172 static int ppgtt_bind_vma(struct i915_vma *vma, 173 enum i915_cache_level cache_level, 174 u32 unused) 175 { 176 u32 pte_flags = 0; 177 178 vma->pages = vma->obj->pages; 179 180 /* Currently applicable only to VLV */ 181 if (vma->obj->gt_ro) 182 pte_flags |= PTE_READ_ONLY; 183 184 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 185 cache_level, pte_flags); 186 187 return 0; 188 } 189 190 static void ppgtt_unbind_vma(struct i915_vma *vma) 191 { 192 vma->vm->clear_range(vma->vm, 193 vma->node.start, 194 vma->size); 195 } 196 197 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 198 enum i915_cache_level level) 199 { 200 gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; 201 pte |= addr; 202 203 switch (level) { 204 case I915_CACHE_NONE: 205 pte |= PPAT_UNCACHED_INDEX; 206 break; 207 case I915_CACHE_WT: 208 pte |= PPAT_DISPLAY_ELLC_INDEX; 209 break; 210 default: 211 pte |= PPAT_CACHED_INDEX; 212 break; 213 } 214 215 return pte; 216 } 217 218 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 219 const enum i915_cache_level level) 220 { 221 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 222 pde |= addr; 223 if (level != I915_CACHE_NONE) 224 pde |= PPAT_CACHED_PDE_INDEX; 225 else 226 pde |= PPAT_UNCACHED_INDEX; 227 return pde; 228 } 229 230 #define gen8_pdpe_encode gen8_pde_encode 231 #define gen8_pml4e_encode gen8_pde_encode 232 233 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 234 enum i915_cache_level level, 235 u32 unused) 236 { 237 gen6_pte_t pte = GEN6_PTE_VALID; 238 pte |= GEN6_PTE_ADDR_ENCODE(addr); 239 240 switch (level) { 241 case I915_CACHE_L3_LLC: 242 case I915_CACHE_LLC: 243 pte |= GEN6_PTE_CACHE_LLC; 244 break; 245 case I915_CACHE_NONE: 246 pte |= GEN6_PTE_UNCACHED; 247 break; 248 default: 249 MISSING_CASE(level); 250 } 251 252 return pte; 253 } 254 255 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 256 enum i915_cache_level level, 257 u32 unused) 258 { 259 gen6_pte_t pte = GEN6_PTE_VALID; 260 pte |= GEN6_PTE_ADDR_ENCODE(addr); 261 262 switch (level) { 263 case I915_CACHE_L3_LLC: 264 pte |= GEN7_PTE_CACHE_L3_LLC; 265 break; 266 case I915_CACHE_LLC: 267 pte |= GEN6_PTE_CACHE_LLC; 268 break; 269 case I915_CACHE_NONE: 270 pte |= GEN6_PTE_UNCACHED; 271 break; 272 default: 273 MISSING_CASE(level); 274 } 275 276 return pte; 277 } 278 279 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 280 enum i915_cache_level level, 281 u32 flags) 282 { 283 gen6_pte_t pte = GEN6_PTE_VALID; 284 pte |= GEN6_PTE_ADDR_ENCODE(addr); 285 286 if (!(flags & PTE_READ_ONLY)) 287 pte |= BYT_PTE_WRITEABLE; 288 289 if (level != I915_CACHE_NONE) 290 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 291 292 return pte; 293 } 294 295 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 296 enum i915_cache_level level, 297 u32 unused) 298 { 299 gen6_pte_t pte = GEN6_PTE_VALID; 300 pte |= HSW_PTE_ADDR_ENCODE(addr); 301 302 if (level != I915_CACHE_NONE) 303 pte |= HSW_WB_LLC_AGE3; 304 305 return pte; 306 } 307 308 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 309 enum i915_cache_level level, 310 u32 unused) 311 { 312 gen6_pte_t pte = GEN6_PTE_VALID; 313 pte |= HSW_PTE_ADDR_ENCODE(addr); 314 315 switch (level) { 316 case I915_CACHE_NONE: 317 break; 318 case I915_CACHE_WT: 319 pte |= HSW_WT_ELLC_LLC_AGE3; 320 break; 321 default: 322 pte |= HSW_WB_ELLC_LLC_AGE3; 323 break; 324 } 325 326 return pte; 327 } 328 329 static int __setup_page_dma(struct drm_device *dev, 330 struct i915_page_dma *p, gfp_t flags) 331 { 332 struct device *kdev = &dev->pdev->dev; 333 334 p->page = alloc_page(flags); 335 if (!p->page) 336 return -ENOMEM; 337 338 p->daddr = dma_map_page(kdev, 339 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 340 341 if (dma_mapping_error(kdev, p->daddr)) { 342 __free_page(p->page); 343 return -EINVAL; 344 } 345 346 return 0; 347 } 348 349 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 350 { 351 return __setup_page_dma(dev, p, I915_GFP_DMA); 352 } 353 354 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 355 { 356 struct pci_dev *pdev = dev->pdev; 357 358 if (WARN_ON(!p->page)) 359 return; 360 361 dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 362 __free_page(p->page); 363 memset(p, 0, sizeof(*p)); 364 } 365 366 static void *kmap_page_dma(struct i915_page_dma *p) 367 { 368 return kmap_atomic(p->page); 369 } 370 371 /* We use the flushing unmap only with ppgtt structures: 372 * page directories, page tables and scratch pages. 373 */ 374 static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) 375 { 376 /* There are only few exceptions for gen >=6. chv and bxt. 377 * And we are not sure about the latter so play safe for now. 378 */ 379 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 380 drm_clflush_virt_range(vaddr, PAGE_SIZE); 381 382 kunmap_atomic(vaddr); 383 } 384 385 #define kmap_px(px) kmap_page_dma(px_base(px)) 386 #define kunmap_px(ppgtt, vaddr) \ 387 kunmap_page_dma(to_i915((ppgtt)->base.dev), (vaddr)) 388 389 #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) 390 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) 391 #define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) 392 #define fill32_px(dev_priv, px, v) \ 393 fill_page_dma_32((dev_priv), px_base(px), (v)) 394 395 static void fill_page_dma(struct drm_i915_private *dev_priv, 396 struct i915_page_dma *p, const uint64_t val) 397 { 398 int i; 399 uint64_t * const vaddr = kmap_page_dma(p); 400 401 for (i = 0; i < 512; i++) 402 vaddr[i] = val; 403 404 kunmap_page_dma(dev_priv, vaddr); 405 } 406 407 static void fill_page_dma_32(struct drm_i915_private *dev_priv, 408 struct i915_page_dma *p, const uint32_t val32) 409 { 410 uint64_t v = val32; 411 412 v = v << 32 | val32; 413 414 fill_page_dma(dev_priv, p, v); 415 } 416 417 static int 418 setup_scratch_page(struct drm_device *dev, 419 struct i915_page_dma *scratch, 420 gfp_t gfp) 421 { 422 return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO); 423 } 424 425 static void cleanup_scratch_page(struct drm_device *dev, 426 struct i915_page_dma *scratch) 427 { 428 cleanup_page_dma(dev, scratch); 429 } 430 431 static struct i915_page_table *alloc_pt(struct drm_device *dev) 432 { 433 struct i915_page_table *pt; 434 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 435 GEN8_PTES : GEN6_PTES; 436 int ret = -ENOMEM; 437 438 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 439 if (!pt) 440 return ERR_PTR(-ENOMEM); 441 442 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 443 GFP_KERNEL); 444 445 if (!pt->used_ptes) 446 goto fail_bitmap; 447 448 ret = setup_px(dev, pt); 449 if (ret) 450 goto fail_page_m; 451 452 return pt; 453 454 fail_page_m: 455 kfree(pt->used_ptes); 456 fail_bitmap: 457 kfree(pt); 458 459 return ERR_PTR(ret); 460 } 461 462 static void free_pt(struct drm_device *dev, struct i915_page_table *pt) 463 { 464 cleanup_px(dev, pt); 465 kfree(pt->used_ptes); 466 kfree(pt); 467 } 468 469 static void gen8_initialize_pt(struct i915_address_space *vm, 470 struct i915_page_table *pt) 471 { 472 gen8_pte_t scratch_pte; 473 474 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 475 I915_CACHE_LLC); 476 477 fill_px(to_i915(vm->dev), pt, scratch_pte); 478 } 479 480 static void gen6_initialize_pt(struct i915_address_space *vm, 481 struct i915_page_table *pt) 482 { 483 gen6_pte_t scratch_pte; 484 485 WARN_ON(vm->scratch_page.daddr == 0); 486 487 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 488 I915_CACHE_LLC, 0); 489 490 fill32_px(to_i915(vm->dev), pt, scratch_pte); 491 } 492 493 static struct i915_page_directory *alloc_pd(struct drm_device *dev) 494 { 495 struct i915_page_directory *pd; 496 int ret = -ENOMEM; 497 498 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 499 if (!pd) 500 return ERR_PTR(-ENOMEM); 501 502 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 503 sizeof(*pd->used_pdes), GFP_KERNEL); 504 if (!pd->used_pdes) 505 goto fail_bitmap; 506 507 ret = setup_px(dev, pd); 508 if (ret) 509 goto fail_page_m; 510 511 return pd; 512 513 fail_page_m: 514 kfree(pd->used_pdes); 515 fail_bitmap: 516 kfree(pd); 517 518 return ERR_PTR(ret); 519 } 520 521 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) 522 { 523 if (px_page(pd)) { 524 cleanup_px(dev, pd); 525 kfree(pd->used_pdes); 526 kfree(pd); 527 } 528 } 529 530 static void gen8_initialize_pd(struct i915_address_space *vm, 531 struct i915_page_directory *pd) 532 { 533 gen8_pde_t scratch_pde; 534 535 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 536 537 fill_px(to_i915(vm->dev), pd, scratch_pde); 538 } 539 540 static int __pdp_init(struct drm_device *dev, 541 struct i915_page_directory_pointer *pdp) 542 { 543 size_t pdpes = I915_PDPES_PER_PDP(dev); 544 545 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 546 sizeof(unsigned long), 547 GFP_KERNEL); 548 if (!pdp->used_pdpes) 549 return -ENOMEM; 550 551 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 552 GFP_KERNEL); 553 if (!pdp->page_directory) { 554 kfree(pdp->used_pdpes); 555 /* the PDP might be the statically allocated top level. Keep it 556 * as clean as possible */ 557 pdp->used_pdpes = NULL; 558 return -ENOMEM; 559 } 560 561 return 0; 562 } 563 564 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 565 { 566 kfree(pdp->used_pdpes); 567 kfree(pdp->page_directory); 568 pdp->page_directory = NULL; 569 } 570 571 static struct 572 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) 573 { 574 struct i915_page_directory_pointer *pdp; 575 int ret = -ENOMEM; 576 577 WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); 578 579 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 580 if (!pdp) 581 return ERR_PTR(-ENOMEM); 582 583 ret = __pdp_init(dev, pdp); 584 if (ret) 585 goto fail_bitmap; 586 587 ret = setup_px(dev, pdp); 588 if (ret) 589 goto fail_page_m; 590 591 return pdp; 592 593 fail_page_m: 594 __pdp_fini(pdp); 595 fail_bitmap: 596 kfree(pdp); 597 598 return ERR_PTR(ret); 599 } 600 601 static void free_pdp(struct drm_device *dev, 602 struct i915_page_directory_pointer *pdp) 603 { 604 __pdp_fini(pdp); 605 if (USES_FULL_48BIT_PPGTT(dev)) { 606 cleanup_px(dev, pdp); 607 kfree(pdp); 608 } 609 } 610 611 static void gen8_initialize_pdp(struct i915_address_space *vm, 612 struct i915_page_directory_pointer *pdp) 613 { 614 gen8_ppgtt_pdpe_t scratch_pdpe; 615 616 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 617 618 fill_px(to_i915(vm->dev), pdp, scratch_pdpe); 619 } 620 621 static void gen8_initialize_pml4(struct i915_address_space *vm, 622 struct i915_pml4 *pml4) 623 { 624 gen8_ppgtt_pml4e_t scratch_pml4e; 625 626 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 627 I915_CACHE_LLC); 628 629 fill_px(to_i915(vm->dev), pml4, scratch_pml4e); 630 } 631 632 static void 633 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 634 struct i915_page_directory_pointer *pdp, 635 struct i915_page_directory *pd, 636 int index) 637 { 638 gen8_ppgtt_pdpe_t *page_directorypo; 639 640 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 641 return; 642 643 page_directorypo = kmap_px(pdp); 644 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 645 kunmap_px(ppgtt, page_directorypo); 646 } 647 648 static void 649 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 650 struct i915_pml4 *pml4, 651 struct i915_page_directory_pointer *pdp, 652 int index) 653 { 654 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 655 656 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); 657 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 658 kunmap_px(ppgtt, pagemap); 659 } 660 661 /* Broadwell Page Directory Pointer Descriptors */ 662 static int gen8_write_pdp(struct drm_i915_gem_request *req, 663 unsigned entry, 664 dma_addr_t addr) 665 { 666 struct intel_ring *ring = req->ring; 667 struct intel_engine_cs *engine = req->engine; 668 int ret; 669 670 BUG_ON(entry >= 4); 671 672 ret = intel_ring_begin(req, 6); 673 if (ret) 674 return ret; 675 676 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 677 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); 678 intel_ring_emit(ring, upper_32_bits(addr)); 679 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 680 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); 681 intel_ring_emit(ring, lower_32_bits(addr)); 682 intel_ring_advance(ring); 683 684 return 0; 685 } 686 687 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 688 struct drm_i915_gem_request *req) 689 { 690 int i, ret; 691 692 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 693 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 694 695 ret = gen8_write_pdp(req, i, pd_daddr); 696 if (ret) 697 return ret; 698 } 699 700 return 0; 701 } 702 703 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 704 struct drm_i915_gem_request *req) 705 { 706 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 707 } 708 709 /* Removes entries from a single page table, releasing it if it's empty. 710 * Caller can use the return value to update higher-level entries. 711 */ 712 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, 713 struct i915_page_table *pt, 714 uint64_t start, 715 uint64_t length) 716 { 717 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 718 unsigned int pte_start = gen8_pte_index(start); 719 unsigned int num_entries = gen8_pte_count(start, length); 720 uint64_t pte; 721 gen8_pte_t *pt_vaddr; 722 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 723 I915_CACHE_LLC); 724 725 if (WARN_ON(!px_page(pt))) 726 return false; 727 728 bitmap_clear(pt->used_ptes, pte_start, num_entries); 729 730 if (bitmap_empty(pt->used_ptes, GEN8_PTES)) { 731 free_pt(vm->dev, pt); 732 return true; 733 } 734 735 pt_vaddr = kmap_px(pt); 736 737 for (pte = pte_start; pte < num_entries; pte++) 738 pt_vaddr[pte] = scratch_pte; 739 740 kunmap_px(ppgtt, pt_vaddr); 741 742 return false; 743 } 744 745 /* Removes entries from a single page dir, releasing it if it's empty. 746 * Caller can use the return value to update higher-level entries 747 */ 748 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, 749 struct i915_page_directory *pd, 750 uint64_t start, 751 uint64_t length) 752 { 753 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 754 struct i915_page_table *pt; 755 uint64_t pde; 756 gen8_pde_t *pde_vaddr; 757 gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), 758 I915_CACHE_LLC); 759 760 gen8_for_each_pde(pt, pd, start, length, pde) { 761 if (WARN_ON(!pd->page_table[pde])) 762 break; 763 764 if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { 765 __clear_bit(pde, pd->used_pdes); 766 pde_vaddr = kmap_px(pd); 767 pde_vaddr[pde] = scratch_pde; 768 kunmap_px(ppgtt, pde_vaddr); 769 } 770 } 771 772 if (bitmap_empty(pd->used_pdes, I915_PDES)) { 773 free_pd(vm->dev, pd); 774 return true; 775 } 776 777 return false; 778 } 779 780 /* Removes entries from a single page dir pointer, releasing it if it's empty. 781 * Caller can use the return value to update higher-level entries 782 */ 783 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, 784 struct i915_page_directory_pointer *pdp, 785 uint64_t start, 786 uint64_t length) 787 { 788 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 789 struct i915_page_directory *pd; 790 uint64_t pdpe; 791 gen8_ppgtt_pdpe_t *pdpe_vaddr; 792 gen8_ppgtt_pdpe_t scratch_pdpe = 793 gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 794 795 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 796 if (WARN_ON(!pdp->page_directory[pdpe])) 797 break; 798 799 if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { 800 __clear_bit(pdpe, pdp->used_pdpes); 801 if (USES_FULL_48BIT_PPGTT(vm->dev)) { 802 pdpe_vaddr = kmap_px(pdp); 803 pdpe_vaddr[pdpe] = scratch_pdpe; 804 kunmap_px(ppgtt, pdpe_vaddr); 805 } 806 } 807 } 808 809 if (USES_FULL_48BIT_PPGTT(vm->dev) && 810 bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev))) { 811 free_pdp(vm->dev, pdp); 812 return true; 813 } 814 815 return false; 816 } 817 818 /* Removes entries from a single pml4. 819 * This is the top-level structure in 4-level page tables used on gen8+. 820 * Empty entries are always scratch pml4e. 821 */ 822 static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, 823 struct i915_pml4 *pml4, 824 uint64_t start, 825 uint64_t length) 826 { 827 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 828 struct i915_page_directory_pointer *pdp; 829 uint64_t pml4e; 830 gen8_ppgtt_pml4e_t *pml4e_vaddr; 831 gen8_ppgtt_pml4e_t scratch_pml4e = 832 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC); 833 834 GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->dev)); 835 836 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 837 if (WARN_ON(!pml4->pdps[pml4e])) 838 break; 839 840 if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { 841 __clear_bit(pml4e, pml4->used_pml4es); 842 pml4e_vaddr = kmap_px(pml4); 843 pml4e_vaddr[pml4e] = scratch_pml4e; 844 kunmap_px(ppgtt, pml4e_vaddr); 845 } 846 } 847 } 848 849 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 850 uint64_t start, uint64_t length) 851 { 852 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 853 854 if (USES_FULL_48BIT_PPGTT(vm->dev)) 855 gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); 856 else 857 gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); 858 } 859 860 static void 861 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 862 struct i915_page_directory_pointer *pdp, 863 struct sg_page_iter *sg_iter, 864 uint64_t start, 865 enum i915_cache_level cache_level) 866 { 867 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 868 gen8_pte_t *pt_vaddr; 869 unsigned pdpe = gen8_pdpe_index(start); 870 unsigned pde = gen8_pde_index(start); 871 unsigned pte = gen8_pte_index(start); 872 873 pt_vaddr = NULL; 874 875 while (__sg_page_iter_next(sg_iter)) { 876 if (pt_vaddr == NULL) { 877 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 878 struct i915_page_table *pt = pd->page_table[pde]; 879 pt_vaddr = kmap_px(pt); 880 } 881 882 pt_vaddr[pte] = 883 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 884 cache_level); 885 if (++pte == GEN8_PTES) { 886 kunmap_px(ppgtt, pt_vaddr); 887 pt_vaddr = NULL; 888 if (++pde == I915_PDES) { 889 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 890 break; 891 pde = 0; 892 } 893 pte = 0; 894 } 895 } 896 897 if (pt_vaddr) 898 kunmap_px(ppgtt, pt_vaddr); 899 } 900 901 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 902 struct sg_table *pages, 903 uint64_t start, 904 enum i915_cache_level cache_level, 905 u32 unused) 906 { 907 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 908 struct sg_page_iter sg_iter; 909 910 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 911 912 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 913 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 914 cache_level); 915 } else { 916 struct i915_page_directory_pointer *pdp; 917 uint64_t pml4e; 918 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 919 920 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 921 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 922 start, cache_level); 923 } 924 } 925 } 926 927 static void gen8_free_page_tables(struct drm_device *dev, 928 struct i915_page_directory *pd) 929 { 930 int i; 931 932 if (!px_page(pd)) 933 return; 934 935 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 936 if (WARN_ON(!pd->page_table[i])) 937 continue; 938 939 free_pt(dev, pd->page_table[i]); 940 pd->page_table[i] = NULL; 941 } 942 } 943 944 static int gen8_init_scratch(struct i915_address_space *vm) 945 { 946 struct drm_device *dev = vm->dev; 947 int ret; 948 949 ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA); 950 if (ret) 951 return ret; 952 953 vm->scratch_pt = alloc_pt(dev); 954 if (IS_ERR(vm->scratch_pt)) { 955 ret = PTR_ERR(vm->scratch_pt); 956 goto free_scratch_page; 957 } 958 959 vm->scratch_pd = alloc_pd(dev); 960 if (IS_ERR(vm->scratch_pd)) { 961 ret = PTR_ERR(vm->scratch_pd); 962 goto free_pt; 963 } 964 965 if (USES_FULL_48BIT_PPGTT(dev)) { 966 vm->scratch_pdp = alloc_pdp(dev); 967 if (IS_ERR(vm->scratch_pdp)) { 968 ret = PTR_ERR(vm->scratch_pdp); 969 goto free_pd; 970 } 971 } 972 973 gen8_initialize_pt(vm, vm->scratch_pt); 974 gen8_initialize_pd(vm, vm->scratch_pd); 975 if (USES_FULL_48BIT_PPGTT(dev)) 976 gen8_initialize_pdp(vm, vm->scratch_pdp); 977 978 return 0; 979 980 free_pd: 981 free_pd(dev, vm->scratch_pd); 982 free_pt: 983 free_pt(dev, vm->scratch_pt); 984 free_scratch_page: 985 cleanup_scratch_page(dev, &vm->scratch_page); 986 987 return ret; 988 } 989 990 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 991 { 992 enum vgt_g2v_type msg; 993 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 994 int i; 995 996 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 997 u64 daddr = px_dma(&ppgtt->pml4); 998 999 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 1000 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 1001 1002 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 1003 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 1004 } else { 1005 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 1006 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 1007 1008 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 1009 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 1010 } 1011 1012 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 1013 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 1014 } 1015 1016 I915_WRITE(vgtif_reg(g2v_notify), msg); 1017 1018 return 0; 1019 } 1020 1021 static void gen8_free_scratch(struct i915_address_space *vm) 1022 { 1023 struct drm_device *dev = vm->dev; 1024 1025 if (USES_FULL_48BIT_PPGTT(dev)) 1026 free_pdp(dev, vm->scratch_pdp); 1027 free_pd(dev, vm->scratch_pd); 1028 free_pt(dev, vm->scratch_pt); 1029 cleanup_scratch_page(dev, &vm->scratch_page); 1030 } 1031 1032 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, 1033 struct i915_page_directory_pointer *pdp) 1034 { 1035 int i; 1036 1037 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { 1038 if (WARN_ON(!pdp->page_directory[i])) 1039 continue; 1040 1041 gen8_free_page_tables(dev, pdp->page_directory[i]); 1042 free_pd(dev, pdp->page_directory[i]); 1043 } 1044 1045 free_pdp(dev, pdp); 1046 } 1047 1048 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 1049 { 1050 int i; 1051 1052 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 1053 if (WARN_ON(!ppgtt->pml4.pdps[i])) 1054 continue; 1055 1056 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); 1057 } 1058 1059 cleanup_px(ppgtt->base.dev, &ppgtt->pml4); 1060 } 1061 1062 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 1063 { 1064 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1065 1066 if (intel_vgpu_active(to_i915(vm->dev))) 1067 gen8_ppgtt_notify_vgt(ppgtt, false); 1068 1069 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 1070 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); 1071 else 1072 gen8_ppgtt_cleanup_4lvl(ppgtt); 1073 1074 gen8_free_scratch(vm); 1075 } 1076 1077 /** 1078 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 1079 * @vm: Master vm structure. 1080 * @pd: Page directory for this address range. 1081 * @start: Starting virtual address to begin allocations. 1082 * @length: Size of the allocations. 1083 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1084 * caller to free on error. 1085 * 1086 * Allocate the required number of page tables. Extremely similar to 1087 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1088 * the page directory boundary (instead of the page directory pointer). That 1089 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1090 * possible, and likely that the caller will need to use multiple calls of this 1091 * function to achieve the appropriate allocation. 1092 * 1093 * Return: 0 if success; negative error code otherwise. 1094 */ 1095 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1096 struct i915_page_directory *pd, 1097 uint64_t start, 1098 uint64_t length, 1099 unsigned long *new_pts) 1100 { 1101 struct drm_device *dev = vm->dev; 1102 struct i915_page_table *pt; 1103 uint32_t pde; 1104 1105 gen8_for_each_pde(pt, pd, start, length, pde) { 1106 /* Don't reallocate page tables */ 1107 if (test_bit(pde, pd->used_pdes)) { 1108 /* Scratch is never allocated this way */ 1109 WARN_ON(pt == vm->scratch_pt); 1110 continue; 1111 } 1112 1113 pt = alloc_pt(dev); 1114 if (IS_ERR(pt)) 1115 goto unwind_out; 1116 1117 gen8_initialize_pt(vm, pt); 1118 pd->page_table[pde] = pt; 1119 __set_bit(pde, new_pts); 1120 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1121 } 1122 1123 return 0; 1124 1125 unwind_out: 1126 for_each_set_bit(pde, new_pts, I915_PDES) 1127 free_pt(dev, pd->page_table[pde]); 1128 1129 return -ENOMEM; 1130 } 1131 1132 /** 1133 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1134 * @vm: Master vm structure. 1135 * @pdp: Page directory pointer for this address range. 1136 * @start: Starting virtual address to begin allocations. 1137 * @length: Size of the allocations. 1138 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1139 * caller to free on error. 1140 * 1141 * Allocate the required number of page directories starting at the pde index of 1142 * @start, and ending at the pde index @start + @length. This function will skip 1143 * over already allocated page directories within the range, and only allocate 1144 * new ones, setting the appropriate pointer within the pdp as well as the 1145 * correct position in the bitmap @new_pds. 1146 * 1147 * The function will only allocate the pages within the range for a give page 1148 * directory pointer. In other words, if @start + @length straddles a virtually 1149 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1150 * required by the caller, This is not currently possible, and the BUG in the 1151 * code will prevent it. 1152 * 1153 * Return: 0 if success; negative error code otherwise. 1154 */ 1155 static int 1156 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1157 struct i915_page_directory_pointer *pdp, 1158 uint64_t start, 1159 uint64_t length, 1160 unsigned long *new_pds) 1161 { 1162 struct drm_device *dev = vm->dev; 1163 struct i915_page_directory *pd; 1164 uint32_t pdpe; 1165 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1166 1167 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1168 1169 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1170 if (test_bit(pdpe, pdp->used_pdpes)) 1171 continue; 1172 1173 pd = alloc_pd(dev); 1174 if (IS_ERR(pd)) 1175 goto unwind_out; 1176 1177 gen8_initialize_pd(vm, pd); 1178 pdp->page_directory[pdpe] = pd; 1179 __set_bit(pdpe, new_pds); 1180 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1181 } 1182 1183 return 0; 1184 1185 unwind_out: 1186 for_each_set_bit(pdpe, new_pds, pdpes) 1187 free_pd(dev, pdp->page_directory[pdpe]); 1188 1189 return -ENOMEM; 1190 } 1191 1192 /** 1193 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1194 * @vm: Master vm structure. 1195 * @pml4: Page map level 4 for this address range. 1196 * @start: Starting virtual address to begin allocations. 1197 * @length: Size of the allocations. 1198 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1199 * caller to free on error. 1200 * 1201 * Allocate the required number of page directory pointers. Extremely similar to 1202 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1203 * The main difference is here we are limited by the pml4 boundary (instead of 1204 * the page directory pointer). 1205 * 1206 * Return: 0 if success; negative error code otherwise. 1207 */ 1208 static int 1209 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1210 struct i915_pml4 *pml4, 1211 uint64_t start, 1212 uint64_t length, 1213 unsigned long *new_pdps) 1214 { 1215 struct drm_device *dev = vm->dev; 1216 struct i915_page_directory_pointer *pdp; 1217 uint32_t pml4e; 1218 1219 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1220 1221 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1222 if (!test_bit(pml4e, pml4->used_pml4es)) { 1223 pdp = alloc_pdp(dev); 1224 if (IS_ERR(pdp)) 1225 goto unwind_out; 1226 1227 gen8_initialize_pdp(vm, pdp); 1228 pml4->pdps[pml4e] = pdp; 1229 __set_bit(pml4e, new_pdps); 1230 trace_i915_page_directory_pointer_entry_alloc(vm, 1231 pml4e, 1232 start, 1233 GEN8_PML4E_SHIFT); 1234 } 1235 } 1236 1237 return 0; 1238 1239 unwind_out: 1240 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1241 free_pdp(dev, pml4->pdps[pml4e]); 1242 1243 return -ENOMEM; 1244 } 1245 1246 static void 1247 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1248 { 1249 kfree(new_pts); 1250 kfree(new_pds); 1251 } 1252 1253 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1254 * of these are based on the number of PDPEs in the system. 1255 */ 1256 static 1257 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1258 unsigned long **new_pts, 1259 uint32_t pdpes) 1260 { 1261 unsigned long *pds; 1262 unsigned long *pts; 1263 1264 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1265 if (!pds) 1266 return -ENOMEM; 1267 1268 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1269 GFP_TEMPORARY); 1270 if (!pts) 1271 goto err_out; 1272 1273 *new_pds = pds; 1274 *new_pts = pts; 1275 1276 return 0; 1277 1278 err_out: 1279 free_gen8_temp_bitmaps(pds, pts); 1280 return -ENOMEM; 1281 } 1282 1283 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 1284 * the page table structures, we mark them dirty so that 1285 * context switching/execlist queuing code takes extra steps 1286 * to ensure that tlbs are flushed. 1287 */ 1288 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 1289 { 1290 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 1291 } 1292 1293 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1294 struct i915_page_directory_pointer *pdp, 1295 uint64_t start, 1296 uint64_t length) 1297 { 1298 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1299 unsigned long *new_page_dirs, *new_page_tables; 1300 struct drm_device *dev = vm->dev; 1301 struct i915_page_directory *pd; 1302 const uint64_t orig_start = start; 1303 const uint64_t orig_length = length; 1304 uint32_t pdpe; 1305 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1306 int ret; 1307 1308 /* Wrap is never okay since we can only represent 48b, and we don't 1309 * actually use the other side of the canonical address space. 1310 */ 1311 if (WARN_ON(start + length < start)) 1312 return -ENODEV; 1313 1314 if (WARN_ON(start + length > vm->total)) 1315 return -ENODEV; 1316 1317 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1318 if (ret) 1319 return ret; 1320 1321 /* Do the allocations first so we can easily bail out */ 1322 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1323 new_page_dirs); 1324 if (ret) { 1325 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1326 return ret; 1327 } 1328 1329 /* For every page directory referenced, allocate page tables */ 1330 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1331 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1332 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1333 if (ret) 1334 goto err_out; 1335 } 1336 1337 start = orig_start; 1338 length = orig_length; 1339 1340 /* Allocations have completed successfully, so set the bitmaps, and do 1341 * the mappings. */ 1342 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1343 gen8_pde_t *const page_directory = kmap_px(pd); 1344 struct i915_page_table *pt; 1345 uint64_t pd_len = length; 1346 uint64_t pd_start = start; 1347 uint32_t pde; 1348 1349 /* Every pd should be allocated, we just did that above. */ 1350 WARN_ON(!pd); 1351 1352 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1353 /* Same reasoning as pd */ 1354 WARN_ON(!pt); 1355 WARN_ON(!pd_len); 1356 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1357 1358 /* Set our used ptes within the page table */ 1359 bitmap_set(pt->used_ptes, 1360 gen8_pte_index(pd_start), 1361 gen8_pte_count(pd_start, pd_len)); 1362 1363 /* Our pde is now pointing to the pagetable, pt */ 1364 __set_bit(pde, pd->used_pdes); 1365 1366 /* Map the PDE to the page table */ 1367 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1368 I915_CACHE_LLC); 1369 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1370 gen8_pte_index(start), 1371 gen8_pte_count(start, length), 1372 GEN8_PTES); 1373 1374 /* NB: We haven't yet mapped ptes to pages. At this 1375 * point we're still relying on insert_entries() */ 1376 } 1377 1378 kunmap_px(ppgtt, page_directory); 1379 __set_bit(pdpe, pdp->used_pdpes); 1380 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1381 } 1382 1383 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1384 mark_tlbs_dirty(ppgtt); 1385 return 0; 1386 1387 err_out: 1388 while (pdpe--) { 1389 unsigned long temp; 1390 1391 for_each_set_bit(temp, new_page_tables + pdpe * 1392 BITS_TO_LONGS(I915_PDES), I915_PDES) 1393 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); 1394 } 1395 1396 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1397 free_pd(dev, pdp->page_directory[pdpe]); 1398 1399 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1400 mark_tlbs_dirty(ppgtt); 1401 return ret; 1402 } 1403 1404 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1405 struct i915_pml4 *pml4, 1406 uint64_t start, 1407 uint64_t length) 1408 { 1409 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1410 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1411 struct i915_page_directory_pointer *pdp; 1412 uint64_t pml4e; 1413 int ret = 0; 1414 1415 /* Do the pml4 allocations first, so we don't need to track the newly 1416 * allocated tables below the pdp */ 1417 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1418 1419 /* The pagedirectory and pagetable allocations are done in the shared 3 1420 * and 4 level code. Just allocate the pdps. 1421 */ 1422 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1423 new_pdps); 1424 if (ret) 1425 return ret; 1426 1427 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1428 "The allocation has spanned more than 512GB. " 1429 "It is highly likely this is incorrect."); 1430 1431 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1432 WARN_ON(!pdp); 1433 1434 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1435 if (ret) 1436 goto err_out; 1437 1438 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1439 } 1440 1441 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1442 GEN8_PML4ES_PER_PML4); 1443 1444 return 0; 1445 1446 err_out: 1447 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1448 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); 1449 1450 return ret; 1451 } 1452 1453 static int gen8_alloc_va_range(struct i915_address_space *vm, 1454 uint64_t start, uint64_t length) 1455 { 1456 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1457 1458 if (USES_FULL_48BIT_PPGTT(vm->dev)) 1459 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1460 else 1461 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1462 } 1463 1464 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1465 uint64_t start, uint64_t length, 1466 gen8_pte_t scratch_pte, 1467 struct seq_file *m) 1468 { 1469 struct i915_page_directory *pd; 1470 uint32_t pdpe; 1471 1472 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1473 struct i915_page_table *pt; 1474 uint64_t pd_len = length; 1475 uint64_t pd_start = start; 1476 uint32_t pde; 1477 1478 if (!test_bit(pdpe, pdp->used_pdpes)) 1479 continue; 1480 1481 seq_printf(m, "\tPDPE #%d\n", pdpe); 1482 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1483 uint32_t pte; 1484 gen8_pte_t *pt_vaddr; 1485 1486 if (!test_bit(pde, pd->used_pdes)) 1487 continue; 1488 1489 pt_vaddr = kmap_px(pt); 1490 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1491 uint64_t va = 1492 (pdpe << GEN8_PDPE_SHIFT) | 1493 (pde << GEN8_PDE_SHIFT) | 1494 (pte << GEN8_PTE_SHIFT); 1495 int i; 1496 bool found = false; 1497 1498 for (i = 0; i < 4; i++) 1499 if (pt_vaddr[pte + i] != scratch_pte) 1500 found = true; 1501 if (!found) 1502 continue; 1503 1504 seq_printf(m, "\t\t0x%lx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1505 for (i = 0; i < 4; i++) { 1506 if (pt_vaddr[pte + i] != scratch_pte) 1507 seq_printf(m, " %lx", pt_vaddr[pte + i]); 1508 else 1509 seq_puts(m, " SCRATCH "); 1510 } 1511 seq_puts(m, "\n"); 1512 } 1513 /* don't use kunmap_px, it could trigger 1514 * an unnecessary flush. 1515 */ 1516 kunmap_atomic(pt_vaddr); 1517 } 1518 } 1519 } 1520 1521 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1522 { 1523 struct i915_address_space *vm = &ppgtt->base; 1524 uint64_t start = ppgtt->base.start; 1525 uint64_t length = ppgtt->base.total; 1526 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 1527 I915_CACHE_LLC); 1528 1529 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 1530 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1531 } else { 1532 uint64_t pml4e; 1533 struct i915_pml4 *pml4 = &ppgtt->pml4; 1534 struct i915_page_directory_pointer *pdp; 1535 1536 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1537 if (!test_bit(pml4e, pml4->used_pml4es)) 1538 continue; 1539 1540 seq_printf(m, " PML4E #%lu\n", pml4e); 1541 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1542 } 1543 } 1544 } 1545 1546 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1547 { 1548 unsigned long *new_page_dirs, *new_page_tables; 1549 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1550 int ret; 1551 1552 /* We allocate temp bitmap for page tables for no gain 1553 * but as this is for init only, lets keep the things simple 1554 */ 1555 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1556 if (ret) 1557 return ret; 1558 1559 /* Allocate for all pdps regardless of how the ppgtt 1560 * was defined. 1561 */ 1562 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1563 0, 1ULL << 32, 1564 new_page_dirs); 1565 if (!ret) 1566 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1567 1568 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1569 1570 return ret; 1571 } 1572 1573 /* 1574 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1575 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1576 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1577 * space. 1578 * 1579 */ 1580 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1581 { 1582 int ret; 1583 1584 ret = gen8_init_scratch(&ppgtt->base); 1585 if (ret) 1586 return ret; 1587 1588 ppgtt->base.start = 0; 1589 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1590 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1591 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1592 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1593 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1594 ppgtt->base.bind_vma = ppgtt_bind_vma; 1595 ppgtt->debug_dump = gen8_dump_ppgtt; 1596 1597 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { 1598 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); 1599 if (ret) 1600 goto free_scratch; 1601 1602 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1603 1604 ppgtt->base.total = 1ULL << 48; 1605 ppgtt->switch_mm = gen8_48b_mm_switch; 1606 } else { 1607 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); 1608 if (ret) 1609 goto free_scratch; 1610 1611 ppgtt->base.total = 1ULL << 32; 1612 ppgtt->switch_mm = gen8_legacy_mm_switch; 1613 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1614 0, 0, 1615 GEN8_PML4E_SHIFT); 1616 1617 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) { 1618 ret = gen8_preallocate_top_level_pdps(ppgtt); 1619 if (ret) 1620 goto free_scratch; 1621 } 1622 } 1623 1624 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) 1625 gen8_ppgtt_notify_vgt(ppgtt, true); 1626 1627 return 0; 1628 1629 free_scratch: 1630 gen8_free_scratch(&ppgtt->base); 1631 return ret; 1632 } 1633 1634 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1635 { 1636 struct i915_address_space *vm = &ppgtt->base; 1637 struct i915_page_table *unused; 1638 gen6_pte_t scratch_pte; 1639 uint32_t pd_entry; 1640 uint32_t pte, pde; 1641 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1642 1643 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1644 I915_CACHE_LLC, 0); 1645 1646 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1647 u32 expected; 1648 gen6_pte_t *pt_vaddr; 1649 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1650 pd_entry = readl(ppgtt->pd_addr + pde); 1651 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1652 1653 if (pd_entry != expected) 1654 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1655 pde, 1656 pd_entry, 1657 expected); 1658 seq_printf(m, "\tPDE: %x\n", pd_entry); 1659 1660 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1661 1662 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1663 unsigned long va = 1664 (pde * PAGE_SIZE * GEN6_PTES) + 1665 (pte * PAGE_SIZE); 1666 int i; 1667 bool found = false; 1668 for (i = 0; i < 4; i++) 1669 if (pt_vaddr[pte + i] != scratch_pte) 1670 found = true; 1671 if (!found) 1672 continue; 1673 1674 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1675 for (i = 0; i < 4; i++) { 1676 if (pt_vaddr[pte + i] != scratch_pte) 1677 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1678 else 1679 seq_puts(m, " SCRATCH "); 1680 } 1681 seq_puts(m, "\n"); 1682 } 1683 kunmap_px(ppgtt, pt_vaddr); 1684 } 1685 } 1686 1687 /* Write pde (index) from the page directory @pd to the page table @pt */ 1688 static void gen6_write_pde(struct i915_page_directory *pd, 1689 const int pde, struct i915_page_table *pt) 1690 { 1691 /* Caller needs to make sure the write completes if necessary */ 1692 struct i915_hw_ppgtt *ppgtt = 1693 container_of(pd, struct i915_hw_ppgtt, pd); 1694 u32 pd_entry; 1695 1696 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1697 pd_entry |= GEN6_PDE_VALID; 1698 1699 writel(pd_entry, ppgtt->pd_addr + pde); 1700 } 1701 1702 /* Write all the page tables found in the ppgtt structure to incrementing page 1703 * directories. */ 1704 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1705 struct i915_page_directory *pd, 1706 uint32_t start, uint32_t length) 1707 { 1708 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1709 struct i915_page_table *pt; 1710 uint32_t pde; 1711 1712 gen6_for_each_pde(pt, pd, start, length, pde) 1713 gen6_write_pde(pd, pde, pt); 1714 1715 /* Make sure write is complete before other code can use this page 1716 * table. Also require for WC mapped PTEs */ 1717 readl(ggtt->gsm); 1718 } 1719 1720 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1721 { 1722 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1723 1724 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1725 } 1726 1727 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1728 struct drm_i915_gem_request *req) 1729 { 1730 struct intel_ring *ring = req->ring; 1731 struct intel_engine_cs *engine = req->engine; 1732 int ret; 1733 1734 /* NB: TLBs must be flushed and invalidated before a switch */ 1735 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1736 if (ret) 1737 return ret; 1738 1739 ret = intel_ring_begin(req, 6); 1740 if (ret) 1741 return ret; 1742 1743 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1744 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1745 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1746 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1747 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1748 intel_ring_emit(ring, MI_NOOP); 1749 intel_ring_advance(ring); 1750 1751 return 0; 1752 } 1753 1754 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1755 struct drm_i915_gem_request *req) 1756 { 1757 struct intel_ring *ring = req->ring; 1758 struct intel_engine_cs *engine = req->engine; 1759 int ret; 1760 1761 /* NB: TLBs must be flushed and invalidated before a switch */ 1762 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1763 if (ret) 1764 return ret; 1765 1766 ret = intel_ring_begin(req, 6); 1767 if (ret) 1768 return ret; 1769 1770 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1771 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1772 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1773 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1774 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1775 intel_ring_emit(ring, MI_NOOP); 1776 intel_ring_advance(ring); 1777 1778 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1779 if (engine->id != RCS) { 1780 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1781 if (ret) 1782 return ret; 1783 } 1784 1785 return 0; 1786 } 1787 1788 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1789 struct drm_i915_gem_request *req) 1790 { 1791 struct intel_engine_cs *engine = req->engine; 1792 struct drm_i915_private *dev_priv = req->i915; 1793 1794 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1795 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1796 return 0; 1797 } 1798 1799 static void gen8_ppgtt_enable(struct drm_device *dev) 1800 { 1801 struct drm_i915_private *dev_priv = to_i915(dev); 1802 struct intel_engine_cs *engine; 1803 enum intel_engine_id id; 1804 1805 for_each_engine(engine, dev_priv, id) { 1806 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; 1807 I915_WRITE(RING_MODE_GEN7(engine), 1808 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1809 } 1810 } 1811 1812 static void gen7_ppgtt_enable(struct drm_device *dev) 1813 { 1814 struct drm_i915_private *dev_priv = to_i915(dev); 1815 struct intel_engine_cs *engine; 1816 uint32_t ecochk, ecobits; 1817 enum intel_engine_id id; 1818 1819 ecobits = I915_READ(GAC_ECO_BITS); 1820 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1821 1822 ecochk = I915_READ(GAM_ECOCHK); 1823 if (IS_HASWELL(dev_priv)) { 1824 ecochk |= ECOCHK_PPGTT_WB_HSW; 1825 } else { 1826 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1827 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1828 } 1829 I915_WRITE(GAM_ECOCHK, ecochk); 1830 1831 for_each_engine(engine, dev_priv, id) { 1832 /* GFX_MODE is per-ring on gen7+ */ 1833 I915_WRITE(RING_MODE_GEN7(engine), 1834 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1835 } 1836 } 1837 1838 static void gen6_ppgtt_enable(struct drm_device *dev) 1839 { 1840 struct drm_i915_private *dev_priv = to_i915(dev); 1841 uint32_t ecochk, gab_ctl, ecobits; 1842 1843 ecobits = I915_READ(GAC_ECO_BITS); 1844 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1845 ECOBITS_PPGTT_CACHE64B); 1846 1847 gab_ctl = I915_READ(GAB_CTL); 1848 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1849 1850 ecochk = I915_READ(GAM_ECOCHK); 1851 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1852 1853 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1854 } 1855 1856 /* PPGTT support for Sandybdrige/Gen6 and later */ 1857 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1858 uint64_t start, 1859 uint64_t length) 1860 { 1861 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1862 gen6_pte_t *pt_vaddr, scratch_pte; 1863 unsigned first_entry = start >> PAGE_SHIFT; 1864 unsigned num_entries = length >> PAGE_SHIFT; 1865 unsigned act_pt = first_entry / GEN6_PTES; 1866 unsigned first_pte = first_entry % GEN6_PTES; 1867 unsigned last_pte, i; 1868 1869 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1870 I915_CACHE_LLC, 0); 1871 1872 while (num_entries) { 1873 last_pte = first_pte + num_entries; 1874 if (last_pte > GEN6_PTES) 1875 last_pte = GEN6_PTES; 1876 1877 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1878 1879 for (i = first_pte; i < last_pte; i++) 1880 pt_vaddr[i] = scratch_pte; 1881 1882 kunmap_px(ppgtt, pt_vaddr); 1883 1884 num_entries -= last_pte - first_pte; 1885 first_pte = 0; 1886 act_pt++; 1887 } 1888 } 1889 1890 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1891 struct sg_table *pages, 1892 uint64_t start, 1893 enum i915_cache_level cache_level, u32 flags) 1894 { 1895 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1896 unsigned first_entry = start >> PAGE_SHIFT; 1897 unsigned act_pt = first_entry / GEN6_PTES; 1898 unsigned act_pte = first_entry % GEN6_PTES; 1899 gen6_pte_t *pt_vaddr = NULL; 1900 struct sgt_iter sgt_iter; 1901 dma_addr_t addr; 1902 1903 for_each_sgt_dma(addr, sgt_iter, pages) { 1904 if (pt_vaddr == NULL) 1905 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1906 1907 pt_vaddr[act_pte] = 1908 vm->pte_encode(addr, cache_level, flags); 1909 1910 if (++act_pte == GEN6_PTES) { 1911 kunmap_px(ppgtt, pt_vaddr); 1912 pt_vaddr = NULL; 1913 act_pt++; 1914 act_pte = 0; 1915 } 1916 } 1917 1918 if (pt_vaddr) 1919 kunmap_px(ppgtt, pt_vaddr); 1920 } 1921 1922 static int gen6_alloc_va_range(struct i915_address_space *vm, 1923 uint64_t start_in, uint64_t length_in) 1924 { 1925 DECLARE_BITMAP(new_page_tables, I915_PDES); 1926 struct drm_device *dev = vm->dev; 1927 struct drm_i915_private *dev_priv = to_i915(dev); 1928 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1929 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1930 struct i915_page_table *pt; 1931 uint32_t start, length, start_save, length_save; 1932 uint32_t pde; 1933 int ret; 1934 1935 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1936 return -ENODEV; 1937 1938 start = start_save = start_in; 1939 length = length_save = length_in; 1940 1941 bitmap_zero(new_page_tables, I915_PDES); 1942 1943 /* The allocation is done in two stages so that we can bail out with 1944 * minimal amount of pain. The first stage finds new page tables that 1945 * need allocation. The second stage marks use ptes within the page 1946 * tables. 1947 */ 1948 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1949 if (pt != vm->scratch_pt) { 1950 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1951 continue; 1952 } 1953 1954 /* We've already allocated a page table */ 1955 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1956 1957 pt = alloc_pt(dev); 1958 if (IS_ERR(pt)) { 1959 ret = PTR_ERR(pt); 1960 goto unwind_out; 1961 } 1962 1963 gen6_initialize_pt(vm, pt); 1964 1965 ppgtt->pd.page_table[pde] = pt; 1966 __set_bit(pde, new_page_tables); 1967 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1968 } 1969 1970 start = start_save; 1971 length = length_save; 1972 1973 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1974 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1975 1976 bitmap_zero(tmp_bitmap, GEN6_PTES); 1977 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1978 gen6_pte_count(start, length)); 1979 1980 if (__test_and_clear_bit(pde, new_page_tables)) 1981 gen6_write_pde(&ppgtt->pd, pde, pt); 1982 1983 trace_i915_page_table_entry_map(vm, pde, pt, 1984 gen6_pte_index(start), 1985 gen6_pte_count(start, length), 1986 GEN6_PTES); 1987 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1988 GEN6_PTES); 1989 } 1990 1991 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1992 1993 /* Make sure write is complete before other code can use this page 1994 * table. Also require for WC mapped PTEs */ 1995 readl(ggtt->gsm); 1996 1997 mark_tlbs_dirty(ppgtt); 1998 return 0; 1999 2000 unwind_out: 2001 for_each_set_bit(pde, new_page_tables, I915_PDES) { 2002 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 2003 2004 ppgtt->pd.page_table[pde] = vm->scratch_pt; 2005 free_pt(vm->dev, pt); 2006 } 2007 2008 mark_tlbs_dirty(ppgtt); 2009 return ret; 2010 } 2011 2012 static int gen6_init_scratch(struct i915_address_space *vm) 2013 { 2014 struct drm_device *dev = vm->dev; 2015 int ret; 2016 2017 ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA); 2018 if (ret) 2019 return ret; 2020 2021 vm->scratch_pt = alloc_pt(dev); 2022 if (IS_ERR(vm->scratch_pt)) { 2023 cleanup_scratch_page(dev, &vm->scratch_page); 2024 return PTR_ERR(vm->scratch_pt); 2025 } 2026 2027 gen6_initialize_pt(vm, vm->scratch_pt); 2028 2029 return 0; 2030 } 2031 2032 static void gen6_free_scratch(struct i915_address_space *vm) 2033 { 2034 struct drm_device *dev = vm->dev; 2035 2036 free_pt(dev, vm->scratch_pt); 2037 cleanup_scratch_page(dev, &vm->scratch_page); 2038 } 2039 2040 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 2041 { 2042 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 2043 struct i915_page_directory *pd = &ppgtt->pd; 2044 struct drm_device *dev = vm->dev; 2045 struct i915_page_table *pt; 2046 uint32_t pde; 2047 2048 drm_mm_remove_node(&ppgtt->node); 2049 2050 gen6_for_all_pdes(pt, pd, pde) 2051 if (pt != vm->scratch_pt) 2052 free_pt(dev, pt); 2053 2054 gen6_free_scratch(vm); 2055 } 2056 2057 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 2058 { 2059 struct i915_address_space *vm = &ppgtt->base; 2060 struct drm_device *dev = ppgtt->base.dev; 2061 struct drm_i915_private *dev_priv = to_i915(dev); 2062 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2063 bool retried = false; 2064 int ret; 2065 2066 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 2067 * allocator works in address space sizes, so it's multiplied by page 2068 * size. We allocate at the top of the GTT to avoid fragmentation. 2069 */ 2070 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2071 2072 ret = gen6_init_scratch(vm); 2073 if (ret) 2074 return ret; 2075 2076 alloc: 2077 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2078 &ppgtt->node, GEN6_PD_SIZE, 2079 GEN6_PD_ALIGN, 0, 2080 0, ggtt->base.total, 2081 DRM_MM_TOPDOWN); 2082 if (ret == -ENOSPC && !retried) { 2083 ret = i915_gem_evict_something(&ggtt->base, 2084 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2085 I915_CACHE_NONE, 2086 0, ggtt->base.total, 2087 0); 2088 if (ret) 2089 goto err_out; 2090 2091 retried = true; 2092 goto alloc; 2093 } 2094 2095 if (ret) 2096 goto err_out; 2097 2098 2099 if (ppgtt->node.start < ggtt->mappable_end) 2100 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2101 2102 return 0; 2103 2104 err_out: 2105 gen6_free_scratch(vm); 2106 return ret; 2107 } 2108 2109 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2110 { 2111 return gen6_ppgtt_allocate_page_directories(ppgtt); 2112 } 2113 2114 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2115 uint64_t start, uint64_t length) 2116 { 2117 struct i915_page_table *unused; 2118 uint32_t pde; 2119 2120 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2121 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2122 } 2123 2124 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2125 { 2126 struct drm_device *dev = ppgtt->base.dev; 2127 struct drm_i915_private *dev_priv = to_i915(dev); 2128 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2129 int ret; 2130 2131 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2132 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv)) 2133 ppgtt->switch_mm = gen6_mm_switch; 2134 else if (IS_HASWELL(dev_priv)) 2135 ppgtt->switch_mm = hsw_mm_switch; 2136 else if (IS_GEN7(dev_priv)) 2137 ppgtt->switch_mm = gen7_mm_switch; 2138 else 2139 BUG(); 2140 2141 ret = gen6_ppgtt_alloc(ppgtt); 2142 if (ret) 2143 return ret; 2144 2145 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2146 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2147 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2148 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2149 ppgtt->base.bind_vma = ppgtt_bind_vma; 2150 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2151 ppgtt->base.start = 0; 2152 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2153 ppgtt->debug_dump = gen6_dump_ppgtt; 2154 2155 ppgtt->pd.base.ggtt_offset = 2156 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2157 2158 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2159 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2160 2161 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2162 2163 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2164 2165 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2166 ppgtt->node.size >> 20, 2167 ppgtt->node.start / PAGE_SIZE); 2168 2169 DRM_DEBUG("Adding PPGTT at offset %x\n", 2170 ppgtt->pd.base.ggtt_offset << 10); 2171 2172 return 0; 2173 } 2174 2175 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2176 struct drm_i915_private *dev_priv) 2177 { 2178 ppgtt->base.dev = &dev_priv->drm; 2179 2180 if (INTEL_INFO(dev_priv)->gen < 8) 2181 return gen6_ppgtt_init(ppgtt); 2182 else 2183 return gen8_ppgtt_init(ppgtt); 2184 } 2185 2186 static void i915_address_space_init(struct i915_address_space *vm, 2187 struct drm_i915_private *dev_priv) 2188 { 2189 drm_mm_init(&vm->mm, vm->start, vm->total); 2190 INIT_LIST_HEAD(&vm->active_list); 2191 INIT_LIST_HEAD(&vm->inactive_list); 2192 INIT_LIST_HEAD(&vm->unbound_list); 2193 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2194 } 2195 2196 static void gtt_write_workarounds(struct drm_device *dev) 2197 { 2198 struct drm_i915_private *dev_priv = to_i915(dev); 2199 2200 /* This function is for gtt related workarounds. This function is 2201 * called on driver load and after a GPU reset, so you can place 2202 * workarounds here even if they get overwritten by GPU reset. 2203 */ 2204 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2205 if (IS_BROADWELL(dev_priv)) 2206 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2207 else if (IS_CHERRYVIEW(dev_priv)) 2208 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2209 else if (IS_SKYLAKE(dev_priv)) 2210 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2211 else if (IS_BROXTON(dev_priv)) 2212 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2213 } 2214 2215 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2216 struct drm_i915_private *dev_priv, 2217 struct drm_i915_file_private *file_priv) 2218 { 2219 int ret; 2220 2221 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2222 if (ret == 0) { 2223 kref_init(&ppgtt->ref); 2224 i915_address_space_init(&ppgtt->base, dev_priv); 2225 ppgtt->base.file = file_priv; 2226 } 2227 2228 return ret; 2229 } 2230 2231 int i915_ppgtt_init_hw(struct drm_device *dev) 2232 { 2233 struct drm_i915_private *dev_priv = to_i915(dev); 2234 2235 gtt_write_workarounds(dev); 2236 2237 /* In the case of execlists, PPGTT is enabled by the context descriptor 2238 * and the PDPs are contained within the context itself. We don't 2239 * need to do anything here. */ 2240 if (i915.enable_execlists) 2241 return 0; 2242 2243 if (!USES_PPGTT(dev)) 2244 return 0; 2245 2246 if (IS_GEN6(dev_priv)) 2247 gen6_ppgtt_enable(dev); 2248 else if (IS_GEN7(dev_priv)) 2249 gen7_ppgtt_enable(dev); 2250 else if (INTEL_INFO(dev)->gen >= 8) 2251 gen8_ppgtt_enable(dev); 2252 else 2253 MISSING_CASE(INTEL_INFO(dev)->gen); 2254 2255 return 0; 2256 } 2257 2258 struct i915_hw_ppgtt * 2259 i915_ppgtt_create(struct drm_i915_private *dev_priv, 2260 struct drm_i915_file_private *fpriv) 2261 { 2262 struct i915_hw_ppgtt *ppgtt; 2263 int ret; 2264 2265 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2266 if (!ppgtt) 2267 return ERR_PTR(-ENOMEM); 2268 2269 ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); 2270 if (ret) { 2271 kfree(ppgtt); 2272 return ERR_PTR(ret); 2273 } 2274 2275 trace_i915_ppgtt_create(&ppgtt->base); 2276 2277 return ppgtt; 2278 } 2279 2280 void i915_ppgtt_release(struct kref *kref) 2281 { 2282 struct i915_hw_ppgtt *ppgtt = 2283 container_of(kref, struct i915_hw_ppgtt, ref); 2284 2285 trace_i915_ppgtt_release(&ppgtt->base); 2286 2287 /* vmas should already be unbound and destroyed */ 2288 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2289 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2290 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 2291 2292 list_del(&ppgtt->base.global_link); 2293 drm_mm_takedown(&ppgtt->base.mm); 2294 2295 ppgtt->base.cleanup(&ppgtt->base); 2296 kfree(ppgtt); 2297 } 2298 2299 /* Certain Gen5 chipsets require require idling the GPU before 2300 * unmapping anything from the GTT when VT-d is enabled. 2301 */ 2302 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 2303 { 2304 #ifdef CONFIG_INTEL_IOMMU 2305 /* Query intel_iommu to see if we need the workaround. Presumably that 2306 * was loaded first. 2307 */ 2308 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 2309 return true; 2310 #endif 2311 return false; 2312 } 2313 2314 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2315 { 2316 struct intel_engine_cs *engine; 2317 enum intel_engine_id id; 2318 2319 if (INTEL_INFO(dev_priv)->gen < 6) 2320 return; 2321 2322 for_each_engine(engine, dev_priv, id) { 2323 u32 fault_reg; 2324 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2325 if (fault_reg & RING_FAULT_VALID) { 2326 DRM_DEBUG_DRIVER("Unexpected fault\n" 2327 "\tAddr: 0x%08ux\n" 2328 "\tAddress space: %s\n" 2329 "\tSource ID: %d\n" 2330 "\tType: %d\n", 2331 fault_reg & LINUX_PAGE_MASK, 2332 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2333 RING_FAULT_SRCID(fault_reg), 2334 RING_FAULT_FAULT_TYPE(fault_reg)); 2335 I915_WRITE(RING_FAULT_REG(engine), 2336 fault_reg & ~RING_FAULT_VALID); 2337 } 2338 } 2339 2340 /* Engine specific init may not have been done till this point. */ 2341 if (dev_priv->engine[RCS]) 2342 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); 2343 } 2344 2345 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2346 { 2347 if (INTEL_INFO(dev_priv)->gen < 6) { 2348 intel_gtt_chipset_flush(); 2349 } else { 2350 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2351 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2352 } 2353 } 2354 2355 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 2356 { 2357 struct drm_i915_private *dev_priv = to_i915(dev); 2358 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2359 2360 /* Don't bother messing with faults pre GEN6 as we have little 2361 * documentation supporting that it's a good idea. 2362 */ 2363 if (INTEL_INFO(dev)->gen < 6) 2364 return; 2365 2366 i915_check_and_clear_faults(dev_priv); 2367 2368 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 2369 2370 i915_ggtt_flush(dev_priv); 2371 } 2372 2373 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 2374 { 2375 if (!dma_map_sg(&obj->base.dev->pdev->dev, 2376 obj->pages->sgl, obj->pages->nents, 2377 PCI_DMA_BIDIRECTIONAL)) 2378 return -ENOSPC; 2379 2380 return 0; 2381 } 2382 2383 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2384 { 2385 writeq(pte, addr); 2386 } 2387 2388 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2389 dma_addr_t addr, 2390 uint64_t offset, 2391 enum i915_cache_level level, 2392 u32 unused) 2393 { 2394 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2395 gen8_pte_t __iomem *pte = 2396 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + 2397 (offset >> PAGE_SHIFT); 2398 int rpm_atomic_seq; 2399 2400 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2401 2402 gen8_set_pte(pte, gen8_pte_encode(addr, level)); 2403 2404 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2405 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2406 2407 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2408 } 2409 2410 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2411 struct sg_table *st, 2412 uint64_t start, 2413 enum i915_cache_level level, u32 unused) 2414 { 2415 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2416 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2417 struct sgt_iter sgt_iter; 2418 gen8_pte_t __iomem *gtt_entries; 2419 gen8_pte_t gtt_entry; 2420 dma_addr_t addr; 2421 int rpm_atomic_seq; 2422 int i = 0; 2423 2424 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2425 2426 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2427 2428 for_each_sgt_dma(addr, sgt_iter, st) { 2429 gtt_entry = gen8_pte_encode(addr, level); 2430 gen8_set_pte(>t_entries[i++], gtt_entry); 2431 } 2432 2433 /* 2434 * XXX: This serves as a posting read to make sure that the PTE has 2435 * actually been updated. There is some concern that even though 2436 * registers and PTEs are within the same BAR that they are potentially 2437 * of NUMA access patterns. Therefore, even with the way we assume 2438 * hardware should work, we must keep this posting read for paranoia. 2439 */ 2440 if (i != 0) 2441 WARN_ON(readq(>t_entries[i-1]) != gtt_entry); 2442 2443 /* This next bit makes the above posting read even more important. We 2444 * want to flush the TLBs only after we're certain all the PTE updates 2445 * have finished. 2446 */ 2447 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2448 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2449 2450 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2451 } 2452 2453 struct insert_entries { 2454 struct i915_address_space *vm; 2455 struct sg_table *st; 2456 uint64_t start; 2457 enum i915_cache_level level; 2458 u32 flags; 2459 }; 2460 2461 static int gen8_ggtt_insert_entries__cb(void *_arg) 2462 { 2463 struct insert_entries *arg = _arg; 2464 gen8_ggtt_insert_entries(arg->vm, arg->st, 2465 arg->start, arg->level, arg->flags); 2466 return 0; 2467 } 2468 2469 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2470 struct sg_table *st, 2471 uint64_t start, 2472 enum i915_cache_level level, 2473 u32 flags) 2474 { 2475 struct insert_entries arg = { vm, st, start, level, flags }; 2476 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2477 } 2478 2479 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2480 dma_addr_t addr, 2481 uint64_t offset, 2482 enum i915_cache_level level, 2483 u32 flags) 2484 { 2485 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2486 gen6_pte_t __iomem *pte = 2487 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + 2488 (offset >> PAGE_SHIFT); 2489 int rpm_atomic_seq; 2490 2491 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2492 2493 iowrite32(vm->pte_encode(addr, level, flags), pte); 2494 2495 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2496 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2497 2498 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2499 } 2500 2501 /* 2502 * Binds an object into the global gtt with the specified cache level. The object 2503 * will be accessible to the GPU via commands whose operands reference offsets 2504 * within the global GTT as well as accessible by the GPU through the GMADR 2505 * mapped BAR (dev_priv->mm.gtt->gtt). 2506 */ 2507 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2508 struct sg_table *st, 2509 uint64_t start, 2510 enum i915_cache_level level, u32 flags) 2511 { 2512 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2513 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2514 struct sgt_iter sgt_iter; 2515 gen6_pte_t __iomem *gtt_entries; 2516 gen6_pte_t gtt_entry; 2517 dma_addr_t addr; 2518 int rpm_atomic_seq; 2519 int i = 0; 2520 2521 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2522 2523 gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2524 2525 for_each_sgt_dma(addr, sgt_iter, st) { 2526 gtt_entry = vm->pte_encode(addr, level, flags); 2527 iowrite32(gtt_entry, >t_entries[i++]); 2528 } 2529 2530 /* XXX: This serves as a posting read to make sure that the PTE has 2531 * actually been updated. There is some concern that even though 2532 * registers and PTEs are within the same BAR that they are potentially 2533 * of NUMA access patterns. Therefore, even with the way we assume 2534 * hardware should work, we must keep this posting read for paranoia. 2535 */ 2536 if (i != 0) 2537 WARN_ON(readl(>t_entries[i-1]) != gtt_entry); 2538 2539 /* This next bit makes the above posting read even more important. We 2540 * want to flush the TLBs only after we're certain all the PTE updates 2541 * have finished. 2542 */ 2543 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2544 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2545 2546 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2547 } 2548 2549 static void nop_clear_range(struct i915_address_space *vm, 2550 uint64_t start, uint64_t length) 2551 { 2552 } 2553 2554 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2555 uint64_t start, uint64_t length) 2556 { 2557 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2558 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2559 unsigned first_entry = start >> PAGE_SHIFT; 2560 unsigned num_entries = length >> PAGE_SHIFT; 2561 gen8_pte_t scratch_pte, __iomem *gtt_base = 2562 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2563 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2564 int i; 2565 int rpm_atomic_seq; 2566 2567 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2568 2569 if (WARN(num_entries > max_entries, 2570 "First entry = %d; Num entries = %d (max=%d)\n", 2571 first_entry, num_entries, max_entries)) 2572 num_entries = max_entries; 2573 2574 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 2575 I915_CACHE_LLC); 2576 for (i = 0; i < num_entries; i++) 2577 gen8_set_pte(>t_base[i], scratch_pte); 2578 readl(gtt_base); 2579 2580 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2581 } 2582 2583 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2584 uint64_t start, 2585 uint64_t length) 2586 { 2587 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2588 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2589 unsigned first_entry = start >> PAGE_SHIFT; 2590 unsigned num_entries = length >> PAGE_SHIFT; 2591 gen6_pte_t scratch_pte, __iomem *gtt_base = 2592 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2593 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2594 int i; 2595 int rpm_atomic_seq; 2596 2597 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2598 2599 if (WARN(num_entries > max_entries, 2600 "First entry = %d; Num entries = %d (max=%d)\n", 2601 first_entry, num_entries, max_entries)) 2602 num_entries = max_entries; 2603 2604 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 2605 I915_CACHE_LLC, 0); 2606 2607 for (i = 0; i < num_entries; i++) 2608 iowrite32(scratch_pte, >t_base[i]); 2609 readl(gtt_base); 2610 2611 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2612 } 2613 2614 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2615 dma_addr_t addr, 2616 uint64_t offset, 2617 enum i915_cache_level cache_level, 2618 u32 unused) 2619 { 2620 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2621 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2622 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2623 int rpm_atomic_seq; 2624 2625 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2626 2627 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2628 2629 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2630 } 2631 2632 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2633 struct sg_table *pages, 2634 uint64_t start, 2635 enum i915_cache_level cache_level, u32 unused) 2636 { 2637 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2638 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2639 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2640 int rpm_atomic_seq; 2641 2642 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2643 2644 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2645 2646 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2647 2648 } 2649 2650 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2651 uint64_t start, 2652 uint64_t length) 2653 { 2654 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2655 unsigned first_entry = start >> PAGE_SHIFT; 2656 unsigned num_entries = length >> PAGE_SHIFT; 2657 int rpm_atomic_seq; 2658 2659 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2660 2661 intel_gtt_clear_range(first_entry, num_entries); 2662 2663 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2664 } 2665 2666 static int ggtt_bind_vma(struct i915_vma *vma, 2667 enum i915_cache_level cache_level, 2668 u32 flags) 2669 { 2670 struct drm_i915_gem_object *obj = vma->obj; 2671 u32 pte_flags = 0; 2672 int ret; 2673 2674 ret = i915_get_ggtt_vma_pages(vma); 2675 if (ret) 2676 return ret; 2677 2678 /* Currently applicable only to VLV */ 2679 if (obj->gt_ro) 2680 pte_flags |= PTE_READ_ONLY; 2681 2682 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 2683 cache_level, pte_flags); 2684 2685 /* 2686 * Without aliasing PPGTT there's no difference between 2687 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2688 * upgrade to both bound if we bind either to avoid double-binding. 2689 */ 2690 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2691 2692 return 0; 2693 } 2694 2695 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2696 enum i915_cache_level cache_level, 2697 u32 flags) 2698 { 2699 u32 pte_flags; 2700 int ret; 2701 2702 ret = i915_get_ggtt_vma_pages(vma); 2703 if (ret) 2704 return ret; 2705 2706 /* Currently applicable only to VLV */ 2707 pte_flags = 0; 2708 if (vma->obj->gt_ro) 2709 pte_flags |= PTE_READ_ONLY; 2710 2711 2712 if (flags & I915_VMA_GLOBAL_BIND) { 2713 vma->vm->insert_entries(vma->vm, 2714 vma->pages, vma->node.start, 2715 cache_level, pte_flags); 2716 } 2717 2718 if (flags & I915_VMA_LOCAL_BIND) { 2719 struct i915_hw_ppgtt *appgtt = 2720 to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2721 appgtt->base.insert_entries(&appgtt->base, 2722 vma->pages, vma->node.start, 2723 cache_level, pte_flags); 2724 } 2725 2726 return 0; 2727 } 2728 2729 static void ggtt_unbind_vma(struct i915_vma *vma) 2730 { 2731 struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2732 const u64 size = min(vma->size, vma->node.size); 2733 2734 if (vma->flags & I915_VMA_GLOBAL_BIND) 2735 vma->vm->clear_range(vma->vm, 2736 vma->node.start, size); 2737 2738 if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) 2739 appgtt->base.clear_range(&appgtt->base, 2740 vma->node.start, size); 2741 } 2742 2743 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2744 { 2745 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2746 struct device *kdev = &dev_priv->drm.pdev->dev; 2747 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2748 2749 if (unlikely(ggtt->do_idle_maps)) { 2750 if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { 2751 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2752 /* Wait a bit, in hopes it avoids the hang */ 2753 udelay(10); 2754 } 2755 } 2756 2757 dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents, 2758 PCI_DMA_BIDIRECTIONAL); 2759 } 2760 2761 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2762 unsigned long color, 2763 u64 *start, 2764 u64 *end) 2765 { 2766 if (node->color != color) 2767 *start += 4096; 2768 2769 node = list_first_entry_or_null(&node->node_list, 2770 struct drm_mm_node, 2771 node_list); 2772 if (node && node->allocated && node->color != color) 2773 *end -= 4096; 2774 } 2775 2776 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2777 { 2778 /* Let GEM Manage all of the aperture. 2779 * 2780 * However, leave one page at the end still bound to the scratch page. 2781 * There are a number of places where the hardware apparently prefetches 2782 * past the end of the object, and we've seen multiple hangs with the 2783 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2784 * aperture. One page should be enough to keep any prefetching inside 2785 * of the aperture. 2786 */ 2787 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2788 unsigned long hole_start, hole_end; 2789 struct i915_hw_ppgtt *ppgtt; 2790 struct drm_mm_node *entry; 2791 int ret; 2792 unsigned long mappable = min(ggtt->base.total, ggtt->mappable_end); 2793 2794 ret = intel_vgt_balloon(dev_priv); 2795 if (ret) 2796 return ret; 2797 2798 /* Reserve a mappable slot for our lockless error capture */ 2799 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2800 &ggtt->error_capture, 2801 4096, 0, -1, 2802 0, ggtt->mappable_end, 2803 0, 0); 2804 if (ret) 2805 return ret; 2806 2807 /* Clear any non-preallocated blocks */ 2808 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2809 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2810 hole_start, hole_end); 2811 ggtt->base.clear_range(&ggtt->base, hole_start, 2812 hole_end - hole_start); 2813 } 2814 2815 #ifdef __DragonFly__ 2816 DRM_INFO("taking over the fictitious range 0x%llx-0x%llx\n", 2817 dev_priv->ggtt.mappable_base, dev_priv->ggtt.mappable_end); 2818 vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base, 2819 dev_priv->ggtt.mappable_base + mappable, VM_MEMATTR_WRITE_COMBINING); 2820 #endif 2821 2822 /* And finally clear the reserved guard page */ 2823 ggtt->base.clear_range(&ggtt->base, 2824 ggtt->base.total - PAGE_SIZE, PAGE_SIZE); 2825 2826 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2827 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2828 if (!ppgtt) { 2829 ret = -ENOMEM; 2830 goto err; 2831 } 2832 2833 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2834 if (ret) 2835 goto err_ppgtt; 2836 2837 if (ppgtt->base.allocate_va_range) { 2838 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2839 ppgtt->base.total); 2840 if (ret) 2841 goto err_ppgtt_cleanup; 2842 } 2843 2844 ppgtt->base.clear_range(&ppgtt->base, 2845 ppgtt->base.start, 2846 ppgtt->base.total); 2847 2848 dev_priv->mm.aliasing_ppgtt = ppgtt; 2849 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2850 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2851 } 2852 2853 return 0; 2854 2855 err_ppgtt_cleanup: 2856 ppgtt->base.cleanup(&ppgtt->base); 2857 err_ppgtt: 2858 kfree(ppgtt); 2859 err: 2860 drm_mm_remove_node(&ggtt->error_capture); 2861 return ret; 2862 } 2863 2864 /** 2865 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2866 * @dev_priv: i915 device 2867 */ 2868 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2869 { 2870 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2871 2872 if (dev_priv->mm.aliasing_ppgtt) { 2873 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2874 ppgtt->base.cleanup(&ppgtt->base); 2875 kfree(ppgtt); 2876 } 2877 2878 i915_gem_cleanup_stolen(&dev_priv->drm); 2879 2880 if (drm_mm_node_allocated(&ggtt->error_capture)) 2881 drm_mm_remove_node(&ggtt->error_capture); 2882 2883 if (drm_mm_initialized(&ggtt->base.mm)) { 2884 intel_vgt_deballoon(dev_priv); 2885 2886 drm_mm_takedown(&ggtt->base.mm); 2887 list_del(&ggtt->base.global_link); 2888 } 2889 2890 ggtt->base.cleanup(&ggtt->base); 2891 2892 arch_phys_wc_del(ggtt->mtrr); 2893 io_mapping_fini(&ggtt->mappable); 2894 } 2895 2896 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2897 { 2898 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2899 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2900 return snb_gmch_ctl << 20; 2901 } 2902 2903 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2904 { 2905 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2906 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2907 if (bdw_gmch_ctl) 2908 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2909 2910 #ifdef CONFIG_X86_32 2911 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2912 if (bdw_gmch_ctl > 4) 2913 bdw_gmch_ctl = 4; 2914 #endif 2915 2916 return bdw_gmch_ctl << 20; 2917 } 2918 2919 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2920 { 2921 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2922 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2923 2924 if (gmch_ctrl) 2925 return 1 << (20 + gmch_ctrl); 2926 2927 return 0; 2928 } 2929 2930 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2931 { 2932 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2933 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2934 return snb_gmch_ctl << 25; /* 32 MB units */ 2935 } 2936 2937 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2938 { 2939 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2940 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2941 return bdw_gmch_ctl << 25; /* 32 MB units */ 2942 } 2943 2944 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2945 { 2946 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2947 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2948 2949 /* 2950 * 0x0 to 0x10: 32MB increments starting at 0MB 2951 * 0x11 to 0x16: 4MB increments starting at 8MB 2952 * 0x17 to 0x1d: 4MB increments start at 36MB 2953 */ 2954 if (gmch_ctrl < 0x11) 2955 return gmch_ctrl << 25; 2956 else if (gmch_ctrl < 0x17) 2957 return (gmch_ctrl - 0x11 + 2) << 22; 2958 else 2959 return (gmch_ctrl - 0x17 + 9) << 22; 2960 } 2961 2962 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2963 { 2964 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2965 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2966 2967 if (gen9_gmch_ctl < 0xf0) 2968 return gen9_gmch_ctl << 25; /* 32 MB units */ 2969 else 2970 /* 4MB increments starting at 0xf0 for 4MB */ 2971 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2972 } 2973 2974 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2975 { 2976 struct pci_dev *pdev = ggtt->base.dev->pdev; 2977 phys_addr_t phys_addr; 2978 int ret; 2979 2980 /* For Modern GENs the PTEs and register space are split in the BAR */ 2981 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2982 2983 /* 2984 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2985 * dropped. For WC mappings in general we have 64 byte burst writes 2986 * when the WC buffer is flushed, so we can't use it, but have to 2987 * resort to an uncached mapping. The WC issue is easily caught by the 2988 * readback check when writing GTT PTE entries. 2989 */ 2990 if (IS_BROXTON(to_i915(ggtt->base.dev))) 2991 ggtt->gsm = ioremap_nocache(phys_addr, size); 2992 else 2993 ggtt->gsm = ioremap_wc(phys_addr, size); 2994 if (!ggtt->gsm) { 2995 DRM_ERROR("Failed to map the ggtt page table\n"); 2996 return -ENOMEM; 2997 } 2998 2999 ret = setup_scratch_page(ggtt->base.dev, 3000 &ggtt->base.scratch_page, 3001 GFP_DMA32); 3002 if (ret) { 3003 DRM_ERROR("Scratch setup failed\n"); 3004 /* iounmap will also get called at remove, but meh */ 3005 iounmap(ggtt->gsm); 3006 return ret; 3007 } 3008 3009 return 0; 3010 } 3011 3012 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 3013 * bits. When using advanced contexts each context stores its own PAT, but 3014 * writing this data shouldn't be harmful even in those cases. */ 3015 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 3016 { 3017 uint64_t pat; 3018 3019 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 3020 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 3021 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 3022 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 3023 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 3024 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 3025 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 3026 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 3027 3028 if (!USES_PPGTT(dev_priv)) 3029 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 3030 * so RTL will always use the value corresponding to 3031 * pat_sel = 000". 3032 * So let's disable cache for GGTT to avoid screen corruptions. 3033 * MOCS still can be used though. 3034 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 3035 * before this patch, i.e. the same uncached + snooping access 3036 * like on gen6/7 seems to be in effect. 3037 * - So this just fixes blitter/render access. Again it looks 3038 * like it's not just uncached access, but uncached + snooping. 3039 * So we can still hold onto all our assumptions wrt cpu 3040 * clflushing on LLC machines. 3041 */ 3042 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 3043 3044 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 3045 * write would work. */ 3046 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3047 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3048 } 3049 3050 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 3051 { 3052 uint64_t pat; 3053 3054 /* 3055 * Map WB on BDW to snooped on CHV. 3056 * 3057 * Only the snoop bit has meaning for CHV, the rest is 3058 * ignored. 3059 * 3060 * The hardware will never snoop for certain types of accesses: 3061 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3062 * - PPGTT page tables 3063 * - some other special cycles 3064 * 3065 * As with BDW, we also need to consider the following for GT accesses: 3066 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3067 * so RTL will always use the value corresponding to 3068 * pat_sel = 000". 3069 * Which means we must set the snoop bit in PAT entry 0 3070 * in order to keep the global status page working. 3071 */ 3072 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3073 GEN8_PPAT(1, 0) | 3074 GEN8_PPAT(2, 0) | 3075 GEN8_PPAT(3, 0) | 3076 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3077 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3078 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3079 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3080 3081 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3082 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3083 } 3084 3085 static void gen6_gmch_remove(struct i915_address_space *vm) 3086 { 3087 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 3088 3089 iounmap(ggtt->gsm); 3090 cleanup_scratch_page(vm->dev, &vm->scratch_page); 3091 } 3092 3093 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3094 { 3095 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3096 struct pci_dev *pdev = dev_priv->drm.pdev; 3097 unsigned int size; 3098 u16 snb_gmch_ctl; 3099 3100 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3101 ggtt->mappable_base = pci_resource_start(pdev, 2); 3102 ggtt->mappable_end = pci_resource_len(pdev, 2); 3103 3104 #if 0 3105 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 3106 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 3107 #endif 3108 3109 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3110 3111 if (INTEL_GEN(dev_priv) >= 9) { 3112 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3113 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3114 } else if (IS_CHERRYVIEW(dev_priv)) { 3115 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3116 size = chv_get_total_gtt_size(snb_gmch_ctl); 3117 } else { 3118 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3119 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3120 } 3121 3122 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3123 3124 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3125 chv_setup_private_ppat(dev_priv); 3126 else 3127 bdw_setup_private_ppat(dev_priv); 3128 3129 ggtt->base.cleanup = gen6_gmch_remove; 3130 ggtt->base.bind_vma = ggtt_bind_vma; 3131 ggtt->base.unbind_vma = ggtt_unbind_vma; 3132 ggtt->base.insert_page = gen8_ggtt_insert_page; 3133 ggtt->base.clear_range = nop_clear_range; 3134 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3135 ggtt->base.clear_range = gen8_ggtt_clear_range; 3136 3137 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3138 if (IS_CHERRYVIEW(dev_priv)) 3139 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3140 3141 return ggtt_probe_common(ggtt, size); 3142 } 3143 3144 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3145 { 3146 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3147 struct pci_dev *pdev = dev_priv->drm.pdev; 3148 unsigned int size; 3149 u16 snb_gmch_ctl; 3150 3151 ggtt->mappable_base = pci_resource_start(pdev, 2); 3152 ggtt->mappable_end = pci_resource_len(pdev, 2); 3153 3154 /* 64/512MB is the current min/max we actually know of, but this is just 3155 * a coarse sanity check. 3156 */ 3157 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 3158 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3159 return -ENXIO; 3160 } 3161 3162 #if 0 3163 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 3164 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 3165 #endif 3166 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3167 3168 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3169 3170 size = gen6_get_total_gtt_size(snb_gmch_ctl); 3171 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3172 3173 ggtt->base.clear_range = gen6_ggtt_clear_range; 3174 ggtt->base.insert_page = gen6_ggtt_insert_page; 3175 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3176 ggtt->base.bind_vma = ggtt_bind_vma; 3177 ggtt->base.unbind_vma = ggtt_unbind_vma; 3178 ggtt->base.cleanup = gen6_gmch_remove; 3179 3180 if (HAS_EDRAM(dev_priv)) 3181 ggtt->base.pte_encode = iris_pte_encode; 3182 else if (IS_HASWELL(dev_priv)) 3183 ggtt->base.pte_encode = hsw_pte_encode; 3184 else if (IS_VALLEYVIEW(dev_priv)) 3185 ggtt->base.pte_encode = byt_pte_encode; 3186 else if (INTEL_GEN(dev_priv) >= 7) 3187 ggtt->base.pte_encode = ivb_pte_encode; 3188 else 3189 ggtt->base.pte_encode = snb_pte_encode; 3190 3191 return ggtt_probe_common(ggtt, size); 3192 } 3193 3194 static void i915_gmch_remove(struct i915_address_space *vm) 3195 { 3196 intel_gmch_remove(); 3197 } 3198 3199 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3200 { 3201 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3202 #if 0 3203 int ret; 3204 3205 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3206 if (!ret) { 3207 DRM_ERROR("failed to set up gmch\n"); 3208 return -EIO; 3209 } 3210 #endif 3211 3212 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3213 &ggtt->mappable_base, &ggtt->mappable_end); 3214 3215 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 3216 ggtt->base.insert_page = i915_ggtt_insert_page; 3217 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3218 ggtt->base.clear_range = i915_ggtt_clear_range; 3219 ggtt->base.bind_vma = ggtt_bind_vma; 3220 ggtt->base.unbind_vma = ggtt_unbind_vma; 3221 ggtt->base.cleanup = i915_gmch_remove; 3222 3223 if (unlikely(ggtt->do_idle_maps)) 3224 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3225 3226 return 0; 3227 } 3228 3229 /** 3230 * i915_ggtt_probe_hw - Probe GGTT hardware location 3231 * @dev_priv: i915 device 3232 */ 3233 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 3234 { 3235 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3236 int ret; 3237 3238 ggtt->base.dev = &dev_priv->drm; 3239 3240 if (INTEL_GEN(dev_priv) <= 5) 3241 ret = i915_gmch_probe(ggtt); 3242 else if (INTEL_GEN(dev_priv) < 8) 3243 ret = gen6_gmch_probe(ggtt); 3244 else 3245 ret = gen8_gmch_probe(ggtt); 3246 if (ret) 3247 return ret; 3248 3249 if ((ggtt->base.total - 1) >> 32) { 3250 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3251 " of address space! Found %lldM!\n", 3252 ggtt->base.total >> 20); 3253 ggtt->base.total = 1ULL << 32; 3254 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3255 } 3256 3257 if (ggtt->mappable_end > ggtt->base.total) { 3258 DRM_ERROR("mappable aperture extends past end of GGTT," 3259 " aperture=%llx, total=%llx\n", 3260 ggtt->mappable_end, ggtt->base.total); 3261 ggtt->mappable_end = ggtt->base.total; 3262 } 3263 3264 /* GMADR is the PCI mmio aperture into the global GTT. */ 3265 DRM_INFO("Memory usable by graphics device = %lluM\n", 3266 ggtt->base.total >> 20); 3267 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3268 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3269 #ifdef CONFIG_INTEL_IOMMU 3270 if (intel_iommu_gfx_mapped) 3271 DRM_INFO("VT-d active for gfx access\n"); 3272 #endif 3273 3274 return 0; 3275 } 3276 3277 /** 3278 * i915_ggtt_init_hw - Initialize GGTT hardware 3279 * @dev_priv: i915 device 3280 */ 3281 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 3282 { 3283 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3284 int ret; 3285 3286 INIT_LIST_HEAD(&dev_priv->vm_list); 3287 3288 /* Subtract the guard page before address space initialization to 3289 * shrink the range used by drm_mm. 3290 */ 3291 ggtt->base.total -= PAGE_SIZE; 3292 i915_address_space_init(&ggtt->base, dev_priv); 3293 ggtt->base.total += PAGE_SIZE; 3294 if (!HAS_LLC(dev_priv)) 3295 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3296 3297 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, 3298 dev_priv->ggtt.mappable_base, 3299 dev_priv->ggtt.mappable_end)) { 3300 ret = -EIO; 3301 goto out_gtt_cleanup; 3302 } 3303 3304 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 3305 3306 /* 3307 * Initialise stolen early so that we may reserve preallocated 3308 * objects for the BIOS to KMS transition. 3309 */ 3310 ret = i915_gem_init_stolen(&dev_priv->drm); 3311 if (ret) 3312 goto out_gtt_cleanup; 3313 3314 return 0; 3315 3316 out_gtt_cleanup: 3317 ggtt->base.cleanup(&ggtt->base); 3318 return ret; 3319 } 3320 3321 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 3322 { 3323 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 3324 return -EIO; 3325 3326 return 0; 3327 } 3328 3329 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 3330 { 3331 struct drm_i915_private *dev_priv = to_i915(dev); 3332 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3333 struct drm_i915_gem_object *obj, *on; 3334 3335 i915_check_and_clear_faults(dev_priv); 3336 3337 /* First fill our portion of the GTT with scratch pages */ 3338 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 3339 3340 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ 3341 3342 /* clflush objects bound into the GGTT and rebind them. */ 3343 list_for_each_entry_safe(obj, on, 3344 &dev_priv->mm.bound_list, global_list) { 3345 bool ggtt_bound = false; 3346 struct i915_vma *vma; 3347 3348 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3349 if (vma->vm != &ggtt->base) 3350 continue; 3351 3352 if (!i915_vma_unbind(vma)) 3353 continue; 3354 3355 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3356 PIN_UPDATE)); 3357 ggtt_bound = true; 3358 } 3359 3360 if (ggtt_bound) 3361 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3362 } 3363 3364 ggtt->base.closed = false; 3365 3366 if (INTEL_INFO(dev)->gen >= 8) { 3367 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3368 chv_setup_private_ppat(dev_priv); 3369 else 3370 bdw_setup_private_ppat(dev_priv); 3371 3372 return; 3373 } 3374 3375 if (USES_PPGTT(dev)) { 3376 struct i915_address_space *vm; 3377 3378 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3379 /* TODO: Perhaps it shouldn't be gen6 specific */ 3380 3381 struct i915_hw_ppgtt *ppgtt; 3382 3383 if (i915_is_ggtt(vm)) 3384 ppgtt = dev_priv->mm.aliasing_ppgtt; 3385 else 3386 ppgtt = i915_vm_to_ppgtt(vm); 3387 3388 gen6_write_page_range(dev_priv, &ppgtt->pd, 3389 0, ppgtt->base.total); 3390 } 3391 } 3392 3393 i915_ggtt_flush(dev_priv); 3394 } 3395 3396 static void 3397 i915_vma_retire(struct i915_gem_active *active, 3398 struct drm_i915_gem_request *rq) 3399 { 3400 const unsigned int idx = rq->engine->id; 3401 struct i915_vma *vma = 3402 container_of(active, struct i915_vma, last_read[idx]); 3403 3404 GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); 3405 3406 i915_vma_clear_active(vma, idx); 3407 if (i915_vma_is_active(vma)) 3408 return; 3409 3410 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 3411 if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) 3412 WARN_ON(i915_vma_unbind(vma)); 3413 } 3414 3415 void i915_vma_destroy(struct i915_vma *vma) 3416 { 3417 GEM_BUG_ON(vma->node.allocated); 3418 GEM_BUG_ON(i915_vma_is_active(vma)); 3419 GEM_BUG_ON(!i915_vma_is_closed(vma)); 3420 GEM_BUG_ON(vma->fence); 3421 3422 list_del(&vma->vm_link); 3423 if (!i915_vma_is_ggtt(vma)) 3424 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 3425 3426 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 3427 } 3428 3429 void i915_vma_close(struct i915_vma *vma) 3430 { 3431 GEM_BUG_ON(i915_vma_is_closed(vma)); 3432 vma->flags |= I915_VMA_CLOSED; 3433 3434 list_del_init(&vma->obj_link); 3435 if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) 3436 WARN_ON(i915_vma_unbind(vma)); 3437 } 3438 3439 static struct i915_vma * 3440 __i915_vma_create(struct drm_i915_gem_object *obj, 3441 struct i915_address_space *vm, 3442 const struct i915_ggtt_view *view) 3443 { 3444 struct i915_vma *vma; 3445 int i; 3446 3447 GEM_BUG_ON(vm->closed); 3448 3449 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 3450 if (vma == NULL) 3451 return ERR_PTR(-ENOMEM); 3452 3453 INIT_LIST_HEAD(&vma->exec_list); 3454 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) 3455 init_request_active(&vma->last_read[i], i915_vma_retire); 3456 init_request_active(&vma->last_fence, NULL); 3457 list_add(&vma->vm_link, &vm->unbound_list); 3458 vma->vm = vm; 3459 vma->obj = obj; 3460 vma->size = obj->base.size; 3461 3462 if (view) { 3463 vma->ggtt_view = *view; 3464 if (view->type == I915_GGTT_VIEW_PARTIAL) { 3465 vma->size = view->params.partial.size; 3466 vma->size <<= PAGE_SHIFT; 3467 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 3468 vma->size = 3469 intel_rotation_info_size(&view->params.rotated); 3470 vma->size <<= PAGE_SHIFT; 3471 } 3472 } 3473 3474 if (i915_is_ggtt(vm)) { 3475 vma->flags |= I915_VMA_GGTT; 3476 } else { 3477 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3478 } 3479 3480 list_add_tail(&vma->obj_link, &obj->vma_list); 3481 return vma; 3482 } 3483 3484 static inline bool vma_matches(struct i915_vma *vma, 3485 struct i915_address_space *vm, 3486 const struct i915_ggtt_view *view) 3487 { 3488 if (vma->vm != vm) 3489 return false; 3490 3491 if (!i915_vma_is_ggtt(vma)) 3492 return true; 3493 3494 if (!view) 3495 return vma->ggtt_view.type == 0; 3496 3497 if (vma->ggtt_view.type != view->type) 3498 return false; 3499 3500 return memcmp(&vma->ggtt_view.params, 3501 &view->params, 3502 sizeof(view->params)) == 0; 3503 } 3504 3505 struct i915_vma * 3506 i915_vma_create(struct drm_i915_gem_object *obj, 3507 struct i915_address_space *vm, 3508 const struct i915_ggtt_view *view) 3509 { 3510 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3511 GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); 3512 3513 return __i915_vma_create(obj, vm, view); 3514 } 3515 3516 struct i915_vma * 3517 i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 3518 struct i915_address_space *vm, 3519 const struct i915_ggtt_view *view) 3520 { 3521 struct i915_vma *vma; 3522 3523 list_for_each_entry_reverse(vma, &obj->vma_list, obj_link) 3524 if (vma_matches(vma, vm, view)) 3525 return vma; 3526 3527 return NULL; 3528 } 3529 3530 struct i915_vma * 3531 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3532 struct i915_address_space *vm, 3533 const struct i915_ggtt_view *view) 3534 { 3535 struct i915_vma *vma; 3536 3537 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3538 3539 vma = i915_gem_obj_to_vma(obj, vm, view); 3540 if (!vma) 3541 vma = __i915_vma_create(obj, vm, view); 3542 3543 GEM_BUG_ON(i915_vma_is_closed(vma)); 3544 return vma; 3545 } 3546 3547 static struct scatterlist * 3548 rotate_pages(const dma_addr_t *in, unsigned int offset, 3549 unsigned int width, unsigned int height, 3550 unsigned int stride, 3551 struct sg_table *st, struct scatterlist *sg) 3552 { 3553 unsigned int column, row; 3554 unsigned int src_idx; 3555 3556 for (column = 0; column < width; column++) { 3557 src_idx = stride * (height - 1) + column; 3558 for (row = 0; row < height; row++) { 3559 st->nents++; 3560 /* We don't need the pages, but need to initialize 3561 * the entries so the sg list can be happily traversed. 3562 * The only thing we need are DMA addresses. 3563 */ 3564 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3565 sg_dma_address(sg) = in[offset + src_idx]; 3566 sg_dma_len(sg) = PAGE_SIZE; 3567 sg = sg_next(sg); 3568 src_idx -= stride; 3569 } 3570 } 3571 3572 return sg; 3573 } 3574 3575 static struct sg_table * 3576 intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, 3577 struct drm_i915_gem_object *obj) 3578 { 3579 const size_t n_pages = obj->base.size / PAGE_SIZE; 3580 unsigned int size = intel_rotation_info_size(rot_info); 3581 struct sgt_iter sgt_iter; 3582 dma_addr_t dma_addr; 3583 unsigned long i; 3584 dma_addr_t *page_addr_list; 3585 struct sg_table *st; 3586 struct scatterlist *sg; 3587 int ret = -ENOMEM; 3588 3589 /* Allocate a temporary list of source pages for random access. */ 3590 page_addr_list = drm_malloc_gfp(n_pages, 3591 sizeof(dma_addr_t), 3592 GFP_TEMPORARY); 3593 if (!page_addr_list) 3594 return ERR_PTR(ret); 3595 3596 /* Allocate target SG list. */ 3597 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 3598 if (!st) 3599 goto err_st_alloc; 3600 3601 ret = sg_alloc_table(st, size, GFP_KERNEL); 3602 if (ret) 3603 goto err_sg_alloc; 3604 3605 /* Populate source page list from the object. */ 3606 i = 0; 3607 for_each_sgt_dma(dma_addr, sgt_iter, obj->pages) 3608 page_addr_list[i++] = dma_addr; 3609 3610 GEM_BUG_ON(i != n_pages); 3611 st->nents = 0; 3612 sg = st->sgl; 3613 3614 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 3615 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, 3616 rot_info->plane[i].width, rot_info->plane[i].height, 3617 rot_info->plane[i].stride, st, sg); 3618 } 3619 3620 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", 3621 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3622 3623 drm_free_large(page_addr_list); 3624 3625 return st; 3626 3627 err_sg_alloc: 3628 kfree(st); 3629 err_st_alloc: 3630 drm_free_large(page_addr_list); 3631 3632 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 3633 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3634 3635 return ERR_PTR(ret); 3636 } 3637 3638 static struct sg_table * 3639 intel_partial_pages(const struct i915_ggtt_view *view, 3640 struct drm_i915_gem_object *obj) 3641 { 3642 struct sg_table *st; 3643 struct scatterlist *sg; 3644 struct sg_page_iter obj_sg_iter; 3645 int ret = -ENOMEM; 3646 3647 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 3648 if (!st) 3649 goto err_st_alloc; 3650 3651 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); 3652 if (ret) 3653 goto err_sg_alloc; 3654 3655 sg = st->sgl; 3656 st->nents = 0; 3657 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, 3658 view->params.partial.offset) 3659 { 3660 if (st->nents >= view->params.partial.size) 3661 break; 3662 3663 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3664 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); 3665 sg_dma_len(sg) = PAGE_SIZE; 3666 3667 sg = sg_next(sg); 3668 st->nents++; 3669 } 3670 3671 return st; 3672 3673 err_sg_alloc: 3674 kfree(st); 3675 err_st_alloc: 3676 return ERR_PTR(ret); 3677 } 3678 3679 static int 3680 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3681 { 3682 int ret = 0; 3683 3684 if (vma->pages) 3685 return 0; 3686 3687 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3688 vma->pages = vma->obj->pages; 3689 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3690 vma->pages = 3691 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3692 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3693 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 3694 else 3695 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3696 vma->ggtt_view.type); 3697 3698 if (!vma->pages) { 3699 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3700 vma->ggtt_view.type); 3701 ret = -EINVAL; 3702 } else if (IS_ERR(vma->pages)) { 3703 ret = PTR_ERR(vma->pages); 3704 vma->pages = NULL; 3705 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3706 vma->ggtt_view.type, ret); 3707 } 3708 3709 return ret; 3710 } 3711 3712 /** 3713 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3714 * @vma: VMA to map 3715 * @cache_level: mapping cache level 3716 * @flags: flags like global or local mapping 3717 * 3718 * DMA addresses are taken from the scatter-gather table of this object (or of 3719 * this VMA in case of non-default GGTT views) and PTE entries set up. 3720 * Note that DMA addresses are also the only part of the SG table we care about. 3721 */ 3722 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3723 u32 flags) 3724 { 3725 u32 bind_flags; 3726 u32 vma_flags; 3727 int ret; 3728 3729 if (WARN_ON(flags == 0)) 3730 return -EINVAL; 3731 3732 bind_flags = 0; 3733 if (flags & PIN_GLOBAL) 3734 bind_flags |= I915_VMA_GLOBAL_BIND; 3735 if (flags & PIN_USER) 3736 bind_flags |= I915_VMA_LOCAL_BIND; 3737 3738 vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); 3739 if (flags & PIN_UPDATE) 3740 bind_flags |= vma_flags; 3741 else 3742 bind_flags &= ~vma_flags; 3743 if (bind_flags == 0) 3744 return 0; 3745 3746 if (vma_flags == 0 && vma->vm->allocate_va_range) { 3747 trace_i915_va_alloc(vma); 3748 ret = vma->vm->allocate_va_range(vma->vm, 3749 vma->node.start, 3750 vma->node.size); 3751 if (ret) 3752 return ret; 3753 } 3754 3755 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3756 if (ret) 3757 return ret; 3758 3759 vma->flags |= bind_flags; 3760 return 0; 3761 } 3762 3763 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 3764 { 3765 void __iomem *ptr; 3766 3767 /* Access through the GTT requires the device to be awake. */ 3768 assert_rpm_wakelock_held(to_i915(vma->vm->dev)); 3769 3770 lockdep_assert_held(&vma->vm->dev->struct_mutex); 3771 if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) 3772 return IO_ERR_PTR(-ENODEV); 3773 3774 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 3775 GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); 3776 3777 ptr = vma->iomap; 3778 if (ptr == NULL) { 3779 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, 3780 vma->node.start, 3781 vma->node.size); 3782 if (ptr == NULL) 3783 return IO_ERR_PTR(-ENOMEM); 3784 3785 vma->iomap = ptr; 3786 } 3787 3788 __i915_vma_pin(vma); 3789 return ptr; 3790 } 3791 3792 void i915_vma_unpin_and_release(struct i915_vma **p_vma) 3793 { 3794 struct i915_vma *vma; 3795 3796 vma = fetch_and_zero(p_vma); 3797 if (!vma) 3798 return; 3799 3800 i915_vma_unpin(vma); 3801 i915_vma_put(vma); 3802 } 3803