1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include "intel_frontbuffer.h" 35 36 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) 37 38 /** 39 * DOC: Global GTT views 40 * 41 * Background and previous state 42 * 43 * Historically objects could exists (be bound) in global GTT space only as 44 * singular instances with a view representing all of the object's backing pages 45 * in a linear fashion. This view will be called a normal view. 46 * 47 * To support multiple views of the same object, where the number of mapped 48 * pages is not equal to the backing store, or where the layout of the pages 49 * is not linear, concept of a GGTT view was added. 50 * 51 * One example of an alternative view is a stereo display driven by a single 52 * image. In this case we would have a framebuffer looking like this 53 * (2x2 pages): 54 * 55 * 12 56 * 34 57 * 58 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 59 * rendering. In contrast, fed to the display engine would be an alternative 60 * view which could look something like this: 61 * 62 * 1212 63 * 3434 64 * 65 * In this example both the size and layout of pages in the alternative view is 66 * different from the normal view. 67 * 68 * Implementation and usage 69 * 70 * GGTT views are implemented using VMAs and are distinguished via enum 71 * i915_ggtt_view_type and struct i915_ggtt_view. 72 * 73 * A new flavour of core GEM functions which work with GGTT bound objects were 74 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 75 * renaming in large amounts of code. They take the struct i915_ggtt_view 76 * parameter encapsulating all metadata required to implement a view. 77 * 78 * As a helper for callers which are only interested in the normal view, 79 * globally const i915_ggtt_view_normal singleton instance exists. All old core 80 * GEM API functions, the ones not taking the view parameter, are operating on, 81 * or with the normal GGTT view. 82 * 83 * Code wanting to add or use a new GGTT view needs to: 84 * 85 * 1. Add a new enum with a suitable name. 86 * 2. Extend the metadata in the i915_ggtt_view structure if required. 87 * 3. Add support to i915_get_vma_pages(). 88 * 89 * New views are required to build a scatter-gather table from within the 90 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 91 * exists for the lifetime of an VMA. 92 * 93 * Core API is designed to have copy semantics which means that passed in 94 * struct i915_ggtt_view does not need to be persistent (left around after 95 * calling the core API functions). 96 * 97 */ 98 99 static int 100 i915_get_ggtt_vma_pages(struct i915_vma *vma); 101 102 const struct i915_ggtt_view i915_ggtt_view_normal = { 103 .type = I915_GGTT_VIEW_NORMAL, 104 }; 105 const struct i915_ggtt_view i915_ggtt_view_rotated = { 106 .type = I915_GGTT_VIEW_ROTATED, 107 }; 108 109 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 110 int enable_ppgtt) 111 { 112 bool has_aliasing_ppgtt; 113 bool has_full_ppgtt; 114 bool has_full_48bit_ppgtt; 115 116 has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6; 117 has_full_ppgtt = INTEL_GEN(dev_priv) >= 7; 118 has_full_48bit_ppgtt = 119 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; 120 121 if (intel_vgpu_active(dev_priv)) { 122 /* emulation is too hard */ 123 has_full_ppgtt = false; 124 has_full_48bit_ppgtt = false; 125 } 126 127 if (!has_aliasing_ppgtt) 128 return 0; 129 130 /* 131 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 132 * execlists, the sole mechanism available to submit work. 133 */ 134 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 135 return 0; 136 137 if (enable_ppgtt == 1) 138 return 1; 139 140 if (enable_ppgtt == 2 && has_full_ppgtt) 141 return 2; 142 143 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 144 return 3; 145 146 #ifdef CONFIG_INTEL_IOMMU 147 /* Disable ppgtt on SNB if VT-d is on. */ 148 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 149 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 150 return 0; 151 } 152 #endif 153 154 /* Early VLV doesn't have this */ 155 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 156 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 157 return 0; 158 } 159 160 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) 161 return has_full_48bit_ppgtt ? 3 : 2; 162 else 163 return has_aliasing_ppgtt ? 1 : 0; 164 } 165 166 static int ppgtt_bind_vma(struct i915_vma *vma, 167 enum i915_cache_level cache_level, 168 u32 unused) 169 { 170 u32 pte_flags = 0; 171 172 vma->pages = vma->obj->mm.pages; 173 174 /* Currently applicable only to VLV */ 175 if (vma->obj->gt_ro) 176 pte_flags |= PTE_READ_ONLY; 177 178 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 179 cache_level, pte_flags); 180 181 return 0; 182 } 183 184 static void ppgtt_unbind_vma(struct i915_vma *vma) 185 { 186 vma->vm->clear_range(vma->vm, 187 vma->node.start, 188 vma->size); 189 } 190 191 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 192 enum i915_cache_level level) 193 { 194 gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; 195 pte |= addr; 196 197 switch (level) { 198 case I915_CACHE_NONE: 199 pte |= PPAT_UNCACHED_INDEX; 200 break; 201 case I915_CACHE_WT: 202 pte |= PPAT_DISPLAY_ELLC_INDEX; 203 break; 204 default: 205 pte |= PPAT_CACHED_INDEX; 206 break; 207 } 208 209 return pte; 210 } 211 212 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 213 const enum i915_cache_level level) 214 { 215 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 216 pde |= addr; 217 if (level != I915_CACHE_NONE) 218 pde |= PPAT_CACHED_PDE_INDEX; 219 else 220 pde |= PPAT_UNCACHED_INDEX; 221 return pde; 222 } 223 224 #define gen8_pdpe_encode gen8_pde_encode 225 #define gen8_pml4e_encode gen8_pde_encode 226 227 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 228 enum i915_cache_level level, 229 u32 unused) 230 { 231 gen6_pte_t pte = GEN6_PTE_VALID; 232 pte |= GEN6_PTE_ADDR_ENCODE(addr); 233 234 switch (level) { 235 case I915_CACHE_L3_LLC: 236 case I915_CACHE_LLC: 237 pte |= GEN6_PTE_CACHE_LLC; 238 break; 239 case I915_CACHE_NONE: 240 pte |= GEN6_PTE_UNCACHED; 241 break; 242 default: 243 MISSING_CASE(level); 244 } 245 246 return pte; 247 } 248 249 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 250 enum i915_cache_level level, 251 u32 unused) 252 { 253 gen6_pte_t pte = GEN6_PTE_VALID; 254 pte |= GEN6_PTE_ADDR_ENCODE(addr); 255 256 switch (level) { 257 case I915_CACHE_L3_LLC: 258 pte |= GEN7_PTE_CACHE_L3_LLC; 259 break; 260 case I915_CACHE_LLC: 261 pte |= GEN6_PTE_CACHE_LLC; 262 break; 263 case I915_CACHE_NONE: 264 pte |= GEN6_PTE_UNCACHED; 265 break; 266 default: 267 MISSING_CASE(level); 268 } 269 270 return pte; 271 } 272 273 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 274 enum i915_cache_level level, 275 u32 flags) 276 { 277 gen6_pte_t pte = GEN6_PTE_VALID; 278 pte |= GEN6_PTE_ADDR_ENCODE(addr); 279 280 if (!(flags & PTE_READ_ONLY)) 281 pte |= BYT_PTE_WRITEABLE; 282 283 if (level != I915_CACHE_NONE) 284 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 285 286 return pte; 287 } 288 289 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 290 enum i915_cache_level level, 291 u32 unused) 292 { 293 gen6_pte_t pte = GEN6_PTE_VALID; 294 pte |= HSW_PTE_ADDR_ENCODE(addr); 295 296 if (level != I915_CACHE_NONE) 297 pte |= HSW_WB_LLC_AGE3; 298 299 return pte; 300 } 301 302 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 303 enum i915_cache_level level, 304 u32 unused) 305 { 306 gen6_pte_t pte = GEN6_PTE_VALID; 307 pte |= HSW_PTE_ADDR_ENCODE(addr); 308 309 switch (level) { 310 case I915_CACHE_NONE: 311 break; 312 case I915_CACHE_WT: 313 pte |= HSW_WT_ELLC_LLC_AGE3; 314 break; 315 default: 316 pte |= HSW_WB_ELLC_LLC_AGE3; 317 break; 318 } 319 320 return pte; 321 } 322 323 static int __setup_page_dma(struct drm_i915_private *dev_priv, 324 struct i915_page_dma *p, gfp_t flags) 325 { 326 struct device *kdev = &dev_priv->drm.pdev->dev; 327 328 p->page = alloc_page(flags); 329 if (!p->page) 330 return -ENOMEM; 331 332 p->daddr = dma_map_page(kdev, 333 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 334 335 if (dma_mapping_error(kdev, p->daddr)) { 336 __free_page(p->page); 337 return -EINVAL; 338 } 339 340 return 0; 341 } 342 343 static int setup_page_dma(struct drm_i915_private *dev_priv, 344 struct i915_page_dma *p) 345 { 346 return __setup_page_dma(dev_priv, p, I915_GFP_DMA); 347 } 348 349 static void cleanup_page_dma(struct drm_i915_private *dev_priv, 350 struct i915_page_dma *p) 351 { 352 struct pci_dev *pdev = dev_priv->drm.pdev; 353 354 if (WARN_ON(!p->page)) 355 return; 356 357 dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 358 __free_page(p->page); 359 memset(p, 0, sizeof(*p)); 360 } 361 362 static void *kmap_page_dma(struct i915_page_dma *p) 363 { 364 return kmap_atomic(p->page); 365 } 366 367 /* We use the flushing unmap only with ppgtt structures: 368 * page directories, page tables and scratch pages. 369 */ 370 static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) 371 { 372 /* There are only few exceptions for gen >=6. chv and bxt. 373 * And we are not sure about the latter so play safe for now. 374 */ 375 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 376 drm_clflush_virt_range(vaddr, PAGE_SIZE); 377 378 kunmap_atomic(vaddr); 379 } 380 381 #define kmap_px(px) kmap_page_dma(px_base(px)) 382 #define kunmap_px(ppgtt, vaddr) \ 383 kunmap_page_dma(to_i915((ppgtt)->base.dev), (vaddr)) 384 385 #define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px)) 386 #define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px)) 387 #define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) 388 #define fill32_px(dev_priv, px, v) \ 389 fill_page_dma_32((dev_priv), px_base(px), (v)) 390 391 static void fill_page_dma(struct drm_i915_private *dev_priv, 392 struct i915_page_dma *p, const uint64_t val) 393 { 394 int i; 395 uint64_t * const vaddr = kmap_page_dma(p); 396 397 for (i = 0; i < 512; i++) 398 vaddr[i] = val; 399 400 kunmap_page_dma(dev_priv, vaddr); 401 } 402 403 static void fill_page_dma_32(struct drm_i915_private *dev_priv, 404 struct i915_page_dma *p, const uint32_t val32) 405 { 406 uint64_t v = val32; 407 408 v = v << 32 | val32; 409 410 fill_page_dma(dev_priv, p, v); 411 } 412 413 static int 414 setup_scratch_page(struct drm_i915_private *dev_priv, 415 struct i915_page_dma *scratch, 416 gfp_t gfp) 417 { 418 return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO); 419 } 420 421 static void cleanup_scratch_page(struct drm_i915_private *dev_priv, 422 struct i915_page_dma *scratch) 423 { 424 cleanup_page_dma(dev_priv, scratch); 425 } 426 427 static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) 428 { 429 struct i915_page_table *pt; 430 const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; 431 int ret = -ENOMEM; 432 433 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 434 if (!pt) 435 return ERR_PTR(-ENOMEM); 436 437 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 438 GFP_KERNEL); 439 440 if (!pt->used_ptes) 441 goto fail_bitmap; 442 443 ret = setup_px(dev_priv, pt); 444 if (ret) 445 goto fail_page_m; 446 447 return pt; 448 449 fail_page_m: 450 kfree(pt->used_ptes); 451 fail_bitmap: 452 kfree(pt); 453 454 return ERR_PTR(ret); 455 } 456 457 static void free_pt(struct drm_i915_private *dev_priv, 458 struct i915_page_table *pt) 459 { 460 cleanup_px(dev_priv, pt); 461 kfree(pt->used_ptes); 462 kfree(pt); 463 } 464 465 static void gen8_initialize_pt(struct i915_address_space *vm, 466 struct i915_page_table *pt) 467 { 468 gen8_pte_t scratch_pte; 469 470 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 471 I915_CACHE_LLC); 472 473 fill_px(to_i915(vm->dev), pt, scratch_pte); 474 } 475 476 static void gen6_initialize_pt(struct i915_address_space *vm, 477 struct i915_page_table *pt) 478 { 479 gen6_pte_t scratch_pte; 480 481 WARN_ON(vm->scratch_page.daddr == 0); 482 483 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 484 I915_CACHE_LLC, 0); 485 486 fill32_px(to_i915(vm->dev), pt, scratch_pte); 487 } 488 489 static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) 490 { 491 struct i915_page_directory *pd; 492 int ret = -ENOMEM; 493 494 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 495 if (!pd) 496 return ERR_PTR(-ENOMEM); 497 498 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 499 sizeof(*pd->used_pdes), GFP_KERNEL); 500 if (!pd->used_pdes) 501 goto fail_bitmap; 502 503 ret = setup_px(dev_priv, pd); 504 if (ret) 505 goto fail_page_m; 506 507 return pd; 508 509 fail_page_m: 510 kfree(pd->used_pdes); 511 fail_bitmap: 512 kfree(pd); 513 514 return ERR_PTR(ret); 515 } 516 517 static void free_pd(struct drm_i915_private *dev_priv, 518 struct i915_page_directory *pd) 519 { 520 if (px_page(pd)) { 521 cleanup_px(dev_priv, pd); 522 kfree(pd->used_pdes); 523 kfree(pd); 524 } 525 } 526 527 static void gen8_initialize_pd(struct i915_address_space *vm, 528 struct i915_page_directory *pd) 529 { 530 gen8_pde_t scratch_pde; 531 532 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 533 534 fill_px(to_i915(vm->dev), pd, scratch_pde); 535 } 536 537 static int __pdp_init(struct drm_i915_private *dev_priv, 538 struct i915_page_directory_pointer *pdp) 539 { 540 size_t pdpes = I915_PDPES_PER_PDP(dev_priv); 541 542 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 543 sizeof(unsigned long), 544 GFP_KERNEL); 545 if (!pdp->used_pdpes) 546 return -ENOMEM; 547 548 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 549 GFP_KERNEL); 550 if (!pdp->page_directory) { 551 kfree(pdp->used_pdpes); 552 /* the PDP might be the statically allocated top level. Keep it 553 * as clean as possible */ 554 pdp->used_pdpes = NULL; 555 return -ENOMEM; 556 } 557 558 return 0; 559 } 560 561 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 562 { 563 kfree(pdp->used_pdpes); 564 kfree(pdp->page_directory); 565 pdp->page_directory = NULL; 566 } 567 568 static struct 569 i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) 570 { 571 struct i915_page_directory_pointer *pdp; 572 int ret = -ENOMEM; 573 574 WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv)); 575 576 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 577 if (!pdp) 578 return ERR_PTR(-ENOMEM); 579 580 ret = __pdp_init(dev_priv, pdp); 581 if (ret) 582 goto fail_bitmap; 583 584 ret = setup_px(dev_priv, pdp); 585 if (ret) 586 goto fail_page_m; 587 588 return pdp; 589 590 fail_page_m: 591 __pdp_fini(pdp); 592 fail_bitmap: 593 kfree(pdp); 594 595 return ERR_PTR(ret); 596 } 597 598 static void free_pdp(struct drm_i915_private *dev_priv, 599 struct i915_page_directory_pointer *pdp) 600 { 601 __pdp_fini(pdp); 602 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 603 cleanup_px(dev_priv, pdp); 604 kfree(pdp); 605 } 606 } 607 608 static void gen8_initialize_pdp(struct i915_address_space *vm, 609 struct i915_page_directory_pointer *pdp) 610 { 611 gen8_ppgtt_pdpe_t scratch_pdpe; 612 613 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 614 615 fill_px(to_i915(vm->dev), pdp, scratch_pdpe); 616 } 617 618 static void gen8_initialize_pml4(struct i915_address_space *vm, 619 struct i915_pml4 *pml4) 620 { 621 gen8_ppgtt_pml4e_t scratch_pml4e; 622 623 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 624 I915_CACHE_LLC); 625 626 fill_px(to_i915(vm->dev), pml4, scratch_pml4e); 627 } 628 629 static void 630 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 631 struct i915_page_directory_pointer *pdp, 632 struct i915_page_directory *pd, 633 int index) 634 { 635 gen8_ppgtt_pdpe_t *page_directorypo; 636 637 if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) 638 return; 639 640 page_directorypo = kmap_px(pdp); 641 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 642 kunmap_px(ppgtt, page_directorypo); 643 } 644 645 static void 646 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 647 struct i915_pml4 *pml4, 648 struct i915_page_directory_pointer *pdp, 649 int index) 650 { 651 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 652 653 WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))); 654 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 655 kunmap_px(ppgtt, pagemap); 656 } 657 658 /* Broadwell Page Directory Pointer Descriptors */ 659 static int gen8_write_pdp(struct drm_i915_gem_request *req, 660 unsigned entry, 661 dma_addr_t addr) 662 { 663 struct intel_ring *ring = req->ring; 664 struct intel_engine_cs *engine = req->engine; 665 int ret; 666 667 BUG_ON(entry >= 4); 668 669 ret = intel_ring_begin(req, 6); 670 if (ret) 671 return ret; 672 673 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 674 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); 675 intel_ring_emit(ring, upper_32_bits(addr)); 676 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 677 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); 678 intel_ring_emit(ring, lower_32_bits(addr)); 679 intel_ring_advance(ring); 680 681 return 0; 682 } 683 684 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 685 struct drm_i915_gem_request *req) 686 { 687 int i, ret; 688 689 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 690 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 691 692 ret = gen8_write_pdp(req, i, pd_daddr); 693 if (ret) 694 return ret; 695 } 696 697 return 0; 698 } 699 700 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 701 struct drm_i915_gem_request *req) 702 { 703 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 704 } 705 706 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 707 * the page table structures, we mark them dirty so that 708 * context switching/execlist queuing code takes extra steps 709 * to ensure that tlbs are flushed. 710 */ 711 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 712 { 713 ppgtt->pd_dirty_rings = INTEL_INFO(to_i915(ppgtt->base.dev))->ring_mask; 714 } 715 716 /* Removes entries from a single page table, releasing it if it's empty. 717 * Caller can use the return value to update higher-level entries. 718 */ 719 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, 720 struct i915_page_table *pt, 721 uint64_t start, 722 uint64_t length) 723 { 724 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 725 unsigned int num_entries = gen8_pte_count(start, length); 726 unsigned int pte = gen8_pte_index(start); 727 unsigned int pte_end = pte + num_entries; 728 gen8_pte_t *pt_vaddr; 729 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 730 I915_CACHE_LLC); 731 732 if (WARN_ON(!px_page(pt))) 733 return false; 734 735 GEM_BUG_ON(pte_end > GEN8_PTES); 736 737 bitmap_clear(pt->used_ptes, pte, num_entries); 738 if (USES_FULL_PPGTT(vm->i915)) { 739 if (bitmap_empty(pt->used_ptes, GEN8_PTES)) 740 return true; 741 } 742 743 pt_vaddr = kmap_px(pt); 744 745 while (pte < pte_end) 746 pt_vaddr[pte++] = scratch_pte; 747 748 kunmap_px(ppgtt, pt_vaddr); 749 750 return false; 751 } 752 753 /* Removes entries from a single page dir, releasing it if it's empty. 754 * Caller can use the return value to update higher-level entries 755 */ 756 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, 757 struct i915_page_directory *pd, 758 uint64_t start, 759 uint64_t length) 760 { 761 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 762 struct i915_page_table *pt; 763 uint64_t pde; 764 gen8_pde_t *pde_vaddr; 765 gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), 766 I915_CACHE_LLC); 767 768 gen8_for_each_pde(pt, pd, start, length, pde) { 769 if (WARN_ON(!pd->page_table[pde])) 770 break; 771 772 if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { 773 __clear_bit(pde, pd->used_pdes); 774 pde_vaddr = kmap_px(pd); 775 pde_vaddr[pde] = scratch_pde; 776 kunmap_px(ppgtt, pde_vaddr); 777 free_pt(to_i915(vm->dev), pt); 778 } 779 } 780 781 if (bitmap_empty(pd->used_pdes, I915_PDES)) 782 return true; 783 784 return false; 785 } 786 787 /* Removes entries from a single page dir pointer, releasing it if it's empty. 788 * Caller can use the return value to update higher-level entries 789 */ 790 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, 791 struct i915_page_directory_pointer *pdp, 792 uint64_t start, 793 uint64_t length) 794 { 795 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 796 struct i915_page_directory *pd; 797 uint64_t pdpe; 798 gen8_ppgtt_pdpe_t *pdpe_vaddr; 799 gen8_ppgtt_pdpe_t scratch_pdpe = 800 gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 801 802 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 803 if (WARN_ON(!pdp->page_directory[pdpe])) 804 break; 805 806 if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { 807 __clear_bit(pdpe, pdp->used_pdpes); 808 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 809 pdpe_vaddr = kmap_px(pdp); 810 pdpe_vaddr[pdpe] = scratch_pdpe; 811 kunmap_px(ppgtt, pdpe_vaddr); 812 } 813 free_pd(to_i915(vm->dev), pd); 814 } 815 } 816 817 mark_tlbs_dirty(ppgtt); 818 819 if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) 820 return true; 821 822 return false; 823 } 824 825 /* Removes entries from a single pml4. 826 * This is the top-level structure in 4-level page tables used on gen8+. 827 * Empty entries are always scratch pml4e. 828 */ 829 static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, 830 struct i915_pml4 *pml4, 831 uint64_t start, 832 uint64_t length) 833 { 834 struct drm_i915_private *dev_priv = to_i915(vm->dev); 835 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 836 struct i915_page_directory_pointer *pdp; 837 uint64_t pml4e; 838 gen8_ppgtt_pml4e_t *pml4e_vaddr; 839 gen8_ppgtt_pml4e_t scratch_pml4e = 840 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC); 841 842 GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(to_i915(vm->dev))); 843 844 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 845 if (WARN_ON(!pml4->pdps[pml4e])) 846 break; 847 848 if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { 849 __clear_bit(pml4e, pml4->used_pml4es); 850 pml4e_vaddr = kmap_px(pml4); 851 pml4e_vaddr[pml4e] = scratch_pml4e; 852 kunmap_px(ppgtt, pml4e_vaddr); 853 free_pdp(dev_priv, pdp); 854 } 855 } 856 } 857 858 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 859 uint64_t start, uint64_t length) 860 { 861 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 862 863 if (USES_FULL_48BIT_PPGTT(to_i915(vm->dev))) 864 gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); 865 else 866 gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); 867 } 868 869 static void 870 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 871 struct i915_page_directory_pointer *pdp, 872 struct sg_page_iter *sg_iter, 873 uint64_t start, 874 enum i915_cache_level cache_level) 875 { 876 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 877 gen8_pte_t *pt_vaddr; 878 unsigned pdpe = gen8_pdpe_index(start); 879 unsigned pde = gen8_pde_index(start); 880 unsigned pte = gen8_pte_index(start); 881 882 pt_vaddr = NULL; 883 884 while (__sg_page_iter_next(sg_iter)) { 885 if (pt_vaddr == NULL) { 886 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 887 struct i915_page_table *pt = pd->page_table[pde]; 888 pt_vaddr = kmap_px(pt); 889 } 890 891 pt_vaddr[pte] = 892 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 893 cache_level); 894 if (++pte == GEN8_PTES) { 895 kunmap_px(ppgtt, pt_vaddr); 896 pt_vaddr = NULL; 897 if (++pde == I915_PDES) { 898 if (++pdpe == I915_PDPES_PER_PDP(to_i915(vm->dev))) 899 break; 900 pde = 0; 901 } 902 pte = 0; 903 } 904 } 905 906 if (pt_vaddr) 907 kunmap_px(ppgtt, pt_vaddr); 908 } 909 910 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 911 struct sg_table *pages, 912 uint64_t start, 913 enum i915_cache_level cache_level, 914 u32 unused) 915 { 916 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 917 struct sg_page_iter sg_iter; 918 919 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 920 921 if (!USES_FULL_48BIT_PPGTT(to_i915(vm->dev))) { 922 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 923 cache_level); 924 } else { 925 struct i915_page_directory_pointer *pdp; 926 uint64_t pml4e; 927 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 928 929 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 930 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 931 start, cache_level); 932 } 933 } 934 } 935 936 static void gen8_free_page_tables(struct drm_i915_private *dev_priv, 937 struct i915_page_directory *pd) 938 { 939 int i; 940 941 if (!px_page(pd)) 942 return; 943 944 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 945 if (WARN_ON(!pd->page_table[i])) 946 continue; 947 948 free_pt(dev_priv, pd->page_table[i]); 949 pd->page_table[i] = NULL; 950 } 951 } 952 953 static int gen8_init_scratch(struct i915_address_space *vm) 954 { 955 struct drm_i915_private *dev_priv = to_i915(vm->dev); 956 int ret; 957 958 ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); 959 if (ret) 960 return ret; 961 962 vm->scratch_pt = alloc_pt(dev_priv); 963 if (IS_ERR(vm->scratch_pt)) { 964 ret = PTR_ERR(vm->scratch_pt); 965 goto free_scratch_page; 966 } 967 968 vm->scratch_pd = alloc_pd(dev_priv); 969 if (IS_ERR(vm->scratch_pd)) { 970 ret = PTR_ERR(vm->scratch_pd); 971 goto free_pt; 972 } 973 974 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 975 vm->scratch_pdp = alloc_pdp(dev_priv); 976 if (IS_ERR(vm->scratch_pdp)) { 977 ret = PTR_ERR(vm->scratch_pdp); 978 goto free_pd; 979 } 980 } 981 982 gen8_initialize_pt(vm, vm->scratch_pt); 983 gen8_initialize_pd(vm, vm->scratch_pd); 984 if (USES_FULL_48BIT_PPGTT(dev_priv)) 985 gen8_initialize_pdp(vm, vm->scratch_pdp); 986 987 return 0; 988 989 free_pd: 990 free_pd(dev_priv, vm->scratch_pd); 991 free_pt: 992 free_pt(dev_priv, vm->scratch_pt); 993 free_scratch_page: 994 cleanup_scratch_page(dev_priv, &vm->scratch_page); 995 996 return ret; 997 } 998 999 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 1000 { 1001 enum vgt_g2v_type msg; 1002 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1003 int i; 1004 1005 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 1006 u64 daddr = px_dma(&ppgtt->pml4); 1007 1008 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 1009 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 1010 1011 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 1012 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 1013 } else { 1014 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 1015 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 1016 1017 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 1018 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 1019 } 1020 1021 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 1022 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 1023 } 1024 1025 I915_WRITE(vgtif_reg(g2v_notify), msg); 1026 1027 return 0; 1028 } 1029 1030 static void gen8_free_scratch(struct i915_address_space *vm) 1031 { 1032 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1033 1034 if (USES_FULL_48BIT_PPGTT(dev_priv)) 1035 free_pdp(dev_priv, vm->scratch_pdp); 1036 free_pd(dev_priv, vm->scratch_pd); 1037 free_pt(dev_priv, vm->scratch_pt); 1038 cleanup_scratch_page(dev_priv, &vm->scratch_page); 1039 } 1040 1041 static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, 1042 struct i915_page_directory_pointer *pdp) 1043 { 1044 int i; 1045 1046 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { 1047 if (WARN_ON(!pdp->page_directory[i])) 1048 continue; 1049 1050 gen8_free_page_tables(dev_priv, pdp->page_directory[i]); 1051 free_pd(dev_priv, pdp->page_directory[i]); 1052 } 1053 1054 free_pdp(dev_priv, pdp); 1055 } 1056 1057 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 1058 { 1059 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1060 int i; 1061 1062 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 1063 if (WARN_ON(!ppgtt->pml4.pdps[i])) 1064 continue; 1065 1066 gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]); 1067 } 1068 1069 cleanup_px(dev_priv, &ppgtt->pml4); 1070 } 1071 1072 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 1073 { 1074 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1075 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1076 1077 if (intel_vgpu_active(dev_priv)) 1078 gen8_ppgtt_notify_vgt(ppgtt, false); 1079 1080 if (!USES_FULL_48BIT_PPGTT(dev_priv)) 1081 gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp); 1082 else 1083 gen8_ppgtt_cleanup_4lvl(ppgtt); 1084 1085 gen8_free_scratch(vm); 1086 } 1087 1088 /** 1089 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 1090 * @vm: Master vm structure. 1091 * @pd: Page directory for this address range. 1092 * @start: Starting virtual address to begin allocations. 1093 * @length: Size of the allocations. 1094 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1095 * caller to free on error. 1096 * 1097 * Allocate the required number of page tables. Extremely similar to 1098 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1099 * the page directory boundary (instead of the page directory pointer). That 1100 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1101 * possible, and likely that the caller will need to use multiple calls of this 1102 * function to achieve the appropriate allocation. 1103 * 1104 * Return: 0 if success; negative error code otherwise. 1105 */ 1106 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1107 struct i915_page_directory *pd, 1108 uint64_t start, 1109 uint64_t length, 1110 unsigned long *new_pts) 1111 { 1112 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1113 struct i915_page_table *pt; 1114 uint32_t pde; 1115 1116 gen8_for_each_pde(pt, pd, start, length, pde) { 1117 /* Don't reallocate page tables */ 1118 if (test_bit(pde, pd->used_pdes)) { 1119 /* Scratch is never allocated this way */ 1120 WARN_ON(pt == vm->scratch_pt); 1121 continue; 1122 } 1123 1124 pt = alloc_pt(dev_priv); 1125 if (IS_ERR(pt)) 1126 goto unwind_out; 1127 1128 gen8_initialize_pt(vm, pt); 1129 pd->page_table[pde] = pt; 1130 __set_bit(pde, new_pts); 1131 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1132 } 1133 1134 return 0; 1135 1136 unwind_out: 1137 for_each_set_bit(pde, new_pts, I915_PDES) 1138 free_pt(dev_priv, pd->page_table[pde]); 1139 1140 return -ENOMEM; 1141 } 1142 1143 /** 1144 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1145 * @vm: Master vm structure. 1146 * @pdp: Page directory pointer for this address range. 1147 * @start: Starting virtual address to begin allocations. 1148 * @length: Size of the allocations. 1149 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1150 * caller to free on error. 1151 * 1152 * Allocate the required number of page directories starting at the pde index of 1153 * @start, and ending at the pde index @start + @length. This function will skip 1154 * over already allocated page directories within the range, and only allocate 1155 * new ones, setting the appropriate pointer within the pdp as well as the 1156 * correct position in the bitmap @new_pds. 1157 * 1158 * The function will only allocate the pages within the range for a give page 1159 * directory pointer. In other words, if @start + @length straddles a virtually 1160 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1161 * required by the caller, This is not currently possible, and the BUG in the 1162 * code will prevent it. 1163 * 1164 * Return: 0 if success; negative error code otherwise. 1165 */ 1166 static int 1167 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1168 struct i915_page_directory_pointer *pdp, 1169 uint64_t start, 1170 uint64_t length, 1171 unsigned long *new_pds) 1172 { 1173 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1174 struct i915_page_directory *pd; 1175 uint32_t pdpe; 1176 uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); 1177 1178 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1179 1180 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1181 if (test_bit(pdpe, pdp->used_pdpes)) 1182 continue; 1183 1184 pd = alloc_pd(dev_priv); 1185 if (IS_ERR(pd)) 1186 goto unwind_out; 1187 1188 gen8_initialize_pd(vm, pd); 1189 pdp->page_directory[pdpe] = pd; 1190 __set_bit(pdpe, new_pds); 1191 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1192 } 1193 1194 return 0; 1195 1196 unwind_out: 1197 for_each_set_bit(pdpe, new_pds, pdpes) 1198 free_pd(dev_priv, pdp->page_directory[pdpe]); 1199 1200 return -ENOMEM; 1201 } 1202 1203 /** 1204 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1205 * @vm: Master vm structure. 1206 * @pml4: Page map level 4 for this address range. 1207 * @start: Starting virtual address to begin allocations. 1208 * @length: Size of the allocations. 1209 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1210 * caller to free on error. 1211 * 1212 * Allocate the required number of page directory pointers. Extremely similar to 1213 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1214 * The main difference is here we are limited by the pml4 boundary (instead of 1215 * the page directory pointer). 1216 * 1217 * Return: 0 if success; negative error code otherwise. 1218 */ 1219 static int 1220 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1221 struct i915_pml4 *pml4, 1222 uint64_t start, 1223 uint64_t length, 1224 unsigned long *new_pdps) 1225 { 1226 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1227 struct i915_page_directory_pointer *pdp; 1228 uint32_t pml4e; 1229 1230 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1231 1232 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1233 if (!test_bit(pml4e, pml4->used_pml4es)) { 1234 pdp = alloc_pdp(dev_priv); 1235 if (IS_ERR(pdp)) 1236 goto unwind_out; 1237 1238 gen8_initialize_pdp(vm, pdp); 1239 pml4->pdps[pml4e] = pdp; 1240 __set_bit(pml4e, new_pdps); 1241 trace_i915_page_directory_pointer_entry_alloc(vm, 1242 pml4e, 1243 start, 1244 GEN8_PML4E_SHIFT); 1245 } 1246 } 1247 1248 return 0; 1249 1250 unwind_out: 1251 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1252 free_pdp(dev_priv, pml4->pdps[pml4e]); 1253 1254 return -ENOMEM; 1255 } 1256 1257 static void 1258 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1259 { 1260 kfree(new_pts); 1261 kfree(new_pds); 1262 } 1263 1264 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1265 * of these are based on the number of PDPEs in the system. 1266 */ 1267 static 1268 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1269 unsigned long **new_pts, 1270 uint32_t pdpes) 1271 { 1272 unsigned long *pds; 1273 unsigned long *pts; 1274 1275 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1276 if (!pds) 1277 return -ENOMEM; 1278 1279 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1280 GFP_TEMPORARY); 1281 if (!pts) 1282 goto err_out; 1283 1284 *new_pds = pds; 1285 *new_pts = pts; 1286 1287 return 0; 1288 1289 err_out: 1290 free_gen8_temp_bitmaps(pds, pts); 1291 return -ENOMEM; 1292 } 1293 1294 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1295 struct i915_page_directory_pointer *pdp, 1296 uint64_t start, 1297 uint64_t length) 1298 { 1299 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1300 unsigned long *new_page_dirs, *new_page_tables; 1301 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1302 struct i915_page_directory *pd; 1303 const uint64_t orig_start = start; 1304 const uint64_t orig_length = length; 1305 uint32_t pdpe; 1306 uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); 1307 int ret; 1308 1309 /* Wrap is never okay since we can only represent 48b, and we don't 1310 * actually use the other side of the canonical address space. 1311 */ 1312 if (WARN_ON(start + length < start)) 1313 return -ENODEV; 1314 1315 if (WARN_ON(start + length > vm->total)) 1316 return -ENODEV; 1317 1318 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1319 if (ret) 1320 return ret; 1321 1322 /* Do the allocations first so we can easily bail out */ 1323 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1324 new_page_dirs); 1325 if (ret) { 1326 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1327 return ret; 1328 } 1329 1330 /* For every page directory referenced, allocate page tables */ 1331 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1332 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1333 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1334 if (ret) 1335 goto err_out; 1336 } 1337 1338 start = orig_start; 1339 length = orig_length; 1340 1341 /* Allocations have completed successfully, so set the bitmaps, and do 1342 * the mappings. */ 1343 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1344 gen8_pde_t *const page_directory = kmap_px(pd); 1345 struct i915_page_table *pt; 1346 uint64_t pd_len = length; 1347 uint64_t pd_start = start; 1348 uint32_t pde; 1349 1350 /* Every pd should be allocated, we just did that above. */ 1351 WARN_ON(!pd); 1352 1353 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1354 /* Same reasoning as pd */ 1355 WARN_ON(!pt); 1356 WARN_ON(!pd_len); 1357 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1358 1359 /* Set our used ptes within the page table */ 1360 bitmap_set(pt->used_ptes, 1361 gen8_pte_index(pd_start), 1362 gen8_pte_count(pd_start, pd_len)); 1363 1364 /* Our pde is now pointing to the pagetable, pt */ 1365 __set_bit(pde, pd->used_pdes); 1366 1367 /* Map the PDE to the page table */ 1368 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1369 I915_CACHE_LLC); 1370 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1371 gen8_pte_index(start), 1372 gen8_pte_count(start, length), 1373 GEN8_PTES); 1374 1375 /* NB: We haven't yet mapped ptes to pages. At this 1376 * point we're still relying on insert_entries() */ 1377 } 1378 1379 kunmap_px(ppgtt, page_directory); 1380 __set_bit(pdpe, pdp->used_pdpes); 1381 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1382 } 1383 1384 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1385 mark_tlbs_dirty(ppgtt); 1386 return 0; 1387 1388 err_out: 1389 while (pdpe--) { 1390 unsigned long temp; 1391 1392 for_each_set_bit(temp, new_page_tables + pdpe * 1393 BITS_TO_LONGS(I915_PDES), I915_PDES) 1394 free_pt(dev_priv, 1395 pdp->page_directory[pdpe]->page_table[temp]); 1396 } 1397 1398 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1399 free_pd(dev_priv, pdp->page_directory[pdpe]); 1400 1401 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1402 mark_tlbs_dirty(ppgtt); 1403 return ret; 1404 } 1405 1406 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1407 struct i915_pml4 *pml4, 1408 uint64_t start, 1409 uint64_t length) 1410 { 1411 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1412 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1413 struct i915_page_directory_pointer *pdp; 1414 uint64_t pml4e; 1415 int ret = 0; 1416 1417 /* Do the pml4 allocations first, so we don't need to track the newly 1418 * allocated tables below the pdp */ 1419 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1420 1421 /* The pagedirectory and pagetable allocations are done in the shared 3 1422 * and 4 level code. Just allocate the pdps. 1423 */ 1424 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1425 new_pdps); 1426 if (ret) 1427 return ret; 1428 1429 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1430 "The allocation has spanned more than 512GB. " 1431 "It is highly likely this is incorrect."); 1432 1433 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1434 WARN_ON(!pdp); 1435 1436 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1437 if (ret) 1438 goto err_out; 1439 1440 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1441 } 1442 1443 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1444 GEN8_PML4ES_PER_PML4); 1445 1446 return 0; 1447 1448 err_out: 1449 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1450 gen8_ppgtt_cleanup_3lvl(to_i915(vm->dev), pml4->pdps[pml4e]); 1451 1452 return ret; 1453 } 1454 1455 static int gen8_alloc_va_range(struct i915_address_space *vm, 1456 uint64_t start, uint64_t length) 1457 { 1458 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1459 1460 if (USES_FULL_48BIT_PPGTT(to_i915(vm->dev))) 1461 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1462 else 1463 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1464 } 1465 1466 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1467 uint64_t start, uint64_t length, 1468 gen8_pte_t scratch_pte, 1469 struct seq_file *m) 1470 { 1471 struct i915_page_directory *pd; 1472 uint32_t pdpe; 1473 1474 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1475 struct i915_page_table *pt; 1476 uint64_t pd_len = length; 1477 uint64_t pd_start = start; 1478 uint32_t pde; 1479 1480 if (!test_bit(pdpe, pdp->used_pdpes)) 1481 continue; 1482 1483 seq_printf(m, "\tPDPE #%d\n", pdpe); 1484 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1485 uint32_t pte; 1486 gen8_pte_t *pt_vaddr; 1487 1488 if (!test_bit(pde, pd->used_pdes)) 1489 continue; 1490 1491 pt_vaddr = kmap_px(pt); 1492 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1493 uint64_t va = 1494 (pdpe << GEN8_PDPE_SHIFT) | 1495 (pde << GEN8_PDE_SHIFT) | 1496 (pte << GEN8_PTE_SHIFT); 1497 int i; 1498 bool found = false; 1499 1500 for (i = 0; i < 4; i++) 1501 if (pt_vaddr[pte + i] != scratch_pte) 1502 found = true; 1503 if (!found) 1504 continue; 1505 1506 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1507 for (i = 0; i < 4; i++) { 1508 if (pt_vaddr[pte + i] != scratch_pte) 1509 seq_printf(m, " %llx", pt_vaddr[pte + i]); 1510 else 1511 seq_puts(m, " SCRATCH "); 1512 } 1513 seq_puts(m, "\n"); 1514 } 1515 /* don't use kunmap_px, it could trigger 1516 * an unnecessary flush. 1517 */ 1518 kunmap_atomic(pt_vaddr); 1519 } 1520 } 1521 } 1522 1523 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1524 { 1525 struct i915_address_space *vm = &ppgtt->base; 1526 uint64_t start = ppgtt->base.start; 1527 uint64_t length = ppgtt->base.total; 1528 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 1529 I915_CACHE_LLC); 1530 1531 if (!USES_FULL_48BIT_PPGTT(to_i915(vm->dev))) { 1532 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1533 } else { 1534 uint64_t pml4e; 1535 struct i915_pml4 *pml4 = &ppgtt->pml4; 1536 struct i915_page_directory_pointer *pdp; 1537 1538 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1539 if (!test_bit(pml4e, pml4->used_pml4es)) 1540 continue; 1541 1542 seq_printf(m, " PML4E #%llu\n", pml4e); 1543 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1544 } 1545 } 1546 } 1547 1548 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1549 { 1550 unsigned long *new_page_dirs, *new_page_tables; 1551 uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); 1552 int ret; 1553 1554 /* We allocate temp bitmap for page tables for no gain 1555 * but as this is for init only, lets keep the things simple 1556 */ 1557 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1558 if (ret) 1559 return ret; 1560 1561 /* Allocate for all pdps regardless of how the ppgtt 1562 * was defined. 1563 */ 1564 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1565 0, 1ULL << 32, 1566 new_page_dirs); 1567 if (!ret) 1568 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1569 1570 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1571 1572 return ret; 1573 } 1574 1575 /* 1576 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1577 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1578 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1579 * space. 1580 * 1581 */ 1582 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1583 { 1584 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1585 int ret; 1586 1587 ret = gen8_init_scratch(&ppgtt->base); 1588 if (ret) 1589 return ret; 1590 1591 ppgtt->base.start = 0; 1592 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1593 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1594 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1595 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1596 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1597 ppgtt->base.bind_vma = ppgtt_bind_vma; 1598 ppgtt->debug_dump = gen8_dump_ppgtt; 1599 1600 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 1601 ret = setup_px(dev_priv, &ppgtt->pml4); 1602 if (ret) 1603 goto free_scratch; 1604 1605 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1606 1607 ppgtt->base.total = 1ULL << 48; 1608 ppgtt->switch_mm = gen8_48b_mm_switch; 1609 } else { 1610 ret = __pdp_init(dev_priv, &ppgtt->pdp); 1611 if (ret) 1612 goto free_scratch; 1613 1614 ppgtt->base.total = 1ULL << 32; 1615 ppgtt->switch_mm = gen8_legacy_mm_switch; 1616 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1617 0, 0, 1618 GEN8_PML4E_SHIFT); 1619 1620 if (intel_vgpu_active(dev_priv)) { 1621 ret = gen8_preallocate_top_level_pdps(ppgtt); 1622 if (ret) 1623 goto free_scratch; 1624 } 1625 } 1626 1627 if (intel_vgpu_active(dev_priv)) 1628 gen8_ppgtt_notify_vgt(ppgtt, true); 1629 1630 return 0; 1631 1632 free_scratch: 1633 gen8_free_scratch(&ppgtt->base); 1634 return ret; 1635 } 1636 1637 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1638 { 1639 struct i915_address_space *vm = &ppgtt->base; 1640 struct i915_page_table *unused; 1641 gen6_pte_t scratch_pte; 1642 uint32_t pd_entry; 1643 uint32_t pte, pde; 1644 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1645 1646 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1647 I915_CACHE_LLC, 0); 1648 1649 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1650 u32 expected; 1651 gen6_pte_t *pt_vaddr; 1652 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1653 pd_entry = readl(ppgtt->pd_addr + pde); 1654 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1655 1656 if (pd_entry != expected) 1657 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1658 pde, 1659 pd_entry, 1660 expected); 1661 seq_printf(m, "\tPDE: %x\n", pd_entry); 1662 1663 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1664 1665 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1666 unsigned long va = 1667 (pde * PAGE_SIZE * GEN6_PTES) + 1668 (pte * PAGE_SIZE); 1669 int i; 1670 bool found = false; 1671 for (i = 0; i < 4; i++) 1672 if (pt_vaddr[pte + i] != scratch_pte) 1673 found = true; 1674 if (!found) 1675 continue; 1676 1677 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1678 for (i = 0; i < 4; i++) { 1679 if (pt_vaddr[pte + i] != scratch_pte) 1680 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1681 else 1682 seq_puts(m, " SCRATCH "); 1683 } 1684 seq_puts(m, "\n"); 1685 } 1686 kunmap_px(ppgtt, pt_vaddr); 1687 } 1688 } 1689 1690 /* Write pde (index) from the page directory @pd to the page table @pt */ 1691 static void gen6_write_pde(struct i915_page_directory *pd, 1692 const int pde, struct i915_page_table *pt) 1693 { 1694 /* Caller needs to make sure the write completes if necessary */ 1695 struct i915_hw_ppgtt *ppgtt = 1696 container_of(pd, struct i915_hw_ppgtt, pd); 1697 u32 pd_entry; 1698 1699 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1700 pd_entry |= GEN6_PDE_VALID; 1701 1702 writel(pd_entry, ppgtt->pd_addr + pde); 1703 } 1704 1705 /* Write all the page tables found in the ppgtt structure to incrementing page 1706 * directories. */ 1707 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1708 struct i915_page_directory *pd, 1709 uint32_t start, uint32_t length) 1710 { 1711 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1712 struct i915_page_table *pt; 1713 uint32_t pde; 1714 1715 gen6_for_each_pde(pt, pd, start, length, pde) 1716 gen6_write_pde(pd, pde, pt); 1717 1718 /* Make sure write is complete before other code can use this page 1719 * table. Also require for WC mapped PTEs */ 1720 readl(ggtt->gsm); 1721 } 1722 1723 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1724 { 1725 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1726 1727 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1728 } 1729 1730 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1731 struct drm_i915_gem_request *req) 1732 { 1733 struct intel_ring *ring = req->ring; 1734 struct intel_engine_cs *engine = req->engine; 1735 int ret; 1736 1737 /* NB: TLBs must be flushed and invalidated before a switch */ 1738 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1739 if (ret) 1740 return ret; 1741 1742 ret = intel_ring_begin(req, 6); 1743 if (ret) 1744 return ret; 1745 1746 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1747 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1748 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1749 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1750 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1751 intel_ring_emit(ring, MI_NOOP); 1752 intel_ring_advance(ring); 1753 1754 return 0; 1755 } 1756 1757 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1758 struct drm_i915_gem_request *req) 1759 { 1760 struct intel_ring *ring = req->ring; 1761 struct intel_engine_cs *engine = req->engine; 1762 int ret; 1763 1764 /* NB: TLBs must be flushed and invalidated before a switch */ 1765 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1766 if (ret) 1767 return ret; 1768 1769 ret = intel_ring_begin(req, 6); 1770 if (ret) 1771 return ret; 1772 1773 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1774 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1775 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1776 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1777 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1778 intel_ring_emit(ring, MI_NOOP); 1779 intel_ring_advance(ring); 1780 1781 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1782 if (engine->id != RCS) { 1783 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1784 if (ret) 1785 return ret; 1786 } 1787 1788 return 0; 1789 } 1790 1791 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1792 struct drm_i915_gem_request *req) 1793 { 1794 struct intel_engine_cs *engine = req->engine; 1795 struct drm_i915_private *dev_priv = req->i915; 1796 1797 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1798 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1799 return 0; 1800 } 1801 1802 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) 1803 { 1804 struct intel_engine_cs *engine; 1805 enum intel_engine_id id; 1806 1807 for_each_engine(engine, dev_priv, id) { 1808 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ? 1809 GEN8_GFX_PPGTT_48B : 0; 1810 I915_WRITE(RING_MODE_GEN7(engine), 1811 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1812 } 1813 } 1814 1815 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) 1816 { 1817 struct intel_engine_cs *engine; 1818 uint32_t ecochk, ecobits; 1819 enum intel_engine_id id; 1820 1821 ecobits = I915_READ(GAC_ECO_BITS); 1822 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1823 1824 ecochk = I915_READ(GAM_ECOCHK); 1825 if (IS_HASWELL(dev_priv)) { 1826 ecochk |= ECOCHK_PPGTT_WB_HSW; 1827 } else { 1828 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1829 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1830 } 1831 I915_WRITE(GAM_ECOCHK, ecochk); 1832 1833 for_each_engine(engine, dev_priv, id) { 1834 /* GFX_MODE is per-ring on gen7+ */ 1835 I915_WRITE(RING_MODE_GEN7(engine), 1836 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1837 } 1838 } 1839 1840 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) 1841 { 1842 uint32_t ecochk, gab_ctl, ecobits; 1843 1844 ecobits = I915_READ(GAC_ECO_BITS); 1845 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1846 ECOBITS_PPGTT_CACHE64B); 1847 1848 gab_ctl = I915_READ(GAB_CTL); 1849 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1850 1851 ecochk = I915_READ(GAM_ECOCHK); 1852 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1853 1854 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1855 } 1856 1857 /* PPGTT support for Sandybdrige/Gen6 and later */ 1858 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1859 uint64_t start, 1860 uint64_t length) 1861 { 1862 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1863 gen6_pte_t *pt_vaddr, scratch_pte; 1864 unsigned first_entry = start >> PAGE_SHIFT; 1865 unsigned num_entries = length >> PAGE_SHIFT; 1866 unsigned act_pt = first_entry / GEN6_PTES; 1867 unsigned first_pte = first_entry % GEN6_PTES; 1868 unsigned last_pte, i; 1869 1870 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1871 I915_CACHE_LLC, 0); 1872 1873 while (num_entries) { 1874 last_pte = first_pte + num_entries; 1875 if (last_pte > GEN6_PTES) 1876 last_pte = GEN6_PTES; 1877 1878 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1879 1880 for (i = first_pte; i < last_pte; i++) 1881 pt_vaddr[i] = scratch_pte; 1882 1883 kunmap_px(ppgtt, pt_vaddr); 1884 1885 num_entries -= last_pte - first_pte; 1886 first_pte = 0; 1887 act_pt++; 1888 } 1889 } 1890 1891 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1892 struct sg_table *pages, 1893 uint64_t start, 1894 enum i915_cache_level cache_level, u32 flags) 1895 { 1896 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1897 unsigned first_entry = start >> PAGE_SHIFT; 1898 unsigned act_pt = first_entry / GEN6_PTES; 1899 unsigned act_pte = first_entry % GEN6_PTES; 1900 gen6_pte_t *pt_vaddr = NULL; 1901 struct sgt_iter sgt_iter; 1902 dma_addr_t addr; 1903 1904 for_each_sgt_dma(addr, sgt_iter, pages) { 1905 if (pt_vaddr == NULL) 1906 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1907 1908 pt_vaddr[act_pte] = 1909 vm->pte_encode(addr, cache_level, flags); 1910 1911 if (++act_pte == GEN6_PTES) { 1912 kunmap_px(ppgtt, pt_vaddr); 1913 pt_vaddr = NULL; 1914 act_pt++; 1915 act_pte = 0; 1916 } 1917 } 1918 1919 if (pt_vaddr) 1920 kunmap_px(ppgtt, pt_vaddr); 1921 } 1922 1923 static int gen6_alloc_va_range(struct i915_address_space *vm, 1924 uint64_t start_in, uint64_t length_in) 1925 { 1926 DECLARE_BITMAP(new_page_tables, I915_PDES); 1927 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1928 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1929 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1930 struct i915_page_table *pt; 1931 uint32_t start, length, start_save, length_save; 1932 uint32_t pde; 1933 int ret; 1934 1935 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1936 return -ENODEV; 1937 1938 start = start_save = start_in; 1939 length = length_save = length_in; 1940 1941 bitmap_zero(new_page_tables, I915_PDES); 1942 1943 /* The allocation is done in two stages so that we can bail out with 1944 * minimal amount of pain. The first stage finds new page tables that 1945 * need allocation. The second stage marks use ptes within the page 1946 * tables. 1947 */ 1948 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1949 if (pt != vm->scratch_pt) { 1950 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1951 continue; 1952 } 1953 1954 /* We've already allocated a page table */ 1955 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1956 1957 pt = alloc_pt(dev_priv); 1958 if (IS_ERR(pt)) { 1959 ret = PTR_ERR(pt); 1960 goto unwind_out; 1961 } 1962 1963 gen6_initialize_pt(vm, pt); 1964 1965 ppgtt->pd.page_table[pde] = pt; 1966 __set_bit(pde, new_page_tables); 1967 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1968 } 1969 1970 start = start_save; 1971 length = length_save; 1972 1973 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1974 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1975 1976 bitmap_zero(tmp_bitmap, GEN6_PTES); 1977 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1978 gen6_pte_count(start, length)); 1979 1980 if (__test_and_clear_bit(pde, new_page_tables)) 1981 gen6_write_pde(&ppgtt->pd, pde, pt); 1982 1983 trace_i915_page_table_entry_map(vm, pde, pt, 1984 gen6_pte_index(start), 1985 gen6_pte_count(start, length), 1986 GEN6_PTES); 1987 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1988 GEN6_PTES); 1989 } 1990 1991 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1992 1993 /* Make sure write is complete before other code can use this page 1994 * table. Also require for WC mapped PTEs */ 1995 readl(ggtt->gsm); 1996 1997 mark_tlbs_dirty(ppgtt); 1998 return 0; 1999 2000 unwind_out: 2001 for_each_set_bit(pde, new_page_tables, I915_PDES) { 2002 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 2003 2004 ppgtt->pd.page_table[pde] = vm->scratch_pt; 2005 free_pt(dev_priv, pt); 2006 } 2007 2008 mark_tlbs_dirty(ppgtt); 2009 return ret; 2010 } 2011 2012 static int gen6_init_scratch(struct i915_address_space *vm) 2013 { 2014 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2015 int ret; 2016 2017 ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); 2018 if (ret) 2019 return ret; 2020 2021 vm->scratch_pt = alloc_pt(dev_priv); 2022 if (IS_ERR(vm->scratch_pt)) { 2023 cleanup_scratch_page(dev_priv, &vm->scratch_page); 2024 return PTR_ERR(vm->scratch_pt); 2025 } 2026 2027 gen6_initialize_pt(vm, vm->scratch_pt); 2028 2029 return 0; 2030 } 2031 2032 static void gen6_free_scratch(struct i915_address_space *vm) 2033 { 2034 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2035 2036 free_pt(dev_priv, vm->scratch_pt); 2037 cleanup_scratch_page(dev_priv, &vm->scratch_page); 2038 } 2039 2040 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 2041 { 2042 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 2043 struct i915_page_directory *pd = &ppgtt->pd; 2044 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2045 struct i915_page_table *pt; 2046 uint32_t pde; 2047 2048 drm_mm_remove_node(&ppgtt->node); 2049 2050 gen6_for_all_pdes(pt, pd, pde) 2051 if (pt != vm->scratch_pt) 2052 free_pt(dev_priv, pt); 2053 2054 gen6_free_scratch(vm); 2055 } 2056 2057 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 2058 { 2059 struct i915_address_space *vm = &ppgtt->base; 2060 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 2061 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2062 bool retried = false; 2063 int ret; 2064 2065 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 2066 * allocator works in address space sizes, so it's multiplied by page 2067 * size. We allocate at the top of the GTT to avoid fragmentation. 2068 */ 2069 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2070 2071 ret = gen6_init_scratch(vm); 2072 if (ret) 2073 return ret; 2074 2075 alloc: 2076 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2077 &ppgtt->node, GEN6_PD_SIZE, 2078 GEN6_PD_ALIGN, 0, 2079 0, ggtt->base.total, 2080 DRM_MM_TOPDOWN); 2081 if (ret == -ENOSPC && !retried) { 2082 ret = i915_gem_evict_something(&ggtt->base, 2083 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2084 I915_CACHE_NONE, 2085 0, ggtt->base.total, 2086 0); 2087 if (ret) 2088 goto err_out; 2089 2090 retried = true; 2091 goto alloc; 2092 } 2093 2094 if (ret) 2095 goto err_out; 2096 2097 2098 if (ppgtt->node.start < ggtt->mappable_end) 2099 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2100 2101 return 0; 2102 2103 err_out: 2104 gen6_free_scratch(vm); 2105 return ret; 2106 } 2107 2108 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2109 { 2110 return gen6_ppgtt_allocate_page_directories(ppgtt); 2111 } 2112 2113 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2114 uint64_t start, uint64_t length) 2115 { 2116 struct i915_page_table *unused; 2117 uint32_t pde; 2118 2119 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2120 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2121 } 2122 2123 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2124 { 2125 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 2126 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2127 int ret; 2128 2129 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2130 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv)) 2131 ppgtt->switch_mm = gen6_mm_switch; 2132 else if (IS_HASWELL(dev_priv)) 2133 ppgtt->switch_mm = hsw_mm_switch; 2134 else if (IS_GEN7(dev_priv)) 2135 ppgtt->switch_mm = gen7_mm_switch; 2136 else 2137 BUG(); 2138 2139 ret = gen6_ppgtt_alloc(ppgtt); 2140 if (ret) 2141 return ret; 2142 2143 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2144 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2145 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2146 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2147 ppgtt->base.bind_vma = ppgtt_bind_vma; 2148 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2149 ppgtt->base.start = 0; 2150 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2151 ppgtt->debug_dump = gen6_dump_ppgtt; 2152 2153 ppgtt->pd.base.ggtt_offset = 2154 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2155 2156 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2157 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2158 2159 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2160 2161 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2162 2163 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2164 ppgtt->node.size >> 20, 2165 ppgtt->node.start / PAGE_SIZE); 2166 2167 DRM_DEBUG("Adding PPGTT at offset %x\n", 2168 ppgtt->pd.base.ggtt_offset << 10); 2169 2170 return 0; 2171 } 2172 2173 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2174 struct drm_i915_private *dev_priv) 2175 { 2176 ppgtt->base.dev = &dev_priv->drm; 2177 2178 if (INTEL_INFO(dev_priv)->gen < 8) 2179 return gen6_ppgtt_init(ppgtt); 2180 else 2181 return gen8_ppgtt_init(ppgtt); 2182 } 2183 2184 static void i915_address_space_init(struct i915_address_space *vm, 2185 struct drm_i915_private *dev_priv, 2186 const char *name) 2187 { 2188 i915_gem_timeline_init(dev_priv, &vm->timeline, name); 2189 drm_mm_init(&vm->mm, vm->start, vm->total); 2190 INIT_LIST_HEAD(&vm->active_list); 2191 INIT_LIST_HEAD(&vm->inactive_list); 2192 INIT_LIST_HEAD(&vm->unbound_list); 2193 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2194 } 2195 2196 static void i915_address_space_fini(struct i915_address_space *vm) 2197 { 2198 i915_gem_timeline_fini(&vm->timeline); 2199 drm_mm_takedown(&vm->mm); 2200 list_del(&vm->global_link); 2201 } 2202 2203 static void gtt_write_workarounds(struct drm_i915_private *dev_priv) 2204 { 2205 /* This function is for gtt related workarounds. This function is 2206 * called on driver load and after a GPU reset, so you can place 2207 * workarounds here even if they get overwritten by GPU reset. 2208 */ 2209 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2210 if (IS_BROADWELL(dev_priv)) 2211 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2212 else if (IS_CHERRYVIEW(dev_priv)) 2213 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2214 else if (IS_SKYLAKE(dev_priv)) 2215 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2216 else if (IS_BROXTON(dev_priv)) 2217 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2218 } 2219 2220 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2221 struct drm_i915_private *dev_priv, 2222 struct drm_i915_file_private *file_priv, 2223 const char *name) 2224 { 2225 int ret; 2226 2227 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2228 if (ret == 0) { 2229 kref_init(&ppgtt->ref); 2230 i915_address_space_init(&ppgtt->base, dev_priv, name); 2231 ppgtt->base.file = file_priv; 2232 } 2233 2234 return ret; 2235 } 2236 2237 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) 2238 { 2239 gtt_write_workarounds(dev_priv); 2240 2241 /* In the case of execlists, PPGTT is enabled by the context descriptor 2242 * and the PDPs are contained within the context itself. We don't 2243 * need to do anything here. */ 2244 if (i915.enable_execlists) 2245 return 0; 2246 2247 if (!USES_PPGTT(dev_priv)) 2248 return 0; 2249 2250 if (IS_GEN6(dev_priv)) 2251 gen6_ppgtt_enable(dev_priv); 2252 else if (IS_GEN7(dev_priv)) 2253 gen7_ppgtt_enable(dev_priv); 2254 else if (INTEL_GEN(dev_priv) >= 8) 2255 gen8_ppgtt_enable(dev_priv); 2256 else 2257 MISSING_CASE(INTEL_GEN(dev_priv)); 2258 2259 return 0; 2260 } 2261 2262 struct i915_hw_ppgtt * 2263 i915_ppgtt_create(struct drm_i915_private *dev_priv, 2264 struct drm_i915_file_private *fpriv, 2265 const char *name) 2266 { 2267 struct i915_hw_ppgtt *ppgtt; 2268 int ret; 2269 2270 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2271 if (!ppgtt) 2272 return ERR_PTR(-ENOMEM); 2273 2274 ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); 2275 if (ret) { 2276 kfree(ppgtt); 2277 return ERR_PTR(ret); 2278 } 2279 2280 trace_i915_ppgtt_create(&ppgtt->base); 2281 2282 return ppgtt; 2283 } 2284 2285 void i915_ppgtt_release(struct kref *kref) 2286 { 2287 struct i915_hw_ppgtt *ppgtt = 2288 container_of(kref, struct i915_hw_ppgtt, ref); 2289 2290 trace_i915_ppgtt_release(&ppgtt->base); 2291 2292 /* vmas should already be unbound and destroyed */ 2293 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2294 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2295 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 2296 2297 i915_address_space_fini(&ppgtt->base); 2298 2299 ppgtt->base.cleanup(&ppgtt->base); 2300 kfree(ppgtt); 2301 } 2302 2303 /* Certain Gen5 chipsets require require idling the GPU before 2304 * unmapping anything from the GTT when VT-d is enabled. 2305 */ 2306 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 2307 { 2308 #ifdef CONFIG_INTEL_IOMMU 2309 /* Query intel_iommu to see if we need the workaround. Presumably that 2310 * was loaded first. 2311 */ 2312 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 2313 return true; 2314 #endif 2315 return false; 2316 } 2317 2318 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2319 { 2320 struct intel_engine_cs *engine; 2321 enum intel_engine_id id; 2322 2323 if (INTEL_INFO(dev_priv)->gen < 6) 2324 return; 2325 2326 for_each_engine(engine, dev_priv, id) { 2327 u32 fault_reg; 2328 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2329 if (fault_reg & RING_FAULT_VALID) { 2330 DRM_DEBUG_DRIVER("Unexpected fault\n" 2331 "\tAddr: 0x%08ux\n" 2332 "\tAddress space: %s\n" 2333 "\tSource ID: %d\n" 2334 "\tType: %d\n", 2335 fault_reg & LINUX_PAGE_MASK, 2336 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2337 RING_FAULT_SRCID(fault_reg), 2338 RING_FAULT_FAULT_TYPE(fault_reg)); 2339 I915_WRITE(RING_FAULT_REG(engine), 2340 fault_reg & ~RING_FAULT_VALID); 2341 } 2342 } 2343 2344 /* Engine specific init may not have been done till this point. */ 2345 if (dev_priv->engine[RCS]) 2346 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); 2347 } 2348 2349 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2350 { 2351 if (INTEL_INFO(dev_priv)->gen < 6) { 2352 intel_gtt_chipset_flush(); 2353 } else { 2354 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2355 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2356 } 2357 } 2358 2359 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) 2360 { 2361 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2362 2363 /* Don't bother messing with faults pre GEN6 as we have little 2364 * documentation supporting that it's a good idea. 2365 */ 2366 if (INTEL_GEN(dev_priv) < 6) 2367 return; 2368 2369 i915_check_and_clear_faults(dev_priv); 2370 2371 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 2372 2373 i915_ggtt_flush(dev_priv); 2374 } 2375 2376 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, 2377 struct sg_table *pages) 2378 { 2379 if (dma_map_sg(&obj->base.dev->pdev->dev, 2380 pages->sgl, pages->nents, 2381 PCI_DMA_BIDIRECTIONAL)) 2382 return 0; 2383 2384 return -ENOSPC; 2385 } 2386 2387 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2388 { 2389 writeq(pte, addr); 2390 } 2391 2392 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2393 dma_addr_t addr, 2394 uint64_t offset, 2395 enum i915_cache_level level, 2396 u32 unused) 2397 { 2398 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2399 gen8_pte_t __iomem *pte = 2400 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + 2401 (offset >> PAGE_SHIFT); 2402 2403 gen8_set_pte(pte, gen8_pte_encode(addr, level)); 2404 2405 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2406 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2407 } 2408 2409 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2410 struct sg_table *st, 2411 uint64_t start, 2412 enum i915_cache_level level, u32 unused) 2413 { 2414 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2415 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2416 struct sgt_iter sgt_iter; 2417 gen8_pte_t __iomem *gtt_entries; 2418 gen8_pte_t gtt_entry; 2419 dma_addr_t addr; 2420 int i = 0; 2421 2422 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2423 2424 for_each_sgt_dma(addr, sgt_iter, st) { 2425 gtt_entry = gen8_pte_encode(addr, level); 2426 gen8_set_pte(>t_entries[i++], gtt_entry); 2427 } 2428 2429 /* 2430 * XXX: This serves as a posting read to make sure that the PTE has 2431 * actually been updated. There is some concern that even though 2432 * registers and PTEs are within the same BAR that they are potentially 2433 * of NUMA access patterns. Therefore, even with the way we assume 2434 * hardware should work, we must keep this posting read for paranoia. 2435 */ 2436 if (i != 0) 2437 WARN_ON(readq(>t_entries[i-1]) != gtt_entry); 2438 2439 /* This next bit makes the above posting read even more important. We 2440 * want to flush the TLBs only after we're certain all the PTE updates 2441 * have finished. 2442 */ 2443 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2444 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2445 } 2446 2447 struct insert_entries { 2448 struct i915_address_space *vm; 2449 struct sg_table *st; 2450 uint64_t start; 2451 enum i915_cache_level level; 2452 u32 flags; 2453 }; 2454 2455 static int gen8_ggtt_insert_entries__cb(void *_arg) 2456 { 2457 struct insert_entries *arg = _arg; 2458 gen8_ggtt_insert_entries(arg->vm, arg->st, 2459 arg->start, arg->level, arg->flags); 2460 return 0; 2461 } 2462 2463 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2464 struct sg_table *st, 2465 uint64_t start, 2466 enum i915_cache_level level, 2467 u32 flags) 2468 { 2469 struct insert_entries arg = { vm, st, start, level, flags }; 2470 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2471 } 2472 2473 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2474 dma_addr_t addr, 2475 uint64_t offset, 2476 enum i915_cache_level level, 2477 u32 flags) 2478 { 2479 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2480 gen6_pte_t __iomem *pte = 2481 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + 2482 (offset >> PAGE_SHIFT); 2483 2484 iowrite32(vm->pte_encode(addr, level, flags), pte); 2485 2486 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2487 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2488 } 2489 2490 /* 2491 * Binds an object into the global gtt with the specified cache level. The object 2492 * will be accessible to the GPU via commands whose operands reference offsets 2493 * within the global GTT as well as accessible by the GPU through the GMADR 2494 * mapped BAR (dev_priv->mm.gtt->gtt). 2495 */ 2496 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2497 struct sg_table *st, 2498 uint64_t start, 2499 enum i915_cache_level level, u32 flags) 2500 { 2501 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2502 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2503 struct sgt_iter sgt_iter; 2504 gen6_pte_t __iomem *gtt_entries; 2505 gen6_pte_t gtt_entry; 2506 dma_addr_t addr; 2507 int i = 0; 2508 2509 gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2510 2511 for_each_sgt_dma(addr, sgt_iter, st) { 2512 gtt_entry = vm->pte_encode(addr, level, flags); 2513 iowrite32(gtt_entry, >t_entries[i++]); 2514 } 2515 2516 /* XXX: This serves as a posting read to make sure that the PTE has 2517 * actually been updated. There is some concern that even though 2518 * registers and PTEs are within the same BAR that they are potentially 2519 * of NUMA access patterns. Therefore, even with the way we assume 2520 * hardware should work, we must keep this posting read for paranoia. 2521 */ 2522 if (i != 0) 2523 WARN_ON(readl(>t_entries[i-1]) != gtt_entry); 2524 2525 /* This next bit makes the above posting read even more important. We 2526 * want to flush the TLBs only after we're certain all the PTE updates 2527 * have finished. 2528 */ 2529 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2530 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2531 } 2532 2533 static void nop_clear_range(struct i915_address_space *vm, 2534 uint64_t start, uint64_t length) 2535 { 2536 } 2537 2538 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2539 uint64_t start, uint64_t length) 2540 { 2541 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2542 unsigned first_entry = start >> PAGE_SHIFT; 2543 unsigned num_entries = length >> PAGE_SHIFT; 2544 gen8_pte_t scratch_pte, __iomem *gtt_base = 2545 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2546 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2547 int i; 2548 2549 if (WARN(num_entries > max_entries, 2550 "First entry = %d; Num entries = %d (max=%d)\n", 2551 first_entry, num_entries, max_entries)) 2552 num_entries = max_entries; 2553 2554 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 2555 I915_CACHE_LLC); 2556 for (i = 0; i < num_entries; i++) 2557 gen8_set_pte(>t_base[i], scratch_pte); 2558 readl(gtt_base); 2559 } 2560 2561 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2562 uint64_t start, 2563 uint64_t length) 2564 { 2565 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2566 unsigned first_entry = start >> PAGE_SHIFT; 2567 unsigned num_entries = length >> PAGE_SHIFT; 2568 gen6_pte_t scratch_pte, __iomem *gtt_base = 2569 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2570 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2571 int i; 2572 2573 if (WARN(num_entries > max_entries, 2574 "First entry = %d; Num entries = %d (max=%d)\n", 2575 first_entry, num_entries, max_entries)) 2576 num_entries = max_entries; 2577 2578 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 2579 I915_CACHE_LLC, 0); 2580 2581 for (i = 0; i < num_entries; i++) 2582 iowrite32(scratch_pte, >t_base[i]); 2583 readl(gtt_base); 2584 } 2585 2586 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2587 dma_addr_t addr, 2588 uint64_t offset, 2589 enum i915_cache_level cache_level, 2590 u32 unused) 2591 { 2592 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2593 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2594 2595 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2596 } 2597 2598 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2599 struct sg_table *pages, 2600 uint64_t start, 2601 enum i915_cache_level cache_level, u32 unused) 2602 { 2603 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2604 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2605 2606 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2607 2608 } 2609 2610 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2611 uint64_t start, 2612 uint64_t length) 2613 { 2614 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 2615 } 2616 2617 static int ggtt_bind_vma(struct i915_vma *vma, 2618 enum i915_cache_level cache_level, 2619 u32 flags) 2620 { 2621 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2622 struct drm_i915_gem_object *obj = vma->obj; 2623 u32 pte_flags = 0; 2624 int ret; 2625 2626 ret = i915_get_ggtt_vma_pages(vma); 2627 if (ret) 2628 return ret; 2629 2630 /* Currently applicable only to VLV */ 2631 if (obj->gt_ro) 2632 pte_flags |= PTE_READ_ONLY; 2633 2634 intel_runtime_pm_get(i915); 2635 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 2636 cache_level, pte_flags); 2637 intel_runtime_pm_put(i915); 2638 2639 /* 2640 * Without aliasing PPGTT there's no difference between 2641 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2642 * upgrade to both bound if we bind either to avoid double-binding. 2643 */ 2644 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2645 2646 return 0; 2647 } 2648 2649 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2650 enum i915_cache_level cache_level, 2651 u32 flags) 2652 { 2653 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2654 u32 pte_flags; 2655 int ret; 2656 2657 ret = i915_get_ggtt_vma_pages(vma); 2658 if (ret) 2659 return ret; 2660 2661 /* Currently applicable only to VLV */ 2662 pte_flags = 0; 2663 if (vma->obj->gt_ro) 2664 pte_flags |= PTE_READ_ONLY; 2665 2666 2667 if (flags & I915_VMA_GLOBAL_BIND) { 2668 intel_runtime_pm_get(i915); 2669 vma->vm->insert_entries(vma->vm, 2670 vma->pages, vma->node.start, 2671 cache_level, pte_flags); 2672 intel_runtime_pm_put(i915); 2673 } 2674 2675 if (flags & I915_VMA_LOCAL_BIND) { 2676 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2677 appgtt->base.insert_entries(&appgtt->base, 2678 vma->pages, vma->node.start, 2679 cache_level, pte_flags); 2680 } 2681 2682 return 0; 2683 } 2684 2685 static void ggtt_unbind_vma(struct i915_vma *vma) 2686 { 2687 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2688 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2689 const u64 size = min(vma->size, vma->node.size); 2690 2691 if (vma->flags & I915_VMA_GLOBAL_BIND) { 2692 intel_runtime_pm_get(i915); 2693 vma->vm->clear_range(vma->vm, 2694 vma->node.start, size); 2695 intel_runtime_pm_put(i915); 2696 } 2697 2698 if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) 2699 appgtt->base.clear_range(&appgtt->base, 2700 vma->node.start, size); 2701 } 2702 2703 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, 2704 struct sg_table *pages) 2705 { 2706 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2707 struct device *kdev = &dev_priv->drm.pdev->dev; 2708 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2709 2710 if (unlikely(ggtt->do_idle_maps)) { 2711 if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { 2712 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2713 /* Wait a bit, in hopes it avoids the hang */ 2714 udelay(10); 2715 } 2716 } 2717 2718 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); 2719 } 2720 2721 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2722 unsigned long color, 2723 u64 *start, 2724 u64 *end) 2725 { 2726 if (node->color != color) 2727 *start += 4096; 2728 2729 node = list_first_entry_or_null(&node->node_list, 2730 struct drm_mm_node, 2731 node_list); 2732 if (node && node->allocated && node->color != color) 2733 *end -= 4096; 2734 } 2735 2736 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2737 { 2738 /* Let GEM Manage all of the aperture. 2739 * 2740 * However, leave one page at the end still bound to the scratch page. 2741 * There are a number of places where the hardware apparently prefetches 2742 * past the end of the object, and we've seen multiple hangs with the 2743 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2744 * aperture. One page should be enough to keep any prefetching inside 2745 * of the aperture. 2746 */ 2747 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2748 unsigned long hole_start, hole_end; 2749 struct i915_hw_ppgtt *ppgtt; 2750 struct drm_mm_node *entry; 2751 int ret; 2752 unsigned long mappable = min(ggtt->base.total, ggtt->mappable_end); 2753 2754 ret = intel_vgt_balloon(dev_priv); 2755 if (ret) 2756 return ret; 2757 2758 /* Reserve a mappable slot for our lockless error capture */ 2759 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2760 &ggtt->error_capture, 2761 4096, 0, -1, 2762 0, ggtt->mappable_end, 2763 0, 0); 2764 if (ret) 2765 return ret; 2766 2767 /* Clear any non-preallocated blocks */ 2768 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2769 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2770 hole_start, hole_end); 2771 ggtt->base.clear_range(&ggtt->base, hole_start, 2772 hole_end - hole_start); 2773 } 2774 2775 #ifdef __DragonFly__ 2776 DRM_INFO("taking over the fictitious range 0x%llx-0x%llx\n", 2777 dev_priv->ggtt.mappable_base, dev_priv->ggtt.mappable_end); 2778 vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base, 2779 dev_priv->ggtt.mappable_base + mappable, VM_MEMATTR_WRITE_COMBINING); 2780 #endif 2781 2782 /* And finally clear the reserved guard page */ 2783 ggtt->base.clear_range(&ggtt->base, 2784 ggtt->base.total - PAGE_SIZE, PAGE_SIZE); 2785 2786 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2787 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2788 if (!ppgtt) { 2789 ret = -ENOMEM; 2790 goto err; 2791 } 2792 2793 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2794 if (ret) 2795 goto err_ppgtt; 2796 2797 if (ppgtt->base.allocate_va_range) { 2798 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2799 ppgtt->base.total); 2800 if (ret) 2801 goto err_ppgtt_cleanup; 2802 } 2803 2804 ppgtt->base.clear_range(&ppgtt->base, 2805 ppgtt->base.start, 2806 ppgtt->base.total); 2807 2808 dev_priv->mm.aliasing_ppgtt = ppgtt; 2809 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2810 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2811 } 2812 2813 return 0; 2814 2815 err_ppgtt_cleanup: 2816 ppgtt->base.cleanup(&ppgtt->base); 2817 err_ppgtt: 2818 kfree(ppgtt); 2819 err: 2820 drm_mm_remove_node(&ggtt->error_capture); 2821 return ret; 2822 } 2823 2824 /** 2825 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2826 * @dev_priv: i915 device 2827 */ 2828 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2829 { 2830 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2831 2832 if (dev_priv->mm.aliasing_ppgtt) { 2833 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2834 ppgtt->base.cleanup(&ppgtt->base); 2835 kfree(ppgtt); 2836 } 2837 2838 i915_gem_cleanup_stolen(&dev_priv->drm); 2839 2840 if (drm_mm_node_allocated(&ggtt->error_capture)) 2841 drm_mm_remove_node(&ggtt->error_capture); 2842 2843 if (drm_mm_initialized(&ggtt->base.mm)) { 2844 intel_vgt_deballoon(dev_priv); 2845 2846 mutex_lock(&dev_priv->drm.struct_mutex); 2847 i915_address_space_fini(&ggtt->base); 2848 mutex_unlock(&dev_priv->drm.struct_mutex); 2849 } 2850 2851 ggtt->base.cleanup(&ggtt->base); 2852 2853 arch_phys_wc_del(ggtt->mtrr); 2854 io_mapping_fini(&ggtt->mappable); 2855 } 2856 2857 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2858 { 2859 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2860 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2861 return snb_gmch_ctl << 20; 2862 } 2863 2864 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2865 { 2866 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2867 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2868 if (bdw_gmch_ctl) 2869 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2870 2871 #ifdef CONFIG_X86_32 2872 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2873 if (bdw_gmch_ctl > 4) 2874 bdw_gmch_ctl = 4; 2875 #endif 2876 2877 return bdw_gmch_ctl << 20; 2878 } 2879 2880 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2881 { 2882 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2883 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2884 2885 if (gmch_ctrl) 2886 return 1 << (20 + gmch_ctrl); 2887 2888 return 0; 2889 } 2890 2891 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2892 { 2893 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2894 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2895 return snb_gmch_ctl << 25; /* 32 MB units */ 2896 } 2897 2898 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2899 { 2900 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2901 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2902 return bdw_gmch_ctl << 25; /* 32 MB units */ 2903 } 2904 2905 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2906 { 2907 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2908 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2909 2910 /* 2911 * 0x0 to 0x10: 32MB increments starting at 0MB 2912 * 0x11 to 0x16: 4MB increments starting at 8MB 2913 * 0x17 to 0x1d: 4MB increments start at 36MB 2914 */ 2915 if (gmch_ctrl < 0x11) 2916 return gmch_ctrl << 25; 2917 else if (gmch_ctrl < 0x17) 2918 return (gmch_ctrl - 0x11 + 2) << 22; 2919 else 2920 return (gmch_ctrl - 0x17 + 9) << 22; 2921 } 2922 2923 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2924 { 2925 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2926 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2927 2928 if (gen9_gmch_ctl < 0xf0) 2929 return gen9_gmch_ctl << 25; /* 32 MB units */ 2930 else 2931 /* 4MB increments starting at 0xf0 for 4MB */ 2932 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2933 } 2934 2935 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2936 { 2937 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 2938 struct pci_dev *pdev = ggtt->base.dev->pdev; 2939 phys_addr_t phys_addr; 2940 int ret; 2941 2942 /* For Modern GENs the PTEs and register space are split in the BAR */ 2943 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2944 2945 /* 2946 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2947 * dropped. For WC mappings in general we have 64 byte burst writes 2948 * when the WC buffer is flushed, so we can't use it, but have to 2949 * resort to an uncached mapping. The WC issue is easily caught by the 2950 * readback check when writing GTT PTE entries. 2951 */ 2952 if (IS_BROXTON(dev_priv)) 2953 ggtt->gsm = ioremap_nocache(phys_addr, size); 2954 else 2955 ggtt->gsm = ioremap_wc(phys_addr, size); 2956 if (!ggtt->gsm) { 2957 DRM_ERROR("Failed to map the ggtt page table\n"); 2958 return -ENOMEM; 2959 } 2960 2961 ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32); 2962 if (ret) { 2963 DRM_ERROR("Scratch setup failed\n"); 2964 /* iounmap will also get called at remove, but meh */ 2965 iounmap(ggtt->gsm); 2966 return ret; 2967 } 2968 2969 return 0; 2970 } 2971 2972 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2973 * bits. When using advanced contexts each context stores its own PAT, but 2974 * writing this data shouldn't be harmful even in those cases. */ 2975 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2976 { 2977 uint64_t pat; 2978 2979 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2980 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2981 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2982 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2983 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2984 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2985 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2986 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2987 2988 if (!USES_PPGTT(dev_priv)) 2989 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2990 * so RTL will always use the value corresponding to 2991 * pat_sel = 000". 2992 * So let's disable cache for GGTT to avoid screen corruptions. 2993 * MOCS still can be used though. 2994 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2995 * before this patch, i.e. the same uncached + snooping access 2996 * like on gen6/7 seems to be in effect. 2997 * - So this just fixes blitter/render access. Again it looks 2998 * like it's not just uncached access, but uncached + snooping. 2999 * So we can still hold onto all our assumptions wrt cpu 3000 * clflushing on LLC machines. 3001 */ 3002 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 3003 3004 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 3005 * write would work. */ 3006 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3007 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3008 } 3009 3010 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 3011 { 3012 uint64_t pat; 3013 3014 /* 3015 * Map WB on BDW to snooped on CHV. 3016 * 3017 * Only the snoop bit has meaning for CHV, the rest is 3018 * ignored. 3019 * 3020 * The hardware will never snoop for certain types of accesses: 3021 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3022 * - PPGTT page tables 3023 * - some other special cycles 3024 * 3025 * As with BDW, we also need to consider the following for GT accesses: 3026 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3027 * so RTL will always use the value corresponding to 3028 * pat_sel = 000". 3029 * Which means we must set the snoop bit in PAT entry 0 3030 * in order to keep the global status page working. 3031 */ 3032 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3033 GEN8_PPAT(1, 0) | 3034 GEN8_PPAT(2, 0) | 3035 GEN8_PPAT(3, 0) | 3036 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3037 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3038 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3039 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3040 3041 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3042 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3043 } 3044 3045 static void gen6_gmch_remove(struct i915_address_space *vm) 3046 { 3047 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 3048 3049 iounmap(ggtt->gsm); 3050 cleanup_scratch_page(to_i915(vm->dev), &vm->scratch_page); 3051 } 3052 3053 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3054 { 3055 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3056 struct pci_dev *pdev = dev_priv->drm.pdev; 3057 unsigned int size; 3058 u16 snb_gmch_ctl; 3059 3060 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3061 ggtt->mappable_base = pci_resource_start(pdev, 2); 3062 ggtt->mappable_end = pci_resource_len(pdev, 2); 3063 3064 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 3065 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 3066 3067 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3068 3069 if (INTEL_GEN(dev_priv) >= 9) { 3070 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3071 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3072 } else if (IS_CHERRYVIEW(dev_priv)) { 3073 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3074 size = chv_get_total_gtt_size(snb_gmch_ctl); 3075 } else { 3076 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3077 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3078 } 3079 3080 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3081 3082 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3083 chv_setup_private_ppat(dev_priv); 3084 else 3085 bdw_setup_private_ppat(dev_priv); 3086 3087 ggtt->base.cleanup = gen6_gmch_remove; 3088 ggtt->base.bind_vma = ggtt_bind_vma; 3089 ggtt->base.unbind_vma = ggtt_unbind_vma; 3090 ggtt->base.insert_page = gen8_ggtt_insert_page; 3091 ggtt->base.clear_range = nop_clear_range; 3092 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3093 ggtt->base.clear_range = gen8_ggtt_clear_range; 3094 3095 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3096 if (IS_CHERRYVIEW(dev_priv)) 3097 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3098 3099 return ggtt_probe_common(ggtt, size); 3100 } 3101 3102 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3103 { 3104 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3105 struct pci_dev *pdev = dev_priv->drm.pdev; 3106 unsigned int size; 3107 u16 snb_gmch_ctl; 3108 3109 ggtt->mappable_base = pci_resource_start(pdev, 2); 3110 ggtt->mappable_end = pci_resource_len(pdev, 2); 3111 3112 /* 64/512MB is the current min/max we actually know of, but this is just 3113 * a coarse sanity check. 3114 */ 3115 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 3116 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3117 return -ENXIO; 3118 } 3119 3120 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 3121 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 3122 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3123 3124 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3125 3126 size = gen6_get_total_gtt_size(snb_gmch_ctl); 3127 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3128 3129 ggtt->base.clear_range = gen6_ggtt_clear_range; 3130 ggtt->base.insert_page = gen6_ggtt_insert_page; 3131 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3132 ggtt->base.bind_vma = ggtt_bind_vma; 3133 ggtt->base.unbind_vma = ggtt_unbind_vma; 3134 ggtt->base.cleanup = gen6_gmch_remove; 3135 3136 if (HAS_EDRAM(dev_priv)) 3137 ggtt->base.pte_encode = iris_pte_encode; 3138 else if (IS_HASWELL(dev_priv)) 3139 ggtt->base.pte_encode = hsw_pte_encode; 3140 else if (IS_VALLEYVIEW(dev_priv)) 3141 ggtt->base.pte_encode = byt_pte_encode; 3142 else if (INTEL_GEN(dev_priv) >= 7) 3143 ggtt->base.pte_encode = ivb_pte_encode; 3144 else 3145 ggtt->base.pte_encode = snb_pte_encode; 3146 3147 return ggtt_probe_common(ggtt, size); 3148 } 3149 3150 static void i915_gmch_remove(struct i915_address_space *vm) 3151 { 3152 intel_gmch_remove(); 3153 } 3154 3155 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3156 { 3157 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3158 #if 0 3159 int ret; 3160 3161 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3162 if (!ret) { 3163 DRM_ERROR("failed to set up gmch\n"); 3164 return -EIO; 3165 } 3166 #endif 3167 3168 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3169 &ggtt->mappable_base, &ggtt->mappable_end); 3170 3171 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 3172 ggtt->base.insert_page = i915_ggtt_insert_page; 3173 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3174 ggtt->base.clear_range = i915_ggtt_clear_range; 3175 ggtt->base.bind_vma = ggtt_bind_vma; 3176 ggtt->base.unbind_vma = ggtt_unbind_vma; 3177 ggtt->base.cleanup = i915_gmch_remove; 3178 3179 if (unlikely(ggtt->do_idle_maps)) 3180 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3181 3182 return 0; 3183 } 3184 3185 /** 3186 * i915_ggtt_probe_hw - Probe GGTT hardware location 3187 * @dev_priv: i915 device 3188 */ 3189 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 3190 { 3191 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3192 int ret; 3193 3194 ggtt->base.dev = &dev_priv->drm; 3195 3196 if (INTEL_GEN(dev_priv) <= 5) 3197 ret = i915_gmch_probe(ggtt); 3198 else if (INTEL_GEN(dev_priv) < 8) 3199 ret = gen6_gmch_probe(ggtt); 3200 else 3201 ret = gen8_gmch_probe(ggtt); 3202 if (ret) 3203 return ret; 3204 3205 if ((ggtt->base.total - 1) >> 32) { 3206 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3207 " of address space! Found %lldM!\n", 3208 ggtt->base.total >> 20); 3209 ggtt->base.total = 1ULL << 32; 3210 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3211 } 3212 3213 if (ggtt->mappable_end > ggtt->base.total) { 3214 DRM_ERROR("mappable aperture extends past end of GGTT," 3215 " aperture=%llx, total=%llx\n", 3216 ggtt->mappable_end, ggtt->base.total); 3217 ggtt->mappable_end = ggtt->base.total; 3218 } 3219 3220 /* GMADR is the PCI mmio aperture into the global GTT. */ 3221 DRM_INFO("Memory usable by graphics device = %lluM\n", 3222 ggtt->base.total >> 20); 3223 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3224 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3225 #ifdef CONFIG_INTEL_IOMMU 3226 if (intel_iommu_gfx_mapped) 3227 DRM_INFO("VT-d active for gfx access\n"); 3228 #endif 3229 3230 return 0; 3231 } 3232 3233 /** 3234 * i915_ggtt_init_hw - Initialize GGTT hardware 3235 * @dev_priv: i915 device 3236 */ 3237 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 3238 { 3239 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3240 int ret; 3241 3242 INIT_LIST_HEAD(&dev_priv->vm_list); 3243 3244 /* Subtract the guard page before address space initialization to 3245 * shrink the range used by drm_mm. 3246 */ 3247 mutex_lock(&dev_priv->drm.struct_mutex); 3248 ggtt->base.total -= PAGE_SIZE; 3249 i915_address_space_init(&ggtt->base, dev_priv, "[global]"); 3250 ggtt->base.total += PAGE_SIZE; 3251 if (!HAS_LLC(dev_priv)) 3252 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3253 mutex_unlock(&dev_priv->drm.struct_mutex); 3254 3255 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, 3256 dev_priv->ggtt.mappable_base, 3257 dev_priv->ggtt.mappable_end)) { 3258 ret = -EIO; 3259 goto out_gtt_cleanup; 3260 } 3261 3262 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 3263 3264 /* 3265 * Initialise stolen early so that we may reserve preallocated 3266 * objects for the BIOS to KMS transition. 3267 */ 3268 ret = i915_gem_init_stolen(dev_priv); 3269 if (ret) 3270 goto out_gtt_cleanup; 3271 3272 return 0; 3273 3274 out_gtt_cleanup: 3275 ggtt->base.cleanup(&ggtt->base); 3276 return ret; 3277 } 3278 3279 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 3280 { 3281 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 3282 return -EIO; 3283 3284 return 0; 3285 } 3286 3287 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) 3288 { 3289 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3290 struct drm_i915_gem_object *obj, *on; 3291 3292 i915_check_and_clear_faults(dev_priv); 3293 3294 /* First fill our portion of the GTT with scratch pages */ 3295 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 3296 3297 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ 3298 3299 /* clflush objects bound into the GGTT and rebind them. */ 3300 list_for_each_entry_safe(obj, on, 3301 &dev_priv->mm.bound_list, global_link) { 3302 bool ggtt_bound = false; 3303 struct i915_vma *vma; 3304 3305 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3306 if (vma->vm != &ggtt->base) 3307 continue; 3308 3309 if (!i915_vma_unbind(vma)) 3310 continue; 3311 3312 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3313 PIN_UPDATE)); 3314 ggtt_bound = true; 3315 } 3316 3317 if (ggtt_bound) 3318 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3319 } 3320 3321 ggtt->base.closed = false; 3322 3323 if (INTEL_GEN(dev_priv) >= 8) { 3324 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3325 chv_setup_private_ppat(dev_priv); 3326 else 3327 bdw_setup_private_ppat(dev_priv); 3328 3329 return; 3330 } 3331 3332 if (USES_PPGTT(dev_priv)) { 3333 struct i915_address_space *vm; 3334 3335 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3336 /* TODO: Perhaps it shouldn't be gen6 specific */ 3337 3338 struct i915_hw_ppgtt *ppgtt; 3339 3340 if (i915_is_ggtt(vm)) 3341 ppgtt = dev_priv->mm.aliasing_ppgtt; 3342 else 3343 ppgtt = i915_vm_to_ppgtt(vm); 3344 3345 gen6_write_page_range(dev_priv, &ppgtt->pd, 3346 0, ppgtt->base.total); 3347 } 3348 } 3349 3350 i915_ggtt_flush(dev_priv); 3351 } 3352 3353 struct i915_vma * 3354 i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 3355 struct i915_address_space *vm, 3356 const struct i915_ggtt_view *view) 3357 { 3358 struct rb_node *rb; 3359 3360 rb = obj->vma_tree.rb_node; 3361 while (rb) { 3362 struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); 3363 long cmp; 3364 3365 cmp = i915_vma_compare(vma, vm, view); 3366 if (cmp == 0) 3367 return vma; 3368 3369 if (cmp < 0) 3370 rb = rb->rb_right; 3371 else 3372 rb = rb->rb_left; 3373 } 3374 3375 return NULL; 3376 } 3377 3378 struct i915_vma * 3379 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3380 struct i915_address_space *vm, 3381 const struct i915_ggtt_view *view) 3382 { 3383 struct i915_vma *vma; 3384 3385 lockdep_assert_held(&obj->base.dev->struct_mutex); 3386 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3387 3388 vma = i915_gem_obj_to_vma(obj, vm, view); 3389 if (!vma) { 3390 vma = i915_vma_create(obj, vm, view); 3391 GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); 3392 } 3393 3394 GEM_BUG_ON(i915_vma_is_closed(vma)); 3395 return vma; 3396 } 3397 3398 static struct scatterlist * 3399 rotate_pages(const dma_addr_t *in, unsigned int offset, 3400 unsigned int width, unsigned int height, 3401 unsigned int stride, 3402 struct sg_table *st, struct scatterlist *sg) 3403 { 3404 unsigned int column, row; 3405 unsigned int src_idx; 3406 3407 for (column = 0; column < width; column++) { 3408 src_idx = stride * (height - 1) + column; 3409 for (row = 0; row < height; row++) { 3410 st->nents++; 3411 /* We don't need the pages, but need to initialize 3412 * the entries so the sg list can be happily traversed. 3413 * The only thing we need are DMA addresses. 3414 */ 3415 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3416 sg_dma_address(sg) = in[offset + src_idx]; 3417 sg_dma_len(sg) = PAGE_SIZE; 3418 sg = sg_next(sg); 3419 src_idx -= stride; 3420 } 3421 } 3422 3423 return sg; 3424 } 3425 3426 static struct sg_table * 3427 intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, 3428 struct drm_i915_gem_object *obj) 3429 { 3430 const size_t n_pages = obj->base.size / PAGE_SIZE; 3431 unsigned int size = intel_rotation_info_size(rot_info); 3432 struct sgt_iter sgt_iter; 3433 dma_addr_t dma_addr; 3434 unsigned long i; 3435 dma_addr_t *page_addr_list; 3436 struct sg_table *st; 3437 struct scatterlist *sg; 3438 int ret = -ENOMEM; 3439 3440 /* Allocate a temporary list of source pages for random access. */ 3441 page_addr_list = drm_malloc_gfp(n_pages, 3442 sizeof(dma_addr_t), 3443 GFP_TEMPORARY); 3444 if (!page_addr_list) 3445 return ERR_PTR(ret); 3446 3447 /* Allocate target SG list. */ 3448 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 3449 if (!st) 3450 goto err_st_alloc; 3451 3452 ret = sg_alloc_table(st, size, GFP_KERNEL); 3453 if (ret) 3454 goto err_sg_alloc; 3455 3456 /* Populate source page list from the object. */ 3457 i = 0; 3458 for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages) 3459 page_addr_list[i++] = dma_addr; 3460 3461 GEM_BUG_ON(i != n_pages); 3462 st->nents = 0; 3463 sg = st->sgl; 3464 3465 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 3466 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, 3467 rot_info->plane[i].width, rot_info->plane[i].height, 3468 rot_info->plane[i].stride, st, sg); 3469 } 3470 3471 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", 3472 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3473 3474 drm_free_large(page_addr_list); 3475 3476 return st; 3477 3478 err_sg_alloc: 3479 kfree(st); 3480 err_st_alloc: 3481 drm_free_large(page_addr_list); 3482 3483 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 3484 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3485 3486 return ERR_PTR(ret); 3487 } 3488 3489 static struct sg_table * 3490 intel_partial_pages(const struct i915_ggtt_view *view, 3491 struct drm_i915_gem_object *obj) 3492 { 3493 struct sg_table *st; 3494 struct scatterlist *sg, *iter; 3495 unsigned int count = view->params.partial.size; 3496 unsigned int offset; 3497 int ret = -ENOMEM; 3498 3499 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 3500 if (!st) 3501 goto err_st_alloc; 3502 3503 ret = sg_alloc_table(st, count, GFP_KERNEL); 3504 if (ret) 3505 goto err_sg_alloc; 3506 3507 iter = i915_gem_object_get_sg(obj, 3508 view->params.partial.offset, 3509 &offset); 3510 GEM_BUG_ON(!iter); 3511 3512 sg = st->sgl; 3513 st->nents = 0; 3514 do { 3515 unsigned int len; 3516 3517 len = min(iter->length - (offset << PAGE_SHIFT), 3518 count << PAGE_SHIFT); 3519 sg_set_page(sg, NULL, len, 0); 3520 sg_dma_address(sg) = 3521 sg_dma_address(iter) + (offset << PAGE_SHIFT); 3522 sg_dma_len(sg) = len; 3523 3524 st->nents++; 3525 count -= len >> PAGE_SHIFT; 3526 if (count == 0) { 3527 sg_mark_end(sg); 3528 return st; 3529 } 3530 3531 sg = __sg_next(sg); 3532 iter = __sg_next(iter); 3533 offset = 0; 3534 } while (1); 3535 3536 err_sg_alloc: 3537 kfree(st); 3538 err_st_alloc: 3539 return ERR_PTR(ret); 3540 } 3541 3542 static int 3543 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3544 { 3545 int ret = 0; 3546 3547 /* The vma->pages are only valid within the lifespan of the borrowed 3548 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 3549 * must be the vma->pages. A simple rule is that vma->pages must only 3550 * be accessed when the obj->mm.pages are pinned. 3551 */ 3552 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 3553 3554 if (vma->pages) 3555 return 0; 3556 3557 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3558 vma->pages = vma->obj->mm.pages; 3559 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3560 vma->pages = 3561 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3562 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3563 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 3564 else 3565 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3566 vma->ggtt_view.type); 3567 3568 if (!vma->pages) { 3569 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3570 vma->ggtt_view.type); 3571 ret = -EINVAL; 3572 } else if (IS_ERR(vma->pages)) { 3573 ret = PTR_ERR(vma->pages); 3574 vma->pages = NULL; 3575 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3576 vma->ggtt_view.type, ret); 3577 } 3578 3579 return ret; 3580 } 3581 3582