1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 35 #include <sys/mplock2.h> 36 37 /** 38 * DOC: Global GTT views 39 * 40 * Background and previous state 41 * 42 * Historically objects could exists (be bound) in global GTT space only as 43 * singular instances with a view representing all of the object's backing pages 44 * in a linear fashion. This view will be called a normal view. 45 * 46 * To support multiple views of the same object, where the number of mapped 47 * pages is not equal to the backing store, or where the layout of the pages 48 * is not linear, concept of a GGTT view was added. 49 * 50 * One example of an alternative view is a stereo display driven by a single 51 * image. In this case we would have a framebuffer looking like this 52 * (2x2 pages): 53 * 54 * 12 55 * 34 56 * 57 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 58 * rendering. In contrast, fed to the display engine would be an alternative 59 * view which could look something like this: 60 * 61 * 1212 62 * 3434 63 * 64 * In this example both the size and layout of pages in the alternative view is 65 * different from the normal view. 66 * 67 * Implementation and usage 68 * 69 * GGTT views are implemented using VMAs and are distinguished via enum 70 * i915_ggtt_view_type and struct i915_ggtt_view. 71 * 72 * A new flavour of core GEM functions which work with GGTT bound objects were 73 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 74 * renaming in large amounts of code. They take the struct i915_ggtt_view 75 * parameter encapsulating all metadata required to implement a view. 76 * 77 * As a helper for callers which are only interested in the normal view, 78 * globally const i915_ggtt_view_normal singleton instance exists. All old core 79 * GEM API functions, the ones not taking the view parameter, are operating on, 80 * or with the normal GGTT view. 81 * 82 * Code wanting to add or use a new GGTT view needs to: 83 * 84 * 1. Add a new enum with a suitable name. 85 * 2. Extend the metadata in the i915_ggtt_view structure if required. 86 * 3. Add support to i915_get_vma_pages(). 87 * 88 * New views are required to build a scatter-gather table from within the 89 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 90 * exists for the lifetime of an VMA. 91 * 92 * Core API is designed to have copy semantics which means that passed in 93 * struct i915_ggtt_view does not need to be persistent (left around after 94 * calling the core API functions). 95 * 96 */ 97 98 static inline struct i915_ggtt * 99 i915_vm_to_ggtt(struct i915_address_space *vm) 100 { 101 GEM_BUG_ON(!i915_is_ggtt(vm)); 102 return container_of(vm, struct i915_ggtt, base); 103 } 104 105 static int 106 i915_get_ggtt_vma_pages(struct i915_vma *vma); 107 108 const struct i915_ggtt_view i915_ggtt_view_normal = { 109 .type = I915_GGTT_VIEW_NORMAL, 110 }; 111 const struct i915_ggtt_view i915_ggtt_view_rotated = { 112 .type = I915_GGTT_VIEW_ROTATED, 113 }; 114 115 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 116 int enable_ppgtt) 117 { 118 bool has_aliasing_ppgtt; 119 bool has_full_ppgtt; 120 bool has_full_48bit_ppgtt; 121 122 has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6; 123 has_full_ppgtt = INTEL_GEN(dev_priv) >= 7; 124 has_full_48bit_ppgtt = 125 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; 126 127 if (intel_vgpu_active(dev_priv)) { 128 /* emulation is too hard */ 129 has_full_ppgtt = false; 130 has_full_48bit_ppgtt = false; 131 } 132 133 if (!has_aliasing_ppgtt) 134 return 0; 135 136 /* 137 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 138 * execlists, the sole mechanism available to submit work. 139 */ 140 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 141 return 0; 142 143 if (enable_ppgtt == 1) 144 return 1; 145 146 if (enable_ppgtt == 2 && has_full_ppgtt) 147 return 2; 148 149 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 150 return 3; 151 152 #ifdef CONFIG_INTEL_IOMMU 153 /* Disable ppgtt on SNB if VT-d is on. */ 154 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 155 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 156 return 0; 157 } 158 #endif 159 160 /* Early VLV doesn't have this */ 161 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 162 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 163 return 0; 164 } 165 166 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) 167 return has_full_48bit_ppgtt ? 3 : 2; 168 else 169 return has_aliasing_ppgtt ? 1 : 0; 170 } 171 172 static int ppgtt_bind_vma(struct i915_vma *vma, 173 enum i915_cache_level cache_level, 174 u32 unused) 175 { 176 u32 pte_flags = 0; 177 178 /* Currently applicable only to VLV */ 179 if (vma->obj->gt_ro) 180 pte_flags |= PTE_READ_ONLY; 181 182 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 183 cache_level, pte_flags); 184 185 return 0; 186 } 187 188 static void ppgtt_unbind_vma(struct i915_vma *vma) 189 { 190 vma->vm->clear_range(vma->vm, 191 vma->node.start, 192 vma->obj->base.size, 193 true); 194 } 195 196 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 197 enum i915_cache_level level, 198 bool valid) 199 { 200 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 201 pte |= addr; 202 203 switch (level) { 204 case I915_CACHE_NONE: 205 pte |= PPAT_UNCACHED_INDEX; 206 break; 207 case I915_CACHE_WT: 208 pte |= PPAT_DISPLAY_ELLC_INDEX; 209 break; 210 default: 211 pte |= PPAT_CACHED_INDEX; 212 break; 213 } 214 215 return pte; 216 } 217 218 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 219 const enum i915_cache_level level) 220 { 221 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 222 pde |= addr; 223 if (level != I915_CACHE_NONE) 224 pde |= PPAT_CACHED_PDE_INDEX; 225 else 226 pde |= PPAT_UNCACHED_INDEX; 227 return pde; 228 } 229 230 #define gen8_pdpe_encode gen8_pde_encode 231 #define gen8_pml4e_encode gen8_pde_encode 232 233 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 234 enum i915_cache_level level, 235 bool valid, u32 unused) 236 { 237 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 238 pte |= GEN6_PTE_ADDR_ENCODE(addr); 239 240 switch (level) { 241 case I915_CACHE_L3_LLC: 242 case I915_CACHE_LLC: 243 pte |= GEN6_PTE_CACHE_LLC; 244 break; 245 case I915_CACHE_NONE: 246 pte |= GEN6_PTE_UNCACHED; 247 break; 248 default: 249 MISSING_CASE(level); 250 } 251 252 return pte; 253 } 254 255 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 256 enum i915_cache_level level, 257 bool valid, u32 unused) 258 { 259 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 260 pte |= GEN6_PTE_ADDR_ENCODE(addr); 261 262 switch (level) { 263 case I915_CACHE_L3_LLC: 264 pte |= GEN7_PTE_CACHE_L3_LLC; 265 break; 266 case I915_CACHE_LLC: 267 pte |= GEN6_PTE_CACHE_LLC; 268 break; 269 case I915_CACHE_NONE: 270 pte |= GEN6_PTE_UNCACHED; 271 break; 272 default: 273 MISSING_CASE(level); 274 } 275 276 return pte; 277 } 278 279 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 280 enum i915_cache_level level, 281 bool valid, u32 flags) 282 { 283 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 284 pte |= GEN6_PTE_ADDR_ENCODE(addr); 285 286 if (!(flags & PTE_READ_ONLY)) 287 pte |= BYT_PTE_WRITEABLE; 288 289 if (level != I915_CACHE_NONE) 290 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 291 292 return pte; 293 } 294 295 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 296 enum i915_cache_level level, 297 bool valid, u32 unused) 298 { 299 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 300 pte |= HSW_PTE_ADDR_ENCODE(addr); 301 302 if (level != I915_CACHE_NONE) 303 pte |= HSW_WB_LLC_AGE3; 304 305 return pte; 306 } 307 308 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 309 enum i915_cache_level level, 310 bool valid, u32 unused) 311 { 312 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 313 pte |= HSW_PTE_ADDR_ENCODE(addr); 314 315 switch (level) { 316 case I915_CACHE_NONE: 317 break; 318 case I915_CACHE_WT: 319 pte |= HSW_WT_ELLC_LLC_AGE3; 320 break; 321 default: 322 pte |= HSW_WB_ELLC_LLC_AGE3; 323 break; 324 } 325 326 return pte; 327 } 328 329 static int __setup_page_dma(struct drm_device *dev, 330 struct i915_page_dma *p, gfp_t flags) 331 { 332 struct device *device = &dev->pdev->dev; 333 334 p->page = alloc_page(flags); 335 if (!p->page) 336 return -ENOMEM; 337 338 p->daddr = dma_map_page(device, 339 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 340 341 if (dma_mapping_error(device, p->daddr)) { 342 __free_page(p->page); 343 return -EINVAL; 344 } 345 346 return 0; 347 } 348 349 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 350 { 351 return __setup_page_dma(dev, p, GFP_KERNEL); 352 } 353 354 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 355 { 356 if (WARN_ON(!p->page)) 357 return; 358 359 dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 360 __free_page(p->page); 361 memset(p, 0, sizeof(*p)); 362 } 363 364 static void *kmap_page_dma(struct i915_page_dma *p) 365 { 366 return kmap_atomic(p->page); 367 } 368 369 /* We use the flushing unmap only with ppgtt structures: 370 * page directories, page tables and scratch pages. 371 */ 372 static void kunmap_page_dma(struct drm_device *dev, void *vaddr) 373 { 374 /* There are only few exceptions for gen >=6. chv and bxt. 375 * And we are not sure about the latter so play safe for now. 376 */ 377 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 378 drm_clflush_virt_range(vaddr, PAGE_SIZE); 379 380 kunmap_atomic(vaddr); 381 } 382 383 #define kmap_px(px) kmap_page_dma(px_base(px)) 384 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr)) 385 386 #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) 387 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) 388 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v)) 389 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v)) 390 391 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p, 392 const uint64_t val) 393 { 394 int i; 395 uint64_t * const vaddr = kmap_page_dma(p); 396 397 for (i = 0; i < 512; i++) 398 vaddr[i] = val; 399 400 kunmap_page_dma(dev, vaddr); 401 } 402 403 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, 404 const uint32_t val32) 405 { 406 uint64_t v = val32; 407 408 v = v << 32 | val32; 409 410 fill_page_dma(dev, p, v); 411 } 412 413 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev) 414 { 415 struct i915_page_scratch *sp; 416 int ret; 417 418 sp = kzalloc(sizeof(*sp), GFP_KERNEL); 419 if (sp == NULL) 420 return ERR_PTR(-ENOMEM); 421 422 ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO); 423 if (ret) { 424 kfree(sp); 425 return ERR_PTR(ret); 426 } 427 428 set_pages_uc(px_page(sp), 1); 429 430 return sp; 431 } 432 433 static void free_scratch_page(struct drm_device *dev, 434 struct i915_page_scratch *sp) 435 { 436 set_pages_wb(px_page(sp), 1); 437 438 cleanup_px(dev, sp); 439 kfree(sp); 440 } 441 442 static struct i915_page_table *alloc_pt(struct drm_device *dev) 443 { 444 struct i915_page_table *pt; 445 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 446 GEN8_PTES : GEN6_PTES; 447 int ret = -ENOMEM; 448 449 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 450 if (!pt) 451 return ERR_PTR(-ENOMEM); 452 453 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 454 GFP_KERNEL); 455 456 if (!pt->used_ptes) 457 goto fail_bitmap; 458 459 ret = setup_px(dev, pt); 460 if (ret) 461 goto fail_page_m; 462 463 return pt; 464 465 fail_page_m: 466 kfree(pt->used_ptes); 467 fail_bitmap: 468 kfree(pt); 469 470 return ERR_PTR(ret); 471 } 472 473 static void free_pt(struct drm_device *dev, struct i915_page_table *pt) 474 { 475 cleanup_px(dev, pt); 476 kfree(pt->used_ptes); 477 kfree(pt); 478 } 479 480 static void gen8_initialize_pt(struct i915_address_space *vm, 481 struct i915_page_table *pt) 482 { 483 gen8_pte_t scratch_pte; 484 485 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 486 I915_CACHE_LLC, true); 487 488 fill_px(vm->dev, pt, scratch_pte); 489 } 490 491 static void gen6_initialize_pt(struct i915_address_space *vm, 492 struct i915_page_table *pt) 493 { 494 gen6_pte_t scratch_pte; 495 496 WARN_ON(px_dma(vm->scratch_page) == 0); 497 498 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 499 I915_CACHE_LLC, true, 0); 500 501 fill32_px(vm->dev, pt, scratch_pte); 502 } 503 504 static struct i915_page_directory *alloc_pd(struct drm_device *dev) 505 { 506 struct i915_page_directory *pd; 507 int ret = -ENOMEM; 508 509 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 510 if (!pd) 511 return ERR_PTR(-ENOMEM); 512 513 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 514 sizeof(*pd->used_pdes), GFP_KERNEL); 515 if (!pd->used_pdes) 516 goto fail_bitmap; 517 518 ret = setup_px(dev, pd); 519 if (ret) 520 goto fail_page_m; 521 522 return pd; 523 524 fail_page_m: 525 kfree(pd->used_pdes); 526 fail_bitmap: 527 kfree(pd); 528 529 return ERR_PTR(ret); 530 } 531 532 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) 533 { 534 if (px_page(pd)) { 535 cleanup_px(dev, pd); 536 kfree(pd->used_pdes); 537 kfree(pd); 538 } 539 } 540 541 static void gen8_initialize_pd(struct i915_address_space *vm, 542 struct i915_page_directory *pd) 543 { 544 gen8_pde_t scratch_pde; 545 546 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 547 548 fill_px(vm->dev, pd, scratch_pde); 549 } 550 551 static int __pdp_init(struct drm_device *dev, 552 struct i915_page_directory_pointer *pdp) 553 { 554 size_t pdpes = I915_PDPES_PER_PDP(dev); 555 556 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 557 sizeof(unsigned long), 558 GFP_KERNEL); 559 if (!pdp->used_pdpes) 560 return -ENOMEM; 561 562 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 563 GFP_KERNEL); 564 if (!pdp->page_directory) { 565 kfree(pdp->used_pdpes); 566 /* the PDP might be the statically allocated top level. Keep it 567 * as clean as possible */ 568 pdp->used_pdpes = NULL; 569 return -ENOMEM; 570 } 571 572 return 0; 573 } 574 575 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 576 { 577 kfree(pdp->used_pdpes); 578 kfree(pdp->page_directory); 579 pdp->page_directory = NULL; 580 } 581 582 static struct 583 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) 584 { 585 struct i915_page_directory_pointer *pdp; 586 int ret = -ENOMEM; 587 588 WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); 589 590 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 591 if (!pdp) 592 return ERR_PTR(-ENOMEM); 593 594 ret = __pdp_init(dev, pdp); 595 if (ret) 596 goto fail_bitmap; 597 598 ret = setup_px(dev, pdp); 599 if (ret) 600 goto fail_page_m; 601 602 return pdp; 603 604 fail_page_m: 605 __pdp_fini(pdp); 606 fail_bitmap: 607 kfree(pdp); 608 609 return ERR_PTR(ret); 610 } 611 612 static void free_pdp(struct drm_device *dev, 613 struct i915_page_directory_pointer *pdp) 614 { 615 __pdp_fini(pdp); 616 if (USES_FULL_48BIT_PPGTT(dev)) { 617 cleanup_px(dev, pdp); 618 kfree(pdp); 619 } 620 } 621 622 static void gen8_initialize_pdp(struct i915_address_space *vm, 623 struct i915_page_directory_pointer *pdp) 624 { 625 gen8_ppgtt_pdpe_t scratch_pdpe; 626 627 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 628 629 fill_px(vm->dev, pdp, scratch_pdpe); 630 } 631 632 static void gen8_initialize_pml4(struct i915_address_space *vm, 633 struct i915_pml4 *pml4) 634 { 635 gen8_ppgtt_pml4e_t scratch_pml4e; 636 637 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 638 I915_CACHE_LLC); 639 640 fill_px(vm->dev, pml4, scratch_pml4e); 641 } 642 643 static void 644 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 645 struct i915_page_directory_pointer *pdp, 646 struct i915_page_directory *pd, 647 int index) 648 { 649 gen8_ppgtt_pdpe_t *page_directorypo; 650 651 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 652 return; 653 654 page_directorypo = kmap_px(pdp); 655 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 656 kunmap_px(ppgtt, page_directorypo); 657 } 658 659 static void 660 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 661 struct i915_pml4 *pml4, 662 struct i915_page_directory_pointer *pdp, 663 int index) 664 { 665 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 666 667 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); 668 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 669 kunmap_px(ppgtt, pagemap); 670 } 671 672 /* Broadwell Page Directory Pointer Descriptors */ 673 static int gen8_write_pdp(struct drm_i915_gem_request *req, 674 unsigned entry, 675 dma_addr_t addr) 676 { 677 struct intel_engine_cs *engine = req->engine; 678 int ret; 679 680 BUG_ON(entry >= 4); 681 682 ret = intel_ring_begin(req, 6); 683 if (ret) 684 return ret; 685 686 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 687 intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry)); 688 intel_ring_emit(engine, upper_32_bits(addr)); 689 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 690 intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry)); 691 intel_ring_emit(engine, lower_32_bits(addr)); 692 intel_ring_advance(engine); 693 694 return 0; 695 } 696 697 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 698 struct drm_i915_gem_request *req) 699 { 700 int i, ret; 701 702 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 703 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 704 705 ret = gen8_write_pdp(req, i, pd_daddr); 706 if (ret) 707 return ret; 708 } 709 710 return 0; 711 } 712 713 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 714 struct drm_i915_gem_request *req) 715 { 716 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 717 } 718 719 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, 720 struct i915_page_directory_pointer *pdp, 721 uint64_t start, 722 uint64_t length, 723 gen8_pte_t scratch_pte) 724 { 725 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 726 gen8_pte_t *pt_vaddr; 727 unsigned pdpe = gen8_pdpe_index(start); 728 unsigned pde = gen8_pde_index(start); 729 unsigned pte = gen8_pte_index(start); 730 unsigned num_entries = length >> PAGE_SHIFT; 731 unsigned last_pte, i; 732 733 if (WARN_ON(!pdp)) 734 return; 735 736 while (num_entries) { 737 struct i915_page_directory *pd; 738 struct i915_page_table *pt; 739 740 if (WARN_ON(!pdp->page_directory[pdpe])) 741 break; 742 743 pd = pdp->page_directory[pdpe]; 744 745 if (WARN_ON(!pd->page_table[pde])) 746 break; 747 748 pt = pd->page_table[pde]; 749 750 if (WARN_ON(!px_page(pt))) 751 break; 752 753 last_pte = pte + num_entries; 754 if (last_pte > GEN8_PTES) 755 last_pte = GEN8_PTES; 756 757 pt_vaddr = kmap_px(pt); 758 759 for (i = pte; i < last_pte; i++) { 760 pt_vaddr[i] = scratch_pte; 761 num_entries--; 762 } 763 764 kunmap_px(ppgtt, pt_vaddr); 765 766 pte = 0; 767 if (++pde == I915_PDES) { 768 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 769 break; 770 pde = 0; 771 } 772 } 773 } 774 775 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 776 uint64_t start, 777 uint64_t length, 778 bool use_scratch) 779 { 780 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 781 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 782 I915_CACHE_LLC, use_scratch); 783 784 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 785 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, 786 scratch_pte); 787 } else { 788 uint64_t pml4e; 789 struct i915_page_directory_pointer *pdp; 790 791 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 792 gen8_ppgtt_clear_pte_range(vm, pdp, start, length, 793 scratch_pte); 794 } 795 } 796 } 797 798 static void 799 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 800 struct i915_page_directory_pointer *pdp, 801 struct sg_page_iter *sg_iter, 802 uint64_t start, 803 enum i915_cache_level cache_level) 804 { 805 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 806 gen8_pte_t *pt_vaddr; 807 unsigned pdpe = gen8_pdpe_index(start); 808 unsigned pde = gen8_pde_index(start); 809 unsigned pte = gen8_pte_index(start); 810 811 pt_vaddr = NULL; 812 813 while (__sg_page_iter_next(sg_iter)) { 814 if (pt_vaddr == NULL) { 815 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 816 struct i915_page_table *pt = pd->page_table[pde]; 817 pt_vaddr = kmap_px(pt); 818 } 819 820 pt_vaddr[pte] = 821 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 822 cache_level, true); 823 if (++pte == GEN8_PTES) { 824 kunmap_px(ppgtt, pt_vaddr); 825 pt_vaddr = NULL; 826 if (++pde == I915_PDES) { 827 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 828 break; 829 pde = 0; 830 } 831 pte = 0; 832 } 833 } 834 835 if (pt_vaddr) 836 kunmap_px(ppgtt, pt_vaddr); 837 } 838 839 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 840 struct sg_table *pages, 841 uint64_t start, 842 enum i915_cache_level cache_level, 843 u32 unused) 844 { 845 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 846 struct sg_page_iter sg_iter; 847 848 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 849 850 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 851 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 852 cache_level); 853 } else { 854 struct i915_page_directory_pointer *pdp; 855 uint64_t pml4e; 856 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 857 858 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 859 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 860 start, cache_level); 861 } 862 } 863 } 864 865 static void gen8_free_page_tables(struct drm_device *dev, 866 struct i915_page_directory *pd) 867 { 868 int i; 869 870 if (!px_page(pd)) 871 return; 872 873 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 874 if (WARN_ON(!pd->page_table[i])) 875 continue; 876 877 free_pt(dev, pd->page_table[i]); 878 pd->page_table[i] = NULL; 879 } 880 } 881 882 static int gen8_init_scratch(struct i915_address_space *vm) 883 { 884 struct drm_device *dev = vm->dev; 885 int ret; 886 887 vm->scratch_page = alloc_scratch_page(dev); 888 if (IS_ERR(vm->scratch_page)) 889 return PTR_ERR(vm->scratch_page); 890 891 vm->scratch_pt = alloc_pt(dev); 892 if (IS_ERR(vm->scratch_pt)) { 893 ret = PTR_ERR(vm->scratch_pt); 894 goto free_scratch_page; 895 } 896 897 vm->scratch_pd = alloc_pd(dev); 898 if (IS_ERR(vm->scratch_pd)) { 899 ret = PTR_ERR(vm->scratch_pd); 900 goto free_pt; 901 } 902 903 if (USES_FULL_48BIT_PPGTT(dev)) { 904 vm->scratch_pdp = alloc_pdp(dev); 905 if (IS_ERR(vm->scratch_pdp)) { 906 ret = PTR_ERR(vm->scratch_pdp); 907 goto free_pd; 908 } 909 } 910 911 gen8_initialize_pt(vm, vm->scratch_pt); 912 gen8_initialize_pd(vm, vm->scratch_pd); 913 if (USES_FULL_48BIT_PPGTT(dev)) 914 gen8_initialize_pdp(vm, vm->scratch_pdp); 915 916 return 0; 917 918 free_pd: 919 free_pd(dev, vm->scratch_pd); 920 free_pt: 921 free_pt(dev, vm->scratch_pt); 922 free_scratch_page: 923 free_scratch_page(dev, vm->scratch_page); 924 925 return ret; 926 } 927 928 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 929 { 930 enum vgt_g2v_type msg; 931 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 932 int i; 933 934 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 935 u64 daddr = px_dma(&ppgtt->pml4); 936 937 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 938 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 939 940 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 941 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 942 } else { 943 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 944 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 945 946 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 947 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 948 } 949 950 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 951 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 952 } 953 954 I915_WRITE(vgtif_reg(g2v_notify), msg); 955 956 return 0; 957 } 958 959 static void gen8_free_scratch(struct i915_address_space *vm) 960 { 961 struct drm_device *dev = vm->dev; 962 963 if (USES_FULL_48BIT_PPGTT(dev)) 964 free_pdp(dev, vm->scratch_pdp); 965 free_pd(dev, vm->scratch_pd); 966 free_pt(dev, vm->scratch_pt); 967 free_scratch_page(dev, vm->scratch_page); 968 } 969 970 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, 971 struct i915_page_directory_pointer *pdp) 972 { 973 int i; 974 975 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { 976 if (WARN_ON(!pdp->page_directory[i])) 977 continue; 978 979 gen8_free_page_tables(dev, pdp->page_directory[i]); 980 free_pd(dev, pdp->page_directory[i]); 981 } 982 983 free_pdp(dev, pdp); 984 } 985 986 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 987 { 988 int i; 989 990 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 991 if (WARN_ON(!ppgtt->pml4.pdps[i])) 992 continue; 993 994 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); 995 } 996 997 cleanup_px(ppgtt->base.dev, &ppgtt->pml4); 998 } 999 1000 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 1001 { 1002 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1003 1004 if (intel_vgpu_active(to_i915(vm->dev))) 1005 gen8_ppgtt_notify_vgt(ppgtt, false); 1006 1007 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 1008 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); 1009 else 1010 gen8_ppgtt_cleanup_4lvl(ppgtt); 1011 1012 gen8_free_scratch(vm); 1013 } 1014 1015 /** 1016 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 1017 * @vm: Master vm structure. 1018 * @pd: Page directory for this address range. 1019 * @start: Starting virtual address to begin allocations. 1020 * @length: Size of the allocations. 1021 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1022 * caller to free on error. 1023 * 1024 * Allocate the required number of page tables. Extremely similar to 1025 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1026 * the page directory boundary (instead of the page directory pointer). That 1027 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1028 * possible, and likely that the caller will need to use multiple calls of this 1029 * function to achieve the appropriate allocation. 1030 * 1031 * Return: 0 if success; negative error code otherwise. 1032 */ 1033 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1034 struct i915_page_directory *pd, 1035 uint64_t start, 1036 uint64_t length, 1037 unsigned long *new_pts) 1038 { 1039 struct drm_device *dev = vm->dev; 1040 struct i915_page_table *pt; 1041 uint32_t pde; 1042 1043 gen8_for_each_pde(pt, pd, start, length, pde) { 1044 /* Don't reallocate page tables */ 1045 if (test_bit(pde, pd->used_pdes)) { 1046 /* Scratch is never allocated this way */ 1047 WARN_ON(pt == vm->scratch_pt); 1048 continue; 1049 } 1050 1051 pt = alloc_pt(dev); 1052 if (IS_ERR(pt)) 1053 goto unwind_out; 1054 1055 gen8_initialize_pt(vm, pt); 1056 pd->page_table[pde] = pt; 1057 __set_bit(pde, new_pts); 1058 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1059 } 1060 1061 return 0; 1062 1063 unwind_out: 1064 for_each_set_bit(pde, new_pts, I915_PDES) 1065 free_pt(dev, pd->page_table[pde]); 1066 1067 return -ENOMEM; 1068 } 1069 1070 /** 1071 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1072 * @vm: Master vm structure. 1073 * @pdp: Page directory pointer for this address range. 1074 * @start: Starting virtual address to begin allocations. 1075 * @length: Size of the allocations. 1076 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1077 * caller to free on error. 1078 * 1079 * Allocate the required number of page directories starting at the pde index of 1080 * @start, and ending at the pde index @start + @length. This function will skip 1081 * over already allocated page directories within the range, and only allocate 1082 * new ones, setting the appropriate pointer within the pdp as well as the 1083 * correct position in the bitmap @new_pds. 1084 * 1085 * The function will only allocate the pages within the range for a give page 1086 * directory pointer. In other words, if @start + @length straddles a virtually 1087 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1088 * required by the caller, This is not currently possible, and the BUG in the 1089 * code will prevent it. 1090 * 1091 * Return: 0 if success; negative error code otherwise. 1092 */ 1093 static int 1094 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1095 struct i915_page_directory_pointer *pdp, 1096 uint64_t start, 1097 uint64_t length, 1098 unsigned long *new_pds) 1099 { 1100 struct drm_device *dev = vm->dev; 1101 struct i915_page_directory *pd; 1102 uint32_t pdpe; 1103 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1104 1105 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1106 1107 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1108 if (test_bit(pdpe, pdp->used_pdpes)) 1109 continue; 1110 1111 pd = alloc_pd(dev); 1112 if (IS_ERR(pd)) 1113 goto unwind_out; 1114 1115 gen8_initialize_pd(vm, pd); 1116 pdp->page_directory[pdpe] = pd; 1117 __set_bit(pdpe, new_pds); 1118 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1119 } 1120 1121 return 0; 1122 1123 unwind_out: 1124 for_each_set_bit(pdpe, new_pds, pdpes) 1125 free_pd(dev, pdp->page_directory[pdpe]); 1126 1127 return -ENOMEM; 1128 } 1129 1130 /** 1131 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1132 * @vm: Master vm structure. 1133 * @pml4: Page map level 4 for this address range. 1134 * @start: Starting virtual address to begin allocations. 1135 * @length: Size of the allocations. 1136 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1137 * caller to free on error. 1138 * 1139 * Allocate the required number of page directory pointers. Extremely similar to 1140 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1141 * The main difference is here we are limited by the pml4 boundary (instead of 1142 * the page directory pointer). 1143 * 1144 * Return: 0 if success; negative error code otherwise. 1145 */ 1146 static int 1147 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1148 struct i915_pml4 *pml4, 1149 uint64_t start, 1150 uint64_t length, 1151 unsigned long *new_pdps) 1152 { 1153 struct drm_device *dev = vm->dev; 1154 struct i915_page_directory_pointer *pdp; 1155 uint32_t pml4e; 1156 1157 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1158 1159 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1160 if (!test_bit(pml4e, pml4->used_pml4es)) { 1161 pdp = alloc_pdp(dev); 1162 if (IS_ERR(pdp)) 1163 goto unwind_out; 1164 1165 gen8_initialize_pdp(vm, pdp); 1166 pml4->pdps[pml4e] = pdp; 1167 __set_bit(pml4e, new_pdps); 1168 trace_i915_page_directory_pointer_entry_alloc(vm, 1169 pml4e, 1170 start, 1171 GEN8_PML4E_SHIFT); 1172 } 1173 } 1174 1175 return 0; 1176 1177 unwind_out: 1178 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1179 free_pdp(dev, pml4->pdps[pml4e]); 1180 1181 return -ENOMEM; 1182 } 1183 1184 static void 1185 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1186 { 1187 kfree(new_pts); 1188 kfree(new_pds); 1189 } 1190 1191 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1192 * of these are based on the number of PDPEs in the system. 1193 */ 1194 static 1195 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1196 unsigned long **new_pts, 1197 uint32_t pdpes) 1198 { 1199 unsigned long *pds; 1200 unsigned long *pts; 1201 1202 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1203 if (!pds) 1204 return -ENOMEM; 1205 1206 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1207 GFP_TEMPORARY); 1208 if (!pts) 1209 goto err_out; 1210 1211 *new_pds = pds; 1212 *new_pts = pts; 1213 1214 return 0; 1215 1216 err_out: 1217 free_gen8_temp_bitmaps(pds, pts); 1218 return -ENOMEM; 1219 } 1220 1221 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 1222 * the page table structures, we mark them dirty so that 1223 * context switching/execlist queuing code takes extra steps 1224 * to ensure that tlbs are flushed. 1225 */ 1226 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 1227 { 1228 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 1229 } 1230 1231 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1232 struct i915_page_directory_pointer *pdp, 1233 uint64_t start, 1234 uint64_t length) 1235 { 1236 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1237 unsigned long *new_page_dirs, *new_page_tables; 1238 struct drm_device *dev = vm->dev; 1239 struct i915_page_directory *pd; 1240 const uint64_t orig_start = start; 1241 const uint64_t orig_length = length; 1242 uint32_t pdpe; 1243 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1244 int ret; 1245 1246 /* Wrap is never okay since we can only represent 48b, and we don't 1247 * actually use the other side of the canonical address space. 1248 */ 1249 if (WARN_ON(start + length < start)) 1250 return -ENODEV; 1251 1252 if (WARN_ON(start + length > vm->total)) 1253 return -ENODEV; 1254 1255 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1256 if (ret) 1257 return ret; 1258 1259 /* Do the allocations first so we can easily bail out */ 1260 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1261 new_page_dirs); 1262 if (ret) { 1263 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1264 return ret; 1265 } 1266 1267 /* For every page directory referenced, allocate page tables */ 1268 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1269 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1270 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1271 if (ret) 1272 goto err_out; 1273 } 1274 1275 start = orig_start; 1276 length = orig_length; 1277 1278 /* Allocations have completed successfully, so set the bitmaps, and do 1279 * the mappings. */ 1280 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1281 gen8_pde_t *const page_directory = kmap_px(pd); 1282 struct i915_page_table *pt; 1283 uint64_t pd_len = length; 1284 uint64_t pd_start = start; 1285 uint32_t pde; 1286 1287 /* Every pd should be allocated, we just did that above. */ 1288 WARN_ON(!pd); 1289 1290 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1291 /* Same reasoning as pd */ 1292 WARN_ON(!pt); 1293 WARN_ON(!pd_len); 1294 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1295 1296 /* Set our used ptes within the page table */ 1297 bitmap_set(pt->used_ptes, 1298 gen8_pte_index(pd_start), 1299 gen8_pte_count(pd_start, pd_len)); 1300 1301 /* Our pde is now pointing to the pagetable, pt */ 1302 __set_bit(pde, pd->used_pdes); 1303 1304 /* Map the PDE to the page table */ 1305 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1306 I915_CACHE_LLC); 1307 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1308 gen8_pte_index(start), 1309 gen8_pte_count(start, length), 1310 GEN8_PTES); 1311 1312 /* NB: We haven't yet mapped ptes to pages. At this 1313 * point we're still relying on insert_entries() */ 1314 } 1315 1316 kunmap_px(ppgtt, page_directory); 1317 __set_bit(pdpe, pdp->used_pdpes); 1318 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1319 } 1320 1321 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1322 mark_tlbs_dirty(ppgtt); 1323 return 0; 1324 1325 err_out: 1326 while (pdpe--) { 1327 unsigned long temp; 1328 1329 for_each_set_bit(temp, new_page_tables + pdpe * 1330 BITS_TO_LONGS(I915_PDES), I915_PDES) 1331 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); 1332 } 1333 1334 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1335 free_pd(dev, pdp->page_directory[pdpe]); 1336 1337 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1338 mark_tlbs_dirty(ppgtt); 1339 return ret; 1340 } 1341 1342 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1343 struct i915_pml4 *pml4, 1344 uint64_t start, 1345 uint64_t length) 1346 { 1347 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1348 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1349 struct i915_page_directory_pointer *pdp; 1350 uint64_t pml4e; 1351 int ret = 0; 1352 1353 /* Do the pml4 allocations first, so we don't need to track the newly 1354 * allocated tables below the pdp */ 1355 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1356 1357 /* The pagedirectory and pagetable allocations are done in the shared 3 1358 * and 4 level code. Just allocate the pdps. 1359 */ 1360 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1361 new_pdps); 1362 if (ret) 1363 return ret; 1364 1365 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1366 "The allocation has spanned more than 512GB. " 1367 "It is highly likely this is incorrect."); 1368 1369 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1370 WARN_ON(!pdp); 1371 1372 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1373 if (ret) 1374 goto err_out; 1375 1376 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1377 } 1378 1379 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1380 GEN8_PML4ES_PER_PML4); 1381 1382 return 0; 1383 1384 err_out: 1385 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1386 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); 1387 1388 return ret; 1389 } 1390 1391 static int gen8_alloc_va_range(struct i915_address_space *vm, 1392 uint64_t start, uint64_t length) 1393 { 1394 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1395 1396 if (USES_FULL_48BIT_PPGTT(vm->dev)) 1397 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1398 else 1399 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1400 } 1401 1402 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1403 uint64_t start, uint64_t length, 1404 gen8_pte_t scratch_pte, 1405 struct seq_file *m) 1406 { 1407 struct i915_page_directory *pd; 1408 uint32_t pdpe; 1409 1410 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1411 struct i915_page_table *pt; 1412 uint64_t pd_len = length; 1413 uint64_t pd_start = start; 1414 uint32_t pde; 1415 1416 if (!test_bit(pdpe, pdp->used_pdpes)) 1417 continue; 1418 1419 seq_printf(m, "\tPDPE #%d\n", pdpe); 1420 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1421 uint32_t pte; 1422 gen8_pte_t *pt_vaddr; 1423 1424 if (!test_bit(pde, pd->used_pdes)) 1425 continue; 1426 1427 pt_vaddr = kmap_px(pt); 1428 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1429 uint64_t va = 1430 (pdpe << GEN8_PDPE_SHIFT) | 1431 (pde << GEN8_PDE_SHIFT) | 1432 (pte << GEN8_PTE_SHIFT); 1433 int i; 1434 bool found = false; 1435 1436 for (i = 0; i < 4; i++) 1437 if (pt_vaddr[pte + i] != scratch_pte) 1438 found = true; 1439 if (!found) 1440 continue; 1441 1442 seq_printf(m, "\t\t0x%lx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1443 for (i = 0; i < 4; i++) { 1444 if (pt_vaddr[pte + i] != scratch_pte) 1445 seq_printf(m, " %lx", pt_vaddr[pte + i]); 1446 else 1447 seq_puts(m, " SCRATCH "); 1448 } 1449 seq_puts(m, "\n"); 1450 } 1451 /* don't use kunmap_px, it could trigger 1452 * an unnecessary flush. 1453 */ 1454 kunmap_atomic(pt_vaddr); 1455 } 1456 } 1457 } 1458 1459 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1460 { 1461 struct i915_address_space *vm = &ppgtt->base; 1462 uint64_t start = ppgtt->base.start; 1463 uint64_t length = ppgtt->base.total; 1464 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 1465 I915_CACHE_LLC, true); 1466 1467 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 1468 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1469 } else { 1470 uint64_t pml4e; 1471 struct i915_pml4 *pml4 = &ppgtt->pml4; 1472 struct i915_page_directory_pointer *pdp; 1473 1474 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1475 if (!test_bit(pml4e, pml4->used_pml4es)) 1476 continue; 1477 1478 seq_printf(m, " PML4E #%lu\n", pml4e); 1479 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1480 } 1481 } 1482 } 1483 1484 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1485 { 1486 unsigned long *new_page_dirs, *new_page_tables; 1487 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1488 int ret; 1489 1490 /* We allocate temp bitmap for page tables for no gain 1491 * but as this is for init only, lets keep the things simple 1492 */ 1493 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1494 if (ret) 1495 return ret; 1496 1497 /* Allocate for all pdps regardless of how the ppgtt 1498 * was defined. 1499 */ 1500 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1501 0, 1ULL << 32, 1502 new_page_dirs); 1503 if (!ret) 1504 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1505 1506 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1507 1508 return ret; 1509 } 1510 1511 /* 1512 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1513 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1514 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1515 * space. 1516 * 1517 */ 1518 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1519 { 1520 int ret; 1521 1522 ret = gen8_init_scratch(&ppgtt->base); 1523 if (ret) 1524 return ret; 1525 1526 ppgtt->base.start = 0; 1527 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1528 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1529 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1530 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1531 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1532 ppgtt->base.bind_vma = ppgtt_bind_vma; 1533 ppgtt->debug_dump = gen8_dump_ppgtt; 1534 1535 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { 1536 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); 1537 if (ret) 1538 goto free_scratch; 1539 1540 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1541 1542 ppgtt->base.total = 1ULL << 48; 1543 ppgtt->switch_mm = gen8_48b_mm_switch; 1544 } else { 1545 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); 1546 if (ret) 1547 goto free_scratch; 1548 1549 ppgtt->base.total = 1ULL << 32; 1550 ppgtt->switch_mm = gen8_legacy_mm_switch; 1551 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1552 0, 0, 1553 GEN8_PML4E_SHIFT); 1554 1555 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) { 1556 ret = gen8_preallocate_top_level_pdps(ppgtt); 1557 if (ret) 1558 goto free_scratch; 1559 } 1560 } 1561 1562 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) 1563 gen8_ppgtt_notify_vgt(ppgtt, true); 1564 1565 return 0; 1566 1567 free_scratch: 1568 gen8_free_scratch(&ppgtt->base); 1569 return ret; 1570 } 1571 1572 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1573 { 1574 struct i915_address_space *vm = &ppgtt->base; 1575 struct i915_page_table *unused; 1576 gen6_pte_t scratch_pte; 1577 uint32_t pd_entry; 1578 uint32_t pte, pde; 1579 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1580 1581 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1582 I915_CACHE_LLC, true, 0); 1583 1584 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1585 u32 expected; 1586 gen6_pte_t *pt_vaddr; 1587 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1588 pd_entry = readl(ppgtt->pd_addr + pde); 1589 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1590 1591 if (pd_entry != expected) 1592 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1593 pde, 1594 pd_entry, 1595 expected); 1596 seq_printf(m, "\tPDE: %x\n", pd_entry); 1597 1598 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1599 1600 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1601 unsigned long va = 1602 (pde * PAGE_SIZE * GEN6_PTES) + 1603 (pte * PAGE_SIZE); 1604 int i; 1605 bool found = false; 1606 for (i = 0; i < 4; i++) 1607 if (pt_vaddr[pte + i] != scratch_pte) 1608 found = true; 1609 if (!found) 1610 continue; 1611 1612 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1613 for (i = 0; i < 4; i++) { 1614 if (pt_vaddr[pte + i] != scratch_pte) 1615 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1616 else 1617 seq_puts(m, " SCRATCH "); 1618 } 1619 seq_puts(m, "\n"); 1620 } 1621 kunmap_px(ppgtt, pt_vaddr); 1622 } 1623 } 1624 1625 /* Write pde (index) from the page directory @pd to the page table @pt */ 1626 static void gen6_write_pde(struct i915_page_directory *pd, 1627 const int pde, struct i915_page_table *pt) 1628 { 1629 /* Caller needs to make sure the write completes if necessary */ 1630 struct i915_hw_ppgtt *ppgtt = 1631 container_of(pd, struct i915_hw_ppgtt, pd); 1632 u32 pd_entry; 1633 1634 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1635 pd_entry |= GEN6_PDE_VALID; 1636 1637 writel(pd_entry, ppgtt->pd_addr + pde); 1638 } 1639 1640 /* Write all the page tables found in the ppgtt structure to incrementing page 1641 * directories. */ 1642 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1643 struct i915_page_directory *pd, 1644 uint32_t start, uint32_t length) 1645 { 1646 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1647 struct i915_page_table *pt; 1648 uint32_t pde; 1649 1650 gen6_for_each_pde(pt, pd, start, length, pde) 1651 gen6_write_pde(pd, pde, pt); 1652 1653 /* Make sure write is complete before other code can use this page 1654 * table. Also require for WC mapped PTEs */ 1655 readl(ggtt->gsm); 1656 } 1657 1658 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1659 { 1660 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1661 1662 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1663 } 1664 1665 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1666 struct drm_i915_gem_request *req) 1667 { 1668 struct intel_engine_cs *engine = req->engine; 1669 int ret; 1670 1671 /* NB: TLBs must be flushed and invalidated before a switch */ 1672 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1673 if (ret) 1674 return ret; 1675 1676 ret = intel_ring_begin(req, 6); 1677 if (ret) 1678 return ret; 1679 1680 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); 1681 intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); 1682 intel_ring_emit(engine, PP_DIR_DCLV_2G); 1683 intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); 1684 intel_ring_emit(engine, get_pd_offset(ppgtt)); 1685 intel_ring_emit(engine, MI_NOOP); 1686 intel_ring_advance(engine); 1687 1688 return 0; 1689 } 1690 1691 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1692 struct drm_i915_gem_request *req) 1693 { 1694 struct intel_engine_cs *engine = req->engine; 1695 int ret; 1696 1697 /* NB: TLBs must be flushed and invalidated before a switch */ 1698 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1699 if (ret) 1700 return ret; 1701 1702 ret = intel_ring_begin(req, 6); 1703 if (ret) 1704 return ret; 1705 1706 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); 1707 intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); 1708 intel_ring_emit(engine, PP_DIR_DCLV_2G); 1709 intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); 1710 intel_ring_emit(engine, get_pd_offset(ppgtt)); 1711 intel_ring_emit(engine, MI_NOOP); 1712 intel_ring_advance(engine); 1713 1714 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1715 if (engine->id != RCS) { 1716 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1717 if (ret) 1718 return ret; 1719 } 1720 1721 return 0; 1722 } 1723 1724 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1725 struct drm_i915_gem_request *req) 1726 { 1727 struct intel_engine_cs *engine = req->engine; 1728 struct drm_i915_private *dev_priv = req->i915; 1729 1730 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1731 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1732 return 0; 1733 } 1734 1735 static void gen8_ppgtt_enable(struct drm_device *dev) 1736 { 1737 struct drm_i915_private *dev_priv = to_i915(dev); 1738 struct intel_engine_cs *engine; 1739 1740 for_each_engine(engine, dev_priv) { 1741 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; 1742 I915_WRITE(RING_MODE_GEN7(engine), 1743 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1744 } 1745 } 1746 1747 static void gen7_ppgtt_enable(struct drm_device *dev) 1748 { 1749 struct drm_i915_private *dev_priv = to_i915(dev); 1750 struct intel_engine_cs *engine; 1751 uint32_t ecochk, ecobits; 1752 1753 ecobits = I915_READ(GAC_ECO_BITS); 1754 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1755 1756 ecochk = I915_READ(GAM_ECOCHK); 1757 if (IS_HASWELL(dev)) { 1758 ecochk |= ECOCHK_PPGTT_WB_HSW; 1759 } else { 1760 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1761 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1762 } 1763 I915_WRITE(GAM_ECOCHK, ecochk); 1764 1765 for_each_engine(engine, dev_priv) { 1766 /* GFX_MODE is per-ring on gen7+ */ 1767 I915_WRITE(RING_MODE_GEN7(engine), 1768 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1769 } 1770 } 1771 1772 static void gen6_ppgtt_enable(struct drm_device *dev) 1773 { 1774 struct drm_i915_private *dev_priv = to_i915(dev); 1775 uint32_t ecochk, gab_ctl, ecobits; 1776 1777 ecobits = I915_READ(GAC_ECO_BITS); 1778 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1779 ECOBITS_PPGTT_CACHE64B); 1780 1781 gab_ctl = I915_READ(GAB_CTL); 1782 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1783 1784 ecochk = I915_READ(GAM_ECOCHK); 1785 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1786 1787 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1788 } 1789 1790 /* PPGTT support for Sandybdrige/Gen6 and later */ 1791 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1792 uint64_t start, 1793 uint64_t length, 1794 bool use_scratch) 1795 { 1796 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1797 gen6_pte_t *pt_vaddr, scratch_pte; 1798 unsigned first_entry = start >> PAGE_SHIFT; 1799 unsigned num_entries = length >> PAGE_SHIFT; 1800 unsigned act_pt = first_entry / GEN6_PTES; 1801 unsigned first_pte = first_entry % GEN6_PTES; 1802 unsigned last_pte, i; 1803 1804 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1805 I915_CACHE_LLC, true, 0); 1806 1807 while (num_entries) { 1808 last_pte = first_pte + num_entries; 1809 if (last_pte > GEN6_PTES) 1810 last_pte = GEN6_PTES; 1811 1812 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1813 1814 for (i = first_pte; i < last_pte; i++) 1815 pt_vaddr[i] = scratch_pte; 1816 1817 kunmap_px(ppgtt, pt_vaddr); 1818 1819 num_entries -= last_pte - first_pte; 1820 first_pte = 0; 1821 act_pt++; 1822 } 1823 } 1824 1825 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1826 struct sg_table *pages, 1827 uint64_t start, 1828 enum i915_cache_level cache_level, u32 flags) 1829 { 1830 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1831 unsigned first_entry = start >> PAGE_SHIFT; 1832 unsigned act_pt = first_entry / GEN6_PTES; 1833 unsigned act_pte = first_entry % GEN6_PTES; 1834 gen6_pte_t *pt_vaddr = NULL; 1835 struct sgt_iter sgt_iter; 1836 dma_addr_t addr; 1837 1838 for_each_sgt_dma(addr, sgt_iter, pages) { 1839 if (pt_vaddr == NULL) 1840 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1841 1842 pt_vaddr[act_pte] = 1843 vm->pte_encode(addr, cache_level, true, flags); 1844 1845 if (++act_pte == GEN6_PTES) { 1846 kunmap_px(ppgtt, pt_vaddr); 1847 pt_vaddr = NULL; 1848 act_pt++; 1849 act_pte = 0; 1850 } 1851 } 1852 1853 if (pt_vaddr) 1854 kunmap_px(ppgtt, pt_vaddr); 1855 } 1856 1857 static int gen6_alloc_va_range(struct i915_address_space *vm, 1858 uint64_t start_in, uint64_t length_in) 1859 { 1860 DECLARE_BITMAP(new_page_tables, I915_PDES); 1861 struct drm_device *dev = vm->dev; 1862 struct drm_i915_private *dev_priv = to_i915(dev); 1863 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1864 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1865 struct i915_page_table *pt; 1866 uint32_t start, length, start_save, length_save; 1867 uint32_t pde; 1868 int ret; 1869 1870 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1871 return -ENODEV; 1872 1873 start = start_save = start_in; 1874 length = length_save = length_in; 1875 1876 bitmap_zero(new_page_tables, I915_PDES); 1877 1878 /* The allocation is done in two stages so that we can bail out with 1879 * minimal amount of pain. The first stage finds new page tables that 1880 * need allocation. The second stage marks use ptes within the page 1881 * tables. 1882 */ 1883 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1884 if (pt != vm->scratch_pt) { 1885 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1886 continue; 1887 } 1888 1889 /* We've already allocated a page table */ 1890 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1891 1892 pt = alloc_pt(dev); 1893 if (IS_ERR(pt)) { 1894 ret = PTR_ERR(pt); 1895 goto unwind_out; 1896 } 1897 1898 gen6_initialize_pt(vm, pt); 1899 1900 ppgtt->pd.page_table[pde] = pt; 1901 __set_bit(pde, new_page_tables); 1902 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1903 } 1904 1905 start = start_save; 1906 length = length_save; 1907 1908 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1909 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1910 1911 bitmap_zero(tmp_bitmap, GEN6_PTES); 1912 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1913 gen6_pte_count(start, length)); 1914 1915 if (__test_and_clear_bit(pde, new_page_tables)) 1916 gen6_write_pde(&ppgtt->pd, pde, pt); 1917 1918 trace_i915_page_table_entry_map(vm, pde, pt, 1919 gen6_pte_index(start), 1920 gen6_pte_count(start, length), 1921 GEN6_PTES); 1922 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1923 GEN6_PTES); 1924 } 1925 1926 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1927 1928 /* Make sure write is complete before other code can use this page 1929 * table. Also require for WC mapped PTEs */ 1930 readl(ggtt->gsm); 1931 1932 mark_tlbs_dirty(ppgtt); 1933 return 0; 1934 1935 unwind_out: 1936 for_each_set_bit(pde, new_page_tables, I915_PDES) { 1937 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 1938 1939 ppgtt->pd.page_table[pde] = vm->scratch_pt; 1940 free_pt(vm->dev, pt); 1941 } 1942 1943 mark_tlbs_dirty(ppgtt); 1944 return ret; 1945 } 1946 1947 static int gen6_init_scratch(struct i915_address_space *vm) 1948 { 1949 struct drm_device *dev = vm->dev; 1950 1951 vm->scratch_page = alloc_scratch_page(dev); 1952 if (IS_ERR(vm->scratch_page)) 1953 return PTR_ERR(vm->scratch_page); 1954 1955 vm->scratch_pt = alloc_pt(dev); 1956 if (IS_ERR(vm->scratch_pt)) { 1957 free_scratch_page(dev, vm->scratch_page); 1958 return PTR_ERR(vm->scratch_pt); 1959 } 1960 1961 gen6_initialize_pt(vm, vm->scratch_pt); 1962 1963 return 0; 1964 } 1965 1966 static void gen6_free_scratch(struct i915_address_space *vm) 1967 { 1968 struct drm_device *dev = vm->dev; 1969 1970 free_pt(dev, vm->scratch_pt); 1971 free_scratch_page(dev, vm->scratch_page); 1972 } 1973 1974 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1975 { 1976 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1977 struct i915_page_directory *pd = &ppgtt->pd; 1978 struct drm_device *dev = vm->dev; 1979 struct i915_page_table *pt; 1980 uint32_t pde; 1981 1982 drm_mm_remove_node(&ppgtt->node); 1983 1984 gen6_for_all_pdes(pt, pd, pde) 1985 if (pt != vm->scratch_pt) 1986 free_pt(dev, pt); 1987 1988 gen6_free_scratch(vm); 1989 } 1990 1991 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1992 { 1993 struct i915_address_space *vm = &ppgtt->base; 1994 struct drm_device *dev = ppgtt->base.dev; 1995 struct drm_i915_private *dev_priv = to_i915(dev); 1996 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1997 bool retried = false; 1998 int ret; 1999 2000 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 2001 * allocator works in address space sizes, so it's multiplied by page 2002 * size. We allocate at the top of the GTT to avoid fragmentation. 2003 */ 2004 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2005 2006 ret = gen6_init_scratch(vm); 2007 if (ret) 2008 return ret; 2009 2010 alloc: 2011 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2012 &ppgtt->node, GEN6_PD_SIZE, 2013 GEN6_PD_ALIGN, 0, 2014 0, ggtt->base.total, 2015 DRM_MM_TOPDOWN); 2016 if (ret == -ENOSPC && !retried) { 2017 ret = i915_gem_evict_something(dev, &ggtt->base, 2018 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2019 I915_CACHE_NONE, 2020 0, ggtt->base.total, 2021 0); 2022 if (ret) 2023 goto err_out; 2024 2025 retried = true; 2026 goto alloc; 2027 } 2028 2029 if (ret) 2030 goto err_out; 2031 2032 2033 if (ppgtt->node.start < ggtt->mappable_end) 2034 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2035 2036 return 0; 2037 2038 err_out: 2039 gen6_free_scratch(vm); 2040 return ret; 2041 } 2042 2043 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2044 { 2045 return gen6_ppgtt_allocate_page_directories(ppgtt); 2046 } 2047 2048 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2049 uint64_t start, uint64_t length) 2050 { 2051 struct i915_page_table *unused; 2052 uint32_t pde; 2053 2054 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2055 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2056 } 2057 2058 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2059 { 2060 struct drm_device *dev = ppgtt->base.dev; 2061 struct drm_i915_private *dev_priv = to_i915(dev); 2062 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2063 int ret; 2064 2065 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2066 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev)) 2067 ppgtt->switch_mm = gen6_mm_switch; 2068 else if (IS_HASWELL(dev)) 2069 ppgtt->switch_mm = hsw_mm_switch; 2070 else if (IS_GEN7(dev)) 2071 ppgtt->switch_mm = gen7_mm_switch; 2072 else 2073 BUG(); 2074 2075 ret = gen6_ppgtt_alloc(ppgtt); 2076 if (ret) 2077 return ret; 2078 2079 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2080 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2081 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2082 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2083 ppgtt->base.bind_vma = ppgtt_bind_vma; 2084 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2085 ppgtt->base.start = 0; 2086 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2087 ppgtt->debug_dump = gen6_dump_ppgtt; 2088 2089 ppgtt->pd.base.ggtt_offset = 2090 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2091 2092 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2093 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2094 2095 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2096 2097 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2098 2099 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2100 ppgtt->node.size >> 20, 2101 ppgtt->node.start / PAGE_SIZE); 2102 2103 DRM_DEBUG("Adding PPGTT at offset %x\n", 2104 ppgtt->pd.base.ggtt_offset << 10); 2105 2106 return 0; 2107 } 2108 2109 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2110 { 2111 ppgtt->base.dev = dev; 2112 2113 if (INTEL_INFO(dev)->gen < 8) 2114 return gen6_ppgtt_init(ppgtt); 2115 else 2116 return gen8_ppgtt_init(ppgtt); 2117 } 2118 2119 static void i915_address_space_init(struct i915_address_space *vm, 2120 struct drm_i915_private *dev_priv) 2121 { 2122 drm_mm_init(&vm->mm, vm->start, vm->total); 2123 vm->dev = &dev_priv->drm; 2124 INIT_LIST_HEAD(&vm->active_list); 2125 INIT_LIST_HEAD(&vm->inactive_list); 2126 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2127 } 2128 2129 static void gtt_write_workarounds(struct drm_device *dev) 2130 { 2131 struct drm_i915_private *dev_priv = to_i915(dev); 2132 2133 /* This function is for gtt related workarounds. This function is 2134 * called on driver load and after a GPU reset, so you can place 2135 * workarounds here even if they get overwritten by GPU reset. 2136 */ 2137 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2138 if (IS_BROADWELL(dev)) 2139 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2140 else if (IS_CHERRYVIEW(dev)) 2141 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2142 else if (IS_SKYLAKE(dev)) 2143 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2144 else if (IS_BROXTON(dev)) 2145 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2146 } 2147 2148 static int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2149 { 2150 struct drm_i915_private *dev_priv = to_i915(dev); 2151 int ret = 0; 2152 2153 ret = __hw_ppgtt_init(dev, ppgtt); 2154 if (ret == 0) { 2155 kref_init(&ppgtt->ref); 2156 i915_address_space_init(&ppgtt->base, dev_priv); 2157 } 2158 2159 return ret; 2160 } 2161 2162 int i915_ppgtt_init_hw(struct drm_device *dev) 2163 { 2164 gtt_write_workarounds(dev); 2165 2166 /* In the case of execlists, PPGTT is enabled by the context descriptor 2167 * and the PDPs are contained within the context itself. We don't 2168 * need to do anything here. */ 2169 if (i915.enable_execlists) 2170 return 0; 2171 2172 if (!USES_PPGTT(dev)) 2173 return 0; 2174 2175 if (IS_GEN6(dev)) 2176 gen6_ppgtt_enable(dev); 2177 else if (IS_GEN7(dev)) 2178 gen7_ppgtt_enable(dev); 2179 else if (INTEL_INFO(dev)->gen >= 8) 2180 gen8_ppgtt_enable(dev); 2181 else 2182 MISSING_CASE(INTEL_INFO(dev)->gen); 2183 2184 return 0; 2185 } 2186 2187 struct i915_hw_ppgtt * 2188 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) 2189 { 2190 struct i915_hw_ppgtt *ppgtt; 2191 int ret; 2192 2193 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2194 if (!ppgtt) 2195 return ERR_PTR(-ENOMEM); 2196 2197 ret = i915_ppgtt_init(dev, ppgtt); 2198 if (ret) { 2199 kfree(ppgtt); 2200 return ERR_PTR(ret); 2201 } 2202 2203 ppgtt->file_priv = fpriv; 2204 2205 trace_i915_ppgtt_create(&ppgtt->base); 2206 2207 return ppgtt; 2208 } 2209 2210 void i915_ppgtt_release(struct kref *kref) 2211 { 2212 struct i915_hw_ppgtt *ppgtt = 2213 container_of(kref, struct i915_hw_ppgtt, ref); 2214 2215 trace_i915_ppgtt_release(&ppgtt->base); 2216 2217 /* vmas should already be unbound */ 2218 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2219 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2220 2221 list_del(&ppgtt->base.global_link); 2222 drm_mm_takedown(&ppgtt->base.mm); 2223 2224 ppgtt->base.cleanup(&ppgtt->base); 2225 kfree(ppgtt); 2226 } 2227 2228 extern int intel_iommu_gfx_mapped; 2229 /* Certain Gen5 chipsets require require idling the GPU before 2230 * unmapping anything from the GTT when VT-d is enabled. 2231 */ 2232 static bool needs_idle_maps(struct drm_device *dev) 2233 { 2234 #ifdef CONFIG_INTEL_IOMMU 2235 /* Query intel_iommu to see if we need the workaround. Presumably that 2236 * was loaded first. 2237 */ 2238 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 2239 return true; 2240 #endif 2241 return false; 2242 } 2243 2244 static bool do_idling(struct drm_i915_private *dev_priv) 2245 { 2246 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2247 bool ret = dev_priv->mm.interruptible; 2248 2249 if (unlikely(ggtt->do_idle_maps)) { 2250 dev_priv->mm.interruptible = false; 2251 if (i915_gem_wait_for_idle(dev_priv)) { 2252 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2253 /* Wait a bit, in hopes it avoids the hang */ 2254 udelay(10); 2255 } 2256 } 2257 2258 return ret; 2259 } 2260 2261 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 2262 { 2263 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2264 2265 if (unlikely(ggtt->do_idle_maps)) 2266 dev_priv->mm.interruptible = interruptible; 2267 } 2268 2269 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2270 { 2271 struct intel_engine_cs *engine; 2272 2273 if (INTEL_INFO(dev_priv)->gen < 6) 2274 return; 2275 2276 for_each_engine(engine, dev_priv) { 2277 u32 fault_reg; 2278 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2279 if (fault_reg & RING_FAULT_VALID) { 2280 DRM_DEBUG_DRIVER("Unexpected fault\n" 2281 "\tAddr: 0x%08ux\n" 2282 "\tAddress space: %s\n" 2283 "\tSource ID: %d\n" 2284 "\tType: %d\n", 2285 fault_reg & LINUX_PAGE_MASK, 2286 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2287 RING_FAULT_SRCID(fault_reg), 2288 RING_FAULT_FAULT_TYPE(fault_reg)); 2289 I915_WRITE(RING_FAULT_REG(engine), 2290 fault_reg & ~RING_FAULT_VALID); 2291 } 2292 } 2293 POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS])); 2294 } 2295 2296 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2297 { 2298 if (INTEL_INFO(dev_priv)->gen < 6) { 2299 intel_gtt_chipset_flush(); 2300 } else { 2301 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2302 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2303 } 2304 } 2305 2306 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 2307 { 2308 struct drm_i915_private *dev_priv = to_i915(dev); 2309 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2310 2311 /* Don't bother messing with faults pre GEN6 as we have little 2312 * documentation supporting that it's a good idea. 2313 */ 2314 if (INTEL_INFO(dev)->gen < 6) 2315 return; 2316 2317 i915_check_and_clear_faults(dev_priv); 2318 2319 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 2320 true); 2321 2322 i915_ggtt_flush(dev_priv); 2323 } 2324 2325 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 2326 { 2327 if (!dma_map_sg(&obj->base.dev->pdev->dev, 2328 obj->pages->sgl, obj->pages->nents, 2329 PCI_DMA_BIDIRECTIONAL)) 2330 return -ENOSPC; 2331 2332 return 0; 2333 } 2334 2335 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2336 { 2337 #ifdef writeq 2338 writeq(pte, addr); 2339 #else 2340 iowrite32((u32)pte, addr); 2341 iowrite32(pte >> 32, addr + 4); 2342 #endif 2343 } 2344 2345 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2346 dma_addr_t addr, 2347 uint64_t offset, 2348 enum i915_cache_level level, 2349 u32 unused) 2350 { 2351 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2352 gen8_pte_t __iomem *pte = 2353 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + 2354 (offset >> PAGE_SHIFT); 2355 int rpm_atomic_seq; 2356 2357 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2358 2359 gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); 2360 2361 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2362 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2363 2364 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2365 } 2366 2367 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2368 struct sg_table *st, 2369 uint64_t start, 2370 enum i915_cache_level level, u32 unused) 2371 { 2372 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2373 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2374 struct sgt_iter sgt_iter; 2375 gen8_pte_t __iomem *gtt_entries; 2376 gen8_pte_t gtt_entry; 2377 dma_addr_t addr; 2378 int rpm_atomic_seq; 2379 int i = 0; 2380 2381 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2382 2383 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2384 2385 for_each_sgt_dma(addr, sgt_iter, st) { 2386 gtt_entry = gen8_pte_encode(addr, level, true); 2387 gen8_set_pte(>t_entries[i++], gtt_entry); 2388 } 2389 2390 /* 2391 * XXX: This serves as a posting read to make sure that the PTE has 2392 * actually been updated. There is some concern that even though 2393 * registers and PTEs are within the same BAR that they are potentially 2394 * of NUMA access patterns. Therefore, even with the way we assume 2395 * hardware should work, we must keep this posting read for paranoia. 2396 */ 2397 if (i != 0) 2398 WARN_ON(readq(>t_entries[i-1]) != gtt_entry); 2399 2400 /* This next bit makes the above posting read even more important. We 2401 * want to flush the TLBs only after we're certain all the PTE updates 2402 * have finished. 2403 */ 2404 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2405 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2406 2407 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2408 } 2409 2410 struct insert_entries { 2411 struct i915_address_space *vm; 2412 struct sg_table *st; 2413 uint64_t start; 2414 enum i915_cache_level level; 2415 u32 flags; 2416 }; 2417 2418 static int gen8_ggtt_insert_entries__cb(void *_arg) 2419 { 2420 struct insert_entries *arg = _arg; 2421 gen8_ggtt_insert_entries(arg->vm, arg->st, 2422 arg->start, arg->level, arg->flags); 2423 return 0; 2424 } 2425 2426 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2427 struct sg_table *st, 2428 uint64_t start, 2429 enum i915_cache_level level, 2430 u32 flags) 2431 { 2432 struct insert_entries arg = { vm, st, start, level, flags }; 2433 #ifndef __DragonFly__ 2434 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2435 #else 2436 /* XXX: is this enough ? 2437 * See Linux commit 5bab6f60cb4d1417ad7c599166bcfec87529c1a2 */ 2438 get_mplock(); 2439 gen8_ggtt_insert_entries__cb(&arg); 2440 rel_mplock(); 2441 #endif 2442 } 2443 2444 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2445 dma_addr_t addr, 2446 uint64_t offset, 2447 enum i915_cache_level level, 2448 u32 flags) 2449 { 2450 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2451 gen6_pte_t __iomem *pte = 2452 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + 2453 (offset >> PAGE_SHIFT); 2454 int rpm_atomic_seq; 2455 2456 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2457 2458 iowrite32(vm->pte_encode(addr, level, true, flags), pte); 2459 2460 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2461 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2462 2463 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2464 } 2465 2466 /* 2467 * Binds an object into the global gtt with the specified cache level. The object 2468 * will be accessible to the GPU via commands whose operands reference offsets 2469 * within the global GTT as well as accessible by the GPU through the GMADR 2470 * mapped BAR (dev_priv->mm.gtt->gtt). 2471 */ 2472 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2473 struct sg_table *st, 2474 uint64_t start, 2475 enum i915_cache_level level, u32 flags) 2476 { 2477 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2478 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2479 struct sgt_iter sgt_iter; 2480 gen6_pte_t __iomem *gtt_entries; 2481 gen6_pte_t gtt_entry; 2482 dma_addr_t addr; 2483 int rpm_atomic_seq; 2484 int i = 0; 2485 2486 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2487 2488 gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2489 2490 for_each_sgt_dma(addr, sgt_iter, st) { 2491 gtt_entry = vm->pte_encode(addr, level, true, flags); 2492 iowrite32(gtt_entry, >t_entries[i++]); 2493 } 2494 2495 /* XXX: This serves as a posting read to make sure that the PTE has 2496 * actually been updated. There is some concern that even though 2497 * registers and PTEs are within the same BAR that they are potentially 2498 * of NUMA access patterns. Therefore, even with the way we assume 2499 * hardware should work, we must keep this posting read for paranoia. 2500 */ 2501 if (i != 0) 2502 WARN_ON(readl(>t_entries[i-1]) != gtt_entry); 2503 2504 /* This next bit makes the above posting read even more important. We 2505 * want to flush the TLBs only after we're certain all the PTE updates 2506 * have finished. 2507 */ 2508 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2509 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2510 2511 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2512 } 2513 2514 static void nop_clear_range(struct i915_address_space *vm, 2515 uint64_t start, 2516 uint64_t length, 2517 bool use_scratch) 2518 { 2519 } 2520 2521 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2522 uint64_t start, 2523 uint64_t length, 2524 bool use_scratch) 2525 { 2526 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2527 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2528 unsigned first_entry = start >> PAGE_SHIFT; 2529 unsigned num_entries = length >> PAGE_SHIFT; 2530 gen8_pte_t scratch_pte, __iomem *gtt_base = 2531 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2532 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2533 int i; 2534 int rpm_atomic_seq; 2535 2536 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2537 2538 if (WARN(num_entries > max_entries, 2539 "First entry = %d; Num entries = %d (max=%d)\n", 2540 first_entry, num_entries, max_entries)) 2541 num_entries = max_entries; 2542 2543 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 2544 I915_CACHE_LLC, 2545 use_scratch); 2546 for (i = 0; i < num_entries; i++) 2547 gen8_set_pte(>t_base[i], scratch_pte); 2548 readl(gtt_base); 2549 2550 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2551 } 2552 2553 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2554 uint64_t start, 2555 uint64_t length, 2556 bool use_scratch) 2557 { 2558 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2559 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2560 unsigned first_entry = start >> PAGE_SHIFT; 2561 unsigned num_entries = length >> PAGE_SHIFT; 2562 gen6_pte_t scratch_pte, __iomem *gtt_base = 2563 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2564 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2565 int i; 2566 int rpm_atomic_seq; 2567 2568 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2569 2570 if (WARN(num_entries > max_entries, 2571 "First entry = %d; Num entries = %d (max=%d)\n", 2572 first_entry, num_entries, max_entries)) 2573 num_entries = max_entries; 2574 2575 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 2576 I915_CACHE_LLC, use_scratch, 0); 2577 2578 for (i = 0; i < num_entries; i++) 2579 iowrite32(scratch_pte, >t_base[i]); 2580 readl(gtt_base); 2581 2582 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2583 } 2584 2585 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2586 dma_addr_t addr, 2587 uint64_t offset, 2588 enum i915_cache_level cache_level, 2589 u32 unused) 2590 { 2591 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2592 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2593 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2594 int rpm_atomic_seq; 2595 2596 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2597 2598 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2599 2600 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2601 } 2602 2603 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2604 struct sg_table *pages, 2605 uint64_t start, 2606 enum i915_cache_level cache_level, u32 unused) 2607 { 2608 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2609 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2610 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2611 int rpm_atomic_seq; 2612 2613 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2614 2615 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2616 2617 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2618 2619 } 2620 2621 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2622 uint64_t start, 2623 uint64_t length, 2624 bool unused) 2625 { 2626 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2627 unsigned first_entry = start >> PAGE_SHIFT; 2628 unsigned num_entries = length >> PAGE_SHIFT; 2629 int rpm_atomic_seq; 2630 2631 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2632 2633 intel_gtt_clear_range(first_entry, num_entries); 2634 2635 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2636 } 2637 2638 static int ggtt_bind_vma(struct i915_vma *vma, 2639 enum i915_cache_level cache_level, 2640 u32 flags) 2641 { 2642 struct drm_i915_gem_object *obj = vma->obj; 2643 u32 pte_flags = 0; 2644 int ret; 2645 2646 ret = i915_get_ggtt_vma_pages(vma); 2647 if (ret) 2648 return ret; 2649 2650 /* Currently applicable only to VLV */ 2651 if (obj->gt_ro) 2652 pte_flags |= PTE_READ_ONLY; 2653 2654 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, 2655 vma->node.start, 2656 cache_level, pte_flags); 2657 2658 /* 2659 * Without aliasing PPGTT there's no difference between 2660 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2661 * upgrade to both bound if we bind either to avoid double-binding. 2662 */ 2663 vma->bound |= GLOBAL_BIND | LOCAL_BIND; 2664 2665 return 0; 2666 } 2667 2668 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2669 enum i915_cache_level cache_level, 2670 u32 flags) 2671 { 2672 u32 pte_flags; 2673 int ret; 2674 2675 ret = i915_get_ggtt_vma_pages(vma); 2676 if (ret) 2677 return ret; 2678 2679 /* Currently applicable only to VLV */ 2680 pte_flags = 0; 2681 if (vma->obj->gt_ro) 2682 pte_flags |= PTE_READ_ONLY; 2683 2684 2685 if (flags & GLOBAL_BIND) { 2686 vma->vm->insert_entries(vma->vm, 2687 vma->ggtt_view.pages, 2688 vma->node.start, 2689 cache_level, pte_flags); 2690 } 2691 2692 if (flags & LOCAL_BIND) { 2693 struct i915_hw_ppgtt *appgtt = 2694 to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2695 appgtt->base.insert_entries(&appgtt->base, 2696 vma->ggtt_view.pages, 2697 vma->node.start, 2698 cache_level, pte_flags); 2699 } 2700 2701 return 0; 2702 } 2703 2704 static void ggtt_unbind_vma(struct i915_vma *vma) 2705 { 2706 struct drm_device *dev = vma->vm->dev; 2707 struct drm_i915_private *dev_priv = to_i915(dev); 2708 struct drm_i915_gem_object *obj = vma->obj; 2709 const uint64_t size = min_t(uint64_t, 2710 obj->base.size, 2711 vma->node.size); 2712 2713 if (vma->bound & GLOBAL_BIND) { 2714 vma->vm->clear_range(vma->vm, 2715 vma->node.start, 2716 size, 2717 true); 2718 } 2719 2720 if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { 2721 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 2722 2723 appgtt->base.clear_range(&appgtt->base, 2724 vma->node.start, 2725 size, 2726 true); 2727 } 2728 } 2729 2730 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2731 { 2732 struct drm_device *dev = obj->base.dev; 2733 struct drm_i915_private *dev_priv = to_i915(dev); 2734 bool interruptible; 2735 2736 interruptible = do_idling(dev_priv); 2737 2738 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, 2739 PCI_DMA_BIDIRECTIONAL); 2740 2741 undo_idling(dev_priv, interruptible); 2742 } 2743 2744 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2745 unsigned long color, 2746 u64 *start, 2747 u64 *end) 2748 { 2749 if (node->color != color) 2750 *start += 4096; 2751 2752 if (!list_empty(&node->node_list)) { 2753 node = list_entry(node->node_list.next, 2754 struct drm_mm_node, 2755 node_list); 2756 if (node->allocated && node->color != color) 2757 *end -= 4096; 2758 } 2759 } 2760 2761 static int i915_gem_setup_global_gtt(struct drm_device *dev, 2762 u64 start, 2763 u64 mappable_end, 2764 u64 end) 2765 { 2766 /* Let GEM Manage all of the aperture. 2767 * 2768 * However, leave one page at the end still bound to the scratch page. 2769 * There are a number of places where the hardware apparently prefetches 2770 * past the end of the object, and we've seen multiple hangs with the 2771 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2772 * aperture. One page should be enough to keep any prefetching inside 2773 * of the aperture. 2774 */ 2775 struct drm_i915_private *dev_priv = to_i915(dev); 2776 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2777 struct drm_mm_node *entry; 2778 struct drm_i915_gem_object *obj; 2779 unsigned long hole_start, hole_end; 2780 int ret; 2781 unsigned long mappable = min(end, mappable_end) - start; 2782 int error; 2783 2784 BUG_ON(mappable_end > end); 2785 2786 ggtt->base.start = start; 2787 2788 /* Subtract the guard page before address space initialization to 2789 * shrink the range used by drm_mm */ 2790 ggtt->base.total = end - start - PAGE_SIZE; 2791 i915_address_space_init(&ggtt->base, dev_priv); 2792 ggtt->base.total += PAGE_SIZE; 2793 2794 ret = intel_vgt_balloon(dev_priv); 2795 if (ret) 2796 return ret; 2797 2798 if (!HAS_LLC(dev)) 2799 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 2800 2801 /* Mark any preallocated objects as occupied */ 2802 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 2803 struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base); 2804 2805 DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", 2806 i915_gem_obj_ggtt_offset(obj), obj->base.size); 2807 2808 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 2809 ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); 2810 if (ret) { 2811 DRM_DEBUG_KMS("Reservation failed: %i\n", ret); 2812 return ret; 2813 } 2814 vma->bound |= GLOBAL_BIND; 2815 __i915_vma_set_map_and_fenceable(vma); 2816 list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); 2817 } 2818 2819 /* Clear any non-preallocated blocks */ 2820 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2821 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2822 hole_start, hole_end); 2823 ggtt->base.clear_range(&ggtt->base, hole_start, 2824 hole_end - hole_start, true); 2825 } 2826 2827 #ifdef __DragonFly__ 2828 device_printf(dev->dev->bsddev, 2829 "taking over the fictitious range 0x%llx-0x%llx\n", 2830 dev_priv->ggtt.mappable_base + start, dev_priv->ggtt.mappable_base + start + mappable); 2831 error = -vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base + start, 2832 dev_priv->ggtt.mappable_base + start + mappable, VM_MEMATTR_WRITE_COMBINING); 2833 #endif 2834 2835 /* And finally clear the reserved guard page */ 2836 ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true); 2837 2838 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { 2839 struct i915_hw_ppgtt *ppgtt; 2840 2841 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2842 if (!ppgtt) 2843 return -ENOMEM; 2844 2845 ret = __hw_ppgtt_init(dev, ppgtt); 2846 if (ret) { 2847 ppgtt->base.cleanup(&ppgtt->base); 2848 kfree(ppgtt); 2849 return ret; 2850 } 2851 2852 if (ppgtt->base.allocate_va_range) 2853 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2854 ppgtt->base.total); 2855 if (ret) { 2856 ppgtt->base.cleanup(&ppgtt->base); 2857 kfree(ppgtt); 2858 return ret; 2859 } 2860 2861 ppgtt->base.clear_range(&ppgtt->base, 2862 ppgtt->base.start, 2863 ppgtt->base.total, 2864 true); 2865 2866 dev_priv->mm.aliasing_ppgtt = ppgtt; 2867 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2868 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2869 } 2870 2871 return 0; 2872 } 2873 2874 /** 2875 * i915_gem_init_ggtt - Initialize GEM for Global GTT 2876 * @dev: DRM device 2877 */ 2878 void i915_gem_init_ggtt(struct drm_device *dev) 2879 { 2880 struct drm_i915_private *dev_priv = to_i915(dev); 2881 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2882 2883 i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total); 2884 } 2885 2886 /** 2887 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2888 * @dev: DRM device 2889 */ 2890 void i915_ggtt_cleanup_hw(struct drm_device *dev) 2891 { 2892 struct drm_i915_private *dev_priv = to_i915(dev); 2893 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2894 2895 if (dev_priv->mm.aliasing_ppgtt) { 2896 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2897 2898 ppgtt->base.cleanup(&ppgtt->base); 2899 kfree(ppgtt); 2900 } 2901 2902 i915_gem_cleanup_stolen(dev); 2903 2904 if (drm_mm_initialized(&ggtt->base.mm)) { 2905 intel_vgt_deballoon(dev_priv); 2906 2907 drm_mm_takedown(&ggtt->base.mm); 2908 list_del(&ggtt->base.global_link); 2909 } 2910 2911 ggtt->base.cleanup(&ggtt->base); 2912 } 2913 2914 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2915 { 2916 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2917 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2918 return snb_gmch_ctl << 20; 2919 } 2920 2921 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2922 { 2923 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2924 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2925 if (bdw_gmch_ctl) 2926 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2927 2928 #ifdef CONFIG_X86_32 2929 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2930 if (bdw_gmch_ctl > 4) 2931 bdw_gmch_ctl = 4; 2932 #endif 2933 2934 return bdw_gmch_ctl << 20; 2935 } 2936 2937 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2938 { 2939 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2940 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2941 2942 if (gmch_ctrl) 2943 return 1 << (20 + gmch_ctrl); 2944 2945 return 0; 2946 } 2947 2948 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2949 { 2950 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2951 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2952 return snb_gmch_ctl << 25; /* 32 MB units */ 2953 } 2954 2955 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2956 { 2957 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2958 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2959 return bdw_gmch_ctl << 25; /* 32 MB units */ 2960 } 2961 2962 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2963 { 2964 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2965 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2966 2967 /* 2968 * 0x0 to 0x10: 32MB increments starting at 0MB 2969 * 0x11 to 0x16: 4MB increments starting at 8MB 2970 * 0x17 to 0x1d: 4MB increments start at 36MB 2971 */ 2972 if (gmch_ctrl < 0x11) 2973 return gmch_ctrl << 25; 2974 else if (gmch_ctrl < 0x17) 2975 return (gmch_ctrl - 0x11 + 2) << 22; 2976 else 2977 return (gmch_ctrl - 0x17 + 9) << 22; 2978 } 2979 2980 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2981 { 2982 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2983 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2984 2985 if (gen9_gmch_ctl < 0xf0) 2986 return gen9_gmch_ctl << 25; /* 32 MB units */ 2987 else 2988 /* 4MB increments starting at 0xf0 for 4MB */ 2989 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2990 } 2991 2992 static int ggtt_probe_common(struct drm_device *dev, 2993 size_t gtt_size) 2994 { 2995 struct drm_i915_private *dev_priv = to_i915(dev); 2996 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2997 struct i915_page_scratch *scratch_page; 2998 phys_addr_t ggtt_phys_addr; 2999 3000 /* For Modern GENs the PTEs and register space are split in the BAR */ 3001 ggtt_phys_addr = pci_resource_start(dev->pdev, 0) + 3002 (pci_resource_len(dev->pdev, 0) / 2); 3003 3004 /* 3005 * On BXT writes larger than 64 bit to the GTT pagetable range will be 3006 * dropped. For WC mappings in general we have 64 byte burst writes 3007 * when the WC buffer is flushed, so we can't use it, but have to 3008 * resort to an uncached mapping. The WC issue is easily caught by the 3009 * readback check when writing GTT PTE entries. 3010 */ 3011 if (IS_BROXTON(dev)) 3012 ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size); 3013 else 3014 ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size); 3015 if (!ggtt->gsm) { 3016 DRM_ERROR("Failed to map the gtt page table\n"); 3017 return -ENOMEM; 3018 } 3019 3020 scratch_page = alloc_scratch_page(dev); 3021 if (IS_ERR(scratch_page)) { 3022 DRM_ERROR("Scratch setup failed\n"); 3023 /* iounmap will also get called at remove, but meh */ 3024 iounmap(ggtt->gsm); 3025 return PTR_ERR(scratch_page); 3026 } 3027 3028 ggtt->base.scratch_page = scratch_page; 3029 3030 return 0; 3031 } 3032 3033 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 3034 * bits. When using advanced contexts each context stores its own PAT, but 3035 * writing this data shouldn't be harmful even in those cases. */ 3036 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 3037 { 3038 uint64_t pat; 3039 3040 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 3041 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 3042 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 3043 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 3044 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 3045 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 3046 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 3047 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 3048 3049 if (!USES_PPGTT(dev_priv)) 3050 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 3051 * so RTL will always use the value corresponding to 3052 * pat_sel = 000". 3053 * So let's disable cache for GGTT to avoid screen corruptions. 3054 * MOCS still can be used though. 3055 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 3056 * before this patch, i.e. the same uncached + snooping access 3057 * like on gen6/7 seems to be in effect. 3058 * - So this just fixes blitter/render access. Again it looks 3059 * like it's not just uncached access, but uncached + snooping. 3060 * So we can still hold onto all our assumptions wrt cpu 3061 * clflushing on LLC machines. 3062 */ 3063 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 3064 3065 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 3066 * write would work. */ 3067 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3068 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3069 } 3070 3071 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 3072 { 3073 uint64_t pat; 3074 3075 /* 3076 * Map WB on BDW to snooped on CHV. 3077 * 3078 * Only the snoop bit has meaning for CHV, the rest is 3079 * ignored. 3080 * 3081 * The hardware will never snoop for certain types of accesses: 3082 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3083 * - PPGTT page tables 3084 * - some other special cycles 3085 * 3086 * As with BDW, we also need to consider the following for GT accesses: 3087 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3088 * so RTL will always use the value corresponding to 3089 * pat_sel = 000". 3090 * Which means we must set the snoop bit in PAT entry 0 3091 * in order to keep the global status page working. 3092 */ 3093 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3094 GEN8_PPAT(1, 0) | 3095 GEN8_PPAT(2, 0) | 3096 GEN8_PPAT(3, 0) | 3097 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3098 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3099 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3100 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3101 3102 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3103 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3104 } 3105 3106 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3107 { 3108 struct drm_device *dev = ggtt->base.dev; 3109 struct drm_i915_private *dev_priv = to_i915(dev); 3110 u16 snb_gmch_ctl; 3111 int ret; 3112 3113 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3114 ggtt->mappable_base = pci_resource_start(dev->pdev, 2); 3115 ggtt->mappable_end = pci_resource_len(dev->pdev, 2); 3116 3117 #if 0 3118 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 3119 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 3120 #endif 3121 3122 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3123 3124 if (INTEL_INFO(dev)->gen >= 9) { 3125 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3126 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); 3127 } else if (IS_CHERRYVIEW(dev)) { 3128 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3129 ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl); 3130 } else { 3131 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3132 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); 3133 } 3134 3135 ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3136 3137 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3138 chv_setup_private_ppat(dev_priv); 3139 else 3140 bdw_setup_private_ppat(dev_priv); 3141 3142 ret = ggtt_probe_common(dev, ggtt->size); 3143 3144 ggtt->base.bind_vma = ggtt_bind_vma; 3145 ggtt->base.unbind_vma = ggtt_unbind_vma; 3146 ggtt->base.insert_page = gen8_ggtt_insert_page; 3147 ggtt->base.clear_range = nop_clear_range; 3148 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3149 ggtt->base.clear_range = gen8_ggtt_clear_range; 3150 3151 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3152 if (IS_CHERRYVIEW(dev_priv)) 3153 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3154 3155 return ret; 3156 } 3157 3158 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3159 { 3160 struct drm_device *dev = ggtt->base.dev; 3161 u16 snb_gmch_ctl; 3162 int ret; 3163 3164 ggtt->mappable_base = pci_resource_start(dev->pdev, 2); 3165 ggtt->mappable_end = pci_resource_len(dev->pdev, 2); 3166 3167 /* 64/512MB is the current min/max we actually know of, but this is just 3168 * a coarse sanity check. 3169 */ 3170 if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) { 3171 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3172 return -ENXIO; 3173 } 3174 3175 #if 0 3176 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 3177 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 3178 #endif 3179 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3180 3181 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3182 ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl); 3183 ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3184 3185 ret = ggtt_probe_common(dev, ggtt->size); 3186 3187 ggtt->base.clear_range = gen6_ggtt_clear_range; 3188 ggtt->base.insert_page = gen6_ggtt_insert_page; 3189 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3190 ggtt->base.bind_vma = ggtt_bind_vma; 3191 ggtt->base.unbind_vma = ggtt_unbind_vma; 3192 3193 return ret; 3194 } 3195 3196 static void gen6_gmch_remove(struct i915_address_space *vm) 3197 { 3198 struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base); 3199 3200 iounmap(ggtt->gsm); 3201 free_scratch_page(vm->dev, vm->scratch_page); 3202 } 3203 3204 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3205 { 3206 struct drm_device *dev = ggtt->base.dev; 3207 struct drm_i915_private *dev_priv = to_i915(dev); 3208 #if 0 3209 int ret; 3210 3211 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3212 if (!ret) { 3213 DRM_ERROR("failed to set up gmch\n"); 3214 return -EIO; 3215 } 3216 #endif 3217 3218 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3219 &ggtt->mappable_base, &ggtt->mappable_end); 3220 3221 ggtt->do_idle_maps = needs_idle_maps(&dev_priv->drm); 3222 ggtt->base.insert_page = i915_ggtt_insert_page; 3223 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3224 ggtt->base.clear_range = i915_ggtt_clear_range; 3225 ggtt->base.bind_vma = ggtt_bind_vma; 3226 ggtt->base.unbind_vma = ggtt_unbind_vma; 3227 3228 if (unlikely(ggtt->do_idle_maps)) 3229 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3230 3231 return 0; 3232 } 3233 3234 static void i915_gmch_remove(struct i915_address_space *vm) 3235 { 3236 intel_gmch_remove(); 3237 } 3238 3239 /** 3240 * i915_ggtt_init_hw - Initialize GGTT hardware 3241 * @dev: DRM device 3242 */ 3243 int i915_ggtt_init_hw(struct drm_device *dev) 3244 { 3245 struct drm_i915_private *dev_priv = to_i915(dev); 3246 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3247 int ret; 3248 3249 if (INTEL_INFO(dev)->gen <= 5) { 3250 ggtt->probe = i915_gmch_probe; 3251 ggtt->base.cleanup = i915_gmch_remove; 3252 } else if (INTEL_INFO(dev)->gen < 8) { 3253 ggtt->probe = gen6_gmch_probe; 3254 ggtt->base.cleanup = gen6_gmch_remove; 3255 3256 if (HAS_EDRAM(dev)) 3257 ggtt->base.pte_encode = iris_pte_encode; 3258 else if (IS_HASWELL(dev)) 3259 ggtt->base.pte_encode = hsw_pte_encode; 3260 else if (IS_VALLEYVIEW(dev)) 3261 ggtt->base.pte_encode = byt_pte_encode; 3262 else if (INTEL_INFO(dev)->gen >= 7) 3263 ggtt->base.pte_encode = ivb_pte_encode; 3264 else 3265 ggtt->base.pte_encode = snb_pte_encode; 3266 } else { 3267 ggtt->probe = gen8_gmch_probe; 3268 ggtt->base.cleanup = gen6_gmch_remove; 3269 } 3270 3271 ggtt->base.dev = dev; 3272 ggtt->base.is_ggtt = true; 3273 3274 ret = ggtt->probe(ggtt); 3275 if (ret) 3276 return ret; 3277 3278 if ((ggtt->base.total - 1) >> 32) { 3279 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3280 "of address space! Found %lldM!\n", 3281 ggtt->base.total >> 20); 3282 ggtt->base.total = 1ULL << 32; 3283 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3284 } 3285 3286 /* 3287 * Initialise stolen early so that we may reserve preallocated 3288 * objects for the BIOS to KMS transition. 3289 */ 3290 ret = i915_gem_init_stolen(dev); 3291 if (ret) 3292 goto out_gtt_cleanup; 3293 3294 /* GMADR is the PCI mmio aperture into the global GTT. */ 3295 DRM_INFO("Memory usable by graphics device = %lluM\n", 3296 ggtt->base.total >> 20); 3297 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3298 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3299 #ifdef CONFIG_INTEL_IOMMU 3300 if (intel_iommu_gfx_mapped) 3301 DRM_INFO("VT-d active for gfx access\n"); 3302 #endif 3303 3304 return 0; 3305 3306 out_gtt_cleanup: 3307 ggtt->base.cleanup(&ggtt->base); 3308 3309 return ret; 3310 } 3311 3312 int i915_ggtt_enable_hw(struct drm_device *dev) 3313 { 3314 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 3315 return -EIO; 3316 3317 return 0; 3318 } 3319 3320 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 3321 { 3322 struct drm_i915_private *dev_priv = to_i915(dev); 3323 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3324 struct drm_i915_gem_object *obj; 3325 struct i915_vma *vma; 3326 3327 i915_check_and_clear_faults(dev_priv); 3328 3329 /* First fill our portion of the GTT with scratch pages */ 3330 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 3331 true); 3332 3333 /* Cache flush objects bound into GGTT and rebind them. */ 3334 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 3335 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3336 if (vma->vm != &ggtt->base) 3337 continue; 3338 3339 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3340 PIN_UPDATE)); 3341 } 3342 3343 if (obj->pin_display) 3344 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3345 } 3346 3347 if (INTEL_INFO(dev)->gen >= 8) { 3348 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3349 chv_setup_private_ppat(dev_priv); 3350 else 3351 bdw_setup_private_ppat(dev_priv); 3352 3353 return; 3354 } 3355 3356 if (USES_PPGTT(dev)) { 3357 struct i915_address_space *vm; 3358 3359 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3360 /* TODO: Perhaps it shouldn't be gen6 specific */ 3361 3362 struct i915_hw_ppgtt *ppgtt; 3363 3364 if (vm->is_ggtt) 3365 ppgtt = dev_priv->mm.aliasing_ppgtt; 3366 else 3367 ppgtt = i915_vm_to_ppgtt(vm); 3368 3369 gen6_write_page_range(dev_priv, &ppgtt->pd, 3370 0, ppgtt->base.total); 3371 } 3372 } 3373 3374 i915_ggtt_flush(dev_priv); 3375 } 3376 3377 static struct i915_vma * 3378 __i915_gem_vma_create(struct drm_i915_gem_object *obj, 3379 struct i915_address_space *vm, 3380 const struct i915_ggtt_view *ggtt_view) 3381 { 3382 struct i915_vma *vma; 3383 3384 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 3385 return ERR_PTR(-EINVAL); 3386 3387 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 3388 if (vma == NULL) 3389 return ERR_PTR(-ENOMEM); 3390 3391 INIT_LIST_HEAD(&vma->vm_link); 3392 INIT_LIST_HEAD(&vma->obj_link); 3393 INIT_LIST_HEAD(&vma->exec_list); 3394 vma->vm = vm; 3395 vma->obj = obj; 3396 vma->is_ggtt = i915_is_ggtt(vm); 3397 3398 if (i915_is_ggtt(vm)) 3399 vma->ggtt_view = *ggtt_view; 3400 else 3401 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3402 3403 list_add_tail(&vma->obj_link, &obj->vma_list); 3404 3405 return vma; 3406 } 3407 3408 struct i915_vma * 3409 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3410 struct i915_address_space *vm) 3411 { 3412 struct i915_vma *vma; 3413 3414 vma = i915_gem_obj_to_vma(obj, vm); 3415 if (!vma) 3416 vma = __i915_gem_vma_create(obj, vm, 3417 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); 3418 3419 return vma; 3420 } 3421 3422 struct i915_vma * 3423 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, 3424 const struct i915_ggtt_view *view) 3425 { 3426 struct drm_device *dev = obj->base.dev; 3427 struct drm_i915_private *dev_priv = to_i915(dev); 3428 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3429 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 3430 3431 if (!vma) 3432 vma = __i915_gem_vma_create(obj, &ggtt->base, view); 3433 3434 return vma; 3435 3436 } 3437 3438 static struct scatterlist * 3439 rotate_pages(const dma_addr_t *in, unsigned int offset, 3440 unsigned int width, unsigned int height, 3441 unsigned int stride, 3442 struct sg_table *st, struct scatterlist *sg) 3443 { 3444 unsigned int column, row; 3445 unsigned int src_idx; 3446 3447 for (column = 0; column < width; column++) { 3448 src_idx = stride * (height - 1) + column; 3449 for (row = 0; row < height; row++) { 3450 st->nents++; 3451 /* We don't need the pages, but need to initialize 3452 * the entries so the sg list can be happily traversed. 3453 * The only thing we need are DMA addresses. 3454 */ 3455 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3456 sg_dma_address(sg) = in[offset + src_idx]; 3457 sg_dma_len(sg) = PAGE_SIZE; 3458 sg = sg_next(sg); 3459 src_idx -= stride; 3460 } 3461 } 3462 3463 return sg; 3464 } 3465 3466 static struct sg_table * 3467 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, 3468 struct drm_i915_gem_object *obj) 3469 { 3470 const size_t n_pages = obj->base.size / PAGE_SIZE; 3471 unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height; 3472 unsigned int size_pages_uv; 3473 struct sgt_iter sgt_iter; 3474 dma_addr_t dma_addr; 3475 unsigned long i; 3476 dma_addr_t *page_addr_list; 3477 struct sg_table *st; 3478 unsigned int uv_start_page; 3479 struct scatterlist *sg; 3480 int ret = -ENOMEM; 3481 3482 /* Allocate a temporary list of source pages for random access. */ 3483 page_addr_list = drm_malloc_gfp(n_pages, 3484 sizeof(dma_addr_t), 3485 GFP_TEMPORARY); 3486 if (!page_addr_list) 3487 return ERR_PTR(ret); 3488 3489 /* Account for UV plane with NV12. */ 3490 if (rot_info->pixel_format == DRM_FORMAT_NV12) 3491 size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height; 3492 else 3493 size_pages_uv = 0; 3494 3495 /* Allocate target SG list. */ 3496 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 3497 if (!st) 3498 goto err_st_alloc; 3499 3500 ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); 3501 if (ret) 3502 goto err_sg_alloc; 3503 3504 /* Populate source page list from the object. */ 3505 i = 0; 3506 for_each_sgt_dma(dma_addr, sgt_iter, obj->pages) 3507 page_addr_list[i++] = dma_addr; 3508 3509 GEM_BUG_ON(i != n_pages); 3510 st->nents = 0; 3511 sg = st->sgl; 3512 3513 /* Rotate the pages. */ 3514 sg = rotate_pages(page_addr_list, 0, 3515 rot_info->plane[0].width, rot_info->plane[0].height, 3516 rot_info->plane[0].width, 3517 st, sg); 3518 3519 /* Append the UV plane if NV12. */ 3520 if (rot_info->pixel_format == DRM_FORMAT_NV12) { 3521 uv_start_page = size_pages; 3522 3523 /* Check for tile-row un-alignment. */ 3524 if (offset_in_page(rot_info->uv_offset)) 3525 uv_start_page--; 3526 3527 rot_info->uv_start_page = uv_start_page; 3528 3529 sg = rotate_pages(page_addr_list, rot_info->uv_start_page, 3530 rot_info->plane[1].width, rot_info->plane[1].height, 3531 rot_info->plane[1].width, 3532 st, sg); 3533 } 3534 3535 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n", 3536 obj->base.size, rot_info->plane[0].width, 3537 rot_info->plane[0].height, size_pages + size_pages_uv, 3538 size_pages); 3539 3540 drm_free_large(page_addr_list); 3541 3542 return st; 3543 3544 err_sg_alloc: 3545 kfree(st); 3546 err_st_alloc: 3547 drm_free_large(page_addr_list); 3548 3549 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n", 3550 obj->base.size, ret, rot_info->plane[0].width, 3551 rot_info->plane[0].height, size_pages + size_pages_uv, 3552 size_pages); 3553 return ERR_PTR(ret); 3554 } 3555 3556 static struct sg_table * 3557 intel_partial_pages(const struct i915_ggtt_view *view, 3558 struct drm_i915_gem_object *obj) 3559 { 3560 struct sg_table *st; 3561 struct scatterlist *sg; 3562 struct sg_page_iter obj_sg_iter; 3563 int ret = -ENOMEM; 3564 3565 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 3566 if (!st) 3567 goto err_st_alloc; 3568 3569 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); 3570 if (ret) 3571 goto err_sg_alloc; 3572 3573 sg = st->sgl; 3574 st->nents = 0; 3575 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, 3576 view->params.partial.offset) 3577 { 3578 if (st->nents >= view->params.partial.size) 3579 break; 3580 3581 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3582 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); 3583 sg_dma_len(sg) = PAGE_SIZE; 3584 3585 sg = sg_next(sg); 3586 st->nents++; 3587 } 3588 3589 return st; 3590 3591 err_sg_alloc: 3592 kfree(st); 3593 err_st_alloc: 3594 return ERR_PTR(ret); 3595 } 3596 3597 static int 3598 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3599 { 3600 int ret = 0; 3601 3602 if (vma->ggtt_view.pages) 3603 return 0; 3604 3605 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3606 vma->ggtt_view.pages = vma->obj->pages; 3607 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3608 vma->ggtt_view.pages = 3609 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3610 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3611 vma->ggtt_view.pages = 3612 intel_partial_pages(&vma->ggtt_view, vma->obj); 3613 else 3614 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3615 vma->ggtt_view.type); 3616 3617 if (!vma->ggtt_view.pages) { 3618 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3619 vma->ggtt_view.type); 3620 ret = -EINVAL; 3621 } else if (IS_ERR(vma->ggtt_view.pages)) { 3622 ret = PTR_ERR(vma->ggtt_view.pages); 3623 vma->ggtt_view.pages = NULL; 3624 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3625 vma->ggtt_view.type, ret); 3626 } 3627 3628 return ret; 3629 } 3630 3631 /** 3632 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3633 * @vma: VMA to map 3634 * @cache_level: mapping cache level 3635 * @flags: flags like global or local mapping 3636 * 3637 * DMA addresses are taken from the scatter-gather table of this object (or of 3638 * this VMA in case of non-default GGTT views) and PTE entries set up. 3639 * Note that DMA addresses are also the only part of the SG table we care about. 3640 */ 3641 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3642 u32 flags) 3643 { 3644 int ret; 3645 u32 bind_flags; 3646 3647 if (WARN_ON(flags == 0)) 3648 return -EINVAL; 3649 3650 bind_flags = 0; 3651 if (flags & PIN_GLOBAL) 3652 bind_flags |= GLOBAL_BIND; 3653 if (flags & PIN_USER) 3654 bind_flags |= LOCAL_BIND; 3655 3656 if (flags & PIN_UPDATE) 3657 bind_flags |= vma->bound; 3658 else 3659 bind_flags &= ~vma->bound; 3660 3661 if (bind_flags == 0) 3662 return 0; 3663 3664 if (vma->bound == 0 && vma->vm->allocate_va_range) { 3665 /* XXX: i915_vma_pin() will fix this +- hack */ 3666 vma->pin_count++; 3667 trace_i915_va_alloc(vma); 3668 ret = vma->vm->allocate_va_range(vma->vm, 3669 vma->node.start, 3670 vma->node.size); 3671 vma->pin_count--; 3672 if (ret) 3673 return ret; 3674 } 3675 3676 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3677 if (ret) 3678 return ret; 3679 3680 vma->bound |= bind_flags; 3681 3682 return 0; 3683 } 3684 3685 /** 3686 * i915_ggtt_view_size - Get the size of a GGTT view. 3687 * @obj: Object the view is of. 3688 * @view: The view in question. 3689 * 3690 * @return The size of the GGTT view in bytes. 3691 */ 3692 size_t 3693 i915_ggtt_view_size(struct drm_i915_gem_object *obj, 3694 const struct i915_ggtt_view *view) 3695 { 3696 if (view->type == I915_GGTT_VIEW_NORMAL) { 3697 return obj->base.size; 3698 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 3699 return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT; 3700 } else if (view->type == I915_GGTT_VIEW_PARTIAL) { 3701 return view->params.partial.size << PAGE_SHIFT; 3702 } else { 3703 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); 3704 return obj->base.size; 3705 } 3706 } 3707 3708 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 3709 { 3710 void __iomem *ptr; 3711 3712 lockdep_assert_held(&vma->vm->dev->struct_mutex); 3713 if (WARN_ON(!vma->obj->map_and_fenceable)) 3714 return ERR_PTR(-ENODEV); 3715 3716 GEM_BUG_ON(!vma->is_ggtt); 3717 GEM_BUG_ON((vma->bound & GLOBAL_BIND) == 0); 3718 3719 ptr = vma->iomap; 3720 if (ptr == NULL) { 3721 ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable, 3722 vma->node.start, 3723 vma->node.size); 3724 if (ptr == NULL) 3725 return ERR_PTR(-ENOMEM); 3726 3727 vma->iomap = ptr; 3728 } 3729 3730 vma->pin_count++; 3731 return ptr; 3732 } 3733