1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "i915_vgpu.h" 31 #include "i915_trace.h" 32 #include "intel_drv.h" 33 34 #include <linux/bitmap.h> 35 36 #include <sys/mplock2.h> 37 38 /** 39 * DOC: Global GTT views 40 * 41 * Background and previous state 42 * 43 * Historically objects could exists (be bound) in global GTT space only as 44 * singular instances with a view representing all of the object's backing pages 45 * in a linear fashion. This view will be called a normal view. 46 * 47 * To support multiple views of the same object, where the number of mapped 48 * pages is not equal to the backing store, or where the layout of the pages 49 * is not linear, concept of a GGTT view was added. 50 * 51 * One example of an alternative view is a stereo display driven by a single 52 * image. In this case we would have a framebuffer looking like this 53 * (2x2 pages): 54 * 55 * 12 56 * 34 57 * 58 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 59 * rendering. In contrast, fed to the display engine would be an alternative 60 * view which could look something like this: 61 * 62 * 1212 63 * 3434 64 * 65 * In this example both the size and layout of pages in the alternative view is 66 * different from the normal view. 67 * 68 * Implementation and usage 69 * 70 * GGTT views are implemented using VMAs and are distinguished via enum 71 * i915_ggtt_view_type and struct i915_ggtt_view. 72 * 73 * A new flavour of core GEM functions which work with GGTT bound objects were 74 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 75 * renaming in large amounts of code. They take the struct i915_ggtt_view 76 * parameter encapsulating all metadata required to implement a view. 77 * 78 * As a helper for callers which are only interested in the normal view, 79 * globally const i915_ggtt_view_normal singleton instance exists. All old core 80 * GEM API functions, the ones not taking the view parameter, are operating on, 81 * or with the normal GGTT view. 82 * 83 * Code wanting to add or use a new GGTT view needs to: 84 * 85 * 1. Add a new enum with a suitable name. 86 * 2. Extend the metadata in the i915_ggtt_view structure if required. 87 * 3. Add support to i915_get_vma_pages(). 88 * 89 * New views are required to build a scatter-gather table from within the 90 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 91 * exists for the lifetime of an VMA. 92 * 93 * Core API is designed to have copy semantics which means that passed in 94 * struct i915_ggtt_view does not need to be persistent (left around after 95 * calling the core API functions). 96 * 97 */ 98 99 static int 100 i915_get_ggtt_vma_pages(struct i915_vma *vma); 101 102 const struct i915_ggtt_view i915_ggtt_view_normal = { 103 .type = I915_GGTT_VIEW_NORMAL, 104 }; 105 const struct i915_ggtt_view i915_ggtt_view_rotated = { 106 .type = I915_GGTT_VIEW_ROTATED, 107 }; 108 109 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 110 { 111 bool has_aliasing_ppgtt; 112 bool has_full_ppgtt; 113 bool has_full_48bit_ppgtt; 114 115 has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6; 116 has_full_ppgtt = INTEL_INFO(dev)->gen >= 7; 117 has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9; 118 119 if (intel_vgpu_active(dev)) 120 has_full_ppgtt = false; /* emulation is too hard */ 121 122 /* 123 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 124 * execlists, the sole mechanism available to submit work. 125 */ 126 if (INTEL_INFO(dev)->gen < 9 && 127 (enable_ppgtt == 0 || !has_aliasing_ppgtt)) 128 return 0; 129 130 if (enable_ppgtt == 1) 131 return 1; 132 133 if (enable_ppgtt == 2 && has_full_ppgtt) 134 return 2; 135 136 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 137 return 3; 138 139 #ifdef CONFIG_INTEL_IOMMU 140 /* Disable ppgtt on SNB if VT-d is on. */ 141 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 142 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 143 return 0; 144 } 145 #endif 146 147 /* Early VLV doesn't have this */ 148 if (IS_VALLEYVIEW(dev) && dev->pdev->revision < 0xb) { 149 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 150 return 0; 151 } 152 153 if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists) 154 return has_full_48bit_ppgtt ? 3 : 2; 155 else 156 return has_aliasing_ppgtt ? 1 : 0; 157 } 158 159 static int ppgtt_bind_vma(struct i915_vma *vma, 160 enum i915_cache_level cache_level, 161 u32 unused) 162 { 163 u32 pte_flags = 0; 164 165 /* Currently applicable only to VLV */ 166 if (vma->obj->gt_ro) 167 pte_flags |= PTE_READ_ONLY; 168 169 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 170 cache_level, pte_flags); 171 172 return 0; 173 } 174 175 static void ppgtt_unbind_vma(struct i915_vma *vma) 176 { 177 vma->vm->clear_range(vma->vm, 178 vma->node.start, 179 vma->obj->base.size, 180 true); 181 } 182 183 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 184 enum i915_cache_level level, 185 bool valid) 186 { 187 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 188 pte |= addr; 189 190 switch (level) { 191 case I915_CACHE_NONE: 192 pte |= PPAT_UNCACHED_INDEX; 193 break; 194 case I915_CACHE_WT: 195 pte |= PPAT_DISPLAY_ELLC_INDEX; 196 break; 197 default: 198 pte |= PPAT_CACHED_INDEX; 199 break; 200 } 201 202 return pte; 203 } 204 205 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 206 const enum i915_cache_level level) 207 { 208 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 209 pde |= addr; 210 if (level != I915_CACHE_NONE) 211 pde |= PPAT_CACHED_PDE_INDEX; 212 else 213 pde |= PPAT_UNCACHED_INDEX; 214 return pde; 215 } 216 217 #define gen8_pdpe_encode gen8_pde_encode 218 #define gen8_pml4e_encode gen8_pde_encode 219 220 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 221 enum i915_cache_level level, 222 bool valid, u32 unused) 223 { 224 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 225 pte |= GEN6_PTE_ADDR_ENCODE(addr); 226 227 switch (level) { 228 case I915_CACHE_L3_LLC: 229 case I915_CACHE_LLC: 230 pte |= GEN6_PTE_CACHE_LLC; 231 break; 232 case I915_CACHE_NONE: 233 pte |= GEN6_PTE_UNCACHED; 234 break; 235 default: 236 MISSING_CASE(level); 237 } 238 239 return pte; 240 } 241 242 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 243 enum i915_cache_level level, 244 bool valid, u32 unused) 245 { 246 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 247 pte |= GEN6_PTE_ADDR_ENCODE(addr); 248 249 switch (level) { 250 case I915_CACHE_L3_LLC: 251 pte |= GEN7_PTE_CACHE_L3_LLC; 252 break; 253 case I915_CACHE_LLC: 254 pte |= GEN6_PTE_CACHE_LLC; 255 break; 256 case I915_CACHE_NONE: 257 pte |= GEN6_PTE_UNCACHED; 258 break; 259 default: 260 MISSING_CASE(level); 261 } 262 263 return pte; 264 } 265 266 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 267 enum i915_cache_level level, 268 bool valid, u32 flags) 269 { 270 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 271 pte |= GEN6_PTE_ADDR_ENCODE(addr); 272 273 if (!(flags & PTE_READ_ONLY)) 274 pte |= BYT_PTE_WRITEABLE; 275 276 if (level != I915_CACHE_NONE) 277 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 278 279 return pte; 280 } 281 282 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 283 enum i915_cache_level level, 284 bool valid, u32 unused) 285 { 286 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 287 pte |= HSW_PTE_ADDR_ENCODE(addr); 288 289 if (level != I915_CACHE_NONE) 290 pte |= HSW_WB_LLC_AGE3; 291 292 return pte; 293 } 294 295 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 296 enum i915_cache_level level, 297 bool valid, u32 unused) 298 { 299 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 300 pte |= HSW_PTE_ADDR_ENCODE(addr); 301 302 switch (level) { 303 case I915_CACHE_NONE: 304 break; 305 case I915_CACHE_WT: 306 pte |= HSW_WT_ELLC_LLC_AGE3; 307 break; 308 default: 309 pte |= HSW_WB_ELLC_LLC_AGE3; 310 break; 311 } 312 313 return pte; 314 } 315 316 static int __setup_page_dma(struct drm_device *dev, 317 struct i915_page_dma *p, gfp_t flags) 318 { 319 struct device *device = &dev->pdev->dev; 320 321 p->page = alloc_page(flags); 322 if (!p->page) 323 return -ENOMEM; 324 325 p->daddr = dma_map_page(device, 326 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 327 328 if (dma_mapping_error(device, p->daddr)) { 329 __free_page(p->page); 330 return -EINVAL; 331 } 332 333 return 0; 334 } 335 336 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 337 { 338 return __setup_page_dma(dev, p, GFP_KERNEL); 339 } 340 341 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 342 { 343 if (WARN_ON(!p->page)) 344 return; 345 346 dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 347 __free_page(p->page); 348 memset(p, 0, sizeof(*p)); 349 } 350 351 static void *kmap_page_dma(struct i915_page_dma *p) 352 { 353 return kmap_atomic(p->page); 354 } 355 356 /* We use the flushing unmap only with ppgtt structures: 357 * page directories, page tables and scratch pages. 358 */ 359 static void kunmap_page_dma(struct drm_device *dev, void *vaddr) 360 { 361 /* There are only few exceptions for gen >=6. chv and bxt. 362 * And we are not sure about the latter so play safe for now. 363 */ 364 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 365 drm_clflush_virt_range(vaddr, PAGE_SIZE); 366 367 kunmap_atomic(vaddr); 368 } 369 370 #define kmap_px(px) kmap_page_dma(px_base(px)) 371 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr)) 372 373 #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) 374 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) 375 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v)) 376 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v)) 377 378 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p, 379 const uint64_t val) 380 { 381 int i; 382 uint64_t * const vaddr = kmap_page_dma(p); 383 384 for (i = 0; i < 512; i++) 385 vaddr[i] = val; 386 387 kunmap_page_dma(dev, vaddr); 388 } 389 390 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, 391 const uint32_t val32) 392 { 393 uint64_t v = val32; 394 395 v = v << 32 | val32; 396 397 fill_page_dma(dev, p, v); 398 } 399 400 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev) 401 { 402 struct i915_page_scratch *sp; 403 int ret; 404 405 sp = kzalloc(sizeof(*sp), GFP_KERNEL); 406 if (sp == NULL) 407 return ERR_PTR(-ENOMEM); 408 409 ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO); 410 if (ret) { 411 kfree(sp); 412 return ERR_PTR(ret); 413 } 414 415 set_pages_uc(px_page(sp), 1); 416 417 return sp; 418 } 419 420 static void free_scratch_page(struct drm_device *dev, 421 struct i915_page_scratch *sp) 422 { 423 set_pages_wb(px_page(sp), 1); 424 425 cleanup_px(dev, sp); 426 kfree(sp); 427 } 428 429 static struct i915_page_table *alloc_pt(struct drm_device *dev) 430 { 431 struct i915_page_table *pt; 432 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 433 GEN8_PTES : GEN6_PTES; 434 int ret = -ENOMEM; 435 436 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 437 if (!pt) 438 return ERR_PTR(-ENOMEM); 439 440 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 441 GFP_KERNEL); 442 443 if (!pt->used_ptes) 444 goto fail_bitmap; 445 446 ret = setup_px(dev, pt); 447 if (ret) 448 goto fail_page_m; 449 450 return pt; 451 452 fail_page_m: 453 kfree(pt->used_ptes); 454 fail_bitmap: 455 kfree(pt); 456 457 return ERR_PTR(ret); 458 } 459 460 static void free_pt(struct drm_device *dev, struct i915_page_table *pt) 461 { 462 cleanup_px(dev, pt); 463 kfree(pt->used_ptes); 464 kfree(pt); 465 } 466 467 static void gen8_initialize_pt(struct i915_address_space *vm, 468 struct i915_page_table *pt) 469 { 470 gen8_pte_t scratch_pte; 471 472 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 473 I915_CACHE_LLC, true); 474 475 fill_px(vm->dev, pt, scratch_pte); 476 } 477 478 static void gen6_initialize_pt(struct i915_address_space *vm, 479 struct i915_page_table *pt) 480 { 481 gen6_pte_t scratch_pte; 482 483 WARN_ON(px_dma(vm->scratch_page) == 0); 484 485 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 486 I915_CACHE_LLC, true, 0); 487 488 fill32_px(vm->dev, pt, scratch_pte); 489 } 490 491 static struct i915_page_directory *alloc_pd(struct drm_device *dev) 492 { 493 struct i915_page_directory *pd; 494 int ret = -ENOMEM; 495 496 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 497 if (!pd) 498 return ERR_PTR(-ENOMEM); 499 500 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 501 sizeof(*pd->used_pdes), GFP_KERNEL); 502 if (!pd->used_pdes) 503 goto fail_bitmap; 504 505 ret = setup_px(dev, pd); 506 if (ret) 507 goto fail_page_m; 508 509 return pd; 510 511 fail_page_m: 512 kfree(pd->used_pdes); 513 fail_bitmap: 514 kfree(pd); 515 516 return ERR_PTR(ret); 517 } 518 519 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) 520 { 521 if (px_page(pd)) { 522 cleanup_px(dev, pd); 523 kfree(pd->used_pdes); 524 kfree(pd); 525 } 526 } 527 528 static void gen8_initialize_pd(struct i915_address_space *vm, 529 struct i915_page_directory *pd) 530 { 531 gen8_pde_t scratch_pde; 532 533 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 534 535 fill_px(vm->dev, pd, scratch_pde); 536 } 537 538 static int __pdp_init(struct drm_device *dev, 539 struct i915_page_directory_pointer *pdp) 540 { 541 size_t pdpes = I915_PDPES_PER_PDP(dev); 542 543 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 544 sizeof(unsigned long), 545 GFP_KERNEL); 546 if (!pdp->used_pdpes) 547 return -ENOMEM; 548 549 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 550 GFP_KERNEL); 551 if (!pdp->page_directory) { 552 kfree(pdp->used_pdpes); 553 /* the PDP might be the statically allocated top level. Keep it 554 * as clean as possible */ 555 pdp->used_pdpes = NULL; 556 return -ENOMEM; 557 } 558 559 return 0; 560 } 561 562 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 563 { 564 kfree(pdp->used_pdpes); 565 kfree(pdp->page_directory); 566 pdp->page_directory = NULL; 567 } 568 569 static struct 570 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) 571 { 572 struct i915_page_directory_pointer *pdp; 573 int ret = -ENOMEM; 574 575 WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); 576 577 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 578 if (!pdp) 579 return ERR_PTR(-ENOMEM); 580 581 ret = __pdp_init(dev, pdp); 582 if (ret) 583 goto fail_bitmap; 584 585 ret = setup_px(dev, pdp); 586 if (ret) 587 goto fail_page_m; 588 589 return pdp; 590 591 fail_page_m: 592 __pdp_fini(pdp); 593 fail_bitmap: 594 kfree(pdp); 595 596 return ERR_PTR(ret); 597 } 598 599 static void free_pdp(struct drm_device *dev, 600 struct i915_page_directory_pointer *pdp) 601 { 602 __pdp_fini(pdp); 603 if (USES_FULL_48BIT_PPGTT(dev)) { 604 cleanup_px(dev, pdp); 605 kfree(pdp); 606 } 607 } 608 609 static void gen8_initialize_pdp(struct i915_address_space *vm, 610 struct i915_page_directory_pointer *pdp) 611 { 612 gen8_ppgtt_pdpe_t scratch_pdpe; 613 614 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 615 616 fill_px(vm->dev, pdp, scratch_pdpe); 617 } 618 619 static void gen8_initialize_pml4(struct i915_address_space *vm, 620 struct i915_pml4 *pml4) 621 { 622 gen8_ppgtt_pml4e_t scratch_pml4e; 623 624 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 625 I915_CACHE_LLC); 626 627 fill_px(vm->dev, pml4, scratch_pml4e); 628 } 629 630 static void 631 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 632 struct i915_page_directory_pointer *pdp, 633 struct i915_page_directory *pd, 634 int index) 635 { 636 gen8_ppgtt_pdpe_t *page_directorypo; 637 638 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 639 return; 640 641 page_directorypo = kmap_px(pdp); 642 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 643 kunmap_px(ppgtt, page_directorypo); 644 } 645 646 static void 647 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 648 struct i915_pml4 *pml4, 649 struct i915_page_directory_pointer *pdp, 650 int index) 651 { 652 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 653 654 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); 655 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 656 kunmap_px(ppgtt, pagemap); 657 } 658 659 /* Broadwell Page Directory Pointer Descriptors */ 660 static int gen8_write_pdp(struct drm_i915_gem_request *req, 661 unsigned entry, 662 dma_addr_t addr) 663 { 664 struct intel_engine_cs *engine = req->engine; 665 int ret; 666 667 BUG_ON(entry >= 4); 668 669 ret = intel_ring_begin(req, 6); 670 if (ret) 671 return ret; 672 673 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 674 intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry)); 675 intel_ring_emit(engine, upper_32_bits(addr)); 676 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 677 intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry)); 678 intel_ring_emit(engine, lower_32_bits(addr)); 679 intel_ring_advance(engine); 680 681 return 0; 682 } 683 684 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 685 struct drm_i915_gem_request *req) 686 { 687 int i, ret; 688 689 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 690 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 691 692 ret = gen8_write_pdp(req, i, pd_daddr); 693 if (ret) 694 return ret; 695 } 696 697 return 0; 698 } 699 700 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 701 struct drm_i915_gem_request *req) 702 { 703 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 704 } 705 706 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, 707 struct i915_page_directory_pointer *pdp, 708 uint64_t start, 709 uint64_t length, 710 gen8_pte_t scratch_pte) 711 { 712 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 713 gen8_pte_t *pt_vaddr; 714 unsigned pdpe = gen8_pdpe_index(start); 715 unsigned pde = gen8_pde_index(start); 716 unsigned pte = gen8_pte_index(start); 717 unsigned num_entries = length >> PAGE_SHIFT; 718 unsigned last_pte, i; 719 720 if (WARN_ON(!pdp)) 721 return; 722 723 while (num_entries) { 724 struct i915_page_directory *pd; 725 struct i915_page_table *pt; 726 727 if (WARN_ON(!pdp->page_directory[pdpe])) 728 break; 729 730 pd = pdp->page_directory[pdpe]; 731 732 if (WARN_ON(!pd->page_table[pde])) 733 break; 734 735 pt = pd->page_table[pde]; 736 737 if (WARN_ON(!px_page(pt))) 738 break; 739 740 last_pte = pte + num_entries; 741 if (last_pte > GEN8_PTES) 742 last_pte = GEN8_PTES; 743 744 pt_vaddr = kmap_px(pt); 745 746 for (i = pte; i < last_pte; i++) { 747 pt_vaddr[i] = scratch_pte; 748 num_entries--; 749 } 750 751 kunmap_px(ppgtt, pt_vaddr); 752 753 pte = 0; 754 if (++pde == I915_PDES) { 755 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 756 break; 757 pde = 0; 758 } 759 } 760 } 761 762 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 763 uint64_t start, 764 uint64_t length, 765 bool use_scratch) 766 { 767 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 768 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 769 I915_CACHE_LLC, use_scratch); 770 771 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 772 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, 773 scratch_pte); 774 } else { 775 uint64_t pml4e; 776 struct i915_page_directory_pointer *pdp; 777 778 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 779 gen8_ppgtt_clear_pte_range(vm, pdp, start, length, 780 scratch_pte); 781 } 782 } 783 } 784 785 static void 786 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 787 struct i915_page_directory_pointer *pdp, 788 struct sg_page_iter *sg_iter, 789 uint64_t start, 790 enum i915_cache_level cache_level) 791 { 792 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 793 gen8_pte_t *pt_vaddr; 794 unsigned pdpe = gen8_pdpe_index(start); 795 unsigned pde = gen8_pde_index(start); 796 unsigned pte = gen8_pte_index(start); 797 798 pt_vaddr = NULL; 799 800 while (__sg_page_iter_next(sg_iter)) { 801 if (pt_vaddr == NULL) { 802 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 803 struct i915_page_table *pt = pd->page_table[pde]; 804 pt_vaddr = kmap_px(pt); 805 } 806 807 pt_vaddr[pte] = 808 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 809 cache_level, true); 810 if (++pte == GEN8_PTES) { 811 kunmap_px(ppgtt, pt_vaddr); 812 pt_vaddr = NULL; 813 if (++pde == I915_PDES) { 814 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 815 break; 816 pde = 0; 817 } 818 pte = 0; 819 } 820 } 821 822 if (pt_vaddr) 823 kunmap_px(ppgtt, pt_vaddr); 824 } 825 826 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 827 struct sg_table *pages, 828 uint64_t start, 829 enum i915_cache_level cache_level, 830 u32 unused) 831 { 832 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 833 struct sg_page_iter sg_iter; 834 835 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 836 837 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 838 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 839 cache_level); 840 } else { 841 struct i915_page_directory_pointer *pdp; 842 uint64_t pml4e; 843 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 844 845 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 846 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 847 start, cache_level); 848 } 849 } 850 } 851 852 static void gen8_free_page_tables(struct drm_device *dev, 853 struct i915_page_directory *pd) 854 { 855 int i; 856 857 if (!px_page(pd)) 858 return; 859 860 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 861 if (WARN_ON(!pd->page_table[i])) 862 continue; 863 864 free_pt(dev, pd->page_table[i]); 865 pd->page_table[i] = NULL; 866 } 867 } 868 869 static int gen8_init_scratch(struct i915_address_space *vm) 870 { 871 struct drm_device *dev = vm->dev; 872 873 vm->scratch_page = alloc_scratch_page(dev); 874 if (IS_ERR(vm->scratch_page)) 875 return PTR_ERR(vm->scratch_page); 876 877 vm->scratch_pt = alloc_pt(dev); 878 if (IS_ERR(vm->scratch_pt)) { 879 free_scratch_page(dev, vm->scratch_page); 880 return PTR_ERR(vm->scratch_pt); 881 } 882 883 vm->scratch_pd = alloc_pd(dev); 884 if (IS_ERR(vm->scratch_pd)) { 885 free_pt(dev, vm->scratch_pt); 886 free_scratch_page(dev, vm->scratch_page); 887 return PTR_ERR(vm->scratch_pd); 888 } 889 890 if (USES_FULL_48BIT_PPGTT(dev)) { 891 vm->scratch_pdp = alloc_pdp(dev); 892 if (IS_ERR(vm->scratch_pdp)) { 893 free_pd(dev, vm->scratch_pd); 894 free_pt(dev, vm->scratch_pt); 895 free_scratch_page(dev, vm->scratch_page); 896 return PTR_ERR(vm->scratch_pdp); 897 } 898 } 899 900 gen8_initialize_pt(vm, vm->scratch_pt); 901 gen8_initialize_pd(vm, vm->scratch_pd); 902 if (USES_FULL_48BIT_PPGTT(dev)) 903 gen8_initialize_pdp(vm, vm->scratch_pdp); 904 905 return 0; 906 } 907 908 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 909 { 910 enum vgt_g2v_type msg; 911 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 912 int i; 913 914 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 915 u64 daddr = px_dma(&ppgtt->pml4); 916 917 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 918 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 919 920 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 921 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 922 } else { 923 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 924 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 925 926 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 927 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 928 } 929 930 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 931 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 932 } 933 934 I915_WRITE(vgtif_reg(g2v_notify), msg); 935 936 return 0; 937 } 938 939 static void gen8_free_scratch(struct i915_address_space *vm) 940 { 941 struct drm_device *dev = vm->dev; 942 943 if (USES_FULL_48BIT_PPGTT(dev)) 944 free_pdp(dev, vm->scratch_pdp); 945 free_pd(dev, vm->scratch_pd); 946 free_pt(dev, vm->scratch_pt); 947 free_scratch_page(dev, vm->scratch_page); 948 } 949 950 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, 951 struct i915_page_directory_pointer *pdp) 952 { 953 int i; 954 955 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { 956 if (WARN_ON(!pdp->page_directory[i])) 957 continue; 958 959 gen8_free_page_tables(dev, pdp->page_directory[i]); 960 free_pd(dev, pdp->page_directory[i]); 961 } 962 963 free_pdp(dev, pdp); 964 } 965 966 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 967 { 968 int i; 969 970 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 971 if (WARN_ON(!ppgtt->pml4.pdps[i])) 972 continue; 973 974 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); 975 } 976 977 cleanup_px(ppgtt->base.dev, &ppgtt->pml4); 978 } 979 980 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 981 { 982 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 983 984 if (intel_vgpu_active(vm->dev)) 985 gen8_ppgtt_notify_vgt(ppgtt, false); 986 987 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 988 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); 989 else 990 gen8_ppgtt_cleanup_4lvl(ppgtt); 991 992 gen8_free_scratch(vm); 993 } 994 995 /** 996 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 997 * @vm: Master vm structure. 998 * @pd: Page directory for this address range. 999 * @start: Starting virtual address to begin allocations. 1000 * @length: Size of the allocations. 1001 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1002 * caller to free on error. 1003 * 1004 * Allocate the required number of page tables. Extremely similar to 1005 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1006 * the page directory boundary (instead of the page directory pointer). That 1007 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1008 * possible, and likely that the caller will need to use multiple calls of this 1009 * function to achieve the appropriate allocation. 1010 * 1011 * Return: 0 if success; negative error code otherwise. 1012 */ 1013 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1014 struct i915_page_directory *pd, 1015 uint64_t start, 1016 uint64_t length, 1017 unsigned long *new_pts) 1018 { 1019 struct drm_device *dev = vm->dev; 1020 struct i915_page_table *pt; 1021 uint32_t pde; 1022 1023 gen8_for_each_pde(pt, pd, start, length, pde) { 1024 /* Don't reallocate page tables */ 1025 if (test_bit(pde, pd->used_pdes)) { 1026 /* Scratch is never allocated this way */ 1027 WARN_ON(pt == vm->scratch_pt); 1028 continue; 1029 } 1030 1031 pt = alloc_pt(dev); 1032 if (IS_ERR(pt)) 1033 goto unwind_out; 1034 1035 gen8_initialize_pt(vm, pt); 1036 pd->page_table[pde] = pt; 1037 __set_bit(pde, new_pts); 1038 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1039 } 1040 1041 return 0; 1042 1043 unwind_out: 1044 for_each_set_bit(pde, new_pts, I915_PDES) 1045 free_pt(dev, pd->page_table[pde]); 1046 1047 return -ENOMEM; 1048 } 1049 1050 /** 1051 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1052 * @vm: Master vm structure. 1053 * @pdp: Page directory pointer for this address range. 1054 * @start: Starting virtual address to begin allocations. 1055 * @length: Size of the allocations. 1056 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1057 * caller to free on error. 1058 * 1059 * Allocate the required number of page directories starting at the pde index of 1060 * @start, and ending at the pde index @start + @length. This function will skip 1061 * over already allocated page directories within the range, and only allocate 1062 * new ones, setting the appropriate pointer within the pdp as well as the 1063 * correct position in the bitmap @new_pds. 1064 * 1065 * The function will only allocate the pages within the range for a give page 1066 * directory pointer. In other words, if @start + @length straddles a virtually 1067 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1068 * required by the caller, This is not currently possible, and the BUG in the 1069 * code will prevent it. 1070 * 1071 * Return: 0 if success; negative error code otherwise. 1072 */ 1073 static int 1074 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1075 struct i915_page_directory_pointer *pdp, 1076 uint64_t start, 1077 uint64_t length, 1078 unsigned long *new_pds) 1079 { 1080 struct drm_device *dev = vm->dev; 1081 struct i915_page_directory *pd; 1082 uint32_t pdpe; 1083 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1084 1085 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1086 1087 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1088 if (test_bit(pdpe, pdp->used_pdpes)) 1089 continue; 1090 1091 pd = alloc_pd(dev); 1092 if (IS_ERR(pd)) 1093 goto unwind_out; 1094 1095 gen8_initialize_pd(vm, pd); 1096 pdp->page_directory[pdpe] = pd; 1097 __set_bit(pdpe, new_pds); 1098 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1099 } 1100 1101 return 0; 1102 1103 unwind_out: 1104 for_each_set_bit(pdpe, new_pds, pdpes) 1105 free_pd(dev, pdp->page_directory[pdpe]); 1106 1107 return -ENOMEM; 1108 } 1109 1110 /** 1111 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1112 * @vm: Master vm structure. 1113 * @pml4: Page map level 4 for this address range. 1114 * @start: Starting virtual address to begin allocations. 1115 * @length: Size of the allocations. 1116 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1117 * caller to free on error. 1118 * 1119 * Allocate the required number of page directory pointers. Extremely similar to 1120 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1121 * The main difference is here we are limited by the pml4 boundary (instead of 1122 * the page directory pointer). 1123 * 1124 * Return: 0 if success; negative error code otherwise. 1125 */ 1126 static int 1127 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1128 struct i915_pml4 *pml4, 1129 uint64_t start, 1130 uint64_t length, 1131 unsigned long *new_pdps) 1132 { 1133 struct drm_device *dev = vm->dev; 1134 struct i915_page_directory_pointer *pdp; 1135 uint32_t pml4e; 1136 1137 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1138 1139 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1140 if (!test_bit(pml4e, pml4->used_pml4es)) { 1141 pdp = alloc_pdp(dev); 1142 if (IS_ERR(pdp)) 1143 goto unwind_out; 1144 1145 gen8_initialize_pdp(vm, pdp); 1146 pml4->pdps[pml4e] = pdp; 1147 __set_bit(pml4e, new_pdps); 1148 trace_i915_page_directory_pointer_entry_alloc(vm, 1149 pml4e, 1150 start, 1151 GEN8_PML4E_SHIFT); 1152 } 1153 } 1154 1155 return 0; 1156 1157 unwind_out: 1158 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1159 free_pdp(dev, pml4->pdps[pml4e]); 1160 1161 return -ENOMEM; 1162 } 1163 1164 static void 1165 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1166 { 1167 kfree(new_pts); 1168 kfree(new_pds); 1169 } 1170 1171 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1172 * of these are based on the number of PDPEs in the system. 1173 */ 1174 static 1175 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1176 unsigned long **new_pts, 1177 uint32_t pdpes) 1178 { 1179 unsigned long *pds; 1180 unsigned long *pts; 1181 1182 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1183 if (!pds) 1184 return -ENOMEM; 1185 1186 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1187 GFP_TEMPORARY); 1188 if (!pts) 1189 goto err_out; 1190 1191 *new_pds = pds; 1192 *new_pts = pts; 1193 1194 return 0; 1195 1196 err_out: 1197 free_gen8_temp_bitmaps(pds, pts); 1198 return -ENOMEM; 1199 } 1200 1201 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 1202 * the page table structures, we mark them dirty so that 1203 * context switching/execlist queuing code takes extra steps 1204 * to ensure that tlbs are flushed. 1205 */ 1206 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 1207 { 1208 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 1209 } 1210 1211 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1212 struct i915_page_directory_pointer *pdp, 1213 uint64_t start, 1214 uint64_t length) 1215 { 1216 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1217 unsigned long *new_page_dirs, *new_page_tables; 1218 struct drm_device *dev = vm->dev; 1219 struct i915_page_directory *pd; 1220 const uint64_t orig_start = start; 1221 const uint64_t orig_length = length; 1222 uint32_t pdpe; 1223 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1224 int ret; 1225 1226 /* Wrap is never okay since we can only represent 48b, and we don't 1227 * actually use the other side of the canonical address space. 1228 */ 1229 if (WARN_ON(start + length < start)) 1230 return -ENODEV; 1231 1232 if (WARN_ON(start + length > vm->total)) 1233 return -ENODEV; 1234 1235 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1236 if (ret) 1237 return ret; 1238 1239 /* Do the allocations first so we can easily bail out */ 1240 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1241 new_page_dirs); 1242 if (ret) { 1243 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1244 return ret; 1245 } 1246 1247 /* For every page directory referenced, allocate page tables */ 1248 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1249 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1250 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1251 if (ret) 1252 goto err_out; 1253 } 1254 1255 start = orig_start; 1256 length = orig_length; 1257 1258 /* Allocations have completed successfully, so set the bitmaps, and do 1259 * the mappings. */ 1260 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1261 gen8_pde_t *const page_directory = kmap_px(pd); 1262 struct i915_page_table *pt; 1263 uint64_t pd_len = length; 1264 uint64_t pd_start = start; 1265 uint32_t pde; 1266 1267 /* Every pd should be allocated, we just did that above. */ 1268 WARN_ON(!pd); 1269 1270 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1271 /* Same reasoning as pd */ 1272 WARN_ON(!pt); 1273 WARN_ON(!pd_len); 1274 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1275 1276 /* Set our used ptes within the page table */ 1277 bitmap_set(pt->used_ptes, 1278 gen8_pte_index(pd_start), 1279 gen8_pte_count(pd_start, pd_len)); 1280 1281 /* Our pde is now pointing to the pagetable, pt */ 1282 __set_bit(pde, pd->used_pdes); 1283 1284 /* Map the PDE to the page table */ 1285 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1286 I915_CACHE_LLC); 1287 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1288 gen8_pte_index(start), 1289 gen8_pte_count(start, length), 1290 GEN8_PTES); 1291 1292 /* NB: We haven't yet mapped ptes to pages. At this 1293 * point we're still relying on insert_entries() */ 1294 } 1295 1296 kunmap_px(ppgtt, page_directory); 1297 __set_bit(pdpe, pdp->used_pdpes); 1298 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1299 } 1300 1301 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1302 mark_tlbs_dirty(ppgtt); 1303 return 0; 1304 1305 err_out: 1306 while (pdpe--) { 1307 unsigned long temp; 1308 1309 for_each_set_bit(temp, new_page_tables + pdpe * 1310 BITS_TO_LONGS(I915_PDES), I915_PDES) 1311 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); 1312 } 1313 1314 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1315 free_pd(dev, pdp->page_directory[pdpe]); 1316 1317 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1318 mark_tlbs_dirty(ppgtt); 1319 return ret; 1320 } 1321 1322 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1323 struct i915_pml4 *pml4, 1324 uint64_t start, 1325 uint64_t length) 1326 { 1327 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1328 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1329 struct i915_page_directory_pointer *pdp; 1330 uint64_t pml4e; 1331 int ret = 0; 1332 1333 /* Do the pml4 allocations first, so we don't need to track the newly 1334 * allocated tables below the pdp */ 1335 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1336 1337 /* The pagedirectory and pagetable allocations are done in the shared 3 1338 * and 4 level code. Just allocate the pdps. 1339 */ 1340 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1341 new_pdps); 1342 if (ret) 1343 return ret; 1344 1345 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1346 "The allocation has spanned more than 512GB. " 1347 "It is highly likely this is incorrect."); 1348 1349 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1350 WARN_ON(!pdp); 1351 1352 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1353 if (ret) 1354 goto err_out; 1355 1356 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1357 } 1358 1359 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1360 GEN8_PML4ES_PER_PML4); 1361 1362 return 0; 1363 1364 err_out: 1365 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1366 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); 1367 1368 return ret; 1369 } 1370 1371 static int gen8_alloc_va_range(struct i915_address_space *vm, 1372 uint64_t start, uint64_t length) 1373 { 1374 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1375 1376 if (USES_FULL_48BIT_PPGTT(vm->dev)) 1377 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1378 else 1379 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1380 } 1381 1382 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1383 uint64_t start, uint64_t length, 1384 gen8_pte_t scratch_pte, 1385 struct seq_file *m) 1386 { 1387 struct i915_page_directory *pd; 1388 uint32_t pdpe; 1389 1390 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1391 struct i915_page_table *pt; 1392 uint64_t pd_len = length; 1393 uint64_t pd_start = start; 1394 uint32_t pde; 1395 1396 if (!test_bit(pdpe, pdp->used_pdpes)) 1397 continue; 1398 1399 seq_printf(m, "\tPDPE #%d\n", pdpe); 1400 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1401 uint32_t pte; 1402 gen8_pte_t *pt_vaddr; 1403 1404 if (!test_bit(pde, pd->used_pdes)) 1405 continue; 1406 1407 pt_vaddr = kmap_px(pt); 1408 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1409 uint64_t va = 1410 (pdpe << GEN8_PDPE_SHIFT) | 1411 (pde << GEN8_PDE_SHIFT) | 1412 (pte << GEN8_PTE_SHIFT); 1413 int i; 1414 bool found = false; 1415 1416 for (i = 0; i < 4; i++) 1417 if (pt_vaddr[pte + i] != scratch_pte) 1418 found = true; 1419 if (!found) 1420 continue; 1421 1422 seq_printf(m, "\t\t0x%lx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1423 for (i = 0; i < 4; i++) { 1424 if (pt_vaddr[pte + i] != scratch_pte) 1425 seq_printf(m, " %lx", pt_vaddr[pte + i]); 1426 else 1427 seq_puts(m, " SCRATCH "); 1428 } 1429 seq_puts(m, "\n"); 1430 } 1431 /* don't use kunmap_px, it could trigger 1432 * an unnecessary flush. 1433 */ 1434 kunmap_atomic(pt_vaddr); 1435 } 1436 } 1437 } 1438 1439 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1440 { 1441 struct i915_address_space *vm = &ppgtt->base; 1442 uint64_t start = ppgtt->base.start; 1443 uint64_t length = ppgtt->base.total; 1444 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 1445 I915_CACHE_LLC, true); 1446 1447 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 1448 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1449 } else { 1450 uint64_t pml4e; 1451 struct i915_pml4 *pml4 = &ppgtt->pml4; 1452 struct i915_page_directory_pointer *pdp; 1453 1454 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1455 if (!test_bit(pml4e, pml4->used_pml4es)) 1456 continue; 1457 1458 seq_printf(m, " PML4E #%lu\n", pml4e); 1459 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1460 } 1461 } 1462 } 1463 1464 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1465 { 1466 unsigned long *new_page_dirs, *new_page_tables; 1467 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1468 int ret; 1469 1470 /* We allocate temp bitmap for page tables for no gain 1471 * but as this is for init only, lets keep the things simple 1472 */ 1473 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1474 if (ret) 1475 return ret; 1476 1477 /* Allocate for all pdps regardless of how the ppgtt 1478 * was defined. 1479 */ 1480 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1481 0, 1ULL << 32, 1482 new_page_dirs); 1483 if (!ret) 1484 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1485 1486 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1487 1488 return ret; 1489 } 1490 1491 /* 1492 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1493 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1494 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1495 * space. 1496 * 1497 */ 1498 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1499 { 1500 int ret; 1501 1502 ret = gen8_init_scratch(&ppgtt->base); 1503 if (ret) 1504 return ret; 1505 1506 ppgtt->base.start = 0; 1507 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1508 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1509 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1510 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1511 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1512 ppgtt->base.bind_vma = ppgtt_bind_vma; 1513 ppgtt->debug_dump = gen8_dump_ppgtt; 1514 1515 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { 1516 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); 1517 if (ret) 1518 goto free_scratch; 1519 1520 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1521 1522 ppgtt->base.total = 1ULL << 48; 1523 ppgtt->switch_mm = gen8_48b_mm_switch; 1524 } else { 1525 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); 1526 if (ret) 1527 goto free_scratch; 1528 1529 ppgtt->base.total = 1ULL << 32; 1530 ppgtt->switch_mm = gen8_legacy_mm_switch; 1531 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1532 0, 0, 1533 GEN8_PML4E_SHIFT); 1534 1535 if (intel_vgpu_active(ppgtt->base.dev)) { 1536 ret = gen8_preallocate_top_level_pdps(ppgtt); 1537 if (ret) 1538 goto free_scratch; 1539 } 1540 } 1541 1542 if (intel_vgpu_active(ppgtt->base.dev)) 1543 gen8_ppgtt_notify_vgt(ppgtt, true); 1544 1545 return 0; 1546 1547 free_scratch: 1548 gen8_free_scratch(&ppgtt->base); 1549 return ret; 1550 } 1551 1552 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1553 { 1554 struct i915_address_space *vm = &ppgtt->base; 1555 struct i915_page_table *unused; 1556 gen6_pte_t scratch_pte; 1557 uint32_t pd_entry; 1558 uint32_t pte, pde, temp; 1559 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1560 1561 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1562 I915_CACHE_LLC, true, 0); 1563 1564 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) { 1565 u32 expected; 1566 gen6_pte_t *pt_vaddr; 1567 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1568 pd_entry = readl(ppgtt->pd_addr + pde); 1569 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1570 1571 if (pd_entry != expected) 1572 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1573 pde, 1574 pd_entry, 1575 expected); 1576 seq_printf(m, "\tPDE: %x\n", pd_entry); 1577 1578 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1579 1580 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1581 unsigned long va = 1582 (pde * PAGE_SIZE * GEN6_PTES) + 1583 (pte * PAGE_SIZE); 1584 int i; 1585 bool found = false; 1586 for (i = 0; i < 4; i++) 1587 if (pt_vaddr[pte + i] != scratch_pte) 1588 found = true; 1589 if (!found) 1590 continue; 1591 1592 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1593 for (i = 0; i < 4; i++) { 1594 if (pt_vaddr[pte + i] != scratch_pte) 1595 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1596 else 1597 seq_puts(m, " SCRATCH "); 1598 } 1599 seq_puts(m, "\n"); 1600 } 1601 kunmap_px(ppgtt, pt_vaddr); 1602 } 1603 } 1604 1605 /* Write pde (index) from the page directory @pd to the page table @pt */ 1606 static void gen6_write_pde(struct i915_page_directory *pd, 1607 const int pde, struct i915_page_table *pt) 1608 { 1609 /* Caller needs to make sure the write completes if necessary */ 1610 struct i915_hw_ppgtt *ppgtt = 1611 container_of(pd, struct i915_hw_ppgtt, pd); 1612 u32 pd_entry; 1613 1614 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1615 pd_entry |= GEN6_PDE_VALID; 1616 1617 writel(pd_entry, ppgtt->pd_addr + pde); 1618 } 1619 1620 /* Write all the page tables found in the ppgtt structure to incrementing page 1621 * directories. */ 1622 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1623 struct i915_page_directory *pd, 1624 uint32_t start, uint32_t length) 1625 { 1626 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1627 struct i915_page_table *pt; 1628 uint32_t pde, temp; 1629 1630 gen6_for_each_pde(pt, pd, start, length, temp, pde) 1631 gen6_write_pde(pd, pde, pt); 1632 1633 /* Make sure write is complete before other code can use this page 1634 * table. Also require for WC mapped PTEs */ 1635 readl(ggtt->gsm); 1636 } 1637 1638 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1639 { 1640 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1641 1642 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1643 } 1644 1645 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1646 struct drm_i915_gem_request *req) 1647 { 1648 struct intel_engine_cs *engine = req->engine; 1649 int ret; 1650 1651 /* NB: TLBs must be flushed and invalidated before a switch */ 1652 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1653 if (ret) 1654 return ret; 1655 1656 ret = intel_ring_begin(req, 6); 1657 if (ret) 1658 return ret; 1659 1660 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); 1661 intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); 1662 intel_ring_emit(engine, PP_DIR_DCLV_2G); 1663 intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); 1664 intel_ring_emit(engine, get_pd_offset(ppgtt)); 1665 intel_ring_emit(engine, MI_NOOP); 1666 intel_ring_advance(engine); 1667 1668 return 0; 1669 } 1670 1671 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt, 1672 struct drm_i915_gem_request *req) 1673 { 1674 struct intel_engine_cs *engine = req->engine; 1675 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1676 1677 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1678 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1679 return 0; 1680 } 1681 1682 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1683 struct drm_i915_gem_request *req) 1684 { 1685 struct intel_engine_cs *engine = req->engine; 1686 int ret; 1687 1688 /* NB: TLBs must be flushed and invalidated before a switch */ 1689 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1690 if (ret) 1691 return ret; 1692 1693 ret = intel_ring_begin(req, 6); 1694 if (ret) 1695 return ret; 1696 1697 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); 1698 intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); 1699 intel_ring_emit(engine, PP_DIR_DCLV_2G); 1700 intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); 1701 intel_ring_emit(engine, get_pd_offset(ppgtt)); 1702 intel_ring_emit(engine, MI_NOOP); 1703 intel_ring_advance(engine); 1704 1705 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1706 if (engine->id != RCS) { 1707 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1708 if (ret) 1709 return ret; 1710 } 1711 1712 return 0; 1713 } 1714 1715 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1716 struct drm_i915_gem_request *req) 1717 { 1718 struct intel_engine_cs *engine = req->engine; 1719 struct drm_device *dev = ppgtt->base.dev; 1720 struct drm_i915_private *dev_priv = dev->dev_private; 1721 1722 1723 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1724 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1725 1726 POSTING_READ(RING_PP_DIR_DCLV(engine)); 1727 1728 return 0; 1729 } 1730 1731 static void gen8_ppgtt_enable(struct drm_device *dev) 1732 { 1733 struct drm_i915_private *dev_priv = dev->dev_private; 1734 struct intel_engine_cs *engine; 1735 1736 for_each_engine(engine, dev_priv) { 1737 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; 1738 I915_WRITE(RING_MODE_GEN7(engine), 1739 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1740 } 1741 } 1742 1743 static void gen7_ppgtt_enable(struct drm_device *dev) 1744 { 1745 struct drm_i915_private *dev_priv = dev->dev_private; 1746 struct intel_engine_cs *engine; 1747 uint32_t ecochk, ecobits; 1748 1749 ecobits = I915_READ(GAC_ECO_BITS); 1750 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1751 1752 ecochk = I915_READ(GAM_ECOCHK); 1753 if (IS_HASWELL(dev)) { 1754 ecochk |= ECOCHK_PPGTT_WB_HSW; 1755 } else { 1756 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1757 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1758 } 1759 I915_WRITE(GAM_ECOCHK, ecochk); 1760 1761 for_each_engine(engine, dev_priv) { 1762 /* GFX_MODE is per-ring on gen7+ */ 1763 I915_WRITE(RING_MODE_GEN7(engine), 1764 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1765 } 1766 } 1767 1768 static void gen6_ppgtt_enable(struct drm_device *dev) 1769 { 1770 struct drm_i915_private *dev_priv = dev->dev_private; 1771 uint32_t ecochk, gab_ctl, ecobits; 1772 1773 ecobits = I915_READ(GAC_ECO_BITS); 1774 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1775 ECOBITS_PPGTT_CACHE64B); 1776 1777 gab_ctl = I915_READ(GAB_CTL); 1778 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1779 1780 ecochk = I915_READ(GAM_ECOCHK); 1781 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1782 1783 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1784 } 1785 1786 /* PPGTT support for Sandybdrige/Gen6 and later */ 1787 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1788 uint64_t start, 1789 uint64_t length, 1790 bool use_scratch) 1791 { 1792 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1793 gen6_pte_t *pt_vaddr, scratch_pte; 1794 unsigned first_entry = start >> PAGE_SHIFT; 1795 unsigned num_entries = length >> PAGE_SHIFT; 1796 unsigned act_pt = first_entry / GEN6_PTES; 1797 unsigned first_pte = first_entry % GEN6_PTES; 1798 unsigned last_pte, i; 1799 1800 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1801 I915_CACHE_LLC, true, 0); 1802 1803 while (num_entries) { 1804 last_pte = first_pte + num_entries; 1805 if (last_pte > GEN6_PTES) 1806 last_pte = GEN6_PTES; 1807 1808 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1809 1810 for (i = first_pte; i < last_pte; i++) 1811 pt_vaddr[i] = scratch_pte; 1812 1813 kunmap_px(ppgtt, pt_vaddr); 1814 1815 num_entries -= last_pte - first_pte; 1816 first_pte = 0; 1817 act_pt++; 1818 } 1819 } 1820 1821 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1822 struct sg_table *pages, 1823 uint64_t start, 1824 enum i915_cache_level cache_level, u32 flags) 1825 { 1826 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1827 gen6_pte_t *pt_vaddr; 1828 unsigned first_entry = start >> PAGE_SHIFT; 1829 unsigned act_pt = first_entry / GEN6_PTES; 1830 unsigned act_pte = first_entry % GEN6_PTES; 1831 struct sg_page_iter sg_iter; 1832 1833 pt_vaddr = NULL; 1834 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 1835 if (pt_vaddr == NULL) 1836 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1837 1838 pt_vaddr[act_pte] = 1839 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 1840 cache_level, true, flags); 1841 1842 if (++act_pte == GEN6_PTES) { 1843 kunmap_px(ppgtt, pt_vaddr); 1844 pt_vaddr = NULL; 1845 act_pt++; 1846 act_pte = 0; 1847 } 1848 } 1849 if (pt_vaddr) 1850 kunmap_px(ppgtt, pt_vaddr); 1851 } 1852 1853 static int gen6_alloc_va_range(struct i915_address_space *vm, 1854 uint64_t start_in, uint64_t length_in) 1855 { 1856 DECLARE_BITMAP(new_page_tables, I915_PDES); 1857 struct drm_device *dev = vm->dev; 1858 struct drm_i915_private *dev_priv = to_i915(dev); 1859 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1860 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1861 struct i915_page_table *pt; 1862 uint32_t start, length, start_save, length_save; 1863 uint32_t pde, temp; 1864 int ret; 1865 1866 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1867 return -ENODEV; 1868 1869 start = start_save = start_in; 1870 length = length_save = length_in; 1871 1872 bitmap_zero(new_page_tables, I915_PDES); 1873 1874 /* The allocation is done in two stages so that we can bail out with 1875 * minimal amount of pain. The first stage finds new page tables that 1876 * need allocation. The second stage marks use ptes within the page 1877 * tables. 1878 */ 1879 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { 1880 if (pt != vm->scratch_pt) { 1881 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1882 continue; 1883 } 1884 1885 /* We've already allocated a page table */ 1886 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1887 1888 pt = alloc_pt(dev); 1889 if (IS_ERR(pt)) { 1890 ret = PTR_ERR(pt); 1891 goto unwind_out; 1892 } 1893 1894 gen6_initialize_pt(vm, pt); 1895 1896 ppgtt->pd.page_table[pde] = pt; 1897 __set_bit(pde, new_page_tables); 1898 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1899 } 1900 1901 start = start_save; 1902 length = length_save; 1903 1904 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { 1905 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1906 1907 bitmap_zero(tmp_bitmap, GEN6_PTES); 1908 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1909 gen6_pte_count(start, length)); 1910 1911 if (__test_and_clear_bit(pde, new_page_tables)) 1912 gen6_write_pde(&ppgtt->pd, pde, pt); 1913 1914 trace_i915_page_table_entry_map(vm, pde, pt, 1915 gen6_pte_index(start), 1916 gen6_pte_count(start, length), 1917 GEN6_PTES); 1918 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1919 GEN6_PTES); 1920 } 1921 1922 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1923 1924 /* Make sure write is complete before other code can use this page 1925 * table. Also require for WC mapped PTEs */ 1926 readl(ggtt->gsm); 1927 1928 mark_tlbs_dirty(ppgtt); 1929 return 0; 1930 1931 unwind_out: 1932 for_each_set_bit(pde, new_page_tables, I915_PDES) { 1933 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 1934 1935 ppgtt->pd.page_table[pde] = vm->scratch_pt; 1936 free_pt(vm->dev, pt); 1937 } 1938 1939 mark_tlbs_dirty(ppgtt); 1940 return ret; 1941 } 1942 1943 static int gen6_init_scratch(struct i915_address_space *vm) 1944 { 1945 struct drm_device *dev = vm->dev; 1946 1947 vm->scratch_page = alloc_scratch_page(dev); 1948 if (IS_ERR(vm->scratch_page)) 1949 return PTR_ERR(vm->scratch_page); 1950 1951 vm->scratch_pt = alloc_pt(dev); 1952 if (IS_ERR(vm->scratch_pt)) { 1953 free_scratch_page(dev, vm->scratch_page); 1954 return PTR_ERR(vm->scratch_pt); 1955 } 1956 1957 gen6_initialize_pt(vm, vm->scratch_pt); 1958 1959 return 0; 1960 } 1961 1962 static void gen6_free_scratch(struct i915_address_space *vm) 1963 { 1964 struct drm_device *dev = vm->dev; 1965 1966 free_pt(dev, vm->scratch_pt); 1967 free_scratch_page(dev, vm->scratch_page); 1968 } 1969 1970 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1971 { 1972 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1973 struct i915_page_table *pt; 1974 uint32_t pde; 1975 1976 drm_mm_remove_node(&ppgtt->node); 1977 1978 gen6_for_all_pdes(pt, ppgtt, pde) { 1979 if (pt != vm->scratch_pt) 1980 free_pt(ppgtt->base.dev, pt); 1981 } 1982 1983 gen6_free_scratch(vm); 1984 } 1985 1986 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1987 { 1988 struct i915_address_space *vm = &ppgtt->base; 1989 struct drm_device *dev = ppgtt->base.dev; 1990 struct drm_i915_private *dev_priv = to_i915(dev); 1991 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1992 bool retried = false; 1993 int ret; 1994 1995 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1996 * allocator works in address space sizes, so it's multiplied by page 1997 * size. We allocate at the top of the GTT to avoid fragmentation. 1998 */ 1999 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2000 2001 ret = gen6_init_scratch(vm); 2002 if (ret) 2003 return ret; 2004 2005 alloc: 2006 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2007 &ppgtt->node, GEN6_PD_SIZE, 2008 GEN6_PD_ALIGN, 0, 2009 0, ggtt->base.total, 2010 DRM_MM_TOPDOWN); 2011 if (ret == -ENOSPC && !retried) { 2012 ret = i915_gem_evict_something(dev, &ggtt->base, 2013 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2014 I915_CACHE_NONE, 2015 0, ggtt->base.total, 2016 0); 2017 if (ret) 2018 goto err_out; 2019 2020 retried = true; 2021 goto alloc; 2022 } 2023 2024 if (ret) 2025 goto err_out; 2026 2027 2028 if (ppgtt->node.start < ggtt->mappable_end) 2029 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2030 2031 return 0; 2032 2033 err_out: 2034 gen6_free_scratch(vm); 2035 return ret; 2036 } 2037 2038 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2039 { 2040 return gen6_ppgtt_allocate_page_directories(ppgtt); 2041 } 2042 2043 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2044 uint64_t start, uint64_t length) 2045 { 2046 struct i915_page_table *unused; 2047 uint32_t pde, temp; 2048 2049 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) 2050 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2051 } 2052 2053 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2054 { 2055 struct drm_device *dev = ppgtt->base.dev; 2056 struct drm_i915_private *dev_priv = to_i915(dev); 2057 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2058 int ret; 2059 2060 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2061 if (IS_GEN6(dev)) { 2062 ppgtt->switch_mm = gen6_mm_switch; 2063 } else if (IS_HASWELL(dev)) { 2064 ppgtt->switch_mm = hsw_mm_switch; 2065 } else if (IS_GEN7(dev)) { 2066 ppgtt->switch_mm = gen7_mm_switch; 2067 } else 2068 BUG(); 2069 2070 if (intel_vgpu_active(dev)) 2071 ppgtt->switch_mm = vgpu_mm_switch; 2072 2073 ret = gen6_ppgtt_alloc(ppgtt); 2074 if (ret) 2075 return ret; 2076 2077 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2078 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2079 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2080 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2081 ppgtt->base.bind_vma = ppgtt_bind_vma; 2082 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2083 ppgtt->base.start = 0; 2084 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2085 ppgtt->debug_dump = gen6_dump_ppgtt; 2086 2087 ppgtt->pd.base.ggtt_offset = 2088 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2089 2090 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2091 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2092 2093 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2094 2095 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2096 2097 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2098 ppgtt->node.size >> 20, 2099 ppgtt->node.start / PAGE_SIZE); 2100 2101 DRM_DEBUG("Adding PPGTT at offset %x\n", 2102 ppgtt->pd.base.ggtt_offset << 10); 2103 2104 return 0; 2105 } 2106 2107 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2108 { 2109 ppgtt->base.dev = dev; 2110 2111 if (INTEL_INFO(dev)->gen < 8) 2112 return gen6_ppgtt_init(ppgtt); 2113 else 2114 return gen8_ppgtt_init(ppgtt); 2115 } 2116 2117 static void i915_address_space_init(struct i915_address_space *vm, 2118 struct drm_i915_private *dev_priv) 2119 { 2120 drm_mm_init(&vm->mm, vm->start, vm->total); 2121 vm->dev = dev_priv->dev; 2122 INIT_LIST_HEAD(&vm->active_list); 2123 INIT_LIST_HEAD(&vm->inactive_list); 2124 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2125 } 2126 2127 static void gtt_write_workarounds(struct drm_device *dev) 2128 { 2129 struct drm_i915_private *dev_priv = dev->dev_private; 2130 2131 /* This function is for gtt related workarounds. This function is 2132 * called on driver load and after a GPU reset, so you can place 2133 * workarounds here even if they get overwritten by GPU reset. 2134 */ 2135 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2136 if (IS_BROADWELL(dev)) 2137 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2138 else if (IS_CHERRYVIEW(dev)) 2139 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2140 else if (IS_SKYLAKE(dev)) 2141 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2142 else if (IS_BROXTON(dev)) 2143 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2144 } 2145 2146 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2147 { 2148 struct drm_i915_private *dev_priv = dev->dev_private; 2149 int ret = 0; 2150 2151 ret = __hw_ppgtt_init(dev, ppgtt); 2152 if (ret == 0) { 2153 kref_init(&ppgtt->ref); 2154 i915_address_space_init(&ppgtt->base, dev_priv); 2155 } 2156 2157 return ret; 2158 } 2159 2160 int i915_ppgtt_init_hw(struct drm_device *dev) 2161 { 2162 gtt_write_workarounds(dev); 2163 2164 /* In the case of execlists, PPGTT is enabled by the context descriptor 2165 * and the PDPs are contained within the context itself. We don't 2166 * need to do anything here. */ 2167 if (i915.enable_execlists) 2168 return 0; 2169 2170 if (!USES_PPGTT(dev)) 2171 return 0; 2172 2173 if (IS_GEN6(dev)) 2174 gen6_ppgtt_enable(dev); 2175 else if (IS_GEN7(dev)) 2176 gen7_ppgtt_enable(dev); 2177 else if (INTEL_INFO(dev)->gen >= 8) 2178 gen8_ppgtt_enable(dev); 2179 else 2180 MISSING_CASE(INTEL_INFO(dev)->gen); 2181 2182 return 0; 2183 } 2184 2185 int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) 2186 { 2187 struct drm_i915_private *dev_priv = req->i915; 2188 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2189 2190 if (i915.enable_execlists) 2191 return 0; 2192 2193 if (!ppgtt) 2194 return 0; 2195 2196 return ppgtt->switch_mm(ppgtt, req); 2197 } 2198 2199 struct i915_hw_ppgtt * 2200 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) 2201 { 2202 struct i915_hw_ppgtt *ppgtt; 2203 int ret; 2204 2205 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2206 if (!ppgtt) 2207 return ERR_PTR(-ENOMEM); 2208 2209 ret = i915_ppgtt_init(dev, ppgtt); 2210 if (ret) { 2211 kfree(ppgtt); 2212 return ERR_PTR(ret); 2213 } 2214 2215 ppgtt->file_priv = fpriv; 2216 2217 trace_i915_ppgtt_create(&ppgtt->base); 2218 2219 return ppgtt; 2220 } 2221 2222 void i915_ppgtt_release(struct kref *kref) 2223 { 2224 struct i915_hw_ppgtt *ppgtt = 2225 container_of(kref, struct i915_hw_ppgtt, ref); 2226 2227 trace_i915_ppgtt_release(&ppgtt->base); 2228 2229 /* vmas should already be unbound */ 2230 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2231 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2232 2233 list_del(&ppgtt->base.global_link); 2234 drm_mm_takedown(&ppgtt->base.mm); 2235 2236 ppgtt->base.cleanup(&ppgtt->base); 2237 kfree(ppgtt); 2238 } 2239 2240 extern int intel_iommu_gfx_mapped; 2241 /* Certain Gen5 chipsets require require idling the GPU before 2242 * unmapping anything from the GTT when VT-d is enabled. 2243 */ 2244 static bool needs_idle_maps(struct drm_device *dev) 2245 { 2246 #ifdef CONFIG_INTEL_IOMMU 2247 /* Query intel_iommu to see if we need the workaround. Presumably that 2248 * was loaded first. 2249 */ 2250 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 2251 return true; 2252 #endif 2253 return false; 2254 } 2255 2256 static bool do_idling(struct drm_i915_private *dev_priv) 2257 { 2258 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2259 bool ret = dev_priv->mm.interruptible; 2260 2261 if (unlikely(ggtt->do_idle_maps)) { 2262 dev_priv->mm.interruptible = false; 2263 if (i915_gpu_idle(dev_priv->dev)) { 2264 DRM_ERROR("Couldn't idle GPU\n"); 2265 /* Wait a bit, in hopes it avoids the hang */ 2266 udelay(10); 2267 } 2268 } 2269 2270 return ret; 2271 } 2272 2273 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 2274 { 2275 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2276 2277 if (unlikely(ggtt->do_idle_maps)) 2278 dev_priv->mm.interruptible = interruptible; 2279 } 2280 2281 void i915_check_and_clear_faults(struct drm_device *dev) 2282 { 2283 struct drm_i915_private *dev_priv = dev->dev_private; 2284 struct intel_engine_cs *engine; 2285 2286 if (INTEL_INFO(dev)->gen < 6) 2287 return; 2288 2289 for_each_engine(engine, dev_priv) { 2290 u32 fault_reg; 2291 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2292 if (fault_reg & RING_FAULT_VALID) { 2293 #if 0 2294 DRM_DEBUG_DRIVER("Unexpected fault\n" 2295 "\tAddr: 0x%08lx\n" 2296 "\tAddress space: %s\n" 2297 "\tSource ID: %d\n" 2298 "\tType: %d\n", 2299 fault_reg & PAGE_MASK, 2300 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2301 RING_FAULT_SRCID(fault_reg), 2302 RING_FAULT_FAULT_TYPE(fault_reg)); 2303 #endif 2304 I915_WRITE(RING_FAULT_REG(engine), 2305 fault_reg & ~RING_FAULT_VALID); 2306 } 2307 } 2308 POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS])); 2309 } 2310 2311 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2312 { 2313 if (INTEL_INFO(dev_priv)->gen < 6) { 2314 intel_gtt_chipset_flush(); 2315 } else { 2316 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2317 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2318 } 2319 } 2320 2321 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 2322 { 2323 struct drm_i915_private *dev_priv = to_i915(dev); 2324 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2325 2326 /* Don't bother messing with faults pre GEN6 as we have little 2327 * documentation supporting that it's a good idea. 2328 */ 2329 if (INTEL_INFO(dev)->gen < 6) 2330 return; 2331 2332 i915_check_and_clear_faults(dev); 2333 2334 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 2335 true); 2336 2337 i915_ggtt_flush(dev_priv); 2338 } 2339 2340 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 2341 { 2342 if (!dma_map_sg(&obj->base.dev->pdev->dev, 2343 obj->pages->sgl, obj->pages->nents, 2344 PCI_DMA_BIDIRECTIONAL)) 2345 return -ENOSPC; 2346 2347 return 0; 2348 } 2349 2350 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2351 { 2352 #ifdef writeq 2353 writeq(pte, addr); 2354 #else 2355 iowrite32((u32)pte, addr); 2356 iowrite32(pte >> 32, addr + 4); 2357 #endif 2358 } 2359 2360 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2361 struct sg_table *st, 2362 uint64_t start, 2363 enum i915_cache_level level, u32 unused) 2364 { 2365 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2366 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2367 unsigned first_entry = start >> PAGE_SHIFT; 2368 gen8_pte_t __iomem *gtt_entries = 2369 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2370 int i = 0; 2371 struct sg_page_iter sg_iter; 2372 dma_addr_t addr = 0; /* shut up gcc */ 2373 int rpm_atomic_seq; 2374 2375 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2376 2377 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2378 addr = sg_dma_address(sg_iter.sg) + 2379 (sg_iter.sg_pgoffset << PAGE_SHIFT); 2380 gen8_set_pte(>t_entries[i], 2381 gen8_pte_encode(addr, level, true)); 2382 i++; 2383 } 2384 2385 /* 2386 * XXX: This serves as a posting read to make sure that the PTE has 2387 * actually been updated. There is some concern that even though 2388 * registers and PTEs are within the same BAR that they are potentially 2389 * of NUMA access patterns. Therefore, even with the way we assume 2390 * hardware should work, we must keep this posting read for paranoia. 2391 */ 2392 if (i != 0) 2393 WARN_ON(readq(>t_entries[i-1]) 2394 != gen8_pte_encode(addr, level, true)); 2395 2396 /* This next bit makes the above posting read even more important. We 2397 * want to flush the TLBs only after we're certain all the PTE updates 2398 * have finished. 2399 */ 2400 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2401 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2402 2403 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2404 } 2405 2406 struct insert_entries { 2407 struct i915_address_space *vm; 2408 struct sg_table *st; 2409 uint64_t start; 2410 enum i915_cache_level level; 2411 u32 flags; 2412 }; 2413 2414 static int gen8_ggtt_insert_entries__cb(void *_arg) 2415 { 2416 struct insert_entries *arg = _arg; 2417 gen8_ggtt_insert_entries(arg->vm, arg->st, 2418 arg->start, arg->level, arg->flags); 2419 return 0; 2420 } 2421 2422 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2423 struct sg_table *st, 2424 uint64_t start, 2425 enum i915_cache_level level, 2426 u32 flags) 2427 { 2428 struct insert_entries arg = { vm, st, start, level, flags }; 2429 #ifndef __DragonFly__ 2430 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2431 #else 2432 /* XXX: is this enough ? 2433 * See Linux commit 5bab6f60cb4d1417ad7c599166bcfec87529c1a2 */ 2434 get_mplock(); 2435 gen8_ggtt_insert_entries__cb(&arg); 2436 rel_mplock(); 2437 #endif 2438 } 2439 2440 /* 2441 * Binds an object into the global gtt with the specified cache level. The object 2442 * will be accessible to the GPU via commands whose operands reference offsets 2443 * within the global GTT as well as accessible by the GPU through the GMADR 2444 * mapped BAR (dev_priv->mm.gtt->gtt). 2445 */ 2446 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2447 struct sg_table *st, 2448 uint64_t start, 2449 enum i915_cache_level level, u32 flags) 2450 { 2451 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2452 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2453 unsigned first_entry = start >> PAGE_SHIFT; 2454 gen6_pte_t __iomem *gtt_entries = 2455 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2456 int i = 0; 2457 struct sg_page_iter sg_iter; 2458 dma_addr_t addr = 0; 2459 int rpm_atomic_seq; 2460 2461 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2462 2463 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2464 addr = sg_page_iter_dma_address(&sg_iter); 2465 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); 2466 i++; 2467 } 2468 2469 /* XXX: This serves as a posting read to make sure that the PTE has 2470 * actually been updated. There is some concern that even though 2471 * registers and PTEs are within the same BAR that they are potentially 2472 * of NUMA access patterns. Therefore, even with the way we assume 2473 * hardware should work, we must keep this posting read for paranoia. 2474 */ 2475 if (i != 0) { 2476 unsigned long gtt = readl(>t_entries[i-1]); 2477 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); 2478 } 2479 2480 /* This next bit makes the above posting read even more important. We 2481 * want to flush the TLBs only after we're certain all the PTE updates 2482 * have finished. 2483 */ 2484 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2485 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2486 2487 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2488 } 2489 2490 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2491 uint64_t start, 2492 uint64_t length, 2493 bool use_scratch) 2494 { 2495 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2496 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2497 unsigned first_entry = start >> PAGE_SHIFT; 2498 unsigned num_entries = length >> PAGE_SHIFT; 2499 gen8_pte_t scratch_pte, __iomem *gtt_base = 2500 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2501 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2502 int i; 2503 int rpm_atomic_seq; 2504 2505 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2506 2507 if (WARN(num_entries > max_entries, 2508 "First entry = %d; Num entries = %d (max=%d)\n", 2509 first_entry, num_entries, max_entries)) 2510 num_entries = max_entries; 2511 2512 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 2513 I915_CACHE_LLC, 2514 use_scratch); 2515 for (i = 0; i < num_entries; i++) 2516 gen8_set_pte(>t_base[i], scratch_pte); 2517 readl(gtt_base); 2518 2519 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2520 } 2521 2522 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2523 uint64_t start, 2524 uint64_t length, 2525 bool use_scratch) 2526 { 2527 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2528 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2529 unsigned first_entry = start >> PAGE_SHIFT; 2530 unsigned num_entries = length >> PAGE_SHIFT; 2531 gen6_pte_t scratch_pte, __iomem *gtt_base = 2532 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2533 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2534 int i; 2535 int rpm_atomic_seq; 2536 2537 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2538 2539 if (WARN(num_entries > max_entries, 2540 "First entry = %d; Num entries = %d (max=%d)\n", 2541 first_entry, num_entries, max_entries)) 2542 num_entries = max_entries; 2543 2544 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 2545 I915_CACHE_LLC, use_scratch, 0); 2546 2547 for (i = 0; i < num_entries; i++) 2548 iowrite32(scratch_pte, >t_base[i]); 2549 readl(gtt_base); 2550 2551 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2552 } 2553 2554 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2555 struct sg_table *pages, 2556 uint64_t start, 2557 enum i915_cache_level cache_level, u32 unused) 2558 { 2559 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2560 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2561 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2562 int rpm_atomic_seq; 2563 2564 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2565 2566 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2567 2568 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2569 2570 } 2571 2572 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2573 uint64_t start, 2574 uint64_t length, 2575 bool unused) 2576 { 2577 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2578 unsigned first_entry = start >> PAGE_SHIFT; 2579 unsigned num_entries = length >> PAGE_SHIFT; 2580 int rpm_atomic_seq; 2581 2582 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2583 2584 intel_gtt_clear_range(first_entry, num_entries); 2585 2586 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2587 } 2588 2589 static int ggtt_bind_vma(struct i915_vma *vma, 2590 enum i915_cache_level cache_level, 2591 u32 flags) 2592 { 2593 struct drm_i915_gem_object *obj = vma->obj; 2594 u32 pte_flags = 0; 2595 int ret; 2596 2597 ret = i915_get_ggtt_vma_pages(vma); 2598 if (ret) 2599 return ret; 2600 2601 /* Currently applicable only to VLV */ 2602 if (obj->gt_ro) 2603 pte_flags |= PTE_READ_ONLY; 2604 2605 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, 2606 vma->node.start, 2607 cache_level, pte_flags); 2608 2609 /* 2610 * Without aliasing PPGTT there's no difference between 2611 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2612 * upgrade to both bound if we bind either to avoid double-binding. 2613 */ 2614 vma->bound |= GLOBAL_BIND | LOCAL_BIND; 2615 2616 return 0; 2617 } 2618 2619 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2620 enum i915_cache_level cache_level, 2621 u32 flags) 2622 { 2623 u32 pte_flags; 2624 int ret; 2625 2626 ret = i915_get_ggtt_vma_pages(vma); 2627 if (ret) 2628 return ret; 2629 2630 /* Currently applicable only to VLV */ 2631 pte_flags = 0; 2632 if (vma->obj->gt_ro) 2633 pte_flags |= PTE_READ_ONLY; 2634 2635 2636 if (flags & GLOBAL_BIND) { 2637 vma->vm->insert_entries(vma->vm, 2638 vma->ggtt_view.pages, 2639 vma->node.start, 2640 cache_level, pte_flags); 2641 } 2642 2643 if (flags & LOCAL_BIND) { 2644 struct i915_hw_ppgtt *appgtt = 2645 to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2646 appgtt->base.insert_entries(&appgtt->base, 2647 vma->ggtt_view.pages, 2648 vma->node.start, 2649 cache_level, pte_flags); 2650 } 2651 2652 return 0; 2653 } 2654 2655 static void ggtt_unbind_vma(struct i915_vma *vma) 2656 { 2657 struct drm_device *dev = vma->vm->dev; 2658 struct drm_i915_private *dev_priv = dev->dev_private; 2659 struct drm_i915_gem_object *obj = vma->obj; 2660 const uint64_t size = min_t(uint64_t, 2661 obj->base.size, 2662 vma->node.size); 2663 2664 if (vma->bound & GLOBAL_BIND) { 2665 vma->vm->clear_range(vma->vm, 2666 vma->node.start, 2667 size, 2668 true); 2669 } 2670 2671 if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { 2672 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 2673 2674 appgtt->base.clear_range(&appgtt->base, 2675 vma->node.start, 2676 size, 2677 true); 2678 } 2679 } 2680 2681 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2682 { 2683 struct drm_device *dev = obj->base.dev; 2684 struct drm_i915_private *dev_priv = dev->dev_private; 2685 bool interruptible; 2686 2687 interruptible = do_idling(dev_priv); 2688 2689 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, 2690 PCI_DMA_BIDIRECTIONAL); 2691 2692 undo_idling(dev_priv, interruptible); 2693 } 2694 2695 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2696 unsigned long color, 2697 u64 *start, 2698 u64 *end) 2699 { 2700 if (node->color != color) 2701 *start += 4096; 2702 2703 if (!list_empty(&node->node_list)) { 2704 node = list_entry(node->node_list.next, 2705 struct drm_mm_node, 2706 node_list); 2707 if (node->allocated && node->color != color) 2708 *end -= 4096; 2709 } 2710 } 2711 2712 static int i915_gem_setup_global_gtt(struct drm_device *dev, 2713 u64 start, 2714 u64 mappable_end, 2715 u64 end) 2716 { 2717 /* Let GEM Manage all of the aperture. 2718 * 2719 * However, leave one page at the end still bound to the scratch page. 2720 * There are a number of places where the hardware apparently prefetches 2721 * past the end of the object, and we've seen multiple hangs with the 2722 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2723 * aperture. One page should be enough to keep any prefetching inside 2724 * of the aperture. 2725 */ 2726 struct drm_i915_private *dev_priv = to_i915(dev); 2727 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2728 struct drm_mm_node *entry; 2729 struct drm_i915_gem_object *obj; 2730 unsigned long hole_start, hole_end; 2731 int ret; 2732 unsigned long mappable; 2733 int error; 2734 2735 mappable = min(end, mappable_end) - start; 2736 BUG_ON(mappable_end > end); 2737 2738 ggtt->base.start = start; 2739 2740 /* Subtract the guard page before address space initialization to 2741 * shrink the range used by drm_mm */ 2742 ggtt->base.total = end - start - PAGE_SIZE; 2743 i915_address_space_init(&ggtt->base, dev_priv); 2744 ggtt->base.total += PAGE_SIZE; 2745 2746 if (intel_vgpu_active(dev)) { 2747 ret = intel_vgt_balloon(dev); 2748 if (ret) 2749 return ret; 2750 } 2751 2752 if (!HAS_LLC(dev)) 2753 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 2754 2755 /* Mark any preallocated objects as occupied */ 2756 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 2757 struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base); 2758 2759 DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", 2760 i915_gem_obj_ggtt_offset(obj), obj->base.size); 2761 2762 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 2763 ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); 2764 if (ret) { 2765 DRM_DEBUG_KMS("Reservation failed: %i\n", ret); 2766 return ret; 2767 } 2768 vma->bound |= GLOBAL_BIND; 2769 __i915_vma_set_map_and_fenceable(vma); 2770 list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); 2771 } 2772 2773 /* Clear any non-preallocated blocks */ 2774 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2775 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2776 hole_start, hole_end); 2777 ggtt->base.clear_range(&ggtt->base, hole_start, 2778 hole_end - hole_start, true); 2779 } 2780 2781 #ifdef __DragonFly__ 2782 device_printf(dev->dev->bsddev, 2783 "taking over the fictitious range 0x%llx-0x%llx\n", 2784 dev_priv->ggtt.mappable_base + start, dev_priv->ggtt.mappable_base + start + mappable); 2785 error = -vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base + start, 2786 dev_priv->ggtt.mappable_base + start + mappable, VM_MEMATTR_WRITE_COMBINING); 2787 #endif 2788 2789 /* And finally clear the reserved guard page */ 2790 ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true); 2791 2792 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { 2793 struct i915_hw_ppgtt *ppgtt; 2794 2795 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2796 if (!ppgtt) 2797 return -ENOMEM; 2798 2799 ret = __hw_ppgtt_init(dev, ppgtt); 2800 if (ret) { 2801 ppgtt->base.cleanup(&ppgtt->base); 2802 kfree(ppgtt); 2803 return ret; 2804 } 2805 2806 if (ppgtt->base.allocate_va_range) 2807 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2808 ppgtt->base.total); 2809 if (ret) { 2810 ppgtt->base.cleanup(&ppgtt->base); 2811 kfree(ppgtt); 2812 return ret; 2813 } 2814 2815 ppgtt->base.clear_range(&ppgtt->base, 2816 ppgtt->base.start, 2817 ppgtt->base.total, 2818 true); 2819 2820 dev_priv->mm.aliasing_ppgtt = ppgtt; 2821 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2822 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2823 } 2824 2825 return 0; 2826 } 2827 2828 /** 2829 * i915_gem_init_ggtt - Initialize GEM for Global GTT 2830 * @dev: DRM device 2831 */ 2832 void i915_gem_init_ggtt(struct drm_device *dev) 2833 { 2834 struct drm_i915_private *dev_priv = to_i915(dev); 2835 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2836 2837 i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total); 2838 } 2839 2840 /** 2841 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2842 * @dev: DRM device 2843 */ 2844 void i915_ggtt_cleanup_hw(struct drm_device *dev) 2845 { 2846 struct drm_i915_private *dev_priv = to_i915(dev); 2847 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2848 2849 if (dev_priv->mm.aliasing_ppgtt) { 2850 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2851 2852 ppgtt->base.cleanup(&ppgtt->base); 2853 kfree(ppgtt); 2854 } 2855 2856 i915_gem_cleanup_stolen(dev); 2857 2858 if (drm_mm_initialized(&ggtt->base.mm)) { 2859 if (intel_vgpu_active(dev)) 2860 intel_vgt_deballoon(); 2861 2862 drm_mm_takedown(&ggtt->base.mm); 2863 list_del(&ggtt->base.global_link); 2864 } 2865 2866 ggtt->base.cleanup(&ggtt->base); 2867 } 2868 2869 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2870 { 2871 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2872 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2873 return snb_gmch_ctl << 20; 2874 } 2875 2876 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2877 { 2878 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2879 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2880 if (bdw_gmch_ctl) 2881 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2882 2883 #ifdef CONFIG_X86_32 2884 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2885 if (bdw_gmch_ctl > 4) 2886 bdw_gmch_ctl = 4; 2887 #endif 2888 2889 return bdw_gmch_ctl << 20; 2890 } 2891 2892 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2893 { 2894 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2895 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2896 2897 if (gmch_ctrl) 2898 return 1 << (20 + gmch_ctrl); 2899 2900 return 0; 2901 } 2902 2903 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2904 { 2905 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2906 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2907 return snb_gmch_ctl << 25; /* 32 MB units */ 2908 } 2909 2910 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2911 { 2912 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2913 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2914 return bdw_gmch_ctl << 25; /* 32 MB units */ 2915 } 2916 2917 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2918 { 2919 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2920 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2921 2922 /* 2923 * 0x0 to 0x10: 32MB increments starting at 0MB 2924 * 0x11 to 0x16: 4MB increments starting at 8MB 2925 * 0x17 to 0x1d: 4MB increments start at 36MB 2926 */ 2927 if (gmch_ctrl < 0x11) 2928 return gmch_ctrl << 25; 2929 else if (gmch_ctrl < 0x17) 2930 return (gmch_ctrl - 0x11 + 2) << 22; 2931 else 2932 return (gmch_ctrl - 0x17 + 9) << 22; 2933 } 2934 2935 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2936 { 2937 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2938 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2939 2940 if (gen9_gmch_ctl < 0xf0) 2941 return gen9_gmch_ctl << 25; /* 32 MB units */ 2942 else 2943 /* 4MB increments starting at 0xf0 for 4MB */ 2944 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2945 } 2946 2947 static int ggtt_probe_common(struct drm_device *dev, 2948 size_t gtt_size) 2949 { 2950 struct drm_i915_private *dev_priv = to_i915(dev); 2951 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2952 struct i915_page_scratch *scratch_page; 2953 phys_addr_t ggtt_phys_addr; 2954 2955 /* For Modern GENs the PTEs and register space are split in the BAR */ 2956 ggtt_phys_addr = pci_resource_start(dev->pdev, 0) + 2957 (pci_resource_len(dev->pdev, 0) / 2); 2958 2959 /* 2960 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2961 * dropped. For WC mappings in general we have 64 byte burst writes 2962 * when the WC buffer is flushed, so we can't use it, but have to 2963 * resort to an uncached mapping. The WC issue is easily caught by the 2964 * readback check when writing GTT PTE entries. 2965 */ 2966 if (IS_BROXTON(dev)) 2967 ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size); 2968 else 2969 ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size); 2970 if (!ggtt->gsm) { 2971 DRM_ERROR("Failed to map the gtt page table\n"); 2972 return -ENOMEM; 2973 } 2974 2975 scratch_page = alloc_scratch_page(dev); 2976 if (IS_ERR(scratch_page)) { 2977 DRM_ERROR("Scratch setup failed\n"); 2978 /* iounmap will also get called at remove, but meh */ 2979 iounmap(ggtt->gsm); 2980 return PTR_ERR(scratch_page); 2981 } 2982 2983 ggtt->base.scratch_page = scratch_page; 2984 2985 return 0; 2986 } 2987 2988 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2989 * bits. When using advanced contexts each context stores its own PAT, but 2990 * writing this data shouldn't be harmful even in those cases. */ 2991 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2992 { 2993 uint64_t pat; 2994 2995 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2996 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2997 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2998 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2999 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 3000 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 3001 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 3002 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 3003 3004 if (!USES_PPGTT(dev_priv)) 3005 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 3006 * so RTL will always use the value corresponding to 3007 * pat_sel = 000". 3008 * So let's disable cache for GGTT to avoid screen corruptions. 3009 * MOCS still can be used though. 3010 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 3011 * before this patch, i.e. the same uncached + snooping access 3012 * like on gen6/7 seems to be in effect. 3013 * - So this just fixes blitter/render access. Again it looks 3014 * like it's not just uncached access, but uncached + snooping. 3015 * So we can still hold onto all our assumptions wrt cpu 3016 * clflushing on LLC machines. 3017 */ 3018 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 3019 3020 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 3021 * write would work. */ 3022 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3023 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3024 } 3025 3026 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 3027 { 3028 uint64_t pat; 3029 3030 /* 3031 * Map WB on BDW to snooped on CHV. 3032 * 3033 * Only the snoop bit has meaning for CHV, the rest is 3034 * ignored. 3035 * 3036 * The hardware will never snoop for certain types of accesses: 3037 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3038 * - PPGTT page tables 3039 * - some other special cycles 3040 * 3041 * As with BDW, we also need to consider the following for GT accesses: 3042 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3043 * so RTL will always use the value corresponding to 3044 * pat_sel = 000". 3045 * Which means we must set the snoop bit in PAT entry 0 3046 * in order to keep the global status page working. 3047 */ 3048 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3049 GEN8_PPAT(1, 0) | 3050 GEN8_PPAT(2, 0) | 3051 GEN8_PPAT(3, 0) | 3052 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3053 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3054 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3055 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3056 3057 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3058 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3059 } 3060 3061 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3062 { 3063 struct drm_device *dev = ggtt->base.dev; 3064 struct drm_i915_private *dev_priv = to_i915(dev); 3065 u16 snb_gmch_ctl; 3066 int ret; 3067 3068 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3069 ggtt->mappable_base = pci_resource_start(dev->pdev, 2); 3070 ggtt->mappable_end = pci_resource_len(dev->pdev, 2); 3071 3072 #if 0 3073 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 3074 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 3075 #endif 3076 3077 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3078 3079 if (INTEL_INFO(dev)->gen >= 9) { 3080 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3081 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); 3082 } else if (IS_CHERRYVIEW(dev)) { 3083 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3084 ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl); 3085 } else { 3086 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3087 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); 3088 } 3089 3090 ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3091 3092 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3093 chv_setup_private_ppat(dev_priv); 3094 else 3095 bdw_setup_private_ppat(dev_priv); 3096 3097 ret = ggtt_probe_common(dev, ggtt->size); 3098 3099 ggtt->base.clear_range = gen8_ggtt_clear_range; 3100 if (IS_CHERRYVIEW(dev_priv)) 3101 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3102 else 3103 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3104 ggtt->base.bind_vma = ggtt_bind_vma; 3105 ggtt->base.unbind_vma = ggtt_unbind_vma; 3106 3107 return ret; 3108 } 3109 3110 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3111 { 3112 struct drm_device *dev = ggtt->base.dev; 3113 u16 snb_gmch_ctl; 3114 int ret; 3115 3116 ggtt->mappable_base = pci_resource_start(dev->pdev, 2); 3117 ggtt->mappable_end = pci_resource_len(dev->pdev, 2); 3118 3119 /* 64/512MB is the current min/max we actually know of, but this is just 3120 * a coarse sanity check. 3121 */ 3122 if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) { 3123 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3124 return -ENXIO; 3125 } 3126 3127 #if 0 3128 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 3129 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 3130 #endif 3131 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3132 3133 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3134 ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl); 3135 ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3136 3137 ret = ggtt_probe_common(dev, ggtt->size); 3138 3139 ggtt->base.clear_range = gen6_ggtt_clear_range; 3140 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3141 ggtt->base.bind_vma = ggtt_bind_vma; 3142 ggtt->base.unbind_vma = ggtt_unbind_vma; 3143 3144 return ret; 3145 } 3146 3147 static void gen6_gmch_remove(struct i915_address_space *vm) 3148 { 3149 struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base); 3150 3151 iounmap(ggtt->gsm); 3152 free_scratch_page(vm->dev, vm->scratch_page); 3153 } 3154 3155 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3156 { 3157 struct drm_device *dev = ggtt->base.dev; 3158 struct drm_i915_private *dev_priv = to_i915(dev); 3159 #if 0 3160 int ret; 3161 3162 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 3163 if (!ret) { 3164 DRM_ERROR("failed to set up gmch\n"); 3165 return -EIO; 3166 } 3167 #endif 3168 3169 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3170 &ggtt->mappable_base, &ggtt->mappable_end); 3171 3172 ggtt->do_idle_maps = needs_idle_maps(dev_priv->dev); 3173 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3174 ggtt->base.clear_range = i915_ggtt_clear_range; 3175 ggtt->base.bind_vma = ggtt_bind_vma; 3176 ggtt->base.unbind_vma = ggtt_unbind_vma; 3177 3178 if (unlikely(ggtt->do_idle_maps)) 3179 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3180 3181 return 0; 3182 } 3183 3184 static void i915_gmch_remove(struct i915_address_space *vm) 3185 { 3186 intel_gmch_remove(); 3187 } 3188 3189 /** 3190 * i915_ggtt_init_hw - Initialize GGTT hardware 3191 * @dev: DRM device 3192 */ 3193 int i915_ggtt_init_hw(struct drm_device *dev) 3194 { 3195 struct drm_i915_private *dev_priv = to_i915(dev); 3196 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3197 int ret; 3198 3199 if (INTEL_INFO(dev)->gen <= 5) { 3200 ggtt->probe = i915_gmch_probe; 3201 ggtt->base.cleanup = i915_gmch_remove; 3202 } else if (INTEL_INFO(dev)->gen < 8) { 3203 ggtt->probe = gen6_gmch_probe; 3204 ggtt->base.cleanup = gen6_gmch_remove; 3205 3206 if (HAS_EDRAM(dev)) 3207 ggtt->base.pte_encode = iris_pte_encode; 3208 else if (IS_HASWELL(dev)) 3209 ggtt->base.pte_encode = hsw_pte_encode; 3210 else if (IS_VALLEYVIEW(dev)) 3211 ggtt->base.pte_encode = byt_pte_encode; 3212 else if (INTEL_INFO(dev)->gen >= 7) 3213 ggtt->base.pte_encode = ivb_pte_encode; 3214 else 3215 ggtt->base.pte_encode = snb_pte_encode; 3216 } else { 3217 ggtt->probe = gen8_gmch_probe; 3218 ggtt->base.cleanup = gen6_gmch_remove; 3219 } 3220 3221 ggtt->base.dev = dev; 3222 ggtt->base.is_ggtt = true; 3223 3224 ret = ggtt->probe(ggtt); 3225 if (ret) 3226 return ret; 3227 3228 if ((ggtt->base.total - 1) >> 32) { 3229 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3230 "of address space! Found %lldM!\n", 3231 ggtt->base.total >> 20); 3232 ggtt->base.total = 1ULL << 32; 3233 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3234 } 3235 3236 /* 3237 * Initialise stolen early so that we may reserve preallocated 3238 * objects for the BIOS to KMS transition. 3239 */ 3240 ret = i915_gem_init_stolen(dev); 3241 if (ret) 3242 goto out_gtt_cleanup; 3243 3244 /* GMADR is the PCI mmio aperture into the global GTT. */ 3245 DRM_INFO("Memory usable by graphics device = %lluM\n", 3246 ggtt->base.total >> 20); 3247 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3248 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3249 #ifdef CONFIG_INTEL_IOMMU 3250 if (intel_iommu_gfx_mapped) 3251 DRM_INFO("VT-d active for gfx access\n"); 3252 #endif 3253 /* 3254 * i915.enable_ppgtt is read-only, so do an early pass to validate the 3255 * user's requested state against the hardware/driver capabilities. We 3256 * do this now so that we can print out any log messages once rather 3257 * than every time we check intel_enable_ppgtt(). 3258 */ 3259 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 3260 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 3261 3262 return 0; 3263 3264 out_gtt_cleanup: 3265 ggtt->base.cleanup(&ggtt->base); 3266 3267 return ret; 3268 } 3269 3270 int i915_ggtt_enable_hw(struct drm_device *dev) 3271 { 3272 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 3273 return -EIO; 3274 3275 return 0; 3276 } 3277 3278 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 3279 { 3280 struct drm_i915_private *dev_priv = to_i915(dev); 3281 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3282 struct drm_i915_gem_object *obj; 3283 struct i915_vma *vma; 3284 bool flush; 3285 3286 i915_check_and_clear_faults(dev); 3287 3288 /* First fill our portion of the GTT with scratch pages */ 3289 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 3290 true); 3291 3292 /* Cache flush objects bound into GGTT and rebind them. */ 3293 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 3294 flush = false; 3295 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3296 if (vma->vm != &ggtt->base) 3297 continue; 3298 3299 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3300 PIN_UPDATE)); 3301 3302 flush = true; 3303 } 3304 3305 if (flush) 3306 i915_gem_clflush_object(obj, obj->pin_display); 3307 } 3308 3309 if (INTEL_INFO(dev)->gen >= 8) { 3310 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3311 chv_setup_private_ppat(dev_priv); 3312 else 3313 bdw_setup_private_ppat(dev_priv); 3314 3315 return; 3316 } 3317 3318 if (USES_PPGTT(dev)) { 3319 struct i915_address_space *vm; 3320 3321 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3322 /* TODO: Perhaps it shouldn't be gen6 specific */ 3323 3324 struct i915_hw_ppgtt *ppgtt; 3325 3326 if (vm->is_ggtt) 3327 ppgtt = dev_priv->mm.aliasing_ppgtt; 3328 else 3329 ppgtt = i915_vm_to_ppgtt(vm); 3330 3331 gen6_write_page_range(dev_priv, &ppgtt->pd, 3332 0, ppgtt->base.total); 3333 } 3334 } 3335 3336 i915_ggtt_flush(dev_priv); 3337 } 3338 3339 static struct i915_vma * 3340 __i915_gem_vma_create(struct drm_i915_gem_object *obj, 3341 struct i915_address_space *vm, 3342 const struct i915_ggtt_view *ggtt_view) 3343 { 3344 struct i915_vma *vma; 3345 3346 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 3347 return ERR_PTR(-EINVAL); 3348 3349 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 3350 if (vma == NULL) 3351 return ERR_PTR(-ENOMEM); 3352 3353 INIT_LIST_HEAD(&vma->vm_link); 3354 INIT_LIST_HEAD(&vma->obj_link); 3355 INIT_LIST_HEAD(&vma->exec_list); 3356 vma->vm = vm; 3357 vma->obj = obj; 3358 vma->is_ggtt = i915_is_ggtt(vm); 3359 3360 if (i915_is_ggtt(vm)) 3361 vma->ggtt_view = *ggtt_view; 3362 else 3363 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3364 3365 list_add_tail(&vma->obj_link, &obj->vma_list); 3366 3367 return vma; 3368 } 3369 3370 struct i915_vma * 3371 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3372 struct i915_address_space *vm) 3373 { 3374 struct i915_vma *vma; 3375 3376 vma = i915_gem_obj_to_vma(obj, vm); 3377 if (!vma) 3378 vma = __i915_gem_vma_create(obj, vm, 3379 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); 3380 3381 return vma; 3382 } 3383 3384 struct i915_vma * 3385 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, 3386 const struct i915_ggtt_view *view) 3387 { 3388 struct drm_device *dev = obj->base.dev; 3389 struct drm_i915_private *dev_priv = to_i915(dev); 3390 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3391 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 3392 3393 if (!vma) 3394 vma = __i915_gem_vma_create(obj, &ggtt->base, view); 3395 3396 return vma; 3397 3398 } 3399 3400 static struct scatterlist * 3401 rotate_pages(const dma_addr_t *in, unsigned int offset, 3402 unsigned int width, unsigned int height, 3403 unsigned int stride, 3404 struct sg_table *st, struct scatterlist *sg) 3405 { 3406 unsigned int column, row; 3407 unsigned int src_idx; 3408 3409 for (column = 0; column < width; column++) { 3410 src_idx = stride * (height - 1) + column; 3411 for (row = 0; row < height; row++) { 3412 st->nents++; 3413 /* We don't need the pages, but need to initialize 3414 * the entries so the sg list can be happily traversed. 3415 * The only thing we need are DMA addresses. 3416 */ 3417 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3418 sg_dma_address(sg) = in[offset + src_idx]; 3419 sg_dma_len(sg) = PAGE_SIZE; 3420 sg = sg_next(sg); 3421 src_idx -= stride; 3422 } 3423 } 3424 3425 return sg; 3426 } 3427 3428 static struct sg_table * 3429 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, 3430 struct drm_i915_gem_object *obj) 3431 { 3432 unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height; 3433 unsigned int size_pages_uv; 3434 struct sg_page_iter sg_iter; 3435 unsigned long i; 3436 dma_addr_t *page_addr_list; 3437 struct sg_table *st; 3438 unsigned int uv_start_page; 3439 struct scatterlist *sg; 3440 int ret = -ENOMEM; 3441 3442 /* Allocate a temporary list of source pages for random access. */ 3443 page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE, 3444 sizeof(dma_addr_t)); 3445 if (!page_addr_list) 3446 return ERR_PTR(ret); 3447 3448 /* Account for UV plane with NV12. */ 3449 if (rot_info->pixel_format == DRM_FORMAT_NV12) 3450 size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height; 3451 else 3452 size_pages_uv = 0; 3453 3454 /* Allocate target SG list. */ 3455 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 3456 if (!st) 3457 goto err_st_alloc; 3458 3459 ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); 3460 if (ret) 3461 goto err_sg_alloc; 3462 3463 /* Populate source page list from the object. */ 3464 i = 0; 3465 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 3466 page_addr_list[i] = sg_page_iter_dma_address(&sg_iter); 3467 i++; 3468 } 3469 3470 st->nents = 0; 3471 sg = st->sgl; 3472 3473 /* Rotate the pages. */ 3474 sg = rotate_pages(page_addr_list, 0, 3475 rot_info->plane[0].width, rot_info->plane[0].height, 3476 rot_info->plane[0].width, 3477 st, sg); 3478 3479 /* Append the UV plane if NV12. */ 3480 if (rot_info->pixel_format == DRM_FORMAT_NV12) { 3481 uv_start_page = size_pages; 3482 3483 /* Check for tile-row un-alignment. */ 3484 if (offset_in_page(rot_info->uv_offset)) 3485 uv_start_page--; 3486 3487 rot_info->uv_start_page = uv_start_page; 3488 3489 sg = rotate_pages(page_addr_list, rot_info->uv_start_page, 3490 rot_info->plane[1].width, rot_info->plane[1].height, 3491 rot_info->plane[1].width, 3492 st, sg); 3493 } 3494 3495 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n", 3496 obj->base.size, rot_info->plane[0].width, 3497 rot_info->plane[0].height, size_pages + size_pages_uv, 3498 size_pages); 3499 3500 drm_free_large(page_addr_list); 3501 3502 return st; 3503 3504 err_sg_alloc: 3505 kfree(st); 3506 err_st_alloc: 3507 drm_free_large(page_addr_list); 3508 3509 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n", 3510 obj->base.size, ret, rot_info->plane[0].width, 3511 rot_info->plane[0].height, size_pages + size_pages_uv, 3512 size_pages); 3513 return ERR_PTR(ret); 3514 } 3515 3516 static struct sg_table * 3517 intel_partial_pages(const struct i915_ggtt_view *view, 3518 struct drm_i915_gem_object *obj) 3519 { 3520 struct sg_table *st; 3521 struct scatterlist *sg; 3522 struct sg_page_iter obj_sg_iter; 3523 int ret = -ENOMEM; 3524 3525 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 3526 if (!st) 3527 goto err_st_alloc; 3528 3529 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); 3530 if (ret) 3531 goto err_sg_alloc; 3532 3533 sg = st->sgl; 3534 st->nents = 0; 3535 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, 3536 view->params.partial.offset) 3537 { 3538 if (st->nents >= view->params.partial.size) 3539 break; 3540 3541 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3542 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); 3543 sg_dma_len(sg) = PAGE_SIZE; 3544 3545 sg = sg_next(sg); 3546 st->nents++; 3547 } 3548 3549 return st; 3550 3551 err_sg_alloc: 3552 kfree(st); 3553 err_st_alloc: 3554 return ERR_PTR(ret); 3555 } 3556 3557 static int 3558 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3559 { 3560 int ret = 0; 3561 3562 if (vma->ggtt_view.pages) 3563 return 0; 3564 3565 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3566 vma->ggtt_view.pages = vma->obj->pages; 3567 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3568 vma->ggtt_view.pages = 3569 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3570 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3571 vma->ggtt_view.pages = 3572 intel_partial_pages(&vma->ggtt_view, vma->obj); 3573 else 3574 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3575 vma->ggtt_view.type); 3576 3577 if (!vma->ggtt_view.pages) { 3578 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3579 vma->ggtt_view.type); 3580 ret = -EINVAL; 3581 } else if (IS_ERR(vma->ggtt_view.pages)) { 3582 ret = PTR_ERR(vma->ggtt_view.pages); 3583 vma->ggtt_view.pages = NULL; 3584 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3585 vma->ggtt_view.type, ret); 3586 } 3587 3588 return ret; 3589 } 3590 3591 /** 3592 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3593 * @vma: VMA to map 3594 * @cache_level: mapping cache level 3595 * @flags: flags like global or local mapping 3596 * 3597 * DMA addresses are taken from the scatter-gather table of this object (or of 3598 * this VMA in case of non-default GGTT views) and PTE entries set up. 3599 * Note that DMA addresses are also the only part of the SG table we care about. 3600 */ 3601 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3602 u32 flags) 3603 { 3604 int ret; 3605 u32 bind_flags; 3606 3607 if (WARN_ON(flags == 0)) 3608 return -EINVAL; 3609 3610 bind_flags = 0; 3611 if (flags & PIN_GLOBAL) 3612 bind_flags |= GLOBAL_BIND; 3613 if (flags & PIN_USER) 3614 bind_flags |= LOCAL_BIND; 3615 3616 if (flags & PIN_UPDATE) 3617 bind_flags |= vma->bound; 3618 else 3619 bind_flags &= ~vma->bound; 3620 3621 if (bind_flags == 0) 3622 return 0; 3623 3624 if (vma->bound == 0 && vma->vm->allocate_va_range) { 3625 /* XXX: i915_vma_pin() will fix this +- hack */ 3626 vma->pin_count++; 3627 trace_i915_va_alloc(vma); 3628 ret = vma->vm->allocate_va_range(vma->vm, 3629 vma->node.start, 3630 vma->node.size); 3631 vma->pin_count--; 3632 if (ret) 3633 return ret; 3634 } 3635 3636 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3637 if (ret) 3638 return ret; 3639 3640 vma->bound |= bind_flags; 3641 3642 return 0; 3643 } 3644 3645 /** 3646 * i915_ggtt_view_size - Get the size of a GGTT view. 3647 * @obj: Object the view is of. 3648 * @view: The view in question. 3649 * 3650 * @return The size of the GGTT view in bytes. 3651 */ 3652 size_t 3653 i915_ggtt_view_size(struct drm_i915_gem_object *obj, 3654 const struct i915_ggtt_view *view) 3655 { 3656 if (view->type == I915_GGTT_VIEW_NORMAL) { 3657 return obj->base.size; 3658 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 3659 return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT; 3660 } else if (view->type == I915_GGTT_VIEW_PARTIAL) { 3661 return view->params.partial.size << PAGE_SHIFT; 3662 } else { 3663 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); 3664 return obj->base.size; 3665 } 3666 } 3667