1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 35 #include <linux/bitmap.h> 36 37 #include <sys/mplock2.h> 38 39 /** 40 * DOC: Global GTT views 41 * 42 * Background and previous state 43 * 44 * Historically objects could exists (be bound) in global GTT space only as 45 * singular instances with a view representing all of the object's backing pages 46 * in a linear fashion. This view will be called a normal view. 47 * 48 * To support multiple views of the same object, where the number of mapped 49 * pages is not equal to the backing store, or where the layout of the pages 50 * is not linear, concept of a GGTT view was added. 51 * 52 * One example of an alternative view is a stereo display driven by a single 53 * image. In this case we would have a framebuffer looking like this 54 * (2x2 pages): 55 * 56 * 12 57 * 34 58 * 59 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 60 * rendering. In contrast, fed to the display engine would be an alternative 61 * view which could look something like this: 62 * 63 * 1212 64 * 3434 65 * 66 * In this example both the size and layout of pages in the alternative view is 67 * different from the normal view. 68 * 69 * Implementation and usage 70 * 71 * GGTT views are implemented using VMAs and are distinguished via enum 72 * i915_ggtt_view_type and struct i915_ggtt_view. 73 * 74 * A new flavour of core GEM functions which work with GGTT bound objects were 75 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 76 * renaming in large amounts of code. They take the struct i915_ggtt_view 77 * parameter encapsulating all metadata required to implement a view. 78 * 79 * As a helper for callers which are only interested in the normal view, 80 * globally const i915_ggtt_view_normal singleton instance exists. All old core 81 * GEM API functions, the ones not taking the view parameter, are operating on, 82 * or with the normal GGTT view. 83 * 84 * Code wanting to add or use a new GGTT view needs to: 85 * 86 * 1. Add a new enum with a suitable name. 87 * 2. Extend the metadata in the i915_ggtt_view structure if required. 88 * 3. Add support to i915_get_vma_pages(). 89 * 90 * New views are required to build a scatter-gather table from within the 91 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 92 * exists for the lifetime of an VMA. 93 * 94 * Core API is designed to have copy semantics which means that passed in 95 * struct i915_ggtt_view does not need to be persistent (left around after 96 * calling the core API functions). 97 * 98 */ 99 100 static int 101 i915_get_ggtt_vma_pages(struct i915_vma *vma); 102 103 const struct i915_ggtt_view i915_ggtt_view_normal = { 104 .type = I915_GGTT_VIEW_NORMAL, 105 }; 106 const struct i915_ggtt_view i915_ggtt_view_rotated = { 107 .type = I915_GGTT_VIEW_ROTATED, 108 }; 109 110 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 111 { 112 bool has_aliasing_ppgtt; 113 bool has_full_ppgtt; 114 bool has_full_48bit_ppgtt; 115 116 has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6; 117 has_full_ppgtt = INTEL_INFO(dev)->gen >= 7; 118 has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9; 119 120 if (intel_vgpu_active(dev)) 121 has_full_ppgtt = false; /* emulation is too hard */ 122 123 /* 124 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 125 * execlists, the sole mechanism available to submit work. 126 */ 127 if (INTEL_INFO(dev)->gen < 9 && 128 (enable_ppgtt == 0 || !has_aliasing_ppgtt)) 129 return 0; 130 131 if (enable_ppgtt == 1) 132 return 1; 133 134 if (enable_ppgtt == 2 && has_full_ppgtt) 135 return 2; 136 137 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 138 return 3; 139 140 #ifdef CONFIG_INTEL_IOMMU 141 /* Disable ppgtt on SNB if VT-d is on. */ 142 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 143 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 144 return 0; 145 } 146 #endif 147 148 /* Early VLV doesn't have this */ 149 if (IS_VALLEYVIEW(dev) && dev->pdev->revision < 0xb) { 150 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 151 return 0; 152 } 153 154 if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists) 155 return has_full_48bit_ppgtt ? 3 : 2; 156 else 157 return has_aliasing_ppgtt ? 1 : 0; 158 } 159 160 static int ppgtt_bind_vma(struct i915_vma *vma, 161 enum i915_cache_level cache_level, 162 u32 unused) 163 { 164 u32 pte_flags = 0; 165 166 /* Currently applicable only to VLV */ 167 if (vma->obj->gt_ro) 168 pte_flags |= PTE_READ_ONLY; 169 170 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 171 cache_level, pte_flags); 172 173 return 0; 174 } 175 176 static void ppgtt_unbind_vma(struct i915_vma *vma) 177 { 178 vma->vm->clear_range(vma->vm, 179 vma->node.start, 180 vma->obj->base.size, 181 true); 182 } 183 184 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 185 enum i915_cache_level level, 186 bool valid) 187 { 188 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 189 pte |= addr; 190 191 switch (level) { 192 case I915_CACHE_NONE: 193 pte |= PPAT_UNCACHED_INDEX; 194 break; 195 case I915_CACHE_WT: 196 pte |= PPAT_DISPLAY_ELLC_INDEX; 197 break; 198 default: 199 pte |= PPAT_CACHED_INDEX; 200 break; 201 } 202 203 return pte; 204 } 205 206 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 207 const enum i915_cache_level level) 208 { 209 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 210 pde |= addr; 211 if (level != I915_CACHE_NONE) 212 pde |= PPAT_CACHED_PDE_INDEX; 213 else 214 pde |= PPAT_UNCACHED_INDEX; 215 return pde; 216 } 217 218 #define gen8_pdpe_encode gen8_pde_encode 219 #define gen8_pml4e_encode gen8_pde_encode 220 221 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 222 enum i915_cache_level level, 223 bool valid, u32 unused) 224 { 225 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 226 pte |= GEN6_PTE_ADDR_ENCODE(addr); 227 228 switch (level) { 229 case I915_CACHE_L3_LLC: 230 case I915_CACHE_LLC: 231 pte |= GEN6_PTE_CACHE_LLC; 232 break; 233 case I915_CACHE_NONE: 234 pte |= GEN6_PTE_UNCACHED; 235 break; 236 default: 237 MISSING_CASE(level); 238 } 239 240 return pte; 241 } 242 243 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 244 enum i915_cache_level level, 245 bool valid, u32 unused) 246 { 247 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 248 pte |= GEN6_PTE_ADDR_ENCODE(addr); 249 250 switch (level) { 251 case I915_CACHE_L3_LLC: 252 pte |= GEN7_PTE_CACHE_L3_LLC; 253 break; 254 case I915_CACHE_LLC: 255 pte |= GEN6_PTE_CACHE_LLC; 256 break; 257 case I915_CACHE_NONE: 258 pte |= GEN6_PTE_UNCACHED; 259 break; 260 default: 261 MISSING_CASE(level); 262 } 263 264 return pte; 265 } 266 267 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 268 enum i915_cache_level level, 269 bool valid, u32 flags) 270 { 271 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 272 pte |= GEN6_PTE_ADDR_ENCODE(addr); 273 274 if (!(flags & PTE_READ_ONLY)) 275 pte |= BYT_PTE_WRITEABLE; 276 277 if (level != I915_CACHE_NONE) 278 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 279 280 return pte; 281 } 282 283 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 284 enum i915_cache_level level, 285 bool valid, u32 unused) 286 { 287 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 288 pte |= HSW_PTE_ADDR_ENCODE(addr); 289 290 if (level != I915_CACHE_NONE) 291 pte |= HSW_WB_LLC_AGE3; 292 293 return pte; 294 } 295 296 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 297 enum i915_cache_level level, 298 bool valid, u32 unused) 299 { 300 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 301 pte |= HSW_PTE_ADDR_ENCODE(addr); 302 303 switch (level) { 304 case I915_CACHE_NONE: 305 break; 306 case I915_CACHE_WT: 307 pte |= HSW_WT_ELLC_LLC_AGE3; 308 break; 309 default: 310 pte |= HSW_WB_ELLC_LLC_AGE3; 311 break; 312 } 313 314 return pte; 315 } 316 317 static int __setup_page_dma(struct drm_device *dev, 318 struct i915_page_dma *p, gfp_t flags) 319 { 320 struct device *device = &dev->pdev->dev; 321 322 p->page = alloc_page(flags); 323 if (!p->page) 324 return -ENOMEM; 325 326 p->daddr = dma_map_page(device, 327 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 328 329 if (dma_mapping_error(device, p->daddr)) { 330 __free_page(p->page); 331 return -EINVAL; 332 } 333 334 return 0; 335 } 336 337 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 338 { 339 return __setup_page_dma(dev, p, GFP_KERNEL); 340 } 341 342 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 343 { 344 if (WARN_ON(!p->page)) 345 return; 346 347 dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 348 __free_page(p->page); 349 memset(p, 0, sizeof(*p)); 350 } 351 352 static void *kmap_page_dma(struct i915_page_dma *p) 353 { 354 return kmap_atomic(p->page); 355 } 356 357 /* We use the flushing unmap only with ppgtt structures: 358 * page directories, page tables and scratch pages. 359 */ 360 static void kunmap_page_dma(struct drm_device *dev, void *vaddr) 361 { 362 /* There are only few exceptions for gen >=6. chv and bxt. 363 * And we are not sure about the latter so play safe for now. 364 */ 365 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 366 drm_clflush_virt_range(vaddr, PAGE_SIZE); 367 368 kunmap_atomic(vaddr); 369 } 370 371 #define kmap_px(px) kmap_page_dma(px_base(px)) 372 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr)) 373 374 #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) 375 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) 376 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v)) 377 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v)) 378 379 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p, 380 const uint64_t val) 381 { 382 int i; 383 uint64_t * const vaddr = kmap_page_dma(p); 384 385 for (i = 0; i < 512; i++) 386 vaddr[i] = val; 387 388 kunmap_page_dma(dev, vaddr); 389 } 390 391 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, 392 const uint32_t val32) 393 { 394 uint64_t v = val32; 395 396 v = v << 32 | val32; 397 398 fill_page_dma(dev, p, v); 399 } 400 401 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev) 402 { 403 struct i915_page_scratch *sp; 404 int ret; 405 406 sp = kzalloc(sizeof(*sp), GFP_KERNEL); 407 if (sp == NULL) 408 return ERR_PTR(-ENOMEM); 409 410 ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO); 411 if (ret) { 412 kfree(sp); 413 return ERR_PTR(ret); 414 } 415 416 set_pages_uc(px_page(sp), 1); 417 418 return sp; 419 } 420 421 static void free_scratch_page(struct drm_device *dev, 422 struct i915_page_scratch *sp) 423 { 424 set_pages_wb(px_page(sp), 1); 425 426 cleanup_px(dev, sp); 427 kfree(sp); 428 } 429 430 static struct i915_page_table *alloc_pt(struct drm_device *dev) 431 { 432 struct i915_page_table *pt; 433 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 434 GEN8_PTES : GEN6_PTES; 435 int ret = -ENOMEM; 436 437 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 438 if (!pt) 439 return ERR_PTR(-ENOMEM); 440 441 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 442 GFP_KERNEL); 443 444 if (!pt->used_ptes) 445 goto fail_bitmap; 446 447 ret = setup_px(dev, pt); 448 if (ret) 449 goto fail_page_m; 450 451 return pt; 452 453 fail_page_m: 454 kfree(pt->used_ptes); 455 fail_bitmap: 456 kfree(pt); 457 458 return ERR_PTR(ret); 459 } 460 461 static void free_pt(struct drm_device *dev, struct i915_page_table *pt) 462 { 463 cleanup_px(dev, pt); 464 kfree(pt->used_ptes); 465 kfree(pt); 466 } 467 468 static void gen8_initialize_pt(struct i915_address_space *vm, 469 struct i915_page_table *pt) 470 { 471 gen8_pte_t scratch_pte; 472 473 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 474 I915_CACHE_LLC, true); 475 476 fill_px(vm->dev, pt, scratch_pte); 477 } 478 479 static void gen6_initialize_pt(struct i915_address_space *vm, 480 struct i915_page_table *pt) 481 { 482 gen6_pte_t scratch_pte; 483 484 WARN_ON(px_dma(vm->scratch_page) == 0); 485 486 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 487 I915_CACHE_LLC, true, 0); 488 489 fill32_px(vm->dev, pt, scratch_pte); 490 } 491 492 static struct i915_page_directory *alloc_pd(struct drm_device *dev) 493 { 494 struct i915_page_directory *pd; 495 int ret = -ENOMEM; 496 497 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 498 if (!pd) 499 return ERR_PTR(-ENOMEM); 500 501 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 502 sizeof(*pd->used_pdes), GFP_KERNEL); 503 if (!pd->used_pdes) 504 goto fail_bitmap; 505 506 ret = setup_px(dev, pd); 507 if (ret) 508 goto fail_page_m; 509 510 return pd; 511 512 fail_page_m: 513 kfree(pd->used_pdes); 514 fail_bitmap: 515 kfree(pd); 516 517 return ERR_PTR(ret); 518 } 519 520 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) 521 { 522 if (px_page(pd)) { 523 cleanup_px(dev, pd); 524 kfree(pd->used_pdes); 525 kfree(pd); 526 } 527 } 528 529 static void gen8_initialize_pd(struct i915_address_space *vm, 530 struct i915_page_directory *pd) 531 { 532 gen8_pde_t scratch_pde; 533 534 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 535 536 fill_px(vm->dev, pd, scratch_pde); 537 } 538 539 static int __pdp_init(struct drm_device *dev, 540 struct i915_page_directory_pointer *pdp) 541 { 542 size_t pdpes = I915_PDPES_PER_PDP(dev); 543 544 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 545 sizeof(unsigned long), 546 GFP_KERNEL); 547 if (!pdp->used_pdpes) 548 return -ENOMEM; 549 550 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 551 GFP_KERNEL); 552 if (!pdp->page_directory) { 553 kfree(pdp->used_pdpes); 554 /* the PDP might be the statically allocated top level. Keep it 555 * as clean as possible */ 556 pdp->used_pdpes = NULL; 557 return -ENOMEM; 558 } 559 560 return 0; 561 } 562 563 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 564 { 565 kfree(pdp->used_pdpes); 566 kfree(pdp->page_directory); 567 pdp->page_directory = NULL; 568 } 569 570 static struct 571 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) 572 { 573 struct i915_page_directory_pointer *pdp; 574 int ret = -ENOMEM; 575 576 WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); 577 578 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 579 if (!pdp) 580 return ERR_PTR(-ENOMEM); 581 582 ret = __pdp_init(dev, pdp); 583 if (ret) 584 goto fail_bitmap; 585 586 ret = setup_px(dev, pdp); 587 if (ret) 588 goto fail_page_m; 589 590 return pdp; 591 592 fail_page_m: 593 __pdp_fini(pdp); 594 fail_bitmap: 595 kfree(pdp); 596 597 return ERR_PTR(ret); 598 } 599 600 static void free_pdp(struct drm_device *dev, 601 struct i915_page_directory_pointer *pdp) 602 { 603 __pdp_fini(pdp); 604 if (USES_FULL_48BIT_PPGTT(dev)) { 605 cleanup_px(dev, pdp); 606 kfree(pdp); 607 } 608 } 609 610 static void gen8_initialize_pdp(struct i915_address_space *vm, 611 struct i915_page_directory_pointer *pdp) 612 { 613 gen8_ppgtt_pdpe_t scratch_pdpe; 614 615 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 616 617 fill_px(vm->dev, pdp, scratch_pdpe); 618 } 619 620 static void gen8_initialize_pml4(struct i915_address_space *vm, 621 struct i915_pml4 *pml4) 622 { 623 gen8_ppgtt_pml4e_t scratch_pml4e; 624 625 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 626 I915_CACHE_LLC); 627 628 fill_px(vm->dev, pml4, scratch_pml4e); 629 } 630 631 static void 632 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 633 struct i915_page_directory_pointer *pdp, 634 struct i915_page_directory *pd, 635 int index) 636 { 637 gen8_ppgtt_pdpe_t *page_directorypo; 638 639 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 640 return; 641 642 page_directorypo = kmap_px(pdp); 643 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 644 kunmap_px(ppgtt, page_directorypo); 645 } 646 647 static void 648 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 649 struct i915_pml4 *pml4, 650 struct i915_page_directory_pointer *pdp, 651 int index) 652 { 653 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 654 655 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); 656 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 657 kunmap_px(ppgtt, pagemap); 658 } 659 660 /* Broadwell Page Directory Pointer Descriptors */ 661 static int gen8_write_pdp(struct drm_i915_gem_request *req, 662 unsigned entry, 663 dma_addr_t addr) 664 { 665 struct intel_engine_cs *engine = req->engine; 666 int ret; 667 668 BUG_ON(entry >= 4); 669 670 ret = intel_ring_begin(req, 6); 671 if (ret) 672 return ret; 673 674 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 675 intel_ring_emit_reg(engine, GEN8_RING_PDP_UDW(engine, entry)); 676 intel_ring_emit(engine, upper_32_bits(addr)); 677 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 678 intel_ring_emit_reg(engine, GEN8_RING_PDP_LDW(engine, entry)); 679 intel_ring_emit(engine, lower_32_bits(addr)); 680 intel_ring_advance(engine); 681 682 return 0; 683 } 684 685 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 686 struct drm_i915_gem_request *req) 687 { 688 int i, ret; 689 690 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 691 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 692 693 ret = gen8_write_pdp(req, i, pd_daddr); 694 if (ret) 695 return ret; 696 } 697 698 return 0; 699 } 700 701 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 702 struct drm_i915_gem_request *req) 703 { 704 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 705 } 706 707 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, 708 struct i915_page_directory_pointer *pdp, 709 uint64_t start, 710 uint64_t length, 711 gen8_pte_t scratch_pte) 712 { 713 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 714 gen8_pte_t *pt_vaddr; 715 unsigned pdpe = gen8_pdpe_index(start); 716 unsigned pde = gen8_pde_index(start); 717 unsigned pte = gen8_pte_index(start); 718 unsigned num_entries = length >> PAGE_SHIFT; 719 unsigned last_pte, i; 720 721 if (WARN_ON(!pdp)) 722 return; 723 724 while (num_entries) { 725 struct i915_page_directory *pd; 726 struct i915_page_table *pt; 727 728 if (WARN_ON(!pdp->page_directory[pdpe])) 729 break; 730 731 pd = pdp->page_directory[pdpe]; 732 733 if (WARN_ON(!pd->page_table[pde])) 734 break; 735 736 pt = pd->page_table[pde]; 737 738 if (WARN_ON(!px_page(pt))) 739 break; 740 741 last_pte = pte + num_entries; 742 if (last_pte > GEN8_PTES) 743 last_pte = GEN8_PTES; 744 745 pt_vaddr = kmap_px(pt); 746 747 for (i = pte; i < last_pte; i++) { 748 pt_vaddr[i] = scratch_pte; 749 num_entries--; 750 } 751 752 kunmap_px(ppgtt, pt_vaddr); 753 754 pte = 0; 755 if (++pde == I915_PDES) { 756 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 757 break; 758 pde = 0; 759 } 760 } 761 } 762 763 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 764 uint64_t start, 765 uint64_t length, 766 bool use_scratch) 767 { 768 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 769 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 770 I915_CACHE_LLC, use_scratch); 771 772 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 773 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, 774 scratch_pte); 775 } else { 776 uint64_t pml4e; 777 struct i915_page_directory_pointer *pdp; 778 779 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 780 gen8_ppgtt_clear_pte_range(vm, pdp, start, length, 781 scratch_pte); 782 } 783 } 784 } 785 786 static void 787 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 788 struct i915_page_directory_pointer *pdp, 789 struct sg_page_iter *sg_iter, 790 uint64_t start, 791 enum i915_cache_level cache_level) 792 { 793 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 794 gen8_pte_t *pt_vaddr; 795 unsigned pdpe = gen8_pdpe_index(start); 796 unsigned pde = gen8_pde_index(start); 797 unsigned pte = gen8_pte_index(start); 798 799 pt_vaddr = NULL; 800 801 while (__sg_page_iter_next(sg_iter)) { 802 if (pt_vaddr == NULL) { 803 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 804 struct i915_page_table *pt = pd->page_table[pde]; 805 pt_vaddr = kmap_px(pt); 806 } 807 808 pt_vaddr[pte] = 809 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 810 cache_level, true); 811 if (++pte == GEN8_PTES) { 812 kunmap_px(ppgtt, pt_vaddr); 813 pt_vaddr = NULL; 814 if (++pde == I915_PDES) { 815 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 816 break; 817 pde = 0; 818 } 819 pte = 0; 820 } 821 } 822 823 if (pt_vaddr) 824 kunmap_px(ppgtt, pt_vaddr); 825 } 826 827 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 828 struct sg_table *pages, 829 uint64_t start, 830 enum i915_cache_level cache_level, 831 u32 unused) 832 { 833 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 834 struct sg_page_iter sg_iter; 835 836 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 837 838 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 839 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 840 cache_level); 841 } else { 842 struct i915_page_directory_pointer *pdp; 843 uint64_t pml4e; 844 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 845 846 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 847 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 848 start, cache_level); 849 } 850 } 851 } 852 853 static void gen8_free_page_tables(struct drm_device *dev, 854 struct i915_page_directory *pd) 855 { 856 int i; 857 858 if (!px_page(pd)) 859 return; 860 861 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 862 if (WARN_ON(!pd->page_table[i])) 863 continue; 864 865 free_pt(dev, pd->page_table[i]); 866 pd->page_table[i] = NULL; 867 } 868 } 869 870 static int gen8_init_scratch(struct i915_address_space *vm) 871 { 872 struct drm_device *dev = vm->dev; 873 874 vm->scratch_page = alloc_scratch_page(dev); 875 if (IS_ERR(vm->scratch_page)) 876 return PTR_ERR(vm->scratch_page); 877 878 vm->scratch_pt = alloc_pt(dev); 879 if (IS_ERR(vm->scratch_pt)) { 880 free_scratch_page(dev, vm->scratch_page); 881 return PTR_ERR(vm->scratch_pt); 882 } 883 884 vm->scratch_pd = alloc_pd(dev); 885 if (IS_ERR(vm->scratch_pd)) { 886 free_pt(dev, vm->scratch_pt); 887 free_scratch_page(dev, vm->scratch_page); 888 return PTR_ERR(vm->scratch_pd); 889 } 890 891 if (USES_FULL_48BIT_PPGTT(dev)) { 892 vm->scratch_pdp = alloc_pdp(dev); 893 if (IS_ERR(vm->scratch_pdp)) { 894 free_pd(dev, vm->scratch_pd); 895 free_pt(dev, vm->scratch_pt); 896 free_scratch_page(dev, vm->scratch_page); 897 return PTR_ERR(vm->scratch_pdp); 898 } 899 } 900 901 gen8_initialize_pt(vm, vm->scratch_pt); 902 gen8_initialize_pd(vm, vm->scratch_pd); 903 if (USES_FULL_48BIT_PPGTT(dev)) 904 gen8_initialize_pdp(vm, vm->scratch_pdp); 905 906 return 0; 907 } 908 909 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 910 { 911 enum vgt_g2v_type msg; 912 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 913 int i; 914 915 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 916 u64 daddr = px_dma(&ppgtt->pml4); 917 918 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 919 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 920 921 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 922 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 923 } else { 924 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 925 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 926 927 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 928 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 929 } 930 931 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 932 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 933 } 934 935 I915_WRITE(vgtif_reg(g2v_notify), msg); 936 937 return 0; 938 } 939 940 static void gen8_free_scratch(struct i915_address_space *vm) 941 { 942 struct drm_device *dev = vm->dev; 943 944 if (USES_FULL_48BIT_PPGTT(dev)) 945 free_pdp(dev, vm->scratch_pdp); 946 free_pd(dev, vm->scratch_pd); 947 free_pt(dev, vm->scratch_pt); 948 free_scratch_page(dev, vm->scratch_page); 949 } 950 951 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, 952 struct i915_page_directory_pointer *pdp) 953 { 954 int i; 955 956 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { 957 if (WARN_ON(!pdp->page_directory[i])) 958 continue; 959 960 gen8_free_page_tables(dev, pdp->page_directory[i]); 961 free_pd(dev, pdp->page_directory[i]); 962 } 963 964 free_pdp(dev, pdp); 965 } 966 967 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 968 { 969 int i; 970 971 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 972 if (WARN_ON(!ppgtt->pml4.pdps[i])) 973 continue; 974 975 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); 976 } 977 978 cleanup_px(ppgtt->base.dev, &ppgtt->pml4); 979 } 980 981 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 982 { 983 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 984 985 if (intel_vgpu_active(vm->dev)) 986 gen8_ppgtt_notify_vgt(ppgtt, false); 987 988 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 989 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); 990 else 991 gen8_ppgtt_cleanup_4lvl(ppgtt); 992 993 gen8_free_scratch(vm); 994 } 995 996 /** 997 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 998 * @vm: Master vm structure. 999 * @pd: Page directory for this address range. 1000 * @start: Starting virtual address to begin allocations. 1001 * @length: Size of the allocations. 1002 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1003 * caller to free on error. 1004 * 1005 * Allocate the required number of page tables. Extremely similar to 1006 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1007 * the page directory boundary (instead of the page directory pointer). That 1008 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1009 * possible, and likely that the caller will need to use multiple calls of this 1010 * function to achieve the appropriate allocation. 1011 * 1012 * Return: 0 if success; negative error code otherwise. 1013 */ 1014 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1015 struct i915_page_directory *pd, 1016 uint64_t start, 1017 uint64_t length, 1018 unsigned long *new_pts) 1019 { 1020 struct drm_device *dev = vm->dev; 1021 struct i915_page_table *pt; 1022 uint32_t pde; 1023 1024 gen8_for_each_pde(pt, pd, start, length, pde) { 1025 /* Don't reallocate page tables */ 1026 if (test_bit(pde, pd->used_pdes)) { 1027 /* Scratch is never allocated this way */ 1028 WARN_ON(pt == vm->scratch_pt); 1029 continue; 1030 } 1031 1032 pt = alloc_pt(dev); 1033 if (IS_ERR(pt)) 1034 goto unwind_out; 1035 1036 gen8_initialize_pt(vm, pt); 1037 pd->page_table[pde] = pt; 1038 __set_bit(pde, new_pts); 1039 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1040 } 1041 1042 return 0; 1043 1044 unwind_out: 1045 for_each_set_bit(pde, new_pts, I915_PDES) 1046 free_pt(dev, pd->page_table[pde]); 1047 1048 return -ENOMEM; 1049 } 1050 1051 /** 1052 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1053 * @vm: Master vm structure. 1054 * @pdp: Page directory pointer for this address range. 1055 * @start: Starting virtual address to begin allocations. 1056 * @length: Size of the allocations. 1057 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1058 * caller to free on error. 1059 * 1060 * Allocate the required number of page directories starting at the pde index of 1061 * @start, and ending at the pde index @start + @length. This function will skip 1062 * over already allocated page directories within the range, and only allocate 1063 * new ones, setting the appropriate pointer within the pdp as well as the 1064 * correct position in the bitmap @new_pds. 1065 * 1066 * The function will only allocate the pages within the range for a give page 1067 * directory pointer. In other words, if @start + @length straddles a virtually 1068 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1069 * required by the caller, This is not currently possible, and the BUG in the 1070 * code will prevent it. 1071 * 1072 * Return: 0 if success; negative error code otherwise. 1073 */ 1074 static int 1075 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1076 struct i915_page_directory_pointer *pdp, 1077 uint64_t start, 1078 uint64_t length, 1079 unsigned long *new_pds) 1080 { 1081 struct drm_device *dev = vm->dev; 1082 struct i915_page_directory *pd; 1083 uint32_t pdpe; 1084 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1085 1086 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1087 1088 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1089 if (test_bit(pdpe, pdp->used_pdpes)) 1090 continue; 1091 1092 pd = alloc_pd(dev); 1093 if (IS_ERR(pd)) 1094 goto unwind_out; 1095 1096 gen8_initialize_pd(vm, pd); 1097 pdp->page_directory[pdpe] = pd; 1098 __set_bit(pdpe, new_pds); 1099 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1100 } 1101 1102 return 0; 1103 1104 unwind_out: 1105 for_each_set_bit(pdpe, new_pds, pdpes) 1106 free_pd(dev, pdp->page_directory[pdpe]); 1107 1108 return -ENOMEM; 1109 } 1110 1111 /** 1112 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1113 * @vm: Master vm structure. 1114 * @pml4: Page map level 4 for this address range. 1115 * @start: Starting virtual address to begin allocations. 1116 * @length: Size of the allocations. 1117 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1118 * caller to free on error. 1119 * 1120 * Allocate the required number of page directory pointers. Extremely similar to 1121 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1122 * The main difference is here we are limited by the pml4 boundary (instead of 1123 * the page directory pointer). 1124 * 1125 * Return: 0 if success; negative error code otherwise. 1126 */ 1127 static int 1128 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1129 struct i915_pml4 *pml4, 1130 uint64_t start, 1131 uint64_t length, 1132 unsigned long *new_pdps) 1133 { 1134 struct drm_device *dev = vm->dev; 1135 struct i915_page_directory_pointer *pdp; 1136 uint32_t pml4e; 1137 1138 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1139 1140 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1141 if (!test_bit(pml4e, pml4->used_pml4es)) { 1142 pdp = alloc_pdp(dev); 1143 if (IS_ERR(pdp)) 1144 goto unwind_out; 1145 1146 gen8_initialize_pdp(vm, pdp); 1147 pml4->pdps[pml4e] = pdp; 1148 __set_bit(pml4e, new_pdps); 1149 trace_i915_page_directory_pointer_entry_alloc(vm, 1150 pml4e, 1151 start, 1152 GEN8_PML4E_SHIFT); 1153 } 1154 } 1155 1156 return 0; 1157 1158 unwind_out: 1159 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1160 free_pdp(dev, pml4->pdps[pml4e]); 1161 1162 return -ENOMEM; 1163 } 1164 1165 static void 1166 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1167 { 1168 kfree(new_pts); 1169 kfree(new_pds); 1170 } 1171 1172 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1173 * of these are based on the number of PDPEs in the system. 1174 */ 1175 static 1176 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1177 unsigned long **new_pts, 1178 uint32_t pdpes) 1179 { 1180 unsigned long *pds; 1181 unsigned long *pts; 1182 1183 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1184 if (!pds) 1185 return -ENOMEM; 1186 1187 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1188 GFP_TEMPORARY); 1189 if (!pts) 1190 goto err_out; 1191 1192 *new_pds = pds; 1193 *new_pts = pts; 1194 1195 return 0; 1196 1197 err_out: 1198 free_gen8_temp_bitmaps(pds, pts); 1199 return -ENOMEM; 1200 } 1201 1202 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 1203 * the page table structures, we mark them dirty so that 1204 * context switching/execlist queuing code takes extra steps 1205 * to ensure that tlbs are flushed. 1206 */ 1207 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 1208 { 1209 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 1210 } 1211 1212 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1213 struct i915_page_directory_pointer *pdp, 1214 uint64_t start, 1215 uint64_t length) 1216 { 1217 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1218 unsigned long *new_page_dirs, *new_page_tables; 1219 struct drm_device *dev = vm->dev; 1220 struct i915_page_directory *pd; 1221 const uint64_t orig_start = start; 1222 const uint64_t orig_length = length; 1223 uint32_t pdpe; 1224 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1225 int ret; 1226 1227 /* Wrap is never okay since we can only represent 48b, and we don't 1228 * actually use the other side of the canonical address space. 1229 */ 1230 if (WARN_ON(start + length < start)) 1231 return -ENODEV; 1232 1233 if (WARN_ON(start + length > vm->total)) 1234 return -ENODEV; 1235 1236 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1237 if (ret) 1238 return ret; 1239 1240 /* Do the allocations first so we can easily bail out */ 1241 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1242 new_page_dirs); 1243 if (ret) { 1244 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1245 return ret; 1246 } 1247 1248 /* For every page directory referenced, allocate page tables */ 1249 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1250 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1251 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1252 if (ret) 1253 goto err_out; 1254 } 1255 1256 start = orig_start; 1257 length = orig_length; 1258 1259 /* Allocations have completed successfully, so set the bitmaps, and do 1260 * the mappings. */ 1261 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1262 gen8_pde_t *const page_directory = kmap_px(pd); 1263 struct i915_page_table *pt; 1264 uint64_t pd_len = length; 1265 uint64_t pd_start = start; 1266 uint32_t pde; 1267 1268 /* Every pd should be allocated, we just did that above. */ 1269 WARN_ON(!pd); 1270 1271 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1272 /* Same reasoning as pd */ 1273 WARN_ON(!pt); 1274 WARN_ON(!pd_len); 1275 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1276 1277 /* Set our used ptes within the page table */ 1278 bitmap_set(pt->used_ptes, 1279 gen8_pte_index(pd_start), 1280 gen8_pte_count(pd_start, pd_len)); 1281 1282 /* Our pde is now pointing to the pagetable, pt */ 1283 __set_bit(pde, pd->used_pdes); 1284 1285 /* Map the PDE to the page table */ 1286 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1287 I915_CACHE_LLC); 1288 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1289 gen8_pte_index(start), 1290 gen8_pte_count(start, length), 1291 GEN8_PTES); 1292 1293 /* NB: We haven't yet mapped ptes to pages. At this 1294 * point we're still relying on insert_entries() */ 1295 } 1296 1297 kunmap_px(ppgtt, page_directory); 1298 __set_bit(pdpe, pdp->used_pdpes); 1299 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1300 } 1301 1302 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1303 mark_tlbs_dirty(ppgtt); 1304 return 0; 1305 1306 err_out: 1307 while (pdpe--) { 1308 unsigned long temp; 1309 1310 for_each_set_bit(temp, new_page_tables + pdpe * 1311 BITS_TO_LONGS(I915_PDES), I915_PDES) 1312 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); 1313 } 1314 1315 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1316 free_pd(dev, pdp->page_directory[pdpe]); 1317 1318 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1319 mark_tlbs_dirty(ppgtt); 1320 return ret; 1321 } 1322 1323 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1324 struct i915_pml4 *pml4, 1325 uint64_t start, 1326 uint64_t length) 1327 { 1328 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1329 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1330 struct i915_page_directory_pointer *pdp; 1331 uint64_t pml4e; 1332 int ret = 0; 1333 1334 /* Do the pml4 allocations first, so we don't need to track the newly 1335 * allocated tables below the pdp */ 1336 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1337 1338 /* The pagedirectory and pagetable allocations are done in the shared 3 1339 * and 4 level code. Just allocate the pdps. 1340 */ 1341 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1342 new_pdps); 1343 if (ret) 1344 return ret; 1345 1346 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1347 "The allocation has spanned more than 512GB. " 1348 "It is highly likely this is incorrect."); 1349 1350 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1351 WARN_ON(!pdp); 1352 1353 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1354 if (ret) 1355 goto err_out; 1356 1357 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1358 } 1359 1360 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1361 GEN8_PML4ES_PER_PML4); 1362 1363 return 0; 1364 1365 err_out: 1366 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1367 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); 1368 1369 return ret; 1370 } 1371 1372 static int gen8_alloc_va_range(struct i915_address_space *vm, 1373 uint64_t start, uint64_t length) 1374 { 1375 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1376 1377 if (USES_FULL_48BIT_PPGTT(vm->dev)) 1378 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1379 else 1380 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1381 } 1382 1383 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1384 uint64_t start, uint64_t length, 1385 gen8_pte_t scratch_pte, 1386 struct seq_file *m) 1387 { 1388 struct i915_page_directory *pd; 1389 uint32_t pdpe; 1390 1391 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1392 struct i915_page_table *pt; 1393 uint64_t pd_len = length; 1394 uint64_t pd_start = start; 1395 uint32_t pde; 1396 1397 if (!test_bit(pdpe, pdp->used_pdpes)) 1398 continue; 1399 1400 seq_printf(m, "\tPDPE #%d\n", pdpe); 1401 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1402 uint32_t pte; 1403 gen8_pte_t *pt_vaddr; 1404 1405 if (!test_bit(pde, pd->used_pdes)) 1406 continue; 1407 1408 pt_vaddr = kmap_px(pt); 1409 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1410 uint64_t va = 1411 (pdpe << GEN8_PDPE_SHIFT) | 1412 (pde << GEN8_PDE_SHIFT) | 1413 (pte << GEN8_PTE_SHIFT); 1414 int i; 1415 bool found = false; 1416 1417 for (i = 0; i < 4; i++) 1418 if (pt_vaddr[pte + i] != scratch_pte) 1419 found = true; 1420 if (!found) 1421 continue; 1422 1423 seq_printf(m, "\t\t0x%lx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1424 for (i = 0; i < 4; i++) { 1425 if (pt_vaddr[pte + i] != scratch_pte) 1426 seq_printf(m, " %lx", pt_vaddr[pte + i]); 1427 else 1428 seq_puts(m, " SCRATCH "); 1429 } 1430 seq_puts(m, "\n"); 1431 } 1432 /* don't use kunmap_px, it could trigger 1433 * an unnecessary flush. 1434 */ 1435 kunmap_atomic(pt_vaddr); 1436 } 1437 } 1438 } 1439 1440 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1441 { 1442 struct i915_address_space *vm = &ppgtt->base; 1443 uint64_t start = ppgtt->base.start; 1444 uint64_t length = ppgtt->base.total; 1445 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 1446 I915_CACHE_LLC, true); 1447 1448 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 1449 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1450 } else { 1451 uint64_t pml4e; 1452 struct i915_pml4 *pml4 = &ppgtt->pml4; 1453 struct i915_page_directory_pointer *pdp; 1454 1455 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1456 if (!test_bit(pml4e, pml4->used_pml4es)) 1457 continue; 1458 1459 seq_printf(m, " PML4E #%lu\n", pml4e); 1460 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1461 } 1462 } 1463 } 1464 1465 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1466 { 1467 unsigned long *new_page_dirs, *new_page_tables; 1468 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1469 int ret; 1470 1471 /* We allocate temp bitmap for page tables for no gain 1472 * but as this is for init only, lets keep the things simple 1473 */ 1474 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1475 if (ret) 1476 return ret; 1477 1478 /* Allocate for all pdps regardless of how the ppgtt 1479 * was defined. 1480 */ 1481 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1482 0, 1ULL << 32, 1483 new_page_dirs); 1484 if (!ret) 1485 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1486 1487 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1488 1489 return ret; 1490 } 1491 1492 /* 1493 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1494 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1495 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1496 * space. 1497 * 1498 */ 1499 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1500 { 1501 int ret; 1502 1503 ret = gen8_init_scratch(&ppgtt->base); 1504 if (ret) 1505 return ret; 1506 1507 ppgtt->base.start = 0; 1508 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1509 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1510 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1511 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1512 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1513 ppgtt->base.bind_vma = ppgtt_bind_vma; 1514 ppgtt->debug_dump = gen8_dump_ppgtt; 1515 1516 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { 1517 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); 1518 if (ret) 1519 goto free_scratch; 1520 1521 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1522 1523 ppgtt->base.total = 1ULL << 48; 1524 ppgtt->switch_mm = gen8_48b_mm_switch; 1525 } else { 1526 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); 1527 if (ret) 1528 goto free_scratch; 1529 1530 ppgtt->base.total = 1ULL << 32; 1531 ppgtt->switch_mm = gen8_legacy_mm_switch; 1532 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1533 0, 0, 1534 GEN8_PML4E_SHIFT); 1535 1536 if (intel_vgpu_active(ppgtt->base.dev)) { 1537 ret = gen8_preallocate_top_level_pdps(ppgtt); 1538 if (ret) 1539 goto free_scratch; 1540 } 1541 } 1542 1543 if (intel_vgpu_active(ppgtt->base.dev)) 1544 gen8_ppgtt_notify_vgt(ppgtt, true); 1545 1546 return 0; 1547 1548 free_scratch: 1549 gen8_free_scratch(&ppgtt->base); 1550 return ret; 1551 } 1552 1553 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1554 { 1555 struct i915_address_space *vm = &ppgtt->base; 1556 struct i915_page_table *unused; 1557 gen6_pte_t scratch_pte; 1558 uint32_t pd_entry; 1559 uint32_t pte, pde, temp; 1560 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1561 1562 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1563 I915_CACHE_LLC, true, 0); 1564 1565 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) { 1566 u32 expected; 1567 gen6_pte_t *pt_vaddr; 1568 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1569 pd_entry = readl(ppgtt->pd_addr + pde); 1570 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1571 1572 if (pd_entry != expected) 1573 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1574 pde, 1575 pd_entry, 1576 expected); 1577 seq_printf(m, "\tPDE: %x\n", pd_entry); 1578 1579 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1580 1581 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1582 unsigned long va = 1583 (pde * PAGE_SIZE * GEN6_PTES) + 1584 (pte * PAGE_SIZE); 1585 int i; 1586 bool found = false; 1587 for (i = 0; i < 4; i++) 1588 if (pt_vaddr[pte + i] != scratch_pte) 1589 found = true; 1590 if (!found) 1591 continue; 1592 1593 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1594 for (i = 0; i < 4; i++) { 1595 if (pt_vaddr[pte + i] != scratch_pte) 1596 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1597 else 1598 seq_puts(m, " SCRATCH "); 1599 } 1600 seq_puts(m, "\n"); 1601 } 1602 kunmap_px(ppgtt, pt_vaddr); 1603 } 1604 } 1605 1606 /* Write pde (index) from the page directory @pd to the page table @pt */ 1607 static void gen6_write_pde(struct i915_page_directory *pd, 1608 const int pde, struct i915_page_table *pt) 1609 { 1610 /* Caller needs to make sure the write completes if necessary */ 1611 struct i915_hw_ppgtt *ppgtt = 1612 container_of(pd, struct i915_hw_ppgtt, pd); 1613 u32 pd_entry; 1614 1615 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1616 pd_entry |= GEN6_PDE_VALID; 1617 1618 writel(pd_entry, ppgtt->pd_addr + pde); 1619 } 1620 1621 /* Write all the page tables found in the ppgtt structure to incrementing page 1622 * directories. */ 1623 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1624 struct i915_page_directory *pd, 1625 uint32_t start, uint32_t length) 1626 { 1627 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1628 struct i915_page_table *pt; 1629 uint32_t pde, temp; 1630 1631 gen6_for_each_pde(pt, pd, start, length, temp, pde) 1632 gen6_write_pde(pd, pde, pt); 1633 1634 /* Make sure write is complete before other code can use this page 1635 * table. Also require for WC mapped PTEs */ 1636 readl(ggtt->gsm); 1637 } 1638 1639 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1640 { 1641 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1642 1643 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1644 } 1645 1646 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1647 struct drm_i915_gem_request *req) 1648 { 1649 struct intel_engine_cs *engine = req->engine; 1650 int ret; 1651 1652 /* NB: TLBs must be flushed and invalidated before a switch */ 1653 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1654 if (ret) 1655 return ret; 1656 1657 ret = intel_ring_begin(req, 6); 1658 if (ret) 1659 return ret; 1660 1661 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); 1662 intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); 1663 intel_ring_emit(engine, PP_DIR_DCLV_2G); 1664 intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); 1665 intel_ring_emit(engine, get_pd_offset(ppgtt)); 1666 intel_ring_emit(engine, MI_NOOP); 1667 intel_ring_advance(engine); 1668 1669 return 0; 1670 } 1671 1672 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt, 1673 struct drm_i915_gem_request *req) 1674 { 1675 struct intel_engine_cs *engine = req->engine; 1676 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1677 1678 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1679 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1680 return 0; 1681 } 1682 1683 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1684 struct drm_i915_gem_request *req) 1685 { 1686 struct intel_engine_cs *engine = req->engine; 1687 int ret; 1688 1689 /* NB: TLBs must be flushed and invalidated before a switch */ 1690 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1691 if (ret) 1692 return ret; 1693 1694 ret = intel_ring_begin(req, 6); 1695 if (ret) 1696 return ret; 1697 1698 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2)); 1699 intel_ring_emit_reg(engine, RING_PP_DIR_DCLV(engine)); 1700 intel_ring_emit(engine, PP_DIR_DCLV_2G); 1701 intel_ring_emit_reg(engine, RING_PP_DIR_BASE(engine)); 1702 intel_ring_emit(engine, get_pd_offset(ppgtt)); 1703 intel_ring_emit(engine, MI_NOOP); 1704 intel_ring_advance(engine); 1705 1706 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1707 if (engine->id != RCS) { 1708 ret = engine->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1709 if (ret) 1710 return ret; 1711 } 1712 1713 return 0; 1714 } 1715 1716 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1717 struct drm_i915_gem_request *req) 1718 { 1719 struct intel_engine_cs *engine = req->engine; 1720 struct drm_device *dev = ppgtt->base.dev; 1721 struct drm_i915_private *dev_priv = dev->dev_private; 1722 1723 1724 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1725 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1726 1727 POSTING_READ(RING_PP_DIR_DCLV(engine)); 1728 1729 return 0; 1730 } 1731 1732 static void gen8_ppgtt_enable(struct drm_device *dev) 1733 { 1734 struct drm_i915_private *dev_priv = dev->dev_private; 1735 struct intel_engine_cs *engine; 1736 1737 for_each_engine(engine, dev_priv) { 1738 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; 1739 I915_WRITE(RING_MODE_GEN7(engine), 1740 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1741 } 1742 } 1743 1744 static void gen7_ppgtt_enable(struct drm_device *dev) 1745 { 1746 struct drm_i915_private *dev_priv = dev->dev_private; 1747 struct intel_engine_cs *engine; 1748 uint32_t ecochk, ecobits; 1749 1750 ecobits = I915_READ(GAC_ECO_BITS); 1751 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1752 1753 ecochk = I915_READ(GAM_ECOCHK); 1754 if (IS_HASWELL(dev)) { 1755 ecochk |= ECOCHK_PPGTT_WB_HSW; 1756 } else { 1757 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1758 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1759 } 1760 I915_WRITE(GAM_ECOCHK, ecochk); 1761 1762 for_each_engine(engine, dev_priv) { 1763 /* GFX_MODE is per-ring on gen7+ */ 1764 I915_WRITE(RING_MODE_GEN7(engine), 1765 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1766 } 1767 } 1768 1769 static void gen6_ppgtt_enable(struct drm_device *dev) 1770 { 1771 struct drm_i915_private *dev_priv = dev->dev_private; 1772 uint32_t ecochk, gab_ctl, ecobits; 1773 1774 ecobits = I915_READ(GAC_ECO_BITS); 1775 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1776 ECOBITS_PPGTT_CACHE64B); 1777 1778 gab_ctl = I915_READ(GAB_CTL); 1779 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1780 1781 ecochk = I915_READ(GAM_ECOCHK); 1782 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1783 1784 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1785 } 1786 1787 /* PPGTT support for Sandybdrige/Gen6 and later */ 1788 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1789 uint64_t start, 1790 uint64_t length, 1791 bool use_scratch) 1792 { 1793 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1794 gen6_pte_t *pt_vaddr, scratch_pte; 1795 unsigned first_entry = start >> PAGE_SHIFT; 1796 unsigned num_entries = length >> PAGE_SHIFT; 1797 unsigned act_pt = first_entry / GEN6_PTES; 1798 unsigned first_pte = first_entry % GEN6_PTES; 1799 unsigned last_pte, i; 1800 1801 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1802 I915_CACHE_LLC, true, 0); 1803 1804 while (num_entries) { 1805 last_pte = first_pte + num_entries; 1806 if (last_pte > GEN6_PTES) 1807 last_pte = GEN6_PTES; 1808 1809 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1810 1811 for (i = first_pte; i < last_pte; i++) 1812 pt_vaddr[i] = scratch_pte; 1813 1814 kunmap_px(ppgtt, pt_vaddr); 1815 1816 num_entries -= last_pte - first_pte; 1817 first_pte = 0; 1818 act_pt++; 1819 } 1820 } 1821 1822 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1823 struct sg_table *pages, 1824 uint64_t start, 1825 enum i915_cache_level cache_level, u32 flags) 1826 { 1827 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1828 gen6_pte_t *pt_vaddr; 1829 unsigned first_entry = start >> PAGE_SHIFT; 1830 unsigned act_pt = first_entry / GEN6_PTES; 1831 unsigned act_pte = first_entry % GEN6_PTES; 1832 struct sg_page_iter sg_iter; 1833 1834 pt_vaddr = NULL; 1835 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 1836 if (pt_vaddr == NULL) 1837 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1838 1839 pt_vaddr[act_pte] = 1840 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 1841 cache_level, true, flags); 1842 1843 if (++act_pte == GEN6_PTES) { 1844 kunmap_px(ppgtt, pt_vaddr); 1845 pt_vaddr = NULL; 1846 act_pt++; 1847 act_pte = 0; 1848 } 1849 } 1850 if (pt_vaddr) 1851 kunmap_px(ppgtt, pt_vaddr); 1852 } 1853 1854 static int gen6_alloc_va_range(struct i915_address_space *vm, 1855 uint64_t start_in, uint64_t length_in) 1856 { 1857 DECLARE_BITMAP(new_page_tables, I915_PDES); 1858 struct drm_device *dev = vm->dev; 1859 struct drm_i915_private *dev_priv = to_i915(dev); 1860 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1861 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1862 struct i915_page_table *pt; 1863 uint32_t start, length, start_save, length_save; 1864 uint32_t pde, temp; 1865 int ret; 1866 1867 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1868 return -ENODEV; 1869 1870 start = start_save = start_in; 1871 length = length_save = length_in; 1872 1873 bitmap_zero(new_page_tables, I915_PDES); 1874 1875 /* The allocation is done in two stages so that we can bail out with 1876 * minimal amount of pain. The first stage finds new page tables that 1877 * need allocation. The second stage marks use ptes within the page 1878 * tables. 1879 */ 1880 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { 1881 if (pt != vm->scratch_pt) { 1882 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1883 continue; 1884 } 1885 1886 /* We've already allocated a page table */ 1887 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1888 1889 pt = alloc_pt(dev); 1890 if (IS_ERR(pt)) { 1891 ret = PTR_ERR(pt); 1892 goto unwind_out; 1893 } 1894 1895 gen6_initialize_pt(vm, pt); 1896 1897 ppgtt->pd.page_table[pde] = pt; 1898 __set_bit(pde, new_page_tables); 1899 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1900 } 1901 1902 start = start_save; 1903 length = length_save; 1904 1905 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { 1906 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1907 1908 bitmap_zero(tmp_bitmap, GEN6_PTES); 1909 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1910 gen6_pte_count(start, length)); 1911 1912 if (__test_and_clear_bit(pde, new_page_tables)) 1913 gen6_write_pde(&ppgtt->pd, pde, pt); 1914 1915 trace_i915_page_table_entry_map(vm, pde, pt, 1916 gen6_pte_index(start), 1917 gen6_pte_count(start, length), 1918 GEN6_PTES); 1919 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1920 GEN6_PTES); 1921 } 1922 1923 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1924 1925 /* Make sure write is complete before other code can use this page 1926 * table. Also require for WC mapped PTEs */ 1927 readl(ggtt->gsm); 1928 1929 mark_tlbs_dirty(ppgtt); 1930 return 0; 1931 1932 unwind_out: 1933 for_each_set_bit(pde, new_page_tables, I915_PDES) { 1934 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 1935 1936 ppgtt->pd.page_table[pde] = vm->scratch_pt; 1937 free_pt(vm->dev, pt); 1938 } 1939 1940 mark_tlbs_dirty(ppgtt); 1941 return ret; 1942 } 1943 1944 static int gen6_init_scratch(struct i915_address_space *vm) 1945 { 1946 struct drm_device *dev = vm->dev; 1947 1948 vm->scratch_page = alloc_scratch_page(dev); 1949 if (IS_ERR(vm->scratch_page)) 1950 return PTR_ERR(vm->scratch_page); 1951 1952 vm->scratch_pt = alloc_pt(dev); 1953 if (IS_ERR(vm->scratch_pt)) { 1954 free_scratch_page(dev, vm->scratch_page); 1955 return PTR_ERR(vm->scratch_pt); 1956 } 1957 1958 gen6_initialize_pt(vm, vm->scratch_pt); 1959 1960 return 0; 1961 } 1962 1963 static void gen6_free_scratch(struct i915_address_space *vm) 1964 { 1965 struct drm_device *dev = vm->dev; 1966 1967 free_pt(dev, vm->scratch_pt); 1968 free_scratch_page(dev, vm->scratch_page); 1969 } 1970 1971 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1972 { 1973 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1974 struct i915_page_table *pt; 1975 uint32_t pde; 1976 1977 drm_mm_remove_node(&ppgtt->node); 1978 1979 gen6_for_all_pdes(pt, ppgtt, pde) { 1980 if (pt != vm->scratch_pt) 1981 free_pt(ppgtt->base.dev, pt); 1982 } 1983 1984 gen6_free_scratch(vm); 1985 } 1986 1987 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1988 { 1989 struct i915_address_space *vm = &ppgtt->base; 1990 struct drm_device *dev = ppgtt->base.dev; 1991 struct drm_i915_private *dev_priv = to_i915(dev); 1992 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1993 bool retried = false; 1994 int ret; 1995 1996 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1997 * allocator works in address space sizes, so it's multiplied by page 1998 * size. We allocate at the top of the GTT to avoid fragmentation. 1999 */ 2000 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2001 2002 ret = gen6_init_scratch(vm); 2003 if (ret) 2004 return ret; 2005 2006 alloc: 2007 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2008 &ppgtt->node, GEN6_PD_SIZE, 2009 GEN6_PD_ALIGN, 0, 2010 0, ggtt->base.total, 2011 DRM_MM_TOPDOWN); 2012 if (ret == -ENOSPC && !retried) { 2013 ret = i915_gem_evict_something(dev, &ggtt->base, 2014 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2015 I915_CACHE_NONE, 2016 0, ggtt->base.total, 2017 0); 2018 if (ret) 2019 goto err_out; 2020 2021 retried = true; 2022 goto alloc; 2023 } 2024 2025 if (ret) 2026 goto err_out; 2027 2028 2029 if (ppgtt->node.start < ggtt->mappable_end) 2030 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2031 2032 return 0; 2033 2034 err_out: 2035 gen6_free_scratch(vm); 2036 return ret; 2037 } 2038 2039 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2040 { 2041 return gen6_ppgtt_allocate_page_directories(ppgtt); 2042 } 2043 2044 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2045 uint64_t start, uint64_t length) 2046 { 2047 struct i915_page_table *unused; 2048 uint32_t pde, temp; 2049 2050 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) 2051 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2052 } 2053 2054 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2055 { 2056 struct drm_device *dev = ppgtt->base.dev; 2057 struct drm_i915_private *dev_priv = to_i915(dev); 2058 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2059 int ret; 2060 2061 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2062 if (IS_GEN6(dev)) { 2063 ppgtt->switch_mm = gen6_mm_switch; 2064 } else if (IS_HASWELL(dev)) { 2065 ppgtt->switch_mm = hsw_mm_switch; 2066 } else if (IS_GEN7(dev)) { 2067 ppgtt->switch_mm = gen7_mm_switch; 2068 } else 2069 BUG(); 2070 2071 if (intel_vgpu_active(dev)) 2072 ppgtt->switch_mm = vgpu_mm_switch; 2073 2074 ret = gen6_ppgtt_alloc(ppgtt); 2075 if (ret) 2076 return ret; 2077 2078 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2079 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2080 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2081 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2082 ppgtt->base.bind_vma = ppgtt_bind_vma; 2083 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2084 ppgtt->base.start = 0; 2085 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2086 ppgtt->debug_dump = gen6_dump_ppgtt; 2087 2088 ppgtt->pd.base.ggtt_offset = 2089 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2090 2091 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2092 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2093 2094 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2095 2096 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2097 2098 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2099 ppgtt->node.size >> 20, 2100 ppgtt->node.start / PAGE_SIZE); 2101 2102 DRM_DEBUG("Adding PPGTT at offset %x\n", 2103 ppgtt->pd.base.ggtt_offset << 10); 2104 2105 return 0; 2106 } 2107 2108 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2109 { 2110 ppgtt->base.dev = dev; 2111 2112 if (INTEL_INFO(dev)->gen < 8) 2113 return gen6_ppgtt_init(ppgtt); 2114 else 2115 return gen8_ppgtt_init(ppgtt); 2116 } 2117 2118 static void i915_address_space_init(struct i915_address_space *vm, 2119 struct drm_i915_private *dev_priv) 2120 { 2121 drm_mm_init(&vm->mm, vm->start, vm->total); 2122 vm->dev = dev_priv->dev; 2123 INIT_LIST_HEAD(&vm->active_list); 2124 INIT_LIST_HEAD(&vm->inactive_list); 2125 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2126 } 2127 2128 static void gtt_write_workarounds(struct drm_device *dev) 2129 { 2130 struct drm_i915_private *dev_priv = dev->dev_private; 2131 2132 /* This function is for gtt related workarounds. This function is 2133 * called on driver load and after a GPU reset, so you can place 2134 * workarounds here even if they get overwritten by GPU reset. 2135 */ 2136 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2137 if (IS_BROADWELL(dev)) 2138 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2139 else if (IS_CHERRYVIEW(dev)) 2140 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2141 else if (IS_SKYLAKE(dev)) 2142 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2143 else if (IS_BROXTON(dev)) 2144 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2145 } 2146 2147 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2148 { 2149 struct drm_i915_private *dev_priv = dev->dev_private; 2150 int ret = 0; 2151 2152 ret = __hw_ppgtt_init(dev, ppgtt); 2153 if (ret == 0) { 2154 kref_init(&ppgtt->ref); 2155 i915_address_space_init(&ppgtt->base, dev_priv); 2156 } 2157 2158 return ret; 2159 } 2160 2161 int i915_ppgtt_init_hw(struct drm_device *dev) 2162 { 2163 gtt_write_workarounds(dev); 2164 2165 /* In the case of execlists, PPGTT is enabled by the context descriptor 2166 * and the PDPs are contained within the context itself. We don't 2167 * need to do anything here. */ 2168 if (i915.enable_execlists) 2169 return 0; 2170 2171 if (!USES_PPGTT(dev)) 2172 return 0; 2173 2174 if (IS_GEN6(dev)) 2175 gen6_ppgtt_enable(dev); 2176 else if (IS_GEN7(dev)) 2177 gen7_ppgtt_enable(dev); 2178 else if (INTEL_INFO(dev)->gen >= 8) 2179 gen8_ppgtt_enable(dev); 2180 else 2181 MISSING_CASE(INTEL_INFO(dev)->gen); 2182 2183 return 0; 2184 } 2185 2186 int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) 2187 { 2188 struct drm_i915_private *dev_priv = req->i915; 2189 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2190 2191 if (i915.enable_execlists) 2192 return 0; 2193 2194 if (!ppgtt) 2195 return 0; 2196 2197 return ppgtt->switch_mm(ppgtt, req); 2198 } 2199 2200 struct i915_hw_ppgtt * 2201 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) 2202 { 2203 struct i915_hw_ppgtt *ppgtt; 2204 int ret; 2205 2206 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2207 if (!ppgtt) 2208 return ERR_PTR(-ENOMEM); 2209 2210 ret = i915_ppgtt_init(dev, ppgtt); 2211 if (ret) { 2212 kfree(ppgtt); 2213 return ERR_PTR(ret); 2214 } 2215 2216 ppgtt->file_priv = fpriv; 2217 2218 trace_i915_ppgtt_create(&ppgtt->base); 2219 2220 return ppgtt; 2221 } 2222 2223 void i915_ppgtt_release(struct kref *kref) 2224 { 2225 struct i915_hw_ppgtt *ppgtt = 2226 container_of(kref, struct i915_hw_ppgtt, ref); 2227 2228 trace_i915_ppgtt_release(&ppgtt->base); 2229 2230 /* vmas should already be unbound */ 2231 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2232 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2233 2234 list_del(&ppgtt->base.global_link); 2235 drm_mm_takedown(&ppgtt->base.mm); 2236 2237 ppgtt->base.cleanup(&ppgtt->base); 2238 kfree(ppgtt); 2239 } 2240 2241 extern int intel_iommu_gfx_mapped; 2242 /* Certain Gen5 chipsets require require idling the GPU before 2243 * unmapping anything from the GTT when VT-d is enabled. 2244 */ 2245 static bool needs_idle_maps(struct drm_device *dev) 2246 { 2247 #ifdef CONFIG_INTEL_IOMMU 2248 /* Query intel_iommu to see if we need the workaround. Presumably that 2249 * was loaded first. 2250 */ 2251 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 2252 return true; 2253 #endif 2254 return false; 2255 } 2256 2257 static bool do_idling(struct drm_i915_private *dev_priv) 2258 { 2259 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2260 bool ret = dev_priv->mm.interruptible; 2261 2262 if (unlikely(ggtt->do_idle_maps)) { 2263 dev_priv->mm.interruptible = false; 2264 if (i915_gpu_idle(dev_priv->dev)) { 2265 DRM_ERROR("Couldn't idle GPU\n"); 2266 /* Wait a bit, in hopes it avoids the hang */ 2267 udelay(10); 2268 } 2269 } 2270 2271 return ret; 2272 } 2273 2274 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 2275 { 2276 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2277 2278 if (unlikely(ggtt->do_idle_maps)) 2279 dev_priv->mm.interruptible = interruptible; 2280 } 2281 2282 void i915_check_and_clear_faults(struct drm_device *dev) 2283 { 2284 struct drm_i915_private *dev_priv = dev->dev_private; 2285 struct intel_engine_cs *engine; 2286 2287 if (INTEL_INFO(dev)->gen < 6) 2288 return; 2289 2290 for_each_engine(engine, dev_priv) { 2291 u32 fault_reg; 2292 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2293 if (fault_reg & RING_FAULT_VALID) { 2294 #if 0 2295 DRM_DEBUG_DRIVER("Unexpected fault\n" 2296 "\tAddr: 0x%08lx\n" 2297 "\tAddress space: %s\n" 2298 "\tSource ID: %d\n" 2299 "\tType: %d\n", 2300 fault_reg & PAGE_MASK, 2301 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2302 RING_FAULT_SRCID(fault_reg), 2303 RING_FAULT_FAULT_TYPE(fault_reg)); 2304 #endif 2305 I915_WRITE(RING_FAULT_REG(engine), 2306 fault_reg & ~RING_FAULT_VALID); 2307 } 2308 } 2309 POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS])); 2310 } 2311 2312 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2313 { 2314 if (INTEL_INFO(dev_priv)->gen < 6) { 2315 intel_gtt_chipset_flush(); 2316 } else { 2317 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2318 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2319 } 2320 } 2321 2322 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 2323 { 2324 struct drm_i915_private *dev_priv = to_i915(dev); 2325 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2326 2327 /* Don't bother messing with faults pre GEN6 as we have little 2328 * documentation supporting that it's a good idea. 2329 */ 2330 if (INTEL_INFO(dev)->gen < 6) 2331 return; 2332 2333 i915_check_and_clear_faults(dev); 2334 2335 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 2336 true); 2337 2338 i915_ggtt_flush(dev_priv); 2339 } 2340 2341 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 2342 { 2343 if (!dma_map_sg(&obj->base.dev->pdev->dev, 2344 obj->pages->sgl, obj->pages->nents, 2345 PCI_DMA_BIDIRECTIONAL)) 2346 return -ENOSPC; 2347 2348 return 0; 2349 } 2350 2351 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2352 { 2353 #ifdef writeq 2354 writeq(pte, addr); 2355 #else 2356 iowrite32((u32)pte, addr); 2357 iowrite32(pte >> 32, addr + 4); 2358 #endif 2359 } 2360 2361 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2362 struct sg_table *st, 2363 uint64_t start, 2364 enum i915_cache_level level, u32 unused) 2365 { 2366 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2367 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2368 unsigned first_entry = start >> PAGE_SHIFT; 2369 gen8_pte_t __iomem *gtt_entries = 2370 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2371 int i = 0; 2372 struct sg_page_iter sg_iter; 2373 dma_addr_t addr = 0; /* shut up gcc */ 2374 int rpm_atomic_seq; 2375 2376 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2377 2378 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2379 addr = sg_dma_address(sg_iter.sg) + 2380 (sg_iter.sg_pgoffset << PAGE_SHIFT); 2381 gen8_set_pte(>t_entries[i], 2382 gen8_pte_encode(addr, level, true)); 2383 i++; 2384 } 2385 2386 /* 2387 * XXX: This serves as a posting read to make sure that the PTE has 2388 * actually been updated. There is some concern that even though 2389 * registers and PTEs are within the same BAR that they are potentially 2390 * of NUMA access patterns. Therefore, even with the way we assume 2391 * hardware should work, we must keep this posting read for paranoia. 2392 */ 2393 if (i != 0) 2394 WARN_ON(readq(>t_entries[i-1]) 2395 != gen8_pte_encode(addr, level, true)); 2396 2397 /* This next bit makes the above posting read even more important. We 2398 * want to flush the TLBs only after we're certain all the PTE updates 2399 * have finished. 2400 */ 2401 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2402 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2403 2404 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2405 } 2406 2407 struct insert_entries { 2408 struct i915_address_space *vm; 2409 struct sg_table *st; 2410 uint64_t start; 2411 enum i915_cache_level level; 2412 u32 flags; 2413 }; 2414 2415 static int gen8_ggtt_insert_entries__cb(void *_arg) 2416 { 2417 struct insert_entries *arg = _arg; 2418 gen8_ggtt_insert_entries(arg->vm, arg->st, 2419 arg->start, arg->level, arg->flags); 2420 return 0; 2421 } 2422 2423 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2424 struct sg_table *st, 2425 uint64_t start, 2426 enum i915_cache_level level, 2427 u32 flags) 2428 { 2429 struct insert_entries arg = { vm, st, start, level, flags }; 2430 #ifndef __DragonFly__ 2431 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2432 #else 2433 /* XXX: is this enough ? 2434 * See Linux commit 5bab6f60cb4d1417ad7c599166bcfec87529c1a2 */ 2435 get_mplock(); 2436 gen8_ggtt_insert_entries__cb(&arg); 2437 rel_mplock(); 2438 #endif 2439 } 2440 2441 /* 2442 * Binds an object into the global gtt with the specified cache level. The object 2443 * will be accessible to the GPU via commands whose operands reference offsets 2444 * within the global GTT as well as accessible by the GPU through the GMADR 2445 * mapped BAR (dev_priv->mm.gtt->gtt). 2446 */ 2447 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2448 struct sg_table *st, 2449 uint64_t start, 2450 enum i915_cache_level level, u32 flags) 2451 { 2452 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2453 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2454 unsigned first_entry = start >> PAGE_SHIFT; 2455 gen6_pte_t __iomem *gtt_entries = 2456 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2457 int i = 0; 2458 struct sg_page_iter sg_iter; 2459 dma_addr_t addr = 0; 2460 int rpm_atomic_seq; 2461 2462 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2463 2464 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2465 addr = sg_page_iter_dma_address(&sg_iter); 2466 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); 2467 i++; 2468 } 2469 2470 /* XXX: This serves as a posting read to make sure that the PTE has 2471 * actually been updated. There is some concern that even though 2472 * registers and PTEs are within the same BAR that they are potentially 2473 * of NUMA access patterns. Therefore, even with the way we assume 2474 * hardware should work, we must keep this posting read for paranoia. 2475 */ 2476 if (i != 0) { 2477 unsigned long gtt = readl(>t_entries[i-1]); 2478 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); 2479 } 2480 2481 /* This next bit makes the above posting read even more important. We 2482 * want to flush the TLBs only after we're certain all the PTE updates 2483 * have finished. 2484 */ 2485 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2486 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2487 2488 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2489 } 2490 2491 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2492 uint64_t start, 2493 uint64_t length, 2494 bool use_scratch) 2495 { 2496 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2497 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2498 unsigned first_entry = start >> PAGE_SHIFT; 2499 unsigned num_entries = length >> PAGE_SHIFT; 2500 gen8_pte_t scratch_pte, __iomem *gtt_base = 2501 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2502 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2503 int i; 2504 int rpm_atomic_seq; 2505 2506 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2507 2508 if (WARN(num_entries > max_entries, 2509 "First entry = %d; Num entries = %d (max=%d)\n", 2510 first_entry, num_entries, max_entries)) 2511 num_entries = max_entries; 2512 2513 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 2514 I915_CACHE_LLC, 2515 use_scratch); 2516 for (i = 0; i < num_entries; i++) 2517 gen8_set_pte(>t_base[i], scratch_pte); 2518 readl(gtt_base); 2519 2520 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2521 } 2522 2523 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2524 uint64_t start, 2525 uint64_t length, 2526 bool use_scratch) 2527 { 2528 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2529 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2530 unsigned first_entry = start >> PAGE_SHIFT; 2531 unsigned num_entries = length >> PAGE_SHIFT; 2532 gen6_pte_t scratch_pte, __iomem *gtt_base = 2533 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2534 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2535 int i; 2536 int rpm_atomic_seq; 2537 2538 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2539 2540 if (WARN(num_entries > max_entries, 2541 "First entry = %d; Num entries = %d (max=%d)\n", 2542 first_entry, num_entries, max_entries)) 2543 num_entries = max_entries; 2544 2545 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 2546 I915_CACHE_LLC, use_scratch, 0); 2547 2548 for (i = 0; i < num_entries; i++) 2549 iowrite32(scratch_pte, >t_base[i]); 2550 readl(gtt_base); 2551 2552 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2553 } 2554 2555 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2556 struct sg_table *pages, 2557 uint64_t start, 2558 enum i915_cache_level cache_level, u32 unused) 2559 { 2560 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2561 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2562 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2563 int rpm_atomic_seq; 2564 2565 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2566 2567 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2568 2569 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2570 2571 } 2572 2573 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2574 uint64_t start, 2575 uint64_t length, 2576 bool unused) 2577 { 2578 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2579 unsigned first_entry = start >> PAGE_SHIFT; 2580 unsigned num_entries = length >> PAGE_SHIFT; 2581 int rpm_atomic_seq; 2582 2583 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2584 2585 intel_gtt_clear_range(first_entry, num_entries); 2586 2587 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2588 } 2589 2590 static int ggtt_bind_vma(struct i915_vma *vma, 2591 enum i915_cache_level cache_level, 2592 u32 flags) 2593 { 2594 struct drm_i915_gem_object *obj = vma->obj; 2595 u32 pte_flags = 0; 2596 int ret; 2597 2598 ret = i915_get_ggtt_vma_pages(vma); 2599 if (ret) 2600 return ret; 2601 2602 /* Currently applicable only to VLV */ 2603 if (obj->gt_ro) 2604 pte_flags |= PTE_READ_ONLY; 2605 2606 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, 2607 vma->node.start, 2608 cache_level, pte_flags); 2609 2610 /* 2611 * Without aliasing PPGTT there's no difference between 2612 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2613 * upgrade to both bound if we bind either to avoid double-binding. 2614 */ 2615 vma->bound |= GLOBAL_BIND | LOCAL_BIND; 2616 2617 return 0; 2618 } 2619 2620 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2621 enum i915_cache_level cache_level, 2622 u32 flags) 2623 { 2624 u32 pte_flags; 2625 int ret; 2626 2627 ret = i915_get_ggtt_vma_pages(vma); 2628 if (ret) 2629 return ret; 2630 2631 /* Currently applicable only to VLV */ 2632 pte_flags = 0; 2633 if (vma->obj->gt_ro) 2634 pte_flags |= PTE_READ_ONLY; 2635 2636 2637 if (flags & GLOBAL_BIND) { 2638 vma->vm->insert_entries(vma->vm, 2639 vma->ggtt_view.pages, 2640 vma->node.start, 2641 cache_level, pte_flags); 2642 } 2643 2644 if (flags & LOCAL_BIND) { 2645 struct i915_hw_ppgtt *appgtt = 2646 to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2647 appgtt->base.insert_entries(&appgtt->base, 2648 vma->ggtt_view.pages, 2649 vma->node.start, 2650 cache_level, pte_flags); 2651 } 2652 2653 return 0; 2654 } 2655 2656 static void ggtt_unbind_vma(struct i915_vma *vma) 2657 { 2658 struct drm_device *dev = vma->vm->dev; 2659 struct drm_i915_private *dev_priv = dev->dev_private; 2660 struct drm_i915_gem_object *obj = vma->obj; 2661 const uint64_t size = min_t(uint64_t, 2662 obj->base.size, 2663 vma->node.size); 2664 2665 if (vma->bound & GLOBAL_BIND) { 2666 vma->vm->clear_range(vma->vm, 2667 vma->node.start, 2668 size, 2669 true); 2670 } 2671 2672 if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { 2673 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 2674 2675 appgtt->base.clear_range(&appgtt->base, 2676 vma->node.start, 2677 size, 2678 true); 2679 } 2680 } 2681 2682 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2683 { 2684 struct drm_device *dev = obj->base.dev; 2685 struct drm_i915_private *dev_priv = dev->dev_private; 2686 bool interruptible; 2687 2688 interruptible = do_idling(dev_priv); 2689 2690 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, 2691 PCI_DMA_BIDIRECTIONAL); 2692 2693 undo_idling(dev_priv, interruptible); 2694 } 2695 2696 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2697 unsigned long color, 2698 u64 *start, 2699 u64 *end) 2700 { 2701 if (node->color != color) 2702 *start += 4096; 2703 2704 if (!list_empty(&node->node_list)) { 2705 node = list_entry(node->node_list.next, 2706 struct drm_mm_node, 2707 node_list); 2708 if (node->allocated && node->color != color) 2709 *end -= 4096; 2710 } 2711 } 2712 2713 static int i915_gem_setup_global_gtt(struct drm_device *dev, 2714 u64 start, 2715 u64 mappable_end, 2716 u64 end) 2717 { 2718 /* Let GEM Manage all of the aperture. 2719 * 2720 * However, leave one page at the end still bound to the scratch page. 2721 * There are a number of places where the hardware apparently prefetches 2722 * past the end of the object, and we've seen multiple hangs with the 2723 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2724 * aperture. One page should be enough to keep any prefetching inside 2725 * of the aperture. 2726 */ 2727 struct drm_i915_private *dev_priv = to_i915(dev); 2728 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2729 struct drm_mm_node *entry; 2730 struct drm_i915_gem_object *obj; 2731 unsigned long hole_start, hole_end; 2732 int ret; 2733 unsigned long mappable; 2734 int error; 2735 2736 mappable = min(end, mappable_end) - start; 2737 BUG_ON(mappable_end > end); 2738 2739 ggtt->base.start = start; 2740 2741 /* Subtract the guard page before address space initialization to 2742 * shrink the range used by drm_mm */ 2743 ggtt->base.total = end - start - PAGE_SIZE; 2744 i915_address_space_init(&ggtt->base, dev_priv); 2745 ggtt->base.total += PAGE_SIZE; 2746 2747 if (intel_vgpu_active(dev)) { 2748 ret = intel_vgt_balloon(dev); 2749 if (ret) 2750 return ret; 2751 } 2752 2753 if (!HAS_LLC(dev)) 2754 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 2755 2756 /* Mark any preallocated objects as occupied */ 2757 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 2758 struct i915_vma *vma = i915_gem_obj_to_vma(obj, &ggtt->base); 2759 2760 DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", 2761 i915_gem_obj_ggtt_offset(obj), obj->base.size); 2762 2763 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 2764 ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); 2765 if (ret) { 2766 DRM_DEBUG_KMS("Reservation failed: %i\n", ret); 2767 return ret; 2768 } 2769 vma->bound |= GLOBAL_BIND; 2770 __i915_vma_set_map_and_fenceable(vma); 2771 list_add_tail(&vma->vm_link, &ggtt->base.inactive_list); 2772 } 2773 2774 /* Clear any non-preallocated blocks */ 2775 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2776 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2777 hole_start, hole_end); 2778 ggtt->base.clear_range(&ggtt->base, hole_start, 2779 hole_end - hole_start, true); 2780 } 2781 2782 #ifdef __DragonFly__ 2783 device_printf(dev->dev->bsddev, 2784 "taking over the fictitious range 0x%llx-0x%llx\n", 2785 dev_priv->ggtt.mappable_base + start, dev_priv->ggtt.mappable_base + start + mappable); 2786 error = -vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base + start, 2787 dev_priv->ggtt.mappable_base + start + mappable, VM_MEMATTR_WRITE_COMBINING); 2788 #endif 2789 2790 /* And finally clear the reserved guard page */ 2791 ggtt->base.clear_range(&ggtt->base, end - PAGE_SIZE, PAGE_SIZE, true); 2792 2793 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { 2794 struct i915_hw_ppgtt *ppgtt; 2795 2796 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2797 if (!ppgtt) 2798 return -ENOMEM; 2799 2800 ret = __hw_ppgtt_init(dev, ppgtt); 2801 if (ret) { 2802 ppgtt->base.cleanup(&ppgtt->base); 2803 kfree(ppgtt); 2804 return ret; 2805 } 2806 2807 if (ppgtt->base.allocate_va_range) 2808 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2809 ppgtt->base.total); 2810 if (ret) { 2811 ppgtt->base.cleanup(&ppgtt->base); 2812 kfree(ppgtt); 2813 return ret; 2814 } 2815 2816 ppgtt->base.clear_range(&ppgtt->base, 2817 ppgtt->base.start, 2818 ppgtt->base.total, 2819 true); 2820 2821 dev_priv->mm.aliasing_ppgtt = ppgtt; 2822 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2823 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2824 } 2825 2826 return 0; 2827 } 2828 2829 /** 2830 * i915_gem_init_ggtt - Initialize GEM for Global GTT 2831 * @dev: DRM device 2832 */ 2833 void i915_gem_init_ggtt(struct drm_device *dev) 2834 { 2835 struct drm_i915_private *dev_priv = to_i915(dev); 2836 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2837 2838 i915_gem_setup_global_gtt(dev, 0, ggtt->mappable_end, ggtt->base.total); 2839 } 2840 2841 /** 2842 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2843 * @dev: DRM device 2844 */ 2845 void i915_ggtt_cleanup_hw(struct drm_device *dev) 2846 { 2847 struct drm_i915_private *dev_priv = to_i915(dev); 2848 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2849 2850 if (dev_priv->mm.aliasing_ppgtt) { 2851 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2852 2853 ppgtt->base.cleanup(&ppgtt->base); 2854 kfree(ppgtt); 2855 } 2856 2857 i915_gem_cleanup_stolen(dev); 2858 2859 if (drm_mm_initialized(&ggtt->base.mm)) { 2860 if (intel_vgpu_active(dev)) 2861 intel_vgt_deballoon(); 2862 2863 drm_mm_takedown(&ggtt->base.mm); 2864 list_del(&ggtt->base.global_link); 2865 } 2866 2867 ggtt->base.cleanup(&ggtt->base); 2868 } 2869 2870 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2871 { 2872 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2873 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2874 return snb_gmch_ctl << 20; 2875 } 2876 2877 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2878 { 2879 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2880 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2881 if (bdw_gmch_ctl) 2882 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2883 2884 #ifdef CONFIG_X86_32 2885 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2886 if (bdw_gmch_ctl > 4) 2887 bdw_gmch_ctl = 4; 2888 #endif 2889 2890 return bdw_gmch_ctl << 20; 2891 } 2892 2893 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2894 { 2895 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2896 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2897 2898 if (gmch_ctrl) 2899 return 1 << (20 + gmch_ctrl); 2900 2901 return 0; 2902 } 2903 2904 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2905 { 2906 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2907 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2908 return snb_gmch_ctl << 25; /* 32 MB units */ 2909 } 2910 2911 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2912 { 2913 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2914 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2915 return bdw_gmch_ctl << 25; /* 32 MB units */ 2916 } 2917 2918 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2919 { 2920 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2921 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2922 2923 /* 2924 * 0x0 to 0x10: 32MB increments starting at 0MB 2925 * 0x11 to 0x16: 4MB increments starting at 8MB 2926 * 0x17 to 0x1d: 4MB increments start at 36MB 2927 */ 2928 if (gmch_ctrl < 0x11) 2929 return gmch_ctrl << 25; 2930 else if (gmch_ctrl < 0x17) 2931 return (gmch_ctrl - 0x11 + 2) << 22; 2932 else 2933 return (gmch_ctrl - 0x17 + 9) << 22; 2934 } 2935 2936 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2937 { 2938 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2939 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2940 2941 if (gen9_gmch_ctl < 0xf0) 2942 return gen9_gmch_ctl << 25; /* 32 MB units */ 2943 else 2944 /* 4MB increments starting at 0xf0 for 4MB */ 2945 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2946 } 2947 2948 static int ggtt_probe_common(struct drm_device *dev, 2949 size_t gtt_size) 2950 { 2951 struct drm_i915_private *dev_priv = to_i915(dev); 2952 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2953 struct i915_page_scratch *scratch_page; 2954 phys_addr_t ggtt_phys_addr; 2955 2956 /* For Modern GENs the PTEs and register space are split in the BAR */ 2957 ggtt_phys_addr = pci_resource_start(dev->pdev, 0) + 2958 (pci_resource_len(dev->pdev, 0) / 2); 2959 2960 /* 2961 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2962 * dropped. For WC mappings in general we have 64 byte burst writes 2963 * when the WC buffer is flushed, so we can't use it, but have to 2964 * resort to an uncached mapping. The WC issue is easily caught by the 2965 * readback check when writing GTT PTE entries. 2966 */ 2967 if (IS_BROXTON(dev)) 2968 ggtt->gsm = ioremap_nocache(ggtt_phys_addr, gtt_size); 2969 else 2970 ggtt->gsm = ioremap_wc(ggtt_phys_addr, gtt_size); 2971 if (!ggtt->gsm) { 2972 DRM_ERROR("Failed to map the gtt page table\n"); 2973 return -ENOMEM; 2974 } 2975 2976 scratch_page = alloc_scratch_page(dev); 2977 if (IS_ERR(scratch_page)) { 2978 DRM_ERROR("Scratch setup failed\n"); 2979 /* iounmap will also get called at remove, but meh */ 2980 iounmap(ggtt->gsm); 2981 return PTR_ERR(scratch_page); 2982 } 2983 2984 ggtt->base.scratch_page = scratch_page; 2985 2986 return 0; 2987 } 2988 2989 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2990 * bits. When using advanced contexts each context stores its own PAT, but 2991 * writing this data shouldn't be harmful even in those cases. */ 2992 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2993 { 2994 uint64_t pat; 2995 2996 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2997 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2998 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2999 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 3000 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 3001 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 3002 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 3003 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 3004 3005 if (!USES_PPGTT(dev_priv)) 3006 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 3007 * so RTL will always use the value corresponding to 3008 * pat_sel = 000". 3009 * So let's disable cache for GGTT to avoid screen corruptions. 3010 * MOCS still can be used though. 3011 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 3012 * before this patch, i.e. the same uncached + snooping access 3013 * like on gen6/7 seems to be in effect. 3014 * - So this just fixes blitter/render access. Again it looks 3015 * like it's not just uncached access, but uncached + snooping. 3016 * So we can still hold onto all our assumptions wrt cpu 3017 * clflushing on LLC machines. 3018 */ 3019 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 3020 3021 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 3022 * write would work. */ 3023 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3024 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3025 } 3026 3027 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 3028 { 3029 uint64_t pat; 3030 3031 /* 3032 * Map WB on BDW to snooped on CHV. 3033 * 3034 * Only the snoop bit has meaning for CHV, the rest is 3035 * ignored. 3036 * 3037 * The hardware will never snoop for certain types of accesses: 3038 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3039 * - PPGTT page tables 3040 * - some other special cycles 3041 * 3042 * As with BDW, we also need to consider the following for GT accesses: 3043 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3044 * so RTL will always use the value corresponding to 3045 * pat_sel = 000". 3046 * Which means we must set the snoop bit in PAT entry 0 3047 * in order to keep the global status page working. 3048 */ 3049 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3050 GEN8_PPAT(1, 0) | 3051 GEN8_PPAT(2, 0) | 3052 GEN8_PPAT(3, 0) | 3053 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3054 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3055 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3056 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3057 3058 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3059 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3060 } 3061 3062 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3063 { 3064 struct drm_device *dev = ggtt->base.dev; 3065 struct drm_i915_private *dev_priv = to_i915(dev); 3066 u16 snb_gmch_ctl; 3067 int ret; 3068 3069 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3070 ggtt->mappable_base = pci_resource_start(dev->pdev, 2); 3071 ggtt->mappable_end = pci_resource_len(dev->pdev, 2); 3072 3073 #if 0 3074 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 3075 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 3076 #endif 3077 3078 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3079 3080 if (INTEL_INFO(dev)->gen >= 9) { 3081 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3082 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); 3083 } else if (IS_CHERRYVIEW(dev)) { 3084 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3085 ggtt->size = chv_get_total_gtt_size(snb_gmch_ctl); 3086 } else { 3087 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3088 ggtt->size = gen8_get_total_gtt_size(snb_gmch_ctl); 3089 } 3090 3091 ggtt->base.total = (ggtt->size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3092 3093 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3094 chv_setup_private_ppat(dev_priv); 3095 else 3096 bdw_setup_private_ppat(dev_priv); 3097 3098 ret = ggtt_probe_common(dev, ggtt->size); 3099 3100 ggtt->base.clear_range = gen8_ggtt_clear_range; 3101 if (IS_CHERRYVIEW(dev_priv)) 3102 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3103 else 3104 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3105 ggtt->base.bind_vma = ggtt_bind_vma; 3106 ggtt->base.unbind_vma = ggtt_unbind_vma; 3107 3108 return ret; 3109 } 3110 3111 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3112 { 3113 struct drm_device *dev = ggtt->base.dev; 3114 u16 snb_gmch_ctl; 3115 int ret; 3116 3117 ggtt->mappable_base = pci_resource_start(dev->pdev, 2); 3118 ggtt->mappable_end = pci_resource_len(dev->pdev, 2); 3119 3120 /* 64/512MB is the current min/max we actually know of, but this is just 3121 * a coarse sanity check. 3122 */ 3123 if ((ggtt->mappable_end < (64<<20) || (ggtt->mappable_end > (512<<20)))) { 3124 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3125 return -ENXIO; 3126 } 3127 3128 #if 0 3129 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 3130 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 3131 #endif 3132 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3133 3134 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3135 ggtt->size = gen6_get_total_gtt_size(snb_gmch_ctl); 3136 ggtt->base.total = (ggtt->size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3137 3138 ret = ggtt_probe_common(dev, ggtt->size); 3139 3140 ggtt->base.clear_range = gen6_ggtt_clear_range; 3141 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3142 ggtt->base.bind_vma = ggtt_bind_vma; 3143 ggtt->base.unbind_vma = ggtt_unbind_vma; 3144 3145 return ret; 3146 } 3147 3148 static void gen6_gmch_remove(struct i915_address_space *vm) 3149 { 3150 struct i915_ggtt *ggtt = container_of(vm, struct i915_ggtt, base); 3151 3152 iounmap(ggtt->gsm); 3153 free_scratch_page(vm->dev, vm->scratch_page); 3154 } 3155 3156 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3157 { 3158 struct drm_device *dev = ggtt->base.dev; 3159 struct drm_i915_private *dev_priv = to_i915(dev); 3160 #if 0 3161 int ret; 3162 3163 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 3164 if (!ret) { 3165 DRM_ERROR("failed to set up gmch\n"); 3166 return -EIO; 3167 } 3168 #endif 3169 3170 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3171 &ggtt->mappable_base, &ggtt->mappable_end); 3172 3173 ggtt->do_idle_maps = needs_idle_maps(dev_priv->dev); 3174 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3175 ggtt->base.clear_range = i915_ggtt_clear_range; 3176 ggtt->base.bind_vma = ggtt_bind_vma; 3177 ggtt->base.unbind_vma = ggtt_unbind_vma; 3178 3179 if (unlikely(ggtt->do_idle_maps)) 3180 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3181 3182 return 0; 3183 } 3184 3185 static void i915_gmch_remove(struct i915_address_space *vm) 3186 { 3187 intel_gmch_remove(); 3188 } 3189 3190 /** 3191 * i915_ggtt_init_hw - Initialize GGTT hardware 3192 * @dev: DRM device 3193 */ 3194 int i915_ggtt_init_hw(struct drm_device *dev) 3195 { 3196 struct drm_i915_private *dev_priv = to_i915(dev); 3197 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3198 int ret; 3199 3200 if (INTEL_INFO(dev)->gen <= 5) { 3201 ggtt->probe = i915_gmch_probe; 3202 ggtt->base.cleanup = i915_gmch_remove; 3203 } else if (INTEL_INFO(dev)->gen < 8) { 3204 ggtt->probe = gen6_gmch_probe; 3205 ggtt->base.cleanup = gen6_gmch_remove; 3206 3207 if (HAS_EDRAM(dev)) 3208 ggtt->base.pte_encode = iris_pte_encode; 3209 else if (IS_HASWELL(dev)) 3210 ggtt->base.pte_encode = hsw_pte_encode; 3211 else if (IS_VALLEYVIEW(dev)) 3212 ggtt->base.pte_encode = byt_pte_encode; 3213 else if (INTEL_INFO(dev)->gen >= 7) 3214 ggtt->base.pte_encode = ivb_pte_encode; 3215 else 3216 ggtt->base.pte_encode = snb_pte_encode; 3217 } else { 3218 ggtt->probe = gen8_gmch_probe; 3219 ggtt->base.cleanup = gen6_gmch_remove; 3220 } 3221 3222 ggtt->base.dev = dev; 3223 ggtt->base.is_ggtt = true; 3224 3225 ret = ggtt->probe(ggtt); 3226 if (ret) 3227 return ret; 3228 3229 if ((ggtt->base.total - 1) >> 32) { 3230 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3231 "of address space! Found %lldM!\n", 3232 ggtt->base.total >> 20); 3233 ggtt->base.total = 1ULL << 32; 3234 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3235 } 3236 3237 /* 3238 * Initialise stolen early so that we may reserve preallocated 3239 * objects for the BIOS to KMS transition. 3240 */ 3241 ret = i915_gem_init_stolen(dev); 3242 if (ret) 3243 goto out_gtt_cleanup; 3244 3245 /* GMADR is the PCI mmio aperture into the global GTT. */ 3246 DRM_INFO("Memory usable by graphics device = %lluM\n", 3247 ggtt->base.total >> 20); 3248 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3249 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3250 #ifdef CONFIG_INTEL_IOMMU 3251 if (intel_iommu_gfx_mapped) 3252 DRM_INFO("VT-d active for gfx access\n"); 3253 #endif 3254 /* 3255 * i915.enable_ppgtt is read-only, so do an early pass to validate the 3256 * user's requested state against the hardware/driver capabilities. We 3257 * do this now so that we can print out any log messages once rather 3258 * than every time we check intel_enable_ppgtt(). 3259 */ 3260 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 3261 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 3262 3263 return 0; 3264 3265 out_gtt_cleanup: 3266 ggtt->base.cleanup(&ggtt->base); 3267 3268 return ret; 3269 } 3270 3271 int i915_ggtt_enable_hw(struct drm_device *dev) 3272 { 3273 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 3274 return -EIO; 3275 3276 return 0; 3277 } 3278 3279 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 3280 { 3281 struct drm_i915_private *dev_priv = to_i915(dev); 3282 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3283 struct drm_i915_gem_object *obj; 3284 struct i915_vma *vma; 3285 bool flush; 3286 3287 i915_check_and_clear_faults(dev); 3288 3289 /* First fill our portion of the GTT with scratch pages */ 3290 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 3291 true); 3292 3293 /* Cache flush objects bound into GGTT and rebind them. */ 3294 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 3295 flush = false; 3296 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3297 if (vma->vm != &ggtt->base) 3298 continue; 3299 3300 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3301 PIN_UPDATE)); 3302 3303 flush = true; 3304 } 3305 3306 if (flush) 3307 i915_gem_clflush_object(obj, obj->pin_display); 3308 } 3309 3310 if (INTEL_INFO(dev)->gen >= 8) { 3311 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3312 chv_setup_private_ppat(dev_priv); 3313 else 3314 bdw_setup_private_ppat(dev_priv); 3315 3316 return; 3317 } 3318 3319 if (USES_PPGTT(dev)) { 3320 struct i915_address_space *vm; 3321 3322 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3323 /* TODO: Perhaps it shouldn't be gen6 specific */ 3324 3325 struct i915_hw_ppgtt *ppgtt; 3326 3327 if (vm->is_ggtt) 3328 ppgtt = dev_priv->mm.aliasing_ppgtt; 3329 else 3330 ppgtt = i915_vm_to_ppgtt(vm); 3331 3332 gen6_write_page_range(dev_priv, &ppgtt->pd, 3333 0, ppgtt->base.total); 3334 } 3335 } 3336 3337 i915_ggtt_flush(dev_priv); 3338 } 3339 3340 static struct i915_vma * 3341 __i915_gem_vma_create(struct drm_i915_gem_object *obj, 3342 struct i915_address_space *vm, 3343 const struct i915_ggtt_view *ggtt_view) 3344 { 3345 struct i915_vma *vma; 3346 3347 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 3348 return ERR_PTR(-EINVAL); 3349 3350 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 3351 if (vma == NULL) 3352 return ERR_PTR(-ENOMEM); 3353 3354 INIT_LIST_HEAD(&vma->vm_link); 3355 INIT_LIST_HEAD(&vma->obj_link); 3356 INIT_LIST_HEAD(&vma->exec_list); 3357 vma->vm = vm; 3358 vma->obj = obj; 3359 vma->is_ggtt = i915_is_ggtt(vm); 3360 3361 if (i915_is_ggtt(vm)) 3362 vma->ggtt_view = *ggtt_view; 3363 else 3364 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3365 3366 list_add_tail(&vma->obj_link, &obj->vma_list); 3367 3368 return vma; 3369 } 3370 3371 struct i915_vma * 3372 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3373 struct i915_address_space *vm) 3374 { 3375 struct i915_vma *vma; 3376 3377 vma = i915_gem_obj_to_vma(obj, vm); 3378 if (!vma) 3379 vma = __i915_gem_vma_create(obj, vm, 3380 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); 3381 3382 return vma; 3383 } 3384 3385 struct i915_vma * 3386 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, 3387 const struct i915_ggtt_view *view) 3388 { 3389 struct drm_device *dev = obj->base.dev; 3390 struct drm_i915_private *dev_priv = to_i915(dev); 3391 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3392 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 3393 3394 if (!vma) 3395 vma = __i915_gem_vma_create(obj, &ggtt->base, view); 3396 3397 return vma; 3398 3399 } 3400 3401 static struct scatterlist * 3402 rotate_pages(const dma_addr_t *in, unsigned int offset, 3403 unsigned int width, unsigned int height, 3404 unsigned int stride, 3405 struct sg_table *st, struct scatterlist *sg) 3406 { 3407 unsigned int column, row; 3408 unsigned int src_idx; 3409 3410 for (column = 0; column < width; column++) { 3411 src_idx = stride * (height - 1) + column; 3412 for (row = 0; row < height; row++) { 3413 st->nents++; 3414 /* We don't need the pages, but need to initialize 3415 * the entries so the sg list can be happily traversed. 3416 * The only thing we need are DMA addresses. 3417 */ 3418 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3419 sg_dma_address(sg) = in[offset + src_idx]; 3420 sg_dma_len(sg) = PAGE_SIZE; 3421 sg = sg_next(sg); 3422 src_idx -= stride; 3423 } 3424 } 3425 3426 return sg; 3427 } 3428 3429 static struct sg_table * 3430 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, 3431 struct drm_i915_gem_object *obj) 3432 { 3433 unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height; 3434 unsigned int size_pages_uv; 3435 struct sg_page_iter sg_iter; 3436 unsigned long i; 3437 dma_addr_t *page_addr_list; 3438 struct sg_table *st; 3439 unsigned int uv_start_page; 3440 struct scatterlist *sg; 3441 int ret = -ENOMEM; 3442 3443 /* Allocate a temporary list of source pages for random access. */ 3444 page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE, 3445 sizeof(dma_addr_t)); 3446 if (!page_addr_list) 3447 return ERR_PTR(ret); 3448 3449 /* Account for UV plane with NV12. */ 3450 if (rot_info->pixel_format == DRM_FORMAT_NV12) 3451 size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height; 3452 else 3453 size_pages_uv = 0; 3454 3455 /* Allocate target SG list. */ 3456 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 3457 if (!st) 3458 goto err_st_alloc; 3459 3460 ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); 3461 if (ret) 3462 goto err_sg_alloc; 3463 3464 /* Populate source page list from the object. */ 3465 i = 0; 3466 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 3467 page_addr_list[i] = sg_page_iter_dma_address(&sg_iter); 3468 i++; 3469 } 3470 3471 st->nents = 0; 3472 sg = st->sgl; 3473 3474 /* Rotate the pages. */ 3475 sg = rotate_pages(page_addr_list, 0, 3476 rot_info->plane[0].width, rot_info->plane[0].height, 3477 rot_info->plane[0].width, 3478 st, sg); 3479 3480 /* Append the UV plane if NV12. */ 3481 if (rot_info->pixel_format == DRM_FORMAT_NV12) { 3482 uv_start_page = size_pages; 3483 3484 /* Check for tile-row un-alignment. */ 3485 if (offset_in_page(rot_info->uv_offset)) 3486 uv_start_page--; 3487 3488 rot_info->uv_start_page = uv_start_page; 3489 3490 sg = rotate_pages(page_addr_list, rot_info->uv_start_page, 3491 rot_info->plane[1].width, rot_info->plane[1].height, 3492 rot_info->plane[1].width, 3493 st, sg); 3494 } 3495 3496 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n", 3497 obj->base.size, rot_info->plane[0].width, 3498 rot_info->plane[0].height, size_pages + size_pages_uv, 3499 size_pages); 3500 3501 drm_free_large(page_addr_list); 3502 3503 return st; 3504 3505 err_sg_alloc: 3506 kfree(st); 3507 err_st_alloc: 3508 drm_free_large(page_addr_list); 3509 3510 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n", 3511 obj->base.size, ret, rot_info->plane[0].width, 3512 rot_info->plane[0].height, size_pages + size_pages_uv, 3513 size_pages); 3514 return ERR_PTR(ret); 3515 } 3516 3517 static struct sg_table * 3518 intel_partial_pages(const struct i915_ggtt_view *view, 3519 struct drm_i915_gem_object *obj) 3520 { 3521 struct sg_table *st; 3522 struct scatterlist *sg; 3523 struct sg_page_iter obj_sg_iter; 3524 int ret = -ENOMEM; 3525 3526 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 3527 if (!st) 3528 goto err_st_alloc; 3529 3530 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); 3531 if (ret) 3532 goto err_sg_alloc; 3533 3534 sg = st->sgl; 3535 st->nents = 0; 3536 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, 3537 view->params.partial.offset) 3538 { 3539 if (st->nents >= view->params.partial.size) 3540 break; 3541 3542 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3543 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); 3544 sg_dma_len(sg) = PAGE_SIZE; 3545 3546 sg = sg_next(sg); 3547 st->nents++; 3548 } 3549 3550 return st; 3551 3552 err_sg_alloc: 3553 kfree(st); 3554 err_st_alloc: 3555 return ERR_PTR(ret); 3556 } 3557 3558 static int 3559 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3560 { 3561 int ret = 0; 3562 3563 if (vma->ggtt_view.pages) 3564 return 0; 3565 3566 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3567 vma->ggtt_view.pages = vma->obj->pages; 3568 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3569 vma->ggtt_view.pages = 3570 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3571 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3572 vma->ggtt_view.pages = 3573 intel_partial_pages(&vma->ggtt_view, vma->obj); 3574 else 3575 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3576 vma->ggtt_view.type); 3577 3578 if (!vma->ggtt_view.pages) { 3579 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3580 vma->ggtt_view.type); 3581 ret = -EINVAL; 3582 } else if (IS_ERR(vma->ggtt_view.pages)) { 3583 ret = PTR_ERR(vma->ggtt_view.pages); 3584 vma->ggtt_view.pages = NULL; 3585 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3586 vma->ggtt_view.type, ret); 3587 } 3588 3589 return ret; 3590 } 3591 3592 /** 3593 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3594 * @vma: VMA to map 3595 * @cache_level: mapping cache level 3596 * @flags: flags like global or local mapping 3597 * 3598 * DMA addresses are taken from the scatter-gather table of this object (or of 3599 * this VMA in case of non-default GGTT views) and PTE entries set up. 3600 * Note that DMA addresses are also the only part of the SG table we care about. 3601 */ 3602 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3603 u32 flags) 3604 { 3605 int ret; 3606 u32 bind_flags; 3607 3608 if (WARN_ON(flags == 0)) 3609 return -EINVAL; 3610 3611 bind_flags = 0; 3612 if (flags & PIN_GLOBAL) 3613 bind_flags |= GLOBAL_BIND; 3614 if (flags & PIN_USER) 3615 bind_flags |= LOCAL_BIND; 3616 3617 if (flags & PIN_UPDATE) 3618 bind_flags |= vma->bound; 3619 else 3620 bind_flags &= ~vma->bound; 3621 3622 if (bind_flags == 0) 3623 return 0; 3624 3625 if (vma->bound == 0 && vma->vm->allocate_va_range) { 3626 /* XXX: i915_vma_pin() will fix this +- hack */ 3627 vma->pin_count++; 3628 trace_i915_va_alloc(vma); 3629 ret = vma->vm->allocate_va_range(vma->vm, 3630 vma->node.start, 3631 vma->node.size); 3632 vma->pin_count--; 3633 if (ret) 3634 return ret; 3635 } 3636 3637 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3638 if (ret) 3639 return ret; 3640 3641 vma->bound |= bind_flags; 3642 3643 return 0; 3644 } 3645 3646 /** 3647 * i915_ggtt_view_size - Get the size of a GGTT view. 3648 * @obj: Object the view is of. 3649 * @view: The view in question. 3650 * 3651 * @return The size of the GGTT view in bytes. 3652 */ 3653 size_t 3654 i915_ggtt_view_size(struct drm_i915_gem_object *obj, 3655 const struct i915_ggtt_view *view) 3656 { 3657 if (view->type == I915_GGTT_VIEW_NORMAL) { 3658 return obj->base.size; 3659 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 3660 return intel_rotation_info_size(&view->params.rotated) << PAGE_SHIFT; 3661 } else if (view->type == I915_GGTT_VIEW_PARTIAL) { 3662 return view->params.partial.size << PAGE_SHIFT; 3663 } else { 3664 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); 3665 return obj->base.size; 3666 } 3667 } 3668