1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "intel_drv.h" 31 32 #include <linux/highmem.h> 33 34 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv); 35 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv); 36 37 bool intel_enable_ppgtt(struct drm_device *dev, bool full) 38 { 39 if (i915.enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) 40 return false; 41 42 if (i915.enable_ppgtt == 1 && full) 43 return false; 44 45 #ifdef CONFIG_INTEL_IOMMU 46 /* Disable ppgtt on SNB if VT-d is on. */ 47 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 48 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 49 return false; 50 } 51 #endif 52 53 /* Full ppgtt disabled by default for now due to issues. */ 54 if (full) 55 return HAS_PPGTT(dev) && (i915.enable_ppgtt == 2); 56 else 57 return HAS_ALIASING_PPGTT(dev); 58 } 59 60 static void ppgtt_bind_vma(struct i915_vma *vma, 61 enum i915_cache_level cache_level, 62 u32 flags); 63 static void ppgtt_unbind_vma(struct i915_vma *vma); 64 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); 65 66 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 67 enum i915_cache_level level, 68 bool valid) 69 { 70 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 71 pte |= addr; 72 73 switch (level) { 74 case I915_CACHE_NONE: 75 pte |= PPAT_UNCACHED_INDEX; 76 break; 77 case I915_CACHE_WT: 78 pte |= PPAT_DISPLAY_ELLC_INDEX; 79 break; 80 default: 81 pte |= PPAT_CACHED_INDEX; 82 break; 83 } 84 85 return pte; 86 } 87 88 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 89 dma_addr_t addr, 90 enum i915_cache_level level) 91 { 92 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 93 pde |= addr; 94 if (level != I915_CACHE_NONE) 95 pde |= PPAT_CACHED_PDE_INDEX; 96 else 97 pde |= PPAT_UNCACHED_INDEX; 98 return pde; 99 } 100 101 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 102 enum i915_cache_level level, 103 bool valid) 104 { 105 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 106 pte |= GEN6_PTE_ADDR_ENCODE(addr); 107 108 switch (level) { 109 case I915_CACHE_L3_LLC: 110 case I915_CACHE_LLC: 111 pte |= GEN6_PTE_CACHE_LLC; 112 break; 113 case I915_CACHE_NONE: 114 pte |= GEN6_PTE_UNCACHED; 115 break; 116 default: 117 WARN_ON(1); 118 } 119 120 return pte; 121 } 122 123 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 124 enum i915_cache_level level, 125 bool valid) 126 { 127 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 128 pte |= GEN6_PTE_ADDR_ENCODE(addr); 129 130 switch (level) { 131 case I915_CACHE_L3_LLC: 132 pte |= GEN7_PTE_CACHE_L3_LLC; 133 break; 134 case I915_CACHE_LLC: 135 pte |= GEN6_PTE_CACHE_LLC; 136 break; 137 case I915_CACHE_NONE: 138 pte |= GEN6_PTE_UNCACHED; 139 break; 140 default: 141 WARN_ON(1); 142 } 143 144 return pte; 145 } 146 147 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 148 enum i915_cache_level level, 149 bool valid) 150 { 151 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 152 pte |= GEN6_PTE_ADDR_ENCODE(addr); 153 154 /* Mark the page as writeable. Other platforms don't have a 155 * setting for read-only/writable, so this matches that behavior. 156 */ 157 pte |= BYT_PTE_WRITEABLE; 158 159 if (level != I915_CACHE_NONE) 160 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 161 162 return pte; 163 } 164 165 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 166 enum i915_cache_level level, 167 bool valid) 168 { 169 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 170 pte |= HSW_PTE_ADDR_ENCODE(addr); 171 172 if (level != I915_CACHE_NONE) 173 pte |= HSW_WB_LLC_AGE3; 174 175 return pte; 176 } 177 178 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 179 enum i915_cache_level level, 180 bool valid) 181 { 182 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 183 pte |= HSW_PTE_ADDR_ENCODE(addr); 184 185 switch (level) { 186 case I915_CACHE_NONE: 187 break; 188 case I915_CACHE_WT: 189 pte |= HSW_WT_ELLC_LLC_AGE3; 190 break; 191 default: 192 pte |= HSW_WB_ELLC_LLC_AGE3; 193 break; 194 } 195 196 return pte; 197 } 198 199 /* Broadwell Page Directory Pointer Descriptors */ 200 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, 201 uint64_t val, bool synchronous) 202 { 203 struct drm_i915_private *dev_priv = ring->dev->dev_private; 204 int ret; 205 206 BUG_ON(entry >= 4); 207 208 if (synchronous) { 209 I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); 210 I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); 211 return 0; 212 } 213 214 ret = intel_ring_begin(ring, 6); 215 if (ret) 216 return ret; 217 218 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 219 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 220 intel_ring_emit(ring, (u32)(val >> 32)); 221 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 222 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 223 intel_ring_emit(ring, (u32)(val)); 224 intel_ring_advance(ring); 225 226 return 0; 227 } 228 229 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, 230 struct intel_engine_cs *ring, 231 bool synchronous) 232 { 233 int i, ret; 234 235 /* bit of a hack to find the actual last used pd */ 236 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 237 238 for (i = used_pd - 1; i >= 0; i--) { 239 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 240 ret = gen8_write_pdp(ring, i, addr, synchronous); 241 if (ret) 242 return ret; 243 } 244 245 return 0; 246 } 247 248 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 249 uint64_t start, 250 uint64_t length, 251 bool use_scratch) 252 { 253 struct i915_hw_ppgtt *ppgtt = 254 container_of(vm, struct i915_hw_ppgtt, base); 255 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 256 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 257 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 258 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 259 unsigned num_entries = length >> PAGE_SHIFT; 260 unsigned last_pte, i; 261 262 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 263 I915_CACHE_LLC, use_scratch); 264 265 while (num_entries) { 266 struct vm_page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; 267 268 last_pte = pte + num_entries; 269 if (last_pte > GEN8_PTES_PER_PAGE) 270 last_pte = GEN8_PTES_PER_PAGE; 271 272 pt_vaddr = kmap_atomic(page_table); 273 274 for (i = pte; i < last_pte; i++) { 275 pt_vaddr[i] = scratch_pte; 276 num_entries--; 277 } 278 279 if (!HAS_LLC(ppgtt->base.dev)) 280 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 281 kunmap_atomic(pt_vaddr); 282 283 pte = 0; 284 if (++pde == GEN8_PDES_PER_PAGE) { 285 pdpe++; 286 pde = 0; 287 } 288 } 289 } 290 291 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 292 vm_page_t *pages, 293 uint64_t start, 294 unsigned int num_entries, 295 enum i915_cache_level cache_level) 296 { 297 struct i915_hw_ppgtt *ppgtt = 298 container_of(vm, struct i915_hw_ppgtt, base); 299 gen8_gtt_pte_t *pt_vaddr; 300 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 301 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 302 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 303 int i; 304 305 pt_vaddr = NULL; 306 307 for (i=0;i<num_entries;i++) { 308 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS)) 309 break; 310 311 if (pt_vaddr == NULL) 312 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]); 313 314 pt_vaddr[pte] = 315 gen8_pte_encode(VM_PAGE_TO_PHYS(pages[i]), 316 cache_level, true); 317 if (++pte == GEN8_PTES_PER_PAGE) { 318 if (!HAS_LLC(ppgtt->base.dev)) 319 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 320 kunmap_atomic(pt_vaddr); 321 pt_vaddr = NULL; 322 if (++pde == GEN8_PDES_PER_PAGE) { 323 pdpe++; 324 pde = 0; 325 } 326 pte = 0; 327 } 328 } 329 if (pt_vaddr) { 330 if (!HAS_LLC(ppgtt->base.dev)) 331 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 332 kunmap_atomic(pt_vaddr); 333 } 334 } 335 336 static void gen8_free_page_tables(struct vm_page **pt_pages) 337 { 338 int i; 339 340 if (pt_pages == NULL) 341 return; 342 343 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) 344 if (pt_pages[i]) 345 __free_pages(pt_pages[i], 0); 346 } 347 348 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) 349 { 350 int i; 351 352 for (i = 0; i < ppgtt->num_pd_pages; i++) { 353 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); 354 kfree(ppgtt->gen8_pt_pages[i]); 355 kfree(ppgtt->gen8_pt_dma_addr[i]); 356 } 357 358 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 359 } 360 361 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 362 { 363 struct pci_dev *hwdev = ppgtt->base.dev->pdev; 364 int i, j; 365 366 for (i = 0; i < ppgtt->num_pd_pages; i++) { 367 /* TODO: In the future we'll support sparse mappings, so this 368 * will have to change. */ 369 if (!ppgtt->pd_dma_addr[i]) 370 continue; 371 372 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, 373 PCI_DMA_BIDIRECTIONAL); 374 375 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 376 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 377 if (addr) 378 pci_unmap_page(hwdev, addr, PAGE_SIZE, 379 PCI_DMA_BIDIRECTIONAL); 380 } 381 } 382 } 383 384 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 385 { 386 struct i915_hw_ppgtt *ppgtt = 387 container_of(vm, struct i915_hw_ppgtt, base); 388 389 list_del(&vm->global_link); 390 drm_mm_takedown(&vm->mm); 391 392 gen8_ppgtt_unmap_pages(ppgtt); 393 gen8_ppgtt_free(ppgtt); 394 } 395 396 static struct vm_page **__gen8_alloc_page_tables(void) 397 { 398 struct vm_page **pt_pages; 399 int i; 400 401 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct vm_page *), GFP_KERNEL); 402 if (!pt_pages) 403 return ERR_PTR(-ENOMEM); 404 405 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { 406 pt_pages[i] = alloc_page(GFP_KERNEL); 407 if (!pt_pages[i]) 408 goto bail; 409 } 410 411 return pt_pages; 412 413 bail: 414 gen8_free_page_tables(pt_pages); 415 kfree(pt_pages); 416 return ERR_PTR(-ENOMEM); 417 } 418 419 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, 420 const int max_pdp) 421 { 422 struct vm_page **pt_pages[GEN8_LEGACY_PDPS]; 423 int i, ret; 424 425 for (i = 0; i < max_pdp; i++) { 426 pt_pages[i] = __gen8_alloc_page_tables(); 427 if (IS_ERR(pt_pages[i])) { 428 ret = PTR_ERR(pt_pages[i]); 429 goto unwind_out; 430 } 431 } 432 433 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, 434 * "atomic" - for cleanup purposes. 435 */ 436 for (i = 0; i < max_pdp; i++) 437 ppgtt->gen8_pt_pages[i] = pt_pages[i]; 438 439 return 0; 440 441 unwind_out: 442 while (i--) { 443 gen8_free_page_tables(pt_pages[i]); 444 kfree(pt_pages[i]); 445 } 446 447 return ret; 448 } 449 450 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) 451 { 452 int i; 453 454 for (i = 0; i < ppgtt->num_pd_pages; i++) { 455 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, 456 sizeof(dma_addr_t), 457 GFP_KERNEL); 458 if (!ppgtt->gen8_pt_dma_addr[i]) 459 return -ENOMEM; 460 } 461 462 return 0; 463 } 464 465 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, 466 const int max_pdp) 467 { 468 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 469 if (!ppgtt->pd_pages) 470 return -ENOMEM; 471 472 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 473 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 474 475 return 0; 476 } 477 478 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, 479 const int max_pdp) 480 { 481 int ret; 482 483 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp); 484 if (ret) 485 return ret; 486 487 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); 488 if (ret) { 489 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 490 return ret; 491 } 492 493 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 494 495 ret = gen8_ppgtt_allocate_dma(ppgtt); 496 if (ret) 497 gen8_ppgtt_free(ppgtt); 498 499 return ret; 500 } 501 502 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, 503 const int pd) 504 { 505 dma_addr_t pd_addr; 506 int ret; 507 508 pd_addr = pci_map_page(ppgtt->base.dev->pdev, 509 &ppgtt->pd_pages[pd], 0, 510 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 511 512 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); 513 if (ret) 514 return ret; 515 516 ppgtt->pd_dma_addr[pd] = pd_addr; 517 518 return 0; 519 } 520 521 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, 522 const int pd, 523 const int pt) 524 { 525 dma_addr_t pt_addr; 526 struct vm_page *p; 527 int ret; 528 529 p = ppgtt->gen8_pt_pages[pd][pt]; 530 pt_addr = pci_map_page(ppgtt->base.dev->pdev, 531 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 532 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); 533 if (ret) 534 return ret; 535 536 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; 537 538 return 0; 539 } 540 541 /** 542 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 543 * with a net effect resembling a 2-level page table in normal x86 terms. Each 544 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 545 * space. 546 * 547 * FIXME: split allocation into smaller pieces. For now we only ever do this 548 * once, but with full PPGTT, the multiple contiguous allocations will be bad. 549 * TODO: Do something with the size parameter 550 */ 551 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 552 { 553 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 554 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 555 int i, j, ret; 556 557 if (size % (1<<30)) 558 DRM_INFO("Pages will be wasted unless GTT size (%lu) is divisible by 1GB\n", size); 559 560 /* 1. Do all our allocations for page directories and page tables. */ 561 ret = gen8_ppgtt_alloc(ppgtt, max_pdp); 562 if (ret) 563 return ret; 564 565 /* 566 * 2. Create DMA mappings for the page directories and page tables. 567 */ 568 for (i = 0; i < max_pdp; i++) { 569 ret = gen8_ppgtt_setup_page_directories(ppgtt, i); 570 if (ret) 571 goto bail; 572 573 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 574 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); 575 if (ret) 576 goto bail; 577 } 578 } 579 580 /* 581 * 3. Map all the page directory entires to point to the page tables 582 * we've allocated. 583 * 584 * For now, the PPGTT helper functions all require that the PDEs are 585 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 586 * will never need to touch the PDEs again. 587 */ 588 for (i = 0; i < max_pdp; i++) { 589 gen8_ppgtt_pde_t *pd_vaddr; 590 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 591 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 592 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 593 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 594 I915_CACHE_LLC); 595 } 596 if (!HAS_LLC(ppgtt->base.dev)) 597 drm_clflush_virt_range(pd_vaddr, PAGE_SIZE); 598 kunmap_atomic(pd_vaddr); 599 } 600 601 ppgtt->enable = gen8_ppgtt_enable; 602 ppgtt->switch_mm = gen8_mm_switch; 603 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 604 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 605 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 606 ppgtt->base.start = 0; 607 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; 608 609 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 610 611 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 612 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 613 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%ld wasted)\n", 614 ppgtt->num_pd_entries, 615 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30)); 616 return 0; 617 618 bail: 619 gen8_ppgtt_unmap_pages(ppgtt); 620 gen8_ppgtt_free(ppgtt); 621 return ret; 622 } 623 624 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 625 { 626 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 627 struct i915_address_space *vm = &ppgtt->base; 628 gen6_gtt_pte_t __iomem *pd_addr; 629 gen6_gtt_pte_t scratch_pte; 630 uint32_t pd_entry; 631 int pte, pde; 632 633 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 634 635 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + 636 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 637 638 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm, 639 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries); 640 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { 641 u32 expected; 642 gen6_gtt_pte_t *pt_vaddr; 643 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde]; 644 pd_entry = readl(pd_addr + pde); 645 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 646 647 if (pd_entry != expected) 648 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 649 pde, 650 pd_entry, 651 expected); 652 seq_printf(m, "\tPDE: %x\n", pd_entry); 653 654 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); 655 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { 656 unsigned long va = 657 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + 658 (pte * PAGE_SIZE); 659 int i; 660 bool found = false; 661 for (i = 0; i < 4; i++) 662 if (pt_vaddr[pte + i] != scratch_pte) 663 found = true; 664 if (!found) 665 continue; 666 667 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 668 for (i = 0; i < 4; i++) { 669 if (pt_vaddr[pte + i] != scratch_pte) 670 seq_printf(m, " %08x", pt_vaddr[pte + i]); 671 else 672 seq_printf(m, " SCRATCH "); 673 } 674 seq_printf(m, "\n"); 675 } 676 kunmap_atomic(pt_vaddr); 677 } 678 } 679 680 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 681 { 682 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 683 gen6_gtt_pte_t __iomem *pd_addr; 684 uint32_t pd_entry; 685 int i; 686 687 WARN_ON(ppgtt->pd_offset & 0x3f); 688 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 689 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 690 for (i = 0; i < ppgtt->num_pd_entries; i++) { 691 dma_addr_t pt_addr; 692 693 pt_addr = ppgtt->pt_dma_addr[i]; 694 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 695 pd_entry |= GEN6_PDE_VALID; 696 697 writel(pd_entry, pd_addr + i); 698 } 699 readl(pd_addr); 700 } 701 702 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 703 { 704 BUG_ON(ppgtt->pd_offset & 0x3f); 705 706 return (ppgtt->pd_offset / 64) << 16; 707 } 708 709 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 710 struct intel_engine_cs *ring, 711 bool synchronous) 712 { 713 struct drm_device *dev = ppgtt->base.dev; 714 struct drm_i915_private *dev_priv = dev->dev_private; 715 int ret; 716 717 /* If we're in reset, we can assume the GPU is sufficiently idle to 718 * manually frob these bits. Ideally we could use the ring functions, 719 * except our error handling makes it quite difficult (can't use 720 * intel_ring_begin, ring->flush, or intel_ring_advance) 721 * 722 * FIXME: We should try not to special case reset 723 */ 724 if (synchronous || 725 i915_reset_in_progress(&dev_priv->gpu_error)) { 726 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 727 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 728 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 729 POSTING_READ(RING_PP_DIR_BASE(ring)); 730 return 0; 731 } 732 733 /* NB: TLBs must be flushed and invalidated before a switch */ 734 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 735 if (ret) 736 return ret; 737 738 ret = intel_ring_begin(ring, 6); 739 if (ret) 740 return ret; 741 742 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 743 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 744 intel_ring_emit(ring, PP_DIR_DCLV_2G); 745 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 746 intel_ring_emit(ring, get_pd_offset(ppgtt)); 747 intel_ring_emit(ring, MI_NOOP); 748 intel_ring_advance(ring); 749 750 return 0; 751 } 752 753 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 754 struct intel_engine_cs *ring, 755 bool synchronous) 756 { 757 struct drm_device *dev = ppgtt->base.dev; 758 struct drm_i915_private *dev_priv = dev->dev_private; 759 int ret; 760 761 /* If we're in reset, we can assume the GPU is sufficiently idle to 762 * manually frob these bits. Ideally we could use the ring functions, 763 * except our error handling makes it quite difficult (can't use 764 * intel_ring_begin, ring->flush, or intel_ring_advance) 765 * 766 * FIXME: We should try not to special case reset 767 */ 768 if (synchronous || 769 i915_reset_in_progress(&dev_priv->gpu_error)) { 770 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 771 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 772 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 773 POSTING_READ(RING_PP_DIR_BASE(ring)); 774 return 0; 775 } 776 777 /* NB: TLBs must be flushed and invalidated before a switch */ 778 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 779 if (ret) 780 return ret; 781 782 ret = intel_ring_begin(ring, 6); 783 if (ret) 784 return ret; 785 786 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 787 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 788 intel_ring_emit(ring, PP_DIR_DCLV_2G); 789 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 790 intel_ring_emit(ring, get_pd_offset(ppgtt)); 791 intel_ring_emit(ring, MI_NOOP); 792 intel_ring_advance(ring); 793 794 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 795 if (ring->id != RCS) { 796 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 797 if (ret) 798 return ret; 799 } 800 801 return 0; 802 } 803 804 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 805 struct intel_engine_cs *ring, 806 bool synchronous) 807 { 808 struct drm_device *dev = ppgtt->base.dev; 809 struct drm_i915_private *dev_priv = dev->dev_private; 810 811 if (!synchronous) 812 return 0; 813 814 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 815 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 816 817 POSTING_READ(RING_PP_DIR_DCLV(ring)); 818 819 return 0; 820 } 821 822 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 823 { 824 struct drm_device *dev = ppgtt->base.dev; 825 struct drm_i915_private *dev_priv = dev->dev_private; 826 struct intel_engine_cs *ring; 827 int j, ret; 828 829 for_each_ring(ring, dev_priv, j) { 830 I915_WRITE(RING_MODE_GEN7(ring), 831 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 832 833 /* We promise to do a switch later with FULL PPGTT. If this is 834 * aliasing, this is the one and only switch we'll do */ 835 if (USES_FULL_PPGTT(dev)) 836 continue; 837 838 ret = ppgtt->switch_mm(ppgtt, ring, true); 839 if (ret) 840 goto err_out; 841 } 842 843 return 0; 844 845 err_out: 846 for_each_ring(ring, dev_priv, j) 847 I915_WRITE(RING_MODE_GEN7(ring), 848 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); 849 return ret; 850 } 851 852 static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 853 { 854 struct drm_device *dev = ppgtt->base.dev; 855 struct drm_i915_private *dev_priv = dev->dev_private; 856 struct intel_engine_cs *ring; 857 uint32_t ecochk, ecobits; 858 int i; 859 860 ecobits = I915_READ(GAC_ECO_BITS); 861 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 862 863 ecochk = I915_READ(GAM_ECOCHK); 864 if (IS_HASWELL(dev)) { 865 ecochk |= ECOCHK_PPGTT_WB_HSW; 866 } else { 867 ecochk |= ECOCHK_PPGTT_LLC_IVB; 868 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 869 } 870 I915_WRITE(GAM_ECOCHK, ecochk); 871 872 for_each_ring(ring, dev_priv, i) { 873 int ret; 874 /* GFX_MODE is per-ring on gen7+ */ 875 I915_WRITE(RING_MODE_GEN7(ring), 876 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 877 878 /* We promise to do a switch later with FULL PPGTT. If this is 879 * aliasing, this is the one and only switch we'll do */ 880 if (USES_FULL_PPGTT(dev)) 881 continue; 882 883 ret = ppgtt->switch_mm(ppgtt, ring, true); 884 if (ret) 885 return ret; 886 } 887 888 return 0; 889 } 890 891 static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 892 { 893 struct drm_device *dev = ppgtt->base.dev; 894 struct drm_i915_private *dev_priv = dev->dev_private; 895 struct intel_engine_cs *ring; 896 uint32_t ecochk, gab_ctl, ecobits; 897 int i; 898 899 ecobits = I915_READ(GAC_ECO_BITS); 900 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 901 ECOBITS_PPGTT_CACHE64B); 902 903 gab_ctl = I915_READ(GAB_CTL); 904 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 905 906 ecochk = I915_READ(GAM_ECOCHK); 907 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 908 909 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 910 911 for_each_ring(ring, dev_priv, i) { 912 int ret = ppgtt->switch_mm(ppgtt, ring, true); 913 if (ret) 914 return ret; 915 } 916 917 return 0; 918 } 919 920 /* PPGTT support for Sandybdrige/Gen6 and later */ 921 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 922 uint64_t start, 923 uint64_t length, 924 bool use_scratch) 925 { 926 struct i915_hw_ppgtt *ppgtt = 927 container_of(vm, struct i915_hw_ppgtt, base); 928 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 929 unsigned first_entry = start >> PAGE_SHIFT; 930 unsigned num_entries = length >> PAGE_SHIFT; 931 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 932 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 933 unsigned last_pte, i; 934 935 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 936 937 while (num_entries) { 938 last_pte = first_pte + num_entries; 939 if (last_pte > I915_PPGTT_PT_ENTRIES) 940 last_pte = I915_PPGTT_PT_ENTRIES; 941 942 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 943 944 for (i = first_pte; i < last_pte; i++) 945 pt_vaddr[i] = scratch_pte; 946 947 kunmap_atomic(pt_vaddr); 948 949 num_entries -= last_pte - first_pte; 950 first_pte = 0; 951 act_pt++; 952 } 953 } 954 955 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 956 vm_page_t *pages, 957 uint64_t start, 958 unsigned num_entries, 959 enum i915_cache_level cache_level) 960 { 961 struct i915_hw_ppgtt *ppgtt = 962 container_of(vm, struct i915_hw_ppgtt, base); 963 gen6_gtt_pte_t *pt_vaddr; 964 unsigned first_entry = start >> PAGE_SHIFT; 965 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 966 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 967 968 pt_vaddr = NULL; 969 for (int i=0;i<num_entries;i++) { 970 if (pt_vaddr == NULL) 971 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 972 973 pt_vaddr[act_pte] = 974 vm->pte_encode(VM_PAGE_TO_PHYS(pages[i]), 975 cache_level, true); 976 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 977 kunmap_atomic(pt_vaddr); 978 pt_vaddr = NULL; 979 act_pt++; 980 act_pte = 0; 981 } 982 } 983 if (pt_vaddr) 984 kunmap_atomic(pt_vaddr); 985 } 986 987 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 988 { 989 int i; 990 991 if (ppgtt->pt_dma_addr) { 992 for (i = 0; i < ppgtt->num_pd_entries; i++) 993 pci_unmap_page(ppgtt->base.dev->pdev, 994 ppgtt->pt_dma_addr[i], 995 4096, PCI_DMA_BIDIRECTIONAL); 996 } 997 } 998 999 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) 1000 { 1001 int i; 1002 1003 kfree(ppgtt->pt_dma_addr); 1004 for (i = 0; i < ppgtt->num_pd_entries; i++) 1005 __free_page(ppgtt->pt_pages[i]); 1006 kfree(ppgtt->pt_pages); 1007 } 1008 1009 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1010 { 1011 struct i915_hw_ppgtt *ppgtt = 1012 container_of(vm, struct i915_hw_ppgtt, base); 1013 1014 list_del(&vm->global_link); 1015 drm_mm_takedown(&ppgtt->base.mm); 1016 drm_mm_remove_node(&ppgtt->node); 1017 1018 gen6_ppgtt_unmap_pages(ppgtt); 1019 gen6_ppgtt_free(ppgtt); 1020 } 1021 1022 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1023 { 1024 struct drm_device *dev = ppgtt->base.dev; 1025 struct drm_i915_private *dev_priv = dev->dev_private; 1026 bool retried = false; 1027 int ret; 1028 1029 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1030 * allocator works in address space sizes, so it's multiplied by page 1031 * size. We allocate at the top of the GTT to avoid fragmentation. 1032 */ 1033 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 1034 alloc: 1035 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 1036 &ppgtt->node, GEN6_PD_SIZE, 1037 GEN6_PD_ALIGN, 0, 1038 0, dev_priv->gtt.base.total, 1039 DRM_MM_TOPDOWN); 1040 if (ret == -ENOSPC && !retried) { 1041 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 1042 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1043 I915_CACHE_NONE, 1044 0, dev_priv->gtt.base.total, 1045 0); 1046 if (ret) 1047 return ret; 1048 1049 retried = true; 1050 goto alloc; 1051 } 1052 1053 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 1054 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1055 1056 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 1057 return ret; 1058 } 1059 1060 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) 1061 { 1062 int i; 1063 1064 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct vm_page *), 1065 GFP_KERNEL); 1066 1067 if (!ppgtt->pt_pages) 1068 return -ENOMEM; 1069 1070 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1071 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 1072 if (!ppgtt->pt_pages[i]) { 1073 gen6_ppgtt_free(ppgtt); 1074 return -ENOMEM; 1075 } 1076 } 1077 1078 return 0; 1079 } 1080 1081 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1082 { 1083 int ret; 1084 1085 ret = gen6_ppgtt_allocate_page_directories(ppgtt); 1086 if (ret) 1087 return ret; 1088 1089 ret = gen6_ppgtt_allocate_page_tables(ppgtt); 1090 if (ret) { 1091 drm_mm_remove_node(&ppgtt->node); 1092 return ret; 1093 } 1094 1095 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 1096 GFP_KERNEL); 1097 if (!ppgtt->pt_dma_addr) { 1098 drm_mm_remove_node(&ppgtt->node); 1099 gen6_ppgtt_free(ppgtt); 1100 return -ENOMEM; 1101 } 1102 1103 return 0; 1104 } 1105 1106 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) 1107 { 1108 struct drm_device *dev = ppgtt->base.dev; 1109 int i; 1110 1111 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1112 dma_addr_t pt_addr; 1113 1114 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 1115 PCI_DMA_BIDIRECTIONAL); 1116 1117 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 1118 gen6_ppgtt_unmap_pages(ppgtt); 1119 return -EIO; 1120 } 1121 1122 ppgtt->pt_dma_addr[i] = pt_addr; 1123 } 1124 1125 return 0; 1126 } 1127 1128 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1129 { 1130 struct drm_device *dev = ppgtt->base.dev; 1131 struct drm_i915_private *dev_priv = dev->dev_private; 1132 int ret; 1133 1134 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 1135 if (IS_GEN6(dev)) { 1136 ppgtt->enable = gen6_ppgtt_enable; 1137 ppgtt->switch_mm = gen6_mm_switch; 1138 } else if (IS_HASWELL(dev)) { 1139 ppgtt->enable = gen7_ppgtt_enable; 1140 ppgtt->switch_mm = hsw_mm_switch; 1141 } else if (IS_GEN7(dev)) { 1142 ppgtt->enable = gen7_ppgtt_enable; 1143 ppgtt->switch_mm = gen7_mm_switch; 1144 } else 1145 BUG(); 1146 1147 ret = gen6_ppgtt_alloc(ppgtt); 1148 if (ret) 1149 return ret; 1150 1151 ret = gen6_ppgtt_setup_page_tables(ppgtt); 1152 if (ret) { 1153 gen6_ppgtt_free(ppgtt); 1154 return ret; 1155 } 1156 1157 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1158 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1159 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1160 ppgtt->base.start = 0; 1161 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 1162 ppgtt->debug_dump = gen6_dump_ppgtt; 1163 1164 ppgtt->pd_offset = 1165 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); 1166 1167 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1168 1169 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", 1170 ppgtt->node.size >> 20, 1171 ppgtt->node.start / PAGE_SIZE); 1172 1173 return 0; 1174 } 1175 1176 int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1177 { 1178 struct drm_i915_private *dev_priv = dev->dev_private; 1179 int ret = 0; 1180 1181 ppgtt->base.dev = dev; 1182 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 1183 1184 if (INTEL_INFO(dev)->gen < 8) 1185 ret = gen6_ppgtt_init(ppgtt); 1186 else if (IS_GEN8(dev)) 1187 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 1188 else 1189 BUG(); 1190 1191 if (!ret) { 1192 struct drm_i915_private *dev_priv = dev->dev_private; 1193 kref_init(&ppgtt->ref); 1194 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 1195 ppgtt->base.total); 1196 i915_init_vm(dev_priv, &ppgtt->base); 1197 if (INTEL_INFO(dev)->gen < 8) { 1198 gen6_write_pdes(ppgtt); 1199 DRM_DEBUG("Adding PPGTT at offset %x\n", 1200 ppgtt->pd_offset << 10); 1201 } 1202 } 1203 1204 return ret; 1205 } 1206 1207 static void 1208 ppgtt_bind_vma(struct i915_vma *vma, 1209 enum i915_cache_level cache_level, 1210 u32 flags) 1211 { 1212 const unsigned int num_entries = vma->obj->base.size >> PAGE_SHIFT; 1213 1214 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 1215 num_entries, 1216 cache_level); 1217 } 1218 1219 static void ppgtt_unbind_vma(struct i915_vma *vma) 1220 { 1221 vma->vm->clear_range(vma->vm, 1222 vma->node.start, 1223 vma->obj->base.size, 1224 true); 1225 } 1226 1227 extern int intel_iommu_gfx_mapped; 1228 /* Certain Gen5 chipsets require require idling the GPU before 1229 * unmapping anything from the GTT when VT-d is enabled. 1230 */ 1231 static inline bool needs_idle_maps(struct drm_device *dev) 1232 { 1233 #ifdef CONFIG_INTEL_IOMMU 1234 /* Query intel_iommu to see if we need the workaround. Presumably that 1235 * was loaded first. 1236 */ 1237 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 1238 return true; 1239 #endif 1240 return false; 1241 } 1242 1243 static bool do_idling(struct drm_i915_private *dev_priv) 1244 { 1245 bool ret = dev_priv->mm.interruptible; 1246 1247 if (unlikely(dev_priv->gtt.do_idle_maps)) { 1248 dev_priv->mm.interruptible = false; 1249 if (i915_gpu_idle(dev_priv->dev)) { 1250 DRM_ERROR("Couldn't idle GPU\n"); 1251 /* Wait a bit, in hopes it avoids the hang */ 1252 udelay(10); 1253 } 1254 } 1255 1256 return ret; 1257 } 1258 1259 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 1260 { 1261 if (unlikely(dev_priv->gtt.do_idle_maps)) 1262 dev_priv->mm.interruptible = interruptible; 1263 } 1264 1265 void i915_check_and_clear_faults(struct drm_device *dev) 1266 { 1267 struct drm_i915_private *dev_priv = dev->dev_private; 1268 struct intel_engine_cs *ring; 1269 int i; 1270 1271 if (INTEL_INFO(dev)->gen < 6) 1272 return; 1273 1274 for_each_ring(ring, dev_priv, i) { 1275 u32 fault_reg; 1276 fault_reg = I915_READ(RING_FAULT_REG(ring)); 1277 if (fault_reg & RING_FAULT_VALID) { 1278 #if 0 1279 DRM_DEBUG_DRIVER("Unexpected fault\n" 1280 "\tAddr: 0x%08lx\\n" 1281 "\tAddress space: %s\n" 1282 "\tSource ID: %d\n" 1283 "\tType: %d\n", 1284 fault_reg & PAGE_MASK, 1285 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 1286 RING_FAULT_SRCID(fault_reg), 1287 RING_FAULT_FAULT_TYPE(fault_reg)); 1288 #endif 1289 I915_WRITE(RING_FAULT_REG(ring), 1290 fault_reg & ~RING_FAULT_VALID); 1291 } 1292 } 1293 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 1294 } 1295 1296 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 1297 { 1298 struct drm_i915_private *dev_priv = dev->dev_private; 1299 1300 /* Don't bother messing with faults pre GEN6 as we have little 1301 * documentation supporting that it's a good idea. 1302 */ 1303 if (INTEL_INFO(dev)->gen < 6) 1304 return; 1305 1306 i915_check_and_clear_faults(dev); 1307 1308 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1309 dev_priv->gtt.base.start, 1310 dev_priv->gtt.base.total, 1311 true); 1312 } 1313 1314 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 1315 { 1316 struct drm_i915_private *dev_priv = dev->dev_private; 1317 struct drm_i915_gem_object *obj; 1318 struct i915_address_space *vm; 1319 1320 i915_check_and_clear_faults(dev); 1321 1322 /* First fill our portion of the GTT with scratch pages */ 1323 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1324 dev_priv->gtt.base.start, 1325 dev_priv->gtt.base.total, 1326 true); 1327 1328 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1329 struct i915_vma *vma = i915_gem_obj_to_vma(obj, 1330 &dev_priv->gtt.base); 1331 if (!vma) 1332 continue; 1333 1334 i915_gem_clflush_object(obj, obj->pin_display); 1335 /* The bind_vma code tries to be smart about tracking mappings. 1336 * Unfortunately above, we've just wiped out the mappings 1337 * without telling our object about it. So we need to fake it. 1338 */ 1339 obj->has_global_gtt_mapping = 0; 1340 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 1341 } 1342 1343 1344 if (INTEL_INFO(dev)->gen >= 8) { 1345 if (IS_CHERRYVIEW(dev)) 1346 chv_setup_private_ppat(dev_priv); 1347 else 1348 bdw_setup_private_ppat(dev_priv); 1349 1350 return; 1351 } 1352 1353 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 1354 /* TODO: Perhaps it shouldn't be gen6 specific */ 1355 if (i915_is_ggtt(vm)) { 1356 if (dev_priv->mm.aliasing_ppgtt) 1357 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); 1358 continue; 1359 } 1360 1361 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); 1362 } 1363 1364 i915_gem_chipset_flush(dev); 1365 } 1366 1367 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 1368 { 1369 if (obj->has_dma_mapping) 1370 return 0; 1371 1372 #if 0 1373 if (!dma_map_sg(&obj->base.dev->pdev->dev, 1374 obj->pages->sgl, obj->pages->nents, 1375 PCI_DMA_BIDIRECTIONAL)) 1376 return -ENOSPC; 1377 #endif 1378 1379 return 0; 1380 } 1381 1382 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 1383 { 1384 #if 0 1385 writeq(pte, addr); 1386 #else 1387 iowrite32((u32)pte, addr); 1388 iowrite32(pte >> 32, addr + 4); 1389 #endif 1390 } 1391 1392 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1393 vm_page_t *pages, 1394 uint64_t start, 1395 unsigned int num_entries, 1396 enum i915_cache_level level) 1397 { 1398 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1399 unsigned first_entry = start >> PAGE_SHIFT; 1400 gen8_gtt_pte_t __iomem *gtt_entries = 1401 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1402 int i = 0; 1403 dma_addr_t addr = 0; 1404 1405 for (i=0;i<num_entries;i++) { 1406 addr = VM_PAGE_TO_PHYS(pages[i]); 1407 gen8_set_pte(>t_entries[i], 1408 gen8_pte_encode(addr, level, true)); 1409 } 1410 1411 /* 1412 * XXX: This serves as a posting read to make sure that the PTE has 1413 * actually been updated. There is some concern that even though 1414 * registers and PTEs are within the same BAR that they are potentially 1415 * of NUMA access patterns. Therefore, even with the way we assume 1416 * hardware should work, we must keep this posting read for paranoia. 1417 */ 1418 if (i != 0) 1419 WARN_ON(readq(>t_entries[i-1]) 1420 != gen8_pte_encode(addr, level, true)); 1421 1422 /* This next bit makes the above posting read even more important. We 1423 * want to flush the TLBs only after we're certain all the PTE updates 1424 * have finished. 1425 */ 1426 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1427 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1428 } 1429 1430 /* 1431 * Binds an object into the global gtt with the specified cache level. The object 1432 * will be accessible to the GPU via commands whose operands reference offsets 1433 * within the global GTT as well as accessible by the GPU through the GMADR 1434 * mapped BAR (dev_priv->mm.gtt->gtt). 1435 */ 1436 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1437 vm_page_t *pages, 1438 uint64_t start, 1439 unsigned int num_entries, 1440 enum i915_cache_level level) 1441 { 1442 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1443 unsigned first_entry = start >> PAGE_SHIFT; 1444 gen6_gtt_pte_t __iomem *gtt_entries = 1445 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1446 int i = 0; 1447 dma_addr_t addr; 1448 1449 for (i = 0; i < num_entries; i++) { 1450 addr = VM_PAGE_TO_PHYS(pages[i]); 1451 iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); 1452 } 1453 1454 /* XXX: This serves as a posting read to make sure that the PTE has 1455 * actually been updated. There is some concern that even though 1456 * registers and PTEs are within the same BAR that they are potentially 1457 * of NUMA access patterns. Therefore, even with the way we assume 1458 * hardware should work, we must keep this posting read for paranoia. 1459 */ 1460 if (i != 0) 1461 WARN_ON(readl(>t_entries[i-1]) != 1462 vm->pte_encode(addr, level, true)); 1463 1464 /* This next bit makes the above posting read even more important. We 1465 * want to flush the TLBs only after we're certain all the PTE updates 1466 * have finished. 1467 */ 1468 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1469 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1470 } 1471 1472 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1473 uint64_t start, 1474 uint64_t length, 1475 bool use_scratch) 1476 { 1477 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1478 unsigned first_entry = start >> PAGE_SHIFT; 1479 unsigned num_entries = length >> PAGE_SHIFT; 1480 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1481 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1482 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1483 int i; 1484 1485 if (WARN(num_entries > max_entries, 1486 "First entry = %d; Num entries = %d (max=%d)\n", 1487 first_entry, num_entries, max_entries)) 1488 num_entries = max_entries; 1489 1490 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1491 I915_CACHE_LLC, 1492 use_scratch); 1493 for (i = 0; i < num_entries; i++) 1494 gen8_set_pte(>t_base[i], scratch_pte); 1495 readl(gtt_base); 1496 } 1497 1498 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1499 uint64_t start, 1500 uint64_t length, 1501 bool use_scratch) 1502 { 1503 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1504 unsigned first_entry = start >> PAGE_SHIFT; 1505 unsigned num_entries = length >> PAGE_SHIFT; 1506 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1507 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1508 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1509 int i; 1510 1511 if (WARN(num_entries > max_entries, 1512 "First entry = %d; Num entries = %d (max=%d)\n", 1513 first_entry, num_entries, max_entries)) 1514 num_entries = max_entries; 1515 1516 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); 1517 1518 for (i = 0; i < num_entries; i++) 1519 iowrite32(scratch_pte, >t_base[i]); 1520 readl(gtt_base); 1521 } 1522 1523 static void i915_ggtt_bind_vma(struct i915_vma *vma, 1524 enum i915_cache_level cache_level, 1525 u32 unused) 1526 { 1527 const unsigned long entry = vma->node.start >> PAGE_SHIFT; 1528 const unsigned int num_entries = vma->obj->base.size >> PAGE_SHIFT; 1529 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1530 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1531 1532 BUG_ON(!i915_is_ggtt(vma->vm)); 1533 intel_gtt_insert_pages(entry, num_entries, vma->obj->pages, flags); 1534 vma->obj->has_global_gtt_mapping = 1; 1535 } 1536 1537 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1538 uint64_t start, 1539 uint64_t length, 1540 bool unused) 1541 { 1542 unsigned first_entry = start >> PAGE_SHIFT; 1543 unsigned num_entries = length >> PAGE_SHIFT; 1544 intel_gtt_clear_range(first_entry, num_entries); 1545 } 1546 1547 static void i915_ggtt_unbind_vma(struct i915_vma *vma) 1548 { 1549 const unsigned int first = vma->node.start >> PAGE_SHIFT; 1550 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; 1551 1552 BUG_ON(!i915_is_ggtt(vma->vm)); 1553 vma->obj->has_global_gtt_mapping = 0; 1554 intel_gtt_clear_range(first, size); 1555 } 1556 1557 static void ggtt_bind_vma(struct i915_vma *vma, 1558 enum i915_cache_level cache_level, 1559 u32 flags) 1560 { 1561 struct drm_device *dev = vma->vm->dev; 1562 struct drm_i915_private *dev_priv = dev->dev_private; 1563 struct drm_i915_gem_object *obj = vma->obj; 1564 1565 /* If there is no aliasing PPGTT, or the caller needs a global mapping, 1566 * or we have a global mapping already but the cacheability flags have 1567 * changed, set the global PTEs. 1568 * 1569 * If there is an aliasing PPGTT it is anecdotally faster, so use that 1570 * instead if none of the above hold true. 1571 * 1572 * NB: A global mapping should only be needed for special regions like 1573 * "gtt mappable", SNB errata, or if specified via special execbuf 1574 * flags. At all other times, the GPU will use the aliasing PPGTT. 1575 */ 1576 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { 1577 if (!obj->has_global_gtt_mapping || 1578 (cache_level != obj->cache_level)) { 1579 vma->vm->insert_entries(vma->vm, obj->pages, 1580 vma->node.start, 1581 obj->base.size >> PAGE_SHIFT, 1582 cache_level); 1583 obj->has_global_gtt_mapping = 1; 1584 } 1585 } 1586 1587 if (dev_priv->mm.aliasing_ppgtt && 1588 (!obj->has_aliasing_ppgtt_mapping || 1589 (cache_level != obj->cache_level))) { 1590 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1591 appgtt->base.insert_entries(&appgtt->base, 1592 vma->obj->pages, 1593 vma->node.start, 1594 obj->base.size >> PAGE_SHIFT, 1595 cache_level); 1596 vma->obj->has_aliasing_ppgtt_mapping = 1; 1597 } 1598 } 1599 1600 static void ggtt_unbind_vma(struct i915_vma *vma) 1601 { 1602 struct drm_device *dev = vma->vm->dev; 1603 struct drm_i915_private *dev_priv = dev->dev_private; 1604 struct drm_i915_gem_object *obj = vma->obj; 1605 1606 if (obj->has_global_gtt_mapping) { 1607 vma->vm->clear_range(vma->vm, 1608 vma->node.start, 1609 obj->base.size, 1610 true); 1611 obj->has_global_gtt_mapping = 0; 1612 } 1613 1614 if (obj->has_aliasing_ppgtt_mapping) { 1615 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1616 appgtt->base.clear_range(&appgtt->base, 1617 vma->node.start, 1618 obj->base.size, 1619 true); 1620 obj->has_aliasing_ppgtt_mapping = 0; 1621 } 1622 } 1623 1624 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1625 { 1626 struct drm_device *dev = obj->base.dev; 1627 struct drm_i915_private *dev_priv = dev->dev_private; 1628 bool interruptible; 1629 1630 interruptible = do_idling(dev_priv); 1631 1632 #if 0 1633 if (!obj->has_dma_mapping) 1634 dma_unmap_sg(&dev->pdev->dev, 1635 obj->pages->sgl, obj->pages->nents, 1636 PCI_DMA_BIDIRECTIONAL); 1637 #endif 1638 1639 undo_idling(dev_priv, interruptible); 1640 } 1641 1642 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1643 unsigned long color, 1644 unsigned long *start, 1645 unsigned long *end) 1646 { 1647 if (node->color != color) 1648 *start += 4096; 1649 1650 if (!list_empty(&node->node_list)) { 1651 node = list_entry(node->node_list.next, 1652 struct drm_mm_node, 1653 node_list); 1654 if (node->allocated && node->color != color) 1655 *end -= 4096; 1656 } 1657 } 1658 1659 void i915_gem_setup_global_gtt(struct drm_device *dev, 1660 unsigned long start, 1661 unsigned long mappable_end, 1662 unsigned long end) 1663 { 1664 /* Let GEM Manage all of the aperture. 1665 * 1666 * However, leave one page at the end still bound to the scratch page. 1667 * There are a number of places where the hardware apparently prefetches 1668 * past the end of the object, and we've seen multiple hangs with the 1669 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1670 * aperture. One page should be enough to keep any prefetching inside 1671 * of the aperture. 1672 */ 1673 struct drm_i915_private *dev_priv = dev->dev_private; 1674 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1675 unsigned long mappable; 1676 int error; 1677 struct drm_mm_node *entry; 1678 struct drm_i915_gem_object *obj; 1679 unsigned long hole_start, hole_end; 1680 1681 kprintf("MAPPABLE_END VS END %016jx %016jx\n", mappable_end, end); 1682 tsleep(&mappable_end, 0, "DELAY", hz); /* for kprintf */ 1683 /*BUG_ON(mappable_end > end);*/ 1684 1685 mappable = min(end, mappable_end) - start; 1686 1687 /* Subtract the guard page ... */ 1688 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1689 if (!HAS_LLC(dev)) 1690 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1691 1692 /* Mark any preallocated objects as occupied */ 1693 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1694 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1695 int ret; 1696 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1697 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1698 1699 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1700 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1701 if (ret) 1702 DRM_DEBUG_KMS("Reservation failed\n"); 1703 obj->has_global_gtt_mapping = 1; 1704 } 1705 1706 dev_priv->gtt.base.start = start; 1707 dev_priv->gtt.base.total = end - start; 1708 1709 /* Clear any non-preallocated blocks */ 1710 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1711 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1712 hole_start, hole_end); 1713 ggtt_vm->clear_range(ggtt_vm, hole_start, 1714 hole_end - hole_start, true); 1715 } 1716 /* ... but ensure that we clear the entire range. */ 1717 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); 1718 device_printf(dev->dev, 1719 "taking over the fictitious range 0x%lx-0x%lx\n", 1720 dev->agp->base + start, dev->agp->base + start + mappable); 1721 error = -vm_phys_fictitious_reg_range(dev->agp->base + start, 1722 dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING); 1723 1724 /* And finally clear the reserved guard page */ 1725 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 1726 } 1727 1728 void i915_gem_init_global_gtt(struct drm_device *dev) 1729 { 1730 struct drm_i915_private *dev_priv = dev->dev_private; 1731 unsigned long gtt_size, mappable_size; 1732 1733 gtt_size = dev_priv->gtt.base.total; 1734 mappable_size = dev_priv->gtt.mappable_end; 1735 1736 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1737 } 1738 1739 static int setup_scratch_page(struct drm_device *dev) 1740 { 1741 struct drm_i915_private *dev_priv = dev->dev_private; 1742 struct vm_page *page; 1743 dma_addr_t dma_addr; 1744 1745 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1746 if (page == NULL) 1747 return -ENOMEM; 1748 get_page(page); 1749 set_pages_uc(page, 1); 1750 1751 #ifdef CONFIG_INTEL_IOMMU 1752 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1753 PCI_DMA_BIDIRECTIONAL); 1754 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1755 return -EINVAL; 1756 #else 1757 dma_addr = page_to_phys(page); 1758 #endif 1759 dev_priv->gtt.base.scratch.page = page; 1760 dev_priv->gtt.base.scratch.addr = dma_addr; 1761 1762 return 0; 1763 } 1764 1765 #if 0 1766 static void teardown_scratch_page(struct drm_device *dev) 1767 { 1768 struct drm_i915_private *dev_priv = dev->dev_private; 1769 struct vm_page *page = dev_priv->gtt.base.scratch.page; 1770 1771 set_pages_wb(page, 1); 1772 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1773 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1774 put_page(page); 1775 __free_page(page); 1776 } 1777 #endif 1778 1779 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1780 { 1781 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1782 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1783 return snb_gmch_ctl << 20; 1784 } 1785 1786 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1787 { 1788 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1789 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1790 if (bdw_gmch_ctl) 1791 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1792 1793 #ifdef CONFIG_X86_32 1794 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 1795 if (bdw_gmch_ctl > 4) 1796 bdw_gmch_ctl = 4; 1797 #endif 1798 1799 return bdw_gmch_ctl << 20; 1800 } 1801 1802 static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1803 { 1804 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1805 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1806 1807 if (gmch_ctrl) 1808 return 1 << (20 + gmch_ctrl); 1809 1810 return 0; 1811 } 1812 1813 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1814 { 1815 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1816 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1817 return snb_gmch_ctl << 25; /* 32 MB units */ 1818 } 1819 1820 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1821 { 1822 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1823 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1824 return bdw_gmch_ctl << 25; /* 32 MB units */ 1825 } 1826 1827 static size_t chv_get_stolen_size(u16 gmch_ctrl) 1828 { 1829 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 1830 gmch_ctrl &= SNB_GMCH_GMS_MASK; 1831 1832 /* 1833 * 0x0 to 0x10: 32MB increments starting at 0MB 1834 * 0x11 to 0x16: 4MB increments starting at 8MB 1835 * 0x17 to 0x1d: 4MB increments start at 36MB 1836 */ 1837 if (gmch_ctrl < 0x11) 1838 return gmch_ctrl << 25; 1839 else if (gmch_ctrl < 0x17) 1840 return (gmch_ctrl - 0x11 + 2) << 22; 1841 else 1842 return (gmch_ctrl - 0x17 + 9) << 22; 1843 } 1844 1845 static int ggtt_probe_common(struct drm_device *dev, 1846 size_t gtt_size) 1847 { 1848 struct drm_i915_private *dev_priv = dev->dev_private; 1849 phys_addr_t gtt_phys_addr; 1850 int ret; 1851 1852 /* For Modern GENs the PTEs and register space are split in the BAR */ 1853 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1854 (pci_resource_len(dev->pdev, 0) / 2); 1855 1856 kprintf("gtt_probe_common: gtt_phys_addr=0x%lx\n", gtt_phys_addr); 1857 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1858 if (!dev_priv->gtt.gsm) { 1859 DRM_ERROR("Failed to map the gtt page table\n"); 1860 return -ENOMEM; 1861 } 1862 1863 ret = setup_scratch_page(dev); 1864 if (ret) { 1865 DRM_ERROR("Scratch setup failed\n"); 1866 /* iounmap will also get called at remove, but meh */ 1867 #if 0 1868 iounmap(dev_priv->gtt.gsm); 1869 #endif 1870 } 1871 1872 return ret; 1873 } 1874 1875 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1876 * bits. When using advanced contexts each context stores its own PAT, but 1877 * writing this data shouldn't be harmful even in those cases. */ 1878 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 1879 { 1880 uint64_t pat; 1881 1882 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1883 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1884 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1885 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1886 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1887 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1888 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1889 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1890 1891 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 1892 * write would work. */ 1893 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1894 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1895 } 1896 1897 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 1898 { 1899 uint64_t pat; 1900 1901 /* 1902 * Map WB on BDW to snooped on CHV. 1903 * 1904 * Only the snoop bit has meaning for CHV, the rest is 1905 * ignored. 1906 * 1907 * Note that the harware enforces snooping for all page 1908 * table accesses. The snoop bit is actually ignored for 1909 * PDEs. 1910 */ 1911 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 1912 GEN8_PPAT(1, 0) | 1913 GEN8_PPAT(2, 0) | 1914 GEN8_PPAT(3, 0) | 1915 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 1916 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 1917 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 1918 GEN8_PPAT(7, CHV_PPAT_SNOOP); 1919 1920 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1921 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1922 } 1923 1924 static int gen8_gmch_probe(struct drm_device *dev, 1925 size_t *gtt_total, 1926 size_t *stolen, 1927 phys_addr_t *mappable_base, 1928 unsigned long *mappable_end) 1929 { 1930 struct drm_i915_private *dev_priv = dev->dev_private; 1931 unsigned int gtt_size; 1932 u16 snb_gmch_ctl; 1933 int ret; 1934 1935 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1936 *mappable_base = pci_resource_start(dev->pdev, 2); 1937 *mappable_end = pci_resource_len(dev->pdev, 2); 1938 1939 #if 0 1940 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 1941 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 1942 #endif 1943 1944 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1945 1946 if (IS_CHERRYVIEW(dev)) { 1947 *stolen = chv_get_stolen_size(snb_gmch_ctl); 1948 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); 1949 } else { 1950 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 1951 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 1952 } 1953 1954 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 1955 1956 if (IS_CHERRYVIEW(dev)) 1957 chv_setup_private_ppat(dev_priv); 1958 else 1959 bdw_setup_private_ppat(dev_priv); 1960 1961 ret = ggtt_probe_common(dev, gtt_size); 1962 1963 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 1964 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 1965 1966 return ret; 1967 } 1968 1969 static int gen6_gmch_probe(struct drm_device *dev, 1970 size_t *gtt_total, 1971 size_t *stolen, 1972 phys_addr_t *mappable_base, 1973 unsigned long *mappable_end) 1974 { 1975 struct drm_i915_private *dev_priv = dev->dev_private; 1976 unsigned int gtt_size; 1977 u16 snb_gmch_ctl; 1978 int ret; 1979 1980 *mappable_base = pci_resource_start(dev->pdev, 2); 1981 *mappable_end = pci_resource_len(dev->pdev, 2); 1982 1983 /* 64/512MB is the current min/max we actually know of, but this is just 1984 * a coarse sanity check. 1985 */ 1986 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 1987 DRM_ERROR("Unknown GMADR size (%lx)\n", 1988 dev_priv->gtt.mappable_end); 1989 return -ENXIO; 1990 } 1991 1992 #if 0 1993 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 1994 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 1995 #endif 1996 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1997 1998 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 1999 2000 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 2001 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 2002 2003 ret = ggtt_probe_common(dev, gtt_size); 2004 2005 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 2006 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 2007 2008 return ret; 2009 } 2010 2011 static void gen6_gmch_remove(struct i915_address_space *vm) 2012 { 2013 #if 0 2014 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 2015 2016 if (drm_mm_initialized(&vm->mm)) { 2017 drm_mm_takedown(&vm->mm); 2018 list_del(&vm->global_link); 2019 } 2020 iounmap(gtt->gsm); 2021 teardown_scratch_page(vm->dev); 2022 #endif 2023 } 2024 2025 static int i915_gmch_probe(struct drm_device *dev, 2026 size_t *gtt_total, 2027 size_t *stolen, 2028 phys_addr_t *mappable_base, 2029 unsigned long *mappable_end) 2030 { 2031 struct drm_i915_private *dev_priv = dev->dev_private; 2032 #if 0 2033 int ret; 2034 2035 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 2036 if (!ret) { 2037 DRM_ERROR("failed to set up gmch\n"); 2038 return -EIO; 2039 } 2040 #endif 2041 2042 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 2043 2044 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 2045 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 2046 2047 if (unlikely(dev_priv->gtt.do_idle_maps)) 2048 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 2049 2050 return 0; 2051 } 2052 2053 static void i915_gmch_remove(struct i915_address_space *vm) 2054 { 2055 if (drm_mm_initialized(&vm->mm)) { 2056 drm_mm_takedown(&vm->mm); 2057 list_del(&vm->global_link); 2058 } 2059 } 2060 2061 int i915_gem_gtt_init(struct drm_device *dev) 2062 { 2063 struct drm_i915_private *dev_priv = dev->dev_private; 2064 struct i915_gtt *gtt = &dev_priv->gtt; 2065 int ret; 2066 2067 if (INTEL_INFO(dev)->gen <= 5) { 2068 gtt->gtt_probe = i915_gmch_probe; 2069 gtt->base.cleanup = i915_gmch_remove; 2070 } else if (INTEL_INFO(dev)->gen < 8) { 2071 gtt->gtt_probe = gen6_gmch_probe; 2072 gtt->base.cleanup = gen6_gmch_remove; 2073 if (IS_HASWELL(dev) && dev_priv->ellc_size) 2074 gtt->base.pte_encode = iris_pte_encode; 2075 else if (IS_HASWELL(dev)) 2076 gtt->base.pte_encode = hsw_pte_encode; 2077 else if (IS_VALLEYVIEW(dev)) 2078 gtt->base.pte_encode = byt_pte_encode; 2079 else if (INTEL_INFO(dev)->gen >= 7) 2080 gtt->base.pte_encode = ivb_pte_encode; 2081 else 2082 gtt->base.pte_encode = snb_pte_encode; 2083 } else { 2084 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 2085 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 2086 } 2087 2088 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 2089 >t->mappable_base, >t->mappable_end); 2090 if (ret) 2091 return ret; 2092 2093 gtt->base.dev = dev; 2094 2095 /* GMADR is the PCI mmio aperture into the global GTT. */ 2096 DRM_INFO("Memory usable by graphics device = %zdM\n", 2097 gtt->base.total >> 20); 2098 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 2099 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 2100 #ifdef CONFIG_INTEL_IOMMU 2101 if (intel_iommu_gfx_mapped) 2102 DRM_INFO("VT-d active for gfx access\n"); 2103 #endif 2104 2105 return 0; 2106 } 2107 2108 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, 2109 struct i915_address_space *vm) 2110 { 2111 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 2112 if (vma == NULL) 2113 return ERR_PTR(-ENOMEM); 2114 2115 INIT_LIST_HEAD(&vma->vma_link); 2116 INIT_LIST_HEAD(&vma->mm_list); 2117 INIT_LIST_HEAD(&vma->exec_list); 2118 vma->vm = vm; 2119 vma->obj = obj; 2120 2121 switch (INTEL_INFO(vm->dev)->gen) { 2122 case 8: 2123 case 7: 2124 case 6: 2125 if (i915_is_ggtt(vm)) { 2126 vma->unbind_vma = ggtt_unbind_vma; 2127 vma->bind_vma = ggtt_bind_vma; 2128 } else { 2129 vma->unbind_vma = ppgtt_unbind_vma; 2130 vma->bind_vma = ppgtt_bind_vma; 2131 } 2132 break; 2133 case 5: 2134 case 4: 2135 case 3: 2136 case 2: 2137 BUG_ON(!i915_is_ggtt(vm)); 2138 vma->unbind_vma = i915_ggtt_unbind_vma; 2139 vma->bind_vma = i915_ggtt_bind_vma; 2140 break; 2141 default: 2142 BUG(); 2143 } 2144 2145 /* Keep GGTT vmas first to make debug easier */ 2146 if (i915_is_ggtt(vm)) 2147 list_add(&vma->vma_link, &obj->vma_list); 2148 else 2149 list_add_tail(&vma->vma_link, &obj->vma_list); 2150 2151 return vma; 2152 } 2153 2154 struct i915_vma * 2155 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 2156 struct i915_address_space *vm) 2157 { 2158 struct i915_vma *vma; 2159 2160 vma = i915_gem_obj_to_vma(obj, vm); 2161 if (!vma) 2162 vma = __i915_gem_vma_create(obj, vm); 2163 2164 return vma; 2165 } 2166