1 /* 2 * Copyright © 2010 Daniel Vetter 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drmP.h> 26 #include <drm/i915_drm.h> 27 #include "i915_drv.h" 28 #include "intel_drv.h" 29 30 #include <linux/highmem.h> 31 32 #define GEN6_PPGTT_PD_ENTRIES 512 33 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) 34 typedef uint64_t gen8_gtt_pte_t; 35 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; 36 37 /* PPGTT stuff */ 38 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 39 #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) 40 41 #define GEN6_PDE_VALID (1 << 0) 42 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 43 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 44 45 #define GEN6_PTE_VALID (1 << 0) 46 #define GEN6_PTE_UNCACHED (1 << 1) 47 #define HSW_PTE_UNCACHED (0) 48 #define GEN6_PTE_CACHE_LLC (2 << 1) 49 #define GEN7_PTE_CACHE_L3_LLC (3 << 1) 50 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 51 #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) 52 53 /* Cacheability Control is a 4-bit value. The low three bits are stored in * 54 * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. 55 */ 56 #define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ 57 (((bits) & 0x8) << (11 - 3))) 58 #define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) 59 #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) 60 #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) 61 #define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8) 62 #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) 63 #define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) 64 65 #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) 66 #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) 67 #define GEN8_LEGACY_PDPS 4 68 69 #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) 70 #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ 71 #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ 72 #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ 73 74 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 75 enum i915_cache_level level, 76 bool valid) 77 { 78 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 79 pte |= addr; 80 if (level != I915_CACHE_NONE) 81 pte |= PPAT_CACHED_INDEX; 82 else 83 pte |= PPAT_UNCACHED_INDEX; 84 return pte; 85 } 86 87 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 88 dma_addr_t addr, 89 enum i915_cache_level level) 90 { 91 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 92 pde |= addr; 93 if (level != I915_CACHE_NONE) 94 pde |= PPAT_CACHED_PDE_INDEX; 95 else 96 pde |= PPAT_UNCACHED_INDEX; 97 return pde; 98 } 99 100 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 101 enum i915_cache_level level, 102 bool valid) 103 { 104 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 105 pte |= GEN6_PTE_ADDR_ENCODE(addr); 106 107 switch (level) { 108 case I915_CACHE_L3_LLC: 109 case I915_CACHE_LLC: 110 pte |= GEN6_PTE_CACHE_LLC; 111 break; 112 case I915_CACHE_NONE: 113 pte |= GEN6_PTE_UNCACHED; 114 break; 115 default: 116 WARN_ON(1); 117 } 118 119 return pte; 120 } 121 122 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 123 enum i915_cache_level level, 124 bool valid) 125 { 126 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 127 pte |= GEN6_PTE_ADDR_ENCODE(addr); 128 129 switch (level) { 130 case I915_CACHE_L3_LLC: 131 pte |= GEN7_PTE_CACHE_L3_LLC; 132 break; 133 case I915_CACHE_LLC: 134 pte |= GEN6_PTE_CACHE_LLC; 135 break; 136 case I915_CACHE_NONE: 137 pte |= GEN6_PTE_UNCACHED; 138 break; 139 default: 140 WARN_ON(1); 141 } 142 143 return pte; 144 } 145 146 #define BYT_PTE_WRITEABLE (1 << 1) 147 #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) 148 149 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 150 enum i915_cache_level level, 151 bool valid) 152 { 153 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 154 pte |= GEN6_PTE_ADDR_ENCODE(addr); 155 156 /* Mark the page as writeable. Other platforms don't have a 157 * setting for read-only/writable, so this matches that behavior. 158 */ 159 pte |= BYT_PTE_WRITEABLE; 160 161 if (level != I915_CACHE_NONE) 162 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 163 164 return pte; 165 } 166 167 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 168 enum i915_cache_level level, 169 bool valid) 170 { 171 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 172 pte |= HSW_PTE_ADDR_ENCODE(addr); 173 174 if (level != I915_CACHE_NONE) 175 pte |= HSW_WB_LLC_AGE3; 176 177 return pte; 178 } 179 180 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 181 enum i915_cache_level level, 182 bool valid) 183 { 184 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 185 pte |= HSW_PTE_ADDR_ENCODE(addr); 186 187 switch (level) { 188 case I915_CACHE_NONE: 189 break; 190 case I915_CACHE_WT: 191 pte |= HSW_WT_ELLC_LLC_AGE3; 192 break; 193 default: 194 pte |= HSW_WB_ELLC_LLC_AGE3; 195 break; 196 } 197 198 return pte; 199 } 200 201 /* Broadwell Page Directory Pointer Descriptors */ 202 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, 203 uint64_t val) 204 { 205 int ret; 206 207 BUG_ON(entry >= 4); 208 209 ret = intel_ring_begin(ring, 6); 210 if (ret) 211 return ret; 212 213 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 214 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 215 intel_ring_emit(ring, (u32)(val >> 32)); 216 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 217 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 218 intel_ring_emit(ring, (u32)(val)); 219 intel_ring_advance(ring); 220 221 return 0; 222 } 223 224 static int gen8_ppgtt_enable(struct drm_device *dev) 225 { 226 struct drm_i915_private *dev_priv = dev->dev_private; 227 struct intel_ring_buffer *ring; 228 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 229 int i, j, ret; 230 231 /* bit of a hack to find the actual last used pd */ 232 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 233 234 for_each_ring(ring, dev_priv, j) { 235 I915_WRITE(RING_MODE_GEN7(ring), 236 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 237 } 238 239 for (i = used_pd - 1; i >= 0; i--) { 240 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 241 for_each_ring(ring, dev_priv, j) { 242 ret = gen8_write_pdp(ring, i, addr); 243 if (ret) 244 goto err_out; 245 } 246 } 247 return 0; 248 249 err_out: 250 for_each_ring(ring, dev_priv, j) 251 I915_WRITE(RING_MODE_GEN7(ring), 252 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); 253 return ret; 254 } 255 256 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 257 unsigned first_entry, 258 unsigned num_entries, 259 bool use_scratch) 260 { 261 struct i915_hw_ppgtt *ppgtt = 262 container_of(vm, struct i915_hw_ppgtt, base); 263 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 264 unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; 265 unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE; 266 unsigned last_pte, i; 267 268 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 269 I915_CACHE_LLC, use_scratch); 270 271 while (num_entries) { 272 struct vm_page *page_table = &ppgtt->gen8_pt_pages[act_pt]; 273 274 last_pte = first_pte + num_entries; 275 if (last_pte > GEN8_PTES_PER_PAGE) 276 last_pte = GEN8_PTES_PER_PAGE; 277 278 pt_vaddr = kmap_atomic(page_table); 279 280 for (i = first_pte; i < last_pte; i++) 281 pt_vaddr[i] = scratch_pte; 282 283 kunmap_atomic(pt_vaddr); 284 285 num_entries -= last_pte - first_pte; 286 first_pte = 0; 287 act_pt++; 288 } 289 } 290 291 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 292 vm_page_t *pages, 293 unsigned int first_entry, 294 unsigned int num_entries, 295 enum i915_cache_level cache_level) 296 { 297 struct i915_hw_ppgtt *ppgtt = 298 container_of(vm, struct i915_hw_ppgtt, base); 299 gen8_gtt_pte_t *pt_vaddr; 300 unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; 301 unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE; 302 int i; 303 304 pt_vaddr = NULL; 305 for (i=0;i<num_entries;i++) { 306 if (pt_vaddr == NULL) 307 pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]); 308 309 pt_vaddr[act_pte] = 310 gen8_pte_encode(VM_PAGE_TO_PHYS(pages[i]), 311 cache_level, true); 312 if (++act_pte == GEN8_PTES_PER_PAGE) { 313 kunmap_atomic(pt_vaddr); 314 pt_vaddr = NULL; 315 act_pt++; 316 act_pte = 0; 317 } 318 } 319 if (pt_vaddr) 320 kunmap_atomic(pt_vaddr); 321 } 322 323 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 324 { 325 struct i915_hw_ppgtt *ppgtt = 326 container_of(vm, struct i915_hw_ppgtt, base); 327 int i, j; 328 329 drm_mm_takedown(&vm->mm); 330 331 for (i = 0; i < ppgtt->num_pd_pages ; i++) { 332 if (ppgtt->pd_dma_addr[i]) { 333 pci_unmap_page(ppgtt->base.dev->pdev, 334 ppgtt->pd_dma_addr[i], 335 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 336 337 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 338 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 339 if (addr) 340 pci_unmap_page(ppgtt->base.dev->pdev, 341 addr, 342 PAGE_SIZE, 343 PCI_DMA_BIDIRECTIONAL); 344 345 } 346 } 347 kfree(ppgtt->gen8_pt_dma_addr[i]); 348 } 349 350 __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages << PAGE_SHIFT)); 351 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 352 } 353 354 /** 355 * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a 356 * net effect resembling a 2-level page table in normal x86 terms. Each PDP 357 * represents 1GB of memory 358 * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. 359 * 360 * TODO: Do something with the size parameter 361 **/ 362 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 363 { 364 struct vm_page *pt_pages; 365 int i, j, ret = -ENOMEM; 366 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 367 const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 368 369 if (size % (1<<30)) 370 DRM_INFO("Pages will be wasted unless GTT size (%lu) is divisible by 1GB\n", size); 371 372 /* FIXME: split allocation into smaller pieces. For now we only ever do 373 * this once, but with full PPGTT, the multiple contiguous allocations 374 * will be bad. 375 */ 376 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 377 if (!ppgtt->pd_pages) 378 return -ENOMEM; 379 380 pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); 381 if (!pt_pages) { 382 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 383 return -ENOMEM; 384 } 385 386 ppgtt->gen8_pt_pages = pt_pages; 387 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 388 ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); 389 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 390 ppgtt->enable = gen8_ppgtt_enable; 391 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 392 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 393 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 394 ppgtt->base.start = 0; 395 ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; 396 397 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 398 399 /* 400 * - Create a mapping for the page directories. 401 * - For each page directory: 402 * allocate space for page table mappings. 403 * map each page table 404 */ 405 for (i = 0; i < max_pdp; i++) { 406 dma_addr_t temp; 407 temp = pci_map_page(ppgtt->base.dev->pdev, 408 &ppgtt->pd_pages[i], 0, 409 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 410 if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) 411 goto err_out; 412 413 ppgtt->pd_dma_addr[i] = temp; 414 415 ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, M_DRM, M_WAITOK); 416 if (!ppgtt->gen8_pt_dma_addr[i]) 417 goto err_out; 418 419 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 420 struct vm_page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; 421 temp = pci_map_page(ppgtt->base.dev->pdev, 422 p, 0, PAGE_SIZE, 423 PCI_DMA_BIDIRECTIONAL); 424 425 if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) 426 goto err_out; 427 428 ppgtt->gen8_pt_dma_addr[i][j] = temp; 429 } 430 } 431 432 /* For now, the PPGTT helper functions all require that the PDEs are 433 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 434 * will never need to touch the PDEs again */ 435 for (i = 0; i < max_pdp; i++) { 436 gen8_ppgtt_pde_t *pd_vaddr; 437 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 438 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 439 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 440 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 441 I915_CACHE_LLC); 442 } 443 kunmap_atomic(pd_vaddr); 444 } 445 446 ppgtt->base.clear_range(&ppgtt->base, 0, 447 ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE, 448 true); 449 450 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 451 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 452 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%ld wasted)\n", 453 ppgtt->num_pt_pages, 454 (ppgtt->num_pt_pages - num_pt_pages) + 455 size % (1<<30)); 456 return 0; 457 458 err_out: 459 ppgtt->base.cleanup(&ppgtt->base); 460 return ret; 461 } 462 463 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 464 { 465 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 466 gen6_gtt_pte_t __iomem *pd_addr; 467 uint32_t pd_entry; 468 int i; 469 470 WARN_ON(ppgtt->pd_offset & 0x3f); 471 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 472 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 473 for (i = 0; i < ppgtt->num_pd_entries; i++) { 474 dma_addr_t pt_addr; 475 476 pt_addr = ppgtt->pt_dma_addr[i]; 477 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 478 pd_entry |= GEN6_PDE_VALID; 479 480 writel(pd_entry, pd_addr + i); 481 } 482 readl(pd_addr); 483 } 484 485 static int gen6_ppgtt_enable(struct drm_device *dev) 486 { 487 drm_i915_private_t *dev_priv = dev->dev_private; 488 uint32_t pd_offset; 489 struct intel_ring_buffer *ring; 490 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 491 int i; 492 493 BUG_ON(ppgtt->pd_offset & 0x3f); 494 495 gen6_write_pdes(ppgtt); 496 497 pd_offset = ppgtt->pd_offset; 498 pd_offset /= 64; /* in cachelines, */ 499 pd_offset <<= 16; 500 501 if (INTEL_INFO(dev)->gen == 6) { 502 uint32_t ecochk, gab_ctl, ecobits; 503 504 ecobits = I915_READ(GAC_ECO_BITS); 505 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 506 ECOBITS_PPGTT_CACHE64B); 507 508 gab_ctl = I915_READ(GAB_CTL); 509 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 510 511 ecochk = I915_READ(GAM_ECOCHK); 512 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 513 ECOCHK_PPGTT_CACHE64B); 514 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 515 } else if (INTEL_INFO(dev)->gen >= 7) { 516 uint32_t ecochk, ecobits; 517 518 ecobits = I915_READ(GAC_ECO_BITS); 519 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 520 521 ecochk = I915_READ(GAM_ECOCHK); 522 if (IS_HASWELL(dev)) { 523 ecochk |= ECOCHK_PPGTT_WB_HSW; 524 } else { 525 ecochk |= ECOCHK_PPGTT_LLC_IVB; 526 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 527 } 528 I915_WRITE(GAM_ECOCHK, ecochk); 529 /* GFX_MODE is per-ring on gen7+ */ 530 } 531 532 for_each_ring(ring, dev_priv, i) { 533 if (INTEL_INFO(dev)->gen >= 7) 534 I915_WRITE(RING_MODE_GEN7(ring), 535 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 536 537 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 538 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 539 } 540 return 0; 541 } 542 543 /* PPGTT support for Sandybdrige/Gen6 and later */ 544 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 545 unsigned first_entry, 546 unsigned num_entries, 547 bool use_scratch) 548 { 549 struct i915_hw_ppgtt *ppgtt = 550 container_of(vm, struct i915_hw_ppgtt, base); 551 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 552 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 553 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 554 unsigned last_pte, i; 555 556 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 557 558 while (num_entries) { 559 last_pte = first_pte + num_entries; 560 if (last_pte > I915_PPGTT_PT_ENTRIES) 561 last_pte = I915_PPGTT_PT_ENTRIES; 562 563 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 564 565 for (i = first_pte; i < last_pte; i++) 566 pt_vaddr[i] = scratch_pte; 567 568 kunmap_atomic(pt_vaddr); 569 570 num_entries -= last_pte - first_pte; 571 first_pte = 0; 572 act_pt++; 573 } 574 } 575 576 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 577 vm_page_t *pages, 578 unsigned first_entry, 579 unsigned num_entries, 580 enum i915_cache_level cache_level) 581 { 582 struct i915_hw_ppgtt *ppgtt = 583 container_of(vm, struct i915_hw_ppgtt, base); 584 gen6_gtt_pte_t *pt_vaddr; 585 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 586 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 587 588 pt_vaddr = NULL; 589 for (int i=0;i<num_entries;i++) { 590 if (pt_vaddr == NULL) 591 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 592 593 pt_vaddr[act_pte] = 594 vm->pte_encode(VM_PAGE_TO_PHYS(pages[i]), 595 cache_level, true); 596 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 597 kunmap_atomic(pt_vaddr); 598 pt_vaddr = NULL; 599 act_pt++; 600 act_pte = 0; 601 } 602 } 603 if (pt_vaddr) 604 kunmap_atomic(pt_vaddr); 605 } 606 607 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 608 { 609 struct i915_hw_ppgtt *ppgtt = 610 container_of(vm, struct i915_hw_ppgtt, base); 611 int i; 612 613 drm_mm_takedown(&ppgtt->base.mm); 614 615 if (ppgtt->pt_dma_addr) { 616 for (i = 0; i < ppgtt->num_pd_entries; i++) 617 pci_unmap_page(ppgtt->base.dev->pdev, 618 ppgtt->pt_dma_addr[i], 619 4096, PCI_DMA_BIDIRECTIONAL); 620 } 621 622 kfree(ppgtt->pt_dma_addr); 623 for (i = 0; i < ppgtt->num_pd_entries; i++) 624 __free_page(ppgtt->pt_pages[i]); 625 kfree(ppgtt->pt_pages); 626 kfree(ppgtt); 627 } 628 629 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 630 { 631 struct drm_device *dev = ppgtt->base.dev; 632 struct drm_i915_private *dev_priv = dev->dev_private; 633 unsigned first_pd_entry_in_global_pt; 634 int i; 635 int ret = -ENOMEM; 636 637 /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 638 * entries. For aliasing ppgtt support we just steal them at the end for 639 * now. */ 640 first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt); 641 642 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 643 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 644 ppgtt->enable = gen6_ppgtt_enable; 645 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 646 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 647 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 648 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 649 ppgtt->base.start = 0; 650 ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 651 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), 652 GFP_KERNEL); 653 if (!ppgtt->pt_pages) 654 return -ENOMEM; 655 656 for (i = 0; i < ppgtt->num_pd_entries; i++) { 657 ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0, 658 VM_ALLOC_NORMAL | VM_ALLOC_ZERO); 659 if (!ppgtt->pt_pages[i]) 660 goto err_pt_alloc; 661 } 662 663 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 664 GFP_KERNEL); 665 if (!ppgtt->pt_dma_addr) 666 goto err_pt_alloc; 667 668 for (i = 0; i < ppgtt->num_pd_entries; i++) { 669 dma_addr_t pt_addr; 670 671 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 672 PCI_DMA_BIDIRECTIONAL); 673 674 #if 0 675 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 676 ret = -EIO; 677 goto err_pd_pin; /* XXX where is label? */ 678 679 } 680 #endif 681 ppgtt->pt_dma_addr[i] = pt_addr; 682 } 683 684 ppgtt->base.clear_range(&ppgtt->base, 0, 685 ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); 686 687 ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); 688 689 return 0; 690 691 err_pt_alloc: 692 kfree(ppgtt->pt_dma_addr); 693 for (i = 0; i < ppgtt->num_pd_entries; i++) { 694 if (ppgtt->pt_pages[i]) 695 __free_page(ppgtt->pt_pages[i]); 696 697 } 698 kfree(ppgtt->pt_pages); 699 700 return ret; 701 } 702 703 static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) 704 { 705 struct drm_i915_private *dev_priv = dev->dev_private; 706 struct i915_hw_ppgtt *ppgtt; 707 int ret; 708 709 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 710 if (!ppgtt) 711 return -ENOMEM; 712 713 ppgtt->base.dev = dev; 714 715 if (INTEL_INFO(dev)->gen < 8) 716 ret = gen6_ppgtt_init(ppgtt); 717 else if (IS_GEN8(dev)) 718 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 719 else 720 BUG(); 721 722 if (ret) 723 kfree(ppgtt); 724 else { 725 dev_priv->mm.aliasing_ppgtt = ppgtt; 726 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 727 ppgtt->base.total); 728 } 729 730 return ret; 731 } 732 733 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) 734 { 735 struct drm_i915_private *dev_priv = dev->dev_private; 736 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 737 738 if (!ppgtt) 739 return; 740 741 ppgtt->base.cleanup(&ppgtt->base); 742 dev_priv->mm.aliasing_ppgtt = NULL; 743 } 744 745 #if 0 746 static void 747 i915_ppgtt_insert_pages(struct i915_address_space *vm, unsigned first_entry, 748 unsigned num_entries, vm_page_t *pages, enum i915_cache_level cache_level) 749 { 750 struct i915_hw_ppgtt *ppgtt = 751 container_of(vm, struct i915_hw_ppgtt, base); 752 uint32_t *pt_vaddr4; 753 uint64_t *pt_vaddr8; 754 unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; 755 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 756 unsigned last_pte, i; 757 dma_addr_t page_addr; 758 759 while (num_entries) { 760 last_pte = first_pte + num_entries; 761 if (last_pte > I915_PPGTT_PT_ENTRIES) 762 last_pte = I915_PPGTT_PT_ENTRIES; 763 764 /* 765 * XXX severe hack. The insert_entries() function pointer 766 * needs to be setup and used instead of this mess. 767 */ 768 if (IS_GEN8(ppgtt->base.dev)) { 769 pt_vaddr4 = NULL; 770 pt_vaddr8 = kmap_atomic(&ppgtt->gen8_pt_pages[act_pd]); 771 } else { 772 pt_vaddr4 = kmap_atomic(ppgtt->pt_pages[act_pd]); 773 pt_vaddr8 = NULL; 774 } 775 776 for (i = first_pte; i < last_pte; i++) { 777 page_addr = VM_PAGE_TO_PHYS(*pages); 778 if (IS_GEN8(ppgtt->base.dev)) { 779 pt_vaddr8[i] = gen8_pte_encode(page_addr, cache_level, true); 780 } else { 781 pt_vaddr4[i] = vm->pte_encode(page_addr, cache_level, true); 782 } 783 784 pages++; 785 } 786 787 if (IS_GEN8(ppgtt->base.dev)) 788 kunmap_atomic(pt_vaddr8); 789 else 790 kunmap_atomic(pt_vaddr4); 791 792 num_entries -= last_pte - first_pte; 793 first_pte = 0; 794 act_pd++; 795 } 796 } 797 #endif 798 799 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, 800 struct drm_i915_gem_object *obj, 801 enum i915_cache_level cache_level) 802 { 803 #if 0 804 i915_ppgtt_insert_pages(&ppgtt->base, 805 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 806 obj->base.size >> PAGE_SHIFT, obj->pages, cache_level); 807 #endif 808 ppgtt->base.insert_entries(&ppgtt->base, obj->pages, 809 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 810 obj->base.size >> PAGE_SHIFT, 811 cache_level); 812 } 813 814 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, 815 struct drm_i915_gem_object *obj) 816 { 817 ppgtt->base.clear_range(&ppgtt->base, 818 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 819 obj->base.size >> PAGE_SHIFT, 820 true); 821 } 822 823 extern int intel_iommu_gfx_mapped; 824 /* Certain Gen5 chipsets require require idling the GPU before 825 * unmapping anything from the GTT when VT-d is enabled. 826 */ 827 static inline bool needs_idle_maps(struct drm_device *dev) 828 { 829 #ifdef CONFIG_INTEL_IOMMU 830 /* Query intel_iommu to see if we need the workaround. Presumably that 831 * was loaded first. 832 */ 833 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 834 return true; 835 #endif 836 return false; 837 } 838 839 static bool do_idling(struct drm_i915_private *dev_priv) 840 { 841 bool ret = dev_priv->mm.interruptible; 842 843 if (unlikely(dev_priv->gtt.do_idle_maps)) { 844 dev_priv->mm.interruptible = false; 845 if (i915_gpu_idle(dev_priv->dev)) { 846 DRM_ERROR("Couldn't idle GPU\n"); 847 /* Wait a bit, in hopes it avoids the hang */ 848 udelay(10); 849 } 850 } 851 852 return ret; 853 } 854 855 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 856 { 857 if (unlikely(dev_priv->gtt.do_idle_maps)) 858 dev_priv->mm.interruptible = interruptible; 859 } 860 861 void i915_check_and_clear_faults(struct drm_device *dev) 862 { 863 struct drm_i915_private *dev_priv = dev->dev_private; 864 struct intel_ring_buffer *ring; 865 int i; 866 867 if (INTEL_INFO(dev)->gen < 6) 868 return; 869 870 for_each_ring(ring, dev_priv, i) { 871 u32 fault_reg; 872 fault_reg = I915_READ(RING_FAULT_REG(ring)); 873 if (fault_reg & RING_FAULT_VALID) { 874 #if 0 875 DRM_DEBUG_DRIVER("Unexpected fault\n" 876 "\tAddr: 0x%08lx\\n" 877 "\tAddress space: %s\n" 878 "\tSource ID: %d\n" 879 "\tType: %d\n", 880 fault_reg & PAGE_MASK, 881 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 882 RING_FAULT_SRCID(fault_reg), 883 RING_FAULT_FAULT_TYPE(fault_reg)); 884 #endif 885 I915_WRITE(RING_FAULT_REG(ring), 886 fault_reg & ~RING_FAULT_VALID); 887 } 888 } 889 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 890 } 891 892 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 893 { 894 struct drm_i915_private *dev_priv = dev->dev_private; 895 896 /* Don't bother messing with faults pre GEN6 as we have little 897 * documentation supporting that it's a good idea. 898 */ 899 if (INTEL_INFO(dev)->gen < 6) 900 return; 901 902 i915_check_and_clear_faults(dev); 903 904 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 905 dev_priv->gtt.base.start / PAGE_SIZE, 906 dev_priv->gtt.base.total / PAGE_SIZE, 907 true); 908 } 909 910 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 911 { 912 struct drm_i915_private *dev_priv = dev->dev_private; 913 struct drm_i915_gem_object *obj; 914 915 i915_check_and_clear_faults(dev); 916 917 /* First fill our portion of the GTT with scratch pages */ 918 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 919 dev_priv->gtt.base.start / PAGE_SIZE, 920 dev_priv->gtt.base.total / PAGE_SIZE, 921 true); 922 923 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 924 i915_gem_clflush_object(obj, obj->pin_display); 925 i915_gem_gtt_bind_object(obj, obj->cache_level); 926 } 927 928 i915_gem_chipset_flush(dev); 929 } 930 931 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 932 { 933 if (obj->has_dma_mapping) 934 return 0; 935 936 #if 0 937 if (!dma_map_sg(&obj->base.dev->pdev->dev, 938 obj->pages->sgl, obj->pages->nents, 939 PCI_DMA_BIDIRECTIONAL)) 940 return -ENOSPC; 941 #endif 942 943 return 0; 944 } 945 946 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 947 { 948 #if 0 949 writeq(pte, addr); 950 #else 951 iowrite32((u32)pte, addr); 952 iowrite32(pte >> 32, addr + 4); 953 #endif 954 } 955 956 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 957 vm_page_t *pages, 958 unsigned int first_entry, 959 unsigned int num_entries, 960 enum i915_cache_level level) 961 { 962 struct drm_i915_private *dev_priv = vm->dev->dev_private; 963 gen8_gtt_pte_t __iomem *gtt_entries = 964 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 965 int i = 0; 966 dma_addr_t addr; 967 968 for (i=0;i<num_entries;i++) { 969 addr = VM_PAGE_TO_PHYS(pages[i]); 970 gen8_set_pte(>t_entries[i], 971 gen8_pte_encode(addr, level, true)); 972 } 973 974 /* 975 * XXX: This serves as a posting read to make sure that the PTE has 976 * actually been updated. There is some concern that even though 977 * registers and PTEs are within the same BAR that they are potentially 978 * of NUMA access patterns. Therefore, even with the way we assume 979 * hardware should work, we must keep this posting read for paranoia. 980 */ 981 if (i != 0) 982 WARN_ON(readq(>t_entries[i-1]) 983 != gen8_pte_encode(addr, level, true)); 984 985 /* This next bit makes the above posting read even more important. We 986 * want to flush the TLBs only after we're certain all the PTE updates 987 * have finished. 988 */ 989 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 990 POSTING_READ(GFX_FLSH_CNTL_GEN6); 991 } 992 993 /* 994 * Binds an object into the global gtt with the specified cache level. The object 995 * will be accessible to the GPU via commands whose operands reference offsets 996 * within the global GTT as well as accessible by the GPU through the GMADR 997 * mapped BAR (dev_priv->mm.gtt->gtt). 998 */ 999 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1000 vm_page_t *pages, 1001 unsigned int first_entry, 1002 unsigned int num_entries, 1003 enum i915_cache_level level) 1004 { 1005 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1006 gen6_gtt_pte_t __iomem *gtt_entries = 1007 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1008 int i = 0; 1009 dma_addr_t addr; 1010 1011 for (i = 0; i < num_entries; i++) { 1012 addr = VM_PAGE_TO_PHYS(pages[i]); 1013 iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); 1014 } 1015 1016 /* XXX: This serves as a posting read to make sure that the PTE has 1017 * actually been updated. There is some concern that even though 1018 * registers and PTEs are within the same BAR that they are potentially 1019 * of NUMA access patterns. Therefore, even with the way we assume 1020 * hardware should work, we must keep this posting read for paranoia. 1021 */ 1022 if (i != 0) 1023 WARN_ON(readl(>t_entries[i-1]) != 1024 vm->pte_encode(addr, level, true)); 1025 1026 /* This next bit makes the above posting read even more important. We 1027 * want to flush the TLBs only after we're certain all the PTE updates 1028 * have finished. 1029 */ 1030 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1031 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1032 } 1033 1034 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1035 unsigned int first_entry, 1036 unsigned int num_entries, 1037 bool use_scratch) 1038 { 1039 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1040 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1041 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1042 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1043 int i; 1044 1045 if (WARN(num_entries > max_entries, 1046 "First entry = %d; Num entries = %d (max=%d)\n", 1047 first_entry, num_entries, max_entries)) 1048 num_entries = max_entries; 1049 1050 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1051 I915_CACHE_LLC, 1052 use_scratch); 1053 for (i = 0; i < num_entries; i++) 1054 gen8_set_pte(>t_base[i], scratch_pte); 1055 readl(gtt_base); 1056 } 1057 1058 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1059 unsigned int first_entry, 1060 unsigned int num_entries, 1061 bool use_scratch) 1062 { 1063 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1064 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1065 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1066 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1067 int i; 1068 1069 if (WARN(num_entries > max_entries, 1070 "First entry = %d; Num entries = %d (max=%d)\n", 1071 first_entry, num_entries, max_entries)) 1072 num_entries = max_entries; 1073 1074 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); 1075 1076 for (i = 0; i < num_entries; i++) 1077 iowrite32(scratch_pte, >t_base[i]); 1078 readl(gtt_base); 1079 } 1080 1081 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 1082 vm_page_t *pages, 1083 unsigned int pg_start, 1084 unsigned int num_entries, 1085 enum i915_cache_level cache_level) 1086 { 1087 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1088 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1089 1090 intel_gtt_insert_pages(pg_start, num_entries, pages, flags); 1091 } 1092 1093 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1094 unsigned int first_entry, 1095 unsigned int num_entries, 1096 bool unused) 1097 { 1098 intel_gtt_clear_range(first_entry, num_entries); 1099 } 1100 1101 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, 1102 enum i915_cache_level cache_level) 1103 { 1104 struct drm_device *dev = obj->base.dev; 1105 struct drm_i915_private *dev_priv = dev->dev_private; 1106 const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; 1107 1108 dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages, 1109 entry, 1110 obj->base.size >> PAGE_SHIFT, 1111 cache_level); 1112 1113 obj->has_global_gtt_mapping = 1; 1114 } 1115 1116 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) 1117 { 1118 struct drm_device *dev = obj->base.dev; 1119 struct drm_i915_private *dev_priv = dev->dev_private; 1120 const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; 1121 1122 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1123 entry, 1124 obj->base.size >> PAGE_SHIFT, 1125 true); 1126 1127 obj->has_global_gtt_mapping = 0; 1128 } 1129 1130 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1131 { 1132 struct drm_device *dev = obj->base.dev; 1133 struct drm_i915_private *dev_priv = dev->dev_private; 1134 bool interruptible; 1135 1136 interruptible = do_idling(dev_priv); 1137 1138 #if 0 1139 if (!obj->has_dma_mapping) 1140 dma_unmap_sg(&dev->pdev->dev, 1141 obj->pages->sgl, obj->pages->nents, 1142 PCI_DMA_BIDIRECTIONAL); 1143 #endif 1144 1145 undo_idling(dev_priv, interruptible); 1146 } 1147 1148 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1149 unsigned long color, 1150 unsigned long *start, 1151 unsigned long *end) 1152 { 1153 if (node->color != color) 1154 *start += 4096; 1155 1156 if (!list_empty(&node->node_list)) { 1157 node = list_entry(node->node_list.next, 1158 struct drm_mm_node, 1159 node_list); 1160 if (node->allocated && node->color != color) 1161 *end -= 4096; 1162 } 1163 } 1164 1165 void i915_gem_setup_global_gtt(struct drm_device *dev, 1166 unsigned long start, 1167 unsigned long mappable_end, 1168 unsigned long end) 1169 { 1170 /* Let GEM Manage all of the aperture. 1171 * 1172 * However, leave one page at the end still bound to the scratch page. 1173 * There are a number of places where the hardware apparently prefetches 1174 * past the end of the object, and we've seen multiple hangs with the 1175 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1176 * aperture. One page should be enough to keep any prefetching inside 1177 * of the aperture. 1178 */ 1179 struct drm_i915_private *dev_priv = dev->dev_private; 1180 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1181 unsigned long mappable; 1182 int error; 1183 struct drm_mm_node *entry; 1184 struct drm_i915_gem_object *obj; 1185 unsigned long hole_start, hole_end; 1186 1187 kprintf("MAPPABLE_END VS END %016jx %016jx\n", mappable_end, end); 1188 tsleep(&mappable_end, 0, "DELAY", hz); /* for kprintf */ 1189 /*BUG_ON(mappable_end > end);*/ 1190 1191 mappable = min(end, mappable_end) - start; 1192 1193 /* Subtract the guard page ... */ 1194 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1195 if (!HAS_LLC(dev)) 1196 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1197 1198 /* Mark any preallocated objects as occupied */ 1199 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1200 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1201 int ret; 1202 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1203 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1204 1205 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1206 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1207 if (ret) 1208 DRM_DEBUG_KMS("Reservation failed\n"); 1209 obj->has_global_gtt_mapping = 1; 1210 } 1211 1212 dev_priv->gtt.base.start = start; 1213 dev_priv->gtt.base.total = end - start; 1214 1215 /* Clear any non-preallocated blocks */ 1216 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1217 const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; 1218 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1219 hole_start, hole_end); 1220 ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true); 1221 } 1222 /* ... but ensure that we clear the entire range. */ 1223 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); 1224 device_printf(dev->dev, 1225 "taking over the fictitious range 0x%lx-0x%lx\n", 1226 dev->agp->base + start, dev->agp->base + start + mappable); 1227 error = -vm_phys_fictitious_reg_range(dev->agp->base + start, 1228 dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING); 1229 1230 /* And finally clear the reserved guard page */ 1231 ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); 1232 } 1233 1234 static bool 1235 intel_enable_ppgtt(struct drm_device *dev) 1236 { 1237 if (i915_enable_ppgtt >= 0) 1238 return i915_enable_ppgtt; 1239 1240 #ifdef CONFIG_INTEL_IOMMU 1241 /* Disable ppgtt on SNB if VT-d is on. */ 1242 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 1243 return false; 1244 #endif 1245 1246 return true; 1247 } 1248 1249 void i915_gem_init_global_gtt(struct drm_device *dev) 1250 { 1251 struct drm_i915_private *dev_priv = dev->dev_private; 1252 unsigned long gtt_size, mappable_size; 1253 1254 gtt_size = dev_priv->gtt.base.total; 1255 mappable_size = dev_priv->gtt.mappable_end; 1256 1257 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 1258 int ret; 1259 1260 if (INTEL_INFO(dev)->gen <= 7) { 1261 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 1262 * aperture accordingly when using aliasing ppgtt. */ 1263 gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE; 1264 } 1265 1266 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1267 1268 ret = i915_gem_init_aliasing_ppgtt(dev); 1269 if (!ret) 1270 return; 1271 1272 DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); 1273 drm_mm_takedown(&dev_priv->gtt.base.mm); 1274 if (INTEL_INFO(dev)->gen < 8) 1275 gtt_size += GEN6_PPGTT_PD_ENTRIES*PAGE_SIZE; 1276 } 1277 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1278 } 1279 1280 static int setup_scratch_page(struct drm_device *dev) 1281 { 1282 struct drm_i915_private *dev_priv = dev->dev_private; 1283 struct vm_page *page; 1284 dma_addr_t dma_addr; 1285 1286 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1287 if (page == NULL) 1288 return -ENOMEM; 1289 #if 0 1290 get_page(page); 1291 set_pages_uc(page, 1); 1292 #endif 1293 1294 #ifdef CONFIG_INTEL_IOMMU 1295 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1296 PCI_DMA_BIDIRECTIONAL); 1297 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1298 return -EINVAL; 1299 #else 1300 dma_addr = page_to_phys(page); 1301 #endif 1302 dev_priv->gtt.base.scratch.page = page; 1303 dev_priv->gtt.base.scratch.addr = dma_addr; 1304 1305 return 0; 1306 } 1307 1308 #if 0 1309 static void teardown_scratch_page(struct drm_device *dev) 1310 { 1311 struct drm_i915_private *dev_priv = dev->dev_private; 1312 struct page *page = dev_priv->gtt.base.scratch.page; 1313 1314 set_pages_wb(page, 1); 1315 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1316 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1317 put_page(page); 1318 __free_page(page); 1319 } 1320 #endif 1321 1322 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1323 { 1324 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1325 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1326 return snb_gmch_ctl << 20; 1327 } 1328 1329 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1330 { 1331 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1332 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1333 if (bdw_gmch_ctl) 1334 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1335 if (bdw_gmch_ctl > 4) { 1336 WARN_ON(!i915_preliminary_hw_support); 1337 return 4<<20; 1338 } 1339 1340 return bdw_gmch_ctl << 20; 1341 } 1342 1343 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1344 { 1345 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1346 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1347 return snb_gmch_ctl << 25; /* 32 MB units */ 1348 } 1349 1350 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1351 { 1352 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1353 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1354 return bdw_gmch_ctl << 25; /* 32 MB units */ 1355 } 1356 1357 static int ggtt_probe_common(struct drm_device *dev, 1358 size_t gtt_size) 1359 { 1360 struct drm_i915_private *dev_priv = dev->dev_private; 1361 phys_addr_t gtt_phys_addr; 1362 int ret; 1363 1364 /* For Modern GENs the PTEs and register space are split in the BAR */ 1365 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1366 (pci_resource_len(dev->pdev, 0) / 2); 1367 1368 kprintf("gtt_probe_common: gtt_phys_addr=0x%lx\n", gtt_phys_addr); 1369 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1370 if (!dev_priv->gtt.gsm) { 1371 DRM_ERROR("Failed to map the gtt page table\n"); 1372 return -ENOMEM; 1373 } 1374 1375 ret = setup_scratch_page(dev); 1376 if (ret) { 1377 DRM_ERROR("Scratch setup failed\n"); 1378 /* iounmap will also get called at remove, but meh */ 1379 #if 0 1380 iounmap(dev_priv->gtt.gsm); 1381 #endif 1382 } 1383 1384 return ret; 1385 } 1386 1387 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1388 * bits. When using advanced contexts each context stores its own PAT, but 1389 * writing this data shouldn't be harmful even in those cases. */ 1390 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv) 1391 { 1392 #define GEN8_PPAT_UC (0<<0) 1393 #define GEN8_PPAT_WC (1<<0) 1394 #define GEN8_PPAT_WT (2<<0) 1395 #define GEN8_PPAT_WB (3<<0) 1396 #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) 1397 /* FIXME(BDW): Bspec is completely confused about cache control bits. */ 1398 #define GEN8_PPAT_LLC (1<<2) 1399 #define GEN8_PPAT_LLCELLC (2<<2) 1400 #define GEN8_PPAT_LLCeLLC (3<<2) 1401 #define GEN8_PPAT_AGE(x) (x<<4) 1402 #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) 1403 uint64_t pat; 1404 1405 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1406 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1407 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1408 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1409 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1410 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1411 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1412 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1413 1414 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 1415 * write would work. */ 1416 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1417 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1418 } 1419 1420 static int gen8_gmch_probe(struct drm_device *dev, 1421 size_t *gtt_total, 1422 size_t *stolen, 1423 phys_addr_t *mappable_base, 1424 unsigned long *mappable_end) 1425 { 1426 struct drm_i915_private *dev_priv = dev->dev_private; 1427 unsigned int gtt_size; 1428 u16 snb_gmch_ctl; 1429 int ret; 1430 1431 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1432 *mappable_base = pci_resource_start(dev->pdev, 2); 1433 *mappable_end = pci_resource_len(dev->pdev, 2); 1434 1435 #if 0 1436 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 1437 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 1438 #endif 1439 1440 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1441 1442 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 1443 1444 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 1445 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 1446 1447 gen8_setup_private_ppat(dev_priv); 1448 1449 ret = ggtt_probe_common(dev, gtt_size); 1450 1451 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 1452 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 1453 1454 return ret; 1455 } 1456 1457 static int gen6_gmch_probe(struct drm_device *dev, 1458 size_t *gtt_total, 1459 size_t *stolen, 1460 phys_addr_t *mappable_base, 1461 unsigned long *mappable_end) 1462 { 1463 struct drm_i915_private *dev_priv = dev->dev_private; 1464 unsigned int gtt_size; 1465 u16 snb_gmch_ctl; 1466 int ret; 1467 1468 *mappable_base = pci_resource_start(dev->pdev, 2); 1469 *mappable_end = pci_resource_len(dev->pdev, 2); 1470 1471 /* 64/512MB is the current min/max we actually know of, but this is just 1472 * a coarse sanity check. 1473 */ 1474 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 1475 DRM_ERROR("Unknown GMADR size (%lx)\n", 1476 dev_priv->gtt.mappable_end); 1477 return -ENXIO; 1478 } 1479 1480 #if 0 1481 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 1482 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 1483 #endif 1484 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1485 1486 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 1487 1488 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 1489 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 1490 1491 ret = ggtt_probe_common(dev, gtt_size); 1492 1493 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 1494 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 1495 1496 return ret; 1497 } 1498 1499 static void gen6_gmch_remove(struct i915_address_space *vm) 1500 { 1501 #if 0 1502 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 1503 1504 drm_mm_takedown(&vm->mm); 1505 iounmap(gtt->gsm); 1506 teardown_scratch_page(vm->dev); 1507 #endif 1508 } 1509 1510 static int i915_gmch_probe(struct drm_device *dev, 1511 size_t *gtt_total, 1512 size_t *stolen, 1513 phys_addr_t *mappable_base, 1514 unsigned long *mappable_end) 1515 { 1516 struct drm_i915_private *dev_priv = dev->dev_private; 1517 #if 0 1518 int ret; 1519 1520 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 1521 if (!ret) { 1522 DRM_ERROR("failed to set up gmch\n"); 1523 return -EIO; 1524 } 1525 #endif 1526 1527 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 1528 1529 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 1530 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 1531 dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries; 1532 1533 if (unlikely(dev_priv->gtt.do_idle_maps)) 1534 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 1535 1536 return 0; 1537 } 1538 1539 static void i915_gmch_remove(struct i915_address_space *vm) 1540 { 1541 } 1542 1543 int i915_gem_gtt_init(struct drm_device *dev) 1544 { 1545 struct drm_i915_private *dev_priv = dev->dev_private; 1546 struct i915_gtt *gtt = &dev_priv->gtt; 1547 int ret; 1548 1549 if (INTEL_INFO(dev)->gen <= 5) { 1550 gtt->gtt_probe = i915_gmch_probe; 1551 gtt->base.cleanup = i915_gmch_remove; 1552 } else if (INTEL_INFO(dev)->gen < 8) { 1553 gtt->gtt_probe = gen6_gmch_probe; 1554 gtt->base.cleanup = gen6_gmch_remove; 1555 if (IS_HASWELL(dev) && dev_priv->ellc_size) 1556 gtt->base.pte_encode = iris_pte_encode; 1557 else if (IS_HASWELL(dev)) 1558 gtt->base.pte_encode = hsw_pte_encode; 1559 else if (IS_VALLEYVIEW(dev)) 1560 gtt->base.pte_encode = byt_pte_encode; 1561 else if (INTEL_INFO(dev)->gen >= 7) 1562 gtt->base.pte_encode = ivb_pte_encode; 1563 else 1564 gtt->base.pte_encode = snb_pte_encode; 1565 } else { 1566 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 1567 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 1568 } 1569 1570 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 1571 >t->mappable_base, >t->mappable_end); 1572 if (ret) 1573 return ret; 1574 1575 gtt->base.dev = dev; 1576 1577 /* GMADR is the PCI mmio aperture into the global GTT. */ 1578 DRM_INFO("Memory usable by graphics device = %zdM\n", 1579 gtt->base.total >> 20); 1580 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 1581 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 1582 1583 return 0; 1584 } 1585