1 /*
2 * Copyright © 2010 Daniel Vetter
3 * Copyright © 2011-2014 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 */
25
26 #include <linux/err.h>
27 #include <linux/seq_file.h>
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_trace.h"
32 #include "intel_drv.h"
33
34 #ifdef __NetBSD__
35 #include <x86/machdep.h>
36 #include <x86/pte.h>
37 #define _PAGE_PRESENT PG_V /* 0x01 PTE is present / valid */
38 #define _PAGE_RW PG_RW /* 0x02 read/write */
39 #define _PAGE_PWT PG_WT /* 0x08 write-through */
40 #define _PAGE_PCD PG_N /* 0x10 page cache disabled / non-cacheable */
41 #define _PAGE_PAT PG_PAT /* 0x80 page attribute table on PTE */
42 #endif
43
44 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv);
45
intel_enable_ppgtt(struct drm_device * dev,bool full)46 bool intel_enable_ppgtt(struct drm_device *dev, bool full)
47 {
48 if (i915.enable_ppgtt == 0)
49 return false;
50
51 if (i915.enable_ppgtt == 1 && full)
52 return false;
53
54 return true;
55 }
56
sanitize_enable_ppgtt(struct drm_device * dev,int enable_ppgtt)57 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
58 {
59 if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
60 return 0;
61
62 if (enable_ppgtt == 1)
63 return 1;
64
65 if (enable_ppgtt == 2 && HAS_PPGTT(dev))
66 return 2;
67
68 #ifdef CONFIG_INTEL_IOMMU
69 /* Disable ppgtt on SNB if VT-d is on. */
70 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
71 DRM_INFO("Disabling PPGTT because VT-d is on\n");
72 return 0;
73 }
74 #endif
75
76 return HAS_ALIASING_PPGTT(dev) ? 1 : 0;
77 }
78
79 #define GEN6_PPGTT_PD_ENTRIES 512
80 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
81 typedef uint64_t gen8_gtt_pte_t;
82 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
83
84 /* PPGTT stuff */
85 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
86 #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
87
88 #define GEN6_PDE_VALID (1 << 0)
89 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
90 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
91
92 #define GEN6_PTE_VALID (1 << 0)
93 #define GEN6_PTE_UNCACHED (1 << 1)
94 #define HSW_PTE_UNCACHED (0)
95 #define GEN6_PTE_CACHE_LLC (2 << 1)
96 #define GEN7_PTE_CACHE_L3_LLC (3 << 1)
97 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
98 #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
99
100 /* Cacheability Control is a 4-bit value. The low three bits are stored in *
101 * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
102 */
103 #define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \
104 (((bits) & 0x8) << (11 - 3)))
105 #define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2)
106 #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3)
107 #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
108 #define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8)
109 #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
110 #define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7)
111
112 #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
113 #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
114
115 /* GEN8 legacy style addressis defined as a 3 level page table:
116 * 31:30 | 29:21 | 20:12 | 11:0
117 * PDPE | PDE | PTE | offset
118 * The difference as compared to normal x86 3 level page table is the PDPEs are
119 * programmed via register.
120 */
121 #define GEN8_PDPE_SHIFT 30
122 #define GEN8_PDPE_MASK 0x3
123 #define GEN8_PDE_SHIFT 21
124 #define GEN8_PDE_MASK 0x1ff
125 #define GEN8_PTE_SHIFT 12
126 #define GEN8_PTE_MASK 0x1ff
127
128 #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD)
129 #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */
130 #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */
131 #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */
132
133 static void ppgtt_bind_vma(struct i915_vma *vma,
134 enum i915_cache_level cache_level,
135 u32 flags);
136 static void ppgtt_unbind_vma(struct i915_vma *vma);
137 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
138
gen8_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)139 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
140 enum i915_cache_level level,
141 bool valid)
142 {
143 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
144 pte |= addr;
145 if (level != I915_CACHE_NONE)
146 pte |= PPAT_CACHED_INDEX;
147 else
148 pte |= PPAT_UNCACHED_INDEX;
149 return pte;
150 }
151
gen8_pde_encode(struct drm_device * dev,dma_addr_t addr,enum i915_cache_level level)152 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
153 dma_addr_t addr,
154 enum i915_cache_level level)
155 {
156 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
157 pde |= addr;
158 if (level != I915_CACHE_NONE)
159 pde |= PPAT_CACHED_PDE_INDEX;
160 else
161 pde |= PPAT_UNCACHED_INDEX;
162 return pde;
163 }
164
snb_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)165 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
166 enum i915_cache_level level,
167 bool valid)
168 {
169 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
170 pte |= GEN6_PTE_ADDR_ENCODE(addr);
171
172 switch (level) {
173 case I915_CACHE_L3_LLC:
174 case I915_CACHE_LLC:
175 pte |= GEN6_PTE_CACHE_LLC;
176 break;
177 case I915_CACHE_NONE:
178 pte |= GEN6_PTE_UNCACHED;
179 break;
180 default:
181 WARN_ON(1);
182 }
183
184 return pte;
185 }
186
ivb_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)187 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
188 enum i915_cache_level level,
189 bool valid)
190 {
191 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
192 pte |= GEN6_PTE_ADDR_ENCODE(addr);
193
194 switch (level) {
195 case I915_CACHE_L3_LLC:
196 pte |= GEN7_PTE_CACHE_L3_LLC;
197 break;
198 case I915_CACHE_LLC:
199 pte |= GEN6_PTE_CACHE_LLC;
200 break;
201 case I915_CACHE_NONE:
202 pte |= GEN6_PTE_UNCACHED;
203 break;
204 default:
205 WARN_ON(1);
206 }
207
208 return pte;
209 }
210
211 #define BYT_PTE_WRITEABLE (1 << 1)
212 #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2)
213
byt_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)214 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
215 enum i915_cache_level level,
216 bool valid)
217 {
218 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
219 pte |= GEN6_PTE_ADDR_ENCODE(addr);
220
221 /* Mark the page as writeable. Other platforms don't have a
222 * setting for read-only/writable, so this matches that behavior.
223 */
224 pte |= BYT_PTE_WRITEABLE;
225
226 if (level != I915_CACHE_NONE)
227 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
228
229 return pte;
230 }
231
hsw_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)232 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
233 enum i915_cache_level level,
234 bool valid)
235 {
236 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
237 pte |= HSW_PTE_ADDR_ENCODE(addr);
238
239 if (level != I915_CACHE_NONE)
240 pte |= HSW_WB_LLC_AGE3;
241
242 return pte;
243 }
244
iris_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)245 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
246 enum i915_cache_level level,
247 bool valid)
248 {
249 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
250 pte |= HSW_PTE_ADDR_ENCODE(addr);
251
252 switch (level) {
253 case I915_CACHE_NONE:
254 break;
255 case I915_CACHE_WT:
256 pte |= HSW_WT_ELLC_LLC_AGE3;
257 break;
258 default:
259 pte |= HSW_WB_ELLC_LLC_AGE3;
260 break;
261 }
262
263 return pte;
264 }
265
266 /* Broadwell Page Directory Pointer Descriptors */
gen8_write_pdp(struct intel_ring_buffer * ring,unsigned entry,uint64_t val,bool synchronous)267 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
268 uint64_t val, bool synchronous)
269 {
270 struct drm_i915_private *dev_priv = ring->dev->dev_private;
271 int ret;
272
273 BUG_ON(entry >= 4);
274
275 if (synchronous) {
276 I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
277 I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
278 return 0;
279 }
280
281 ret = intel_ring_begin(ring, 6);
282 if (ret)
283 return ret;
284
285 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
286 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
287 intel_ring_emit(ring, (u32)(val >> 32));
288 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
289 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
290 intel_ring_emit(ring, (u32)(val));
291 intel_ring_advance(ring);
292
293 return 0;
294 }
295
gen8_mm_switch(struct i915_hw_ppgtt * ppgtt,struct intel_ring_buffer * ring,bool synchronous)296 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
297 struct intel_ring_buffer *ring,
298 bool synchronous)
299 {
300 int i, ret;
301
302 /* bit of a hack to find the actual last used pd */
303 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
304
305 for (i = used_pd - 1; i >= 0; i--) {
306 #ifdef __NetBSD__
307 const bus_addr_t addr =
308 ppgtt->u.gen8->pd_map->dm_segs[i].ds_addr;
309 #else
310 dma_addr_t addr = ppgtt->pd_dma_addr[i];
311 #endif
312 ret = gen8_write_pdp(ring, i, addr, synchronous);
313 if (ret)
314 return ret;
315 }
316
317 return 0;
318 }
319
gen8_ppgtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool use_scratch)320 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
321 uint64_t start,
322 uint64_t length,
323 bool use_scratch)
324 {
325 struct i915_hw_ppgtt *ppgtt =
326 container_of(vm, struct i915_hw_ppgtt, base);
327 gen8_gtt_pte_t *pt_vaddr, scratch_pte;
328 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
329 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
330 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
331 unsigned num_entries = length >> PAGE_SHIFT;
332 unsigned last_pte, i;
333 #ifdef __NetBSD__
334 void *kva;
335 int ret;
336 #endif
337
338 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
339 I915_CACHE_LLC, use_scratch);
340
341 while (num_entries) {
342 #ifndef __NetBSD__
343 struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
344 #endif
345
346 last_pte = pte + num_entries;
347 if (last_pte > GEN8_PTES_PER_PAGE)
348 last_pte = GEN8_PTES_PER_PAGE;
349
350 #ifdef __NetBSD__
351 /* XXX errno NetBSD->Linux */
352 ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
353 &ppgtt->u.gen8->pd[pdpe].pt_segs[pde], 1, PAGE_SIZE, &kva,
354 BUS_DMA_NOWAIT);
355 if (ret) {
356 /*
357 * XXX Should guarantee mapping earlier with
358 * uvm_emap(9) or something.
359 */
360 device_printf(ppgtt->base.dev->dev,
361 "failed to map page table: %d\n", -ret);
362 goto skip;
363 }
364 pt_vaddr = kva;
365 #else
366 pt_vaddr = kmap_atomic(page_table);
367 #endif
368
369 for (i = pte; i < last_pte; i++) {
370 pt_vaddr[i] = scratch_pte;
371 num_entries--;
372 }
373
374 #ifdef __NetBSD__
375 bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
376 skip:;
377 #else
378 kunmap_atomic(pt_vaddr);
379 #endif
380
381 pte = 0;
382 if (++pde == GEN8_PDES_PER_PAGE) {
383 pdpe++;
384 pde = 0;
385 }
386 }
387 }
388
389 #ifdef __NetBSD__
390 static void
gen8_ppgtt_insert_entries(struct i915_address_space * vm,bus_dmamap_t dmamap,uint64_t start,enum i915_cache_level cache_level)391 gen8_ppgtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
392 uint64_t start, enum i915_cache_level cache_level)
393 {
394 struct i915_hw_ppgtt *ppgtt =
395 container_of(vm, struct i915_hw_ppgtt, base);
396 gen8_gtt_pte_t *pt_vaddr;
397 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
398 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
399 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
400 unsigned seg;
401 void *kva;
402 int ret;
403
404 pt_vaddr = NULL;
405 KASSERT(0 < dmamap->dm_nsegs);
406 for (seg = 0; seg < dmamap->dm_nsegs; seg++) {
407 KASSERT(dmamap->dm_segs[seg].ds_len == PAGE_SIZE);
408 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
409 break;
410 if (pt_vaddr == NULL) {
411 /* XXX errno NetBSD->Linux */
412 ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
413 &ppgtt->u.gen8->pd[pdpe].pt_segs[pde], 1,
414 PAGE_SIZE, &kva, BUS_DMA_NOWAIT);
415 if (ret) {
416 /*
417 * XXX Should guarantee mapping earlier
418 * with uvm_emap(9) or something.
419 */
420 device_printf(ppgtt->base.dev->dev,
421 "failed to map page table: %d\n", -ret);
422 goto skip;
423 }
424 pt_vaddr = kva;
425 }
426 pt_vaddr[pte] = gen8_pte_encode(dmamap->dm_segs[seg].ds_addr,
427 cache_level, true);
428 skip: if (++pte == GEN8_PTES_PER_PAGE) {
429 bus_dmamem_unmap(ppgtt->base.dev->dmat, kva,
430 PAGE_SIZE);
431 pt_vaddr = NULL;
432 if (++pde == GEN8_PDES_PER_PAGE) {
433 pdpe++;
434 pde = 0;
435 }
436 pte = 0;
437 }
438 }
439 if (pt_vaddr)
440 bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
441 }
442
443 static void gen8_ppgtt_cleanup(struct i915_address_space *);
444 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *, unsigned);
445 static void gen8_ppgtt_free(struct i915_hw_ppgtt *);
446 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *,
447 unsigned);
448 static void gen8_ppgtt_free_page_directories(struct i915_hw_ppgtt *);
449 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *,
450 unsigned);
451 static void gen8_ppgtt_free_page_tables(struct i915_hw_ppgtt *);
452
453 static int
gen8_ppgtt_init(struct i915_hw_ppgtt * ppgtt,uint64_t size)454 gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
455 {
456 const unsigned npdp = DIV_ROUND_UP(size, (1U << 30));
457 const unsigned min_pt_pages = (GEN8_PDES_PER_PAGE * npdp);
458 unsigned i, j;
459 int ret;
460
461 /* Allocate the PPGTT structures. */
462 ret = gen8_ppgtt_alloc(ppgtt, npdp);
463 if (ret)
464 goto fail0;
465
466 /* Fill the page directory entries. */
467 for (i = 0; i < npdp; i++) {
468 void *kva;
469
470 /* XXX errno NetBSD->Linux */
471 ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
472 &ppgtt->u.gen8->pd_segs[i], 1, PAGE_SIZE, &kva,
473 BUS_DMA_WAITOK);
474 if (ret)
475 goto fail1;
476
477 gen8_ppgtt_pde_t *const pd = kva;
478 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
479 const bus_dma_segment_t *const seg =
480 &ppgtt->u.gen8->pd[i].pt_segs[j];
481 KASSERT(seg->ds_len == PAGE_SIZE);
482 pd[j] = gen8_pde_encode(ppgtt->base.dev, seg->ds_addr,
483 I915_CACHE_LLC);
484 }
485 bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
486 }
487
488 ppgtt->enable = gen8_ppgtt_enable;
489 ppgtt->switch_mm = gen8_mm_switch;
490 ppgtt->base.clear_range = gen8_ppgtt_clear_range;
491 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
492 ppgtt->base.cleanup = gen8_ppgtt_cleanup;
493 ppgtt->base.start = 0;
494 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
495
496 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
497
498 DRM_DEBUG_DRIVER("Allocated %u pages for page directories\n", npdp);
499 DRM_DEBUG_DRIVER("Allocated %u pages for page tables"
500 " (%"PRIxMAX" wasted)\n",
501 ppgtt->num_pd_entries,
502 ((uintmax_t)(ppgtt->num_pd_entries - min_pt_pages) +
503 (size % (1<<30))));
504
505 /* Success! */
506 return 0;
507
508 fail1: gen8_ppgtt_free(ppgtt);
509 fail0: KASSERT(ret);
510 return ret;
511 }
512
513 static void
gen8_ppgtt_cleanup(struct i915_address_space * vm)514 gen8_ppgtt_cleanup(struct i915_address_space *vm)
515 {
516 struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt,
517 base);
518
519 list_del(&vm->global_link);
520 drm_mm_takedown(&vm->mm);
521
522 gen8_ppgtt_free(ppgtt);
523 }
524
525 static int
gen8_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt,unsigned npdp)526 gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, unsigned npdp)
527 {
528 int ret;
529
530 /*
531 * XXX This is a very large (48 MB) allocation! However, it
532 * can't really be made smaller than 8 MB, since we need a
533 * contiguous array of DMA segments for the page tables. I
534 * expect this to be used mainly on machines with lots of
535 * memory, so...
536 */
537 ppgtt->u.gen8 = kmem_alloc(sizeof(*ppgtt->u.gen8), KM_SLEEP);
538
539 ret = gen8_ppgtt_allocate_page_directories(ppgtt, npdp);
540 if (ret)
541 goto fail0;
542 ppgtt->num_pd_entries = (npdp * GEN8_PDES_PER_PAGE);
543 ret = gen8_ppgtt_allocate_page_tables(ppgtt, npdp);
544 if (ret)
545 goto fail1;
546
547 /* Success! */
548 return 0;
549
550 fail2: __unused
551 gen8_ppgtt_free_page_tables(ppgtt);
552 fail1: gen8_ppgtt_free_page_directories(ppgtt);
553 fail0: KASSERT(ret);
554 kmem_free(ppgtt->u.gen8, sizeof(*ppgtt->u.gen8));
555 return ret;
556 }
557
558 static void
gen8_ppgtt_free(struct i915_hw_ppgtt * ppgtt)559 gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
560 {
561
562 gen8_ppgtt_free_page_tables(ppgtt);
563 gen8_ppgtt_free_page_directories(ppgtt);
564 kmem_free(ppgtt->u.gen8, sizeof(*ppgtt->u.gen8));
565 }
566
567 static int
gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt * ppgtt,unsigned npdp)568 gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
569 unsigned npdp)
570 {
571 const bus_size_t pd_bytes = (npdp << PAGE_SHIFT);
572 const int nsegs = npdp;
573 int rsegs;
574 int ret;
575
576 ppgtt->u.gen8->npdp = npdp;
577
578 KASSERT(nsegs <= GEN8_LEGACY_PDPS);
579 CTASSERT(GEN8_LEGACY_PDPS == __arraycount(ppgtt->u.gen8->pd_segs));
580
581 /* XXX errno NetBSD->Linux */
582 ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat, pd_bytes, PAGE_SIZE,
583 PAGE_SIZE, ppgtt->u.gen8->pd_segs, nsegs, &rsegs, BUS_DMA_WAITOK);
584 if (ret)
585 goto fail0;
586 KASSERT(rsegs == nsegs);
587
588 /* XXX errno NetBSD->Linux */
589 ret = -bus_dmamap_create(ppgtt->base.dev->dmat, pd_bytes, nsegs,
590 PAGE_SIZE, 0, BUS_DMA_WAITOK, &ppgtt->u.gen8->pd_map);
591 if (ret)
592 goto fail1;
593
594 /* XXX errno NetBSD->Linux */
595 ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
596 ppgtt->u.gen8->pd_map, ppgtt->u.gen8->pd_segs, nsegs, pd_bytes,
597 BUS_DMA_WAITOK);
598 if (ret)
599 goto fail2;
600
601 /* Success! */
602 return 0;
603
604 fail3: __unused
605 bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
606 fail2: bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
607 fail1: bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_segs,
608 ppgtt->u.gen8->npdp);
609 fail0: KASSERT(ret);
610 return ret;
611 }
612
613 static void
gen8_ppgtt_free_page_directories(struct i915_hw_ppgtt * ppgtt)614 gen8_ppgtt_free_page_directories(struct i915_hw_ppgtt *ppgtt)
615 {
616
617 bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
618 bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
619 bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_segs,
620 ppgtt->u.gen8->npdp);
621 }
622
623 static int
gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt * ppgtt,unsigned npdp)624 gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, unsigned npdp)
625 {
626 unsigned i, j;
627 int rsegs;
628 int ret;
629
630 for (i = 0; i < npdp; i++) {
631 CTASSERT(__arraycount(ppgtt->u.gen8->pd[i].pt_segs) ==
632 GEN8_PDES_PER_PAGE);
633 /* XXX errno NetBSD->Linux */
634 ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat,
635 (PAGE_SIZE * GEN8_PDES_PER_PAGE), PAGE_SIZE, PAGE_SIZE,
636 ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE, &rsegs,
637 BUS_DMA_WAITOK);
638 if (ret)
639 goto fail0;
640 KASSERT(rsegs == GEN8_PDES_PER_PAGE);
641 /* XXX errno NetBSD->Linux */
642 ret = -bus_dmamap_create(ppgtt->base.dev->dmat,
643 (PAGE_SIZE * GEN8_PDES_PER_PAGE), GEN8_PDES_PER_PAGE,
644 PAGE_SIZE, 0, BUS_DMA_WAITOK,
645 &ppgtt->u.gen8->pd[i].pt_map);
646 if (ret)
647 goto fail1;
648 /* XXX errno NetBSD->Linux */
649 ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
650 ppgtt->u.gen8->pd[i].pt_map, ppgtt->u.gen8->pd[i].pt_segs,
651 GEN8_PDES_PER_PAGE, PAGE_SIZE, BUS_DMA_WAITOK);
652 if (ret)
653 goto fail2;
654 continue;
655
656 fail3: __unused
657 bus_dmamap_unload(ppgtt->base.dev->dmat,
658 ppgtt->u.gen8->pd[i].pt_map);
659 fail2: bus_dmamap_destroy(ppgtt->base.dev->dmat,
660 ppgtt->u.gen8->pd[i].pt_map);
661 fail1: bus_dmamem_free(ppgtt->base.dev->dmat,
662 ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE);
663 fail0: goto fail;
664 }
665
666 /* Success! */
667 return 0;
668
669 fail: KASSERT(ret);
670 for (j = 0; j < i; j++) {
671 bus_dmamap_unload(ppgtt->base.dev->dmat,
672 ppgtt->u.gen8->pd[j].pt_map);
673 bus_dmamap_destroy(ppgtt->base.dev->dmat,
674 ppgtt->u.gen8->pd[j].pt_map);
675 bus_dmamem_free(ppgtt->base.dev->dmat,
676 ppgtt->u.gen8->pd[j].pt_segs, GEN8_PDES_PER_PAGE);
677 }
678 return ret;
679 }
680
681 static void
gen8_ppgtt_free_page_tables(struct i915_hw_ppgtt * ppgtt)682 gen8_ppgtt_free_page_tables(struct i915_hw_ppgtt *ppgtt)
683 {
684 unsigned i;
685
686 for (i = 0; i < ppgtt->u.gen8->npdp; i++) {
687 bus_dmamap_unload(ppgtt->base.dev->dmat,
688 ppgtt->u.gen8->pd[i].pt_map);
689 bus_dmamap_destroy(ppgtt->base.dev->dmat,
690 ppgtt->u.gen8->pd[i].pt_map);
691 bus_dmamem_free(ppgtt->base.dev->dmat,
692 ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE);
693 }
694 }
695 #else
gen8_ppgtt_insert_entries(struct i915_address_space * vm,struct sg_table * pages,uint64_t start,enum i915_cache_level cache_level)696 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
697 struct sg_table *pages,
698 uint64_t start,
699 enum i915_cache_level cache_level)
700 {
701 struct i915_hw_ppgtt *ppgtt =
702 container_of(vm, struct i915_hw_ppgtt, base);
703 gen8_gtt_pte_t *pt_vaddr;
704 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
705 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
706 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
707 struct sg_page_iter sg_iter;
708
709 pt_vaddr = NULL;
710
711 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
712 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
713 break;
714
715 if (pt_vaddr == NULL)
716 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]);
717
718 pt_vaddr[pte] =
719 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
720 cache_level, true);
721 if (++pte == GEN8_PTES_PER_PAGE) {
722 kunmap_atomic(pt_vaddr);
723 pt_vaddr = NULL;
724 if (++pde == GEN8_PDES_PER_PAGE) {
725 pdpe++;
726 pde = 0;
727 }
728 pte = 0;
729 }
730 }
731 if (pt_vaddr)
732 kunmap_atomic(pt_vaddr);
733 }
734
gen8_free_page_tables(struct page ** pt_pages)735 static void gen8_free_page_tables(struct page **pt_pages)
736 {
737 int i;
738
739 if (pt_pages == NULL)
740 return;
741
742 for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
743 if (pt_pages[i])
744 __free_pages(pt_pages[i], 0);
745 }
746
gen8_ppgtt_free(const struct i915_hw_ppgtt * ppgtt)747 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
748 {
749 int i;
750
751 for (i = 0; i < ppgtt->num_pd_pages; i++) {
752 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]);
753 kfree(ppgtt->gen8_pt_pages[i]);
754 kfree(ppgtt->gen8_pt_dma_addr[i]);
755 }
756
757 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
758 }
759
gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt * ppgtt)760 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
761 {
762 struct pci_dev *hwdev = ppgtt->base.dev->pdev;
763 int i, j;
764
765 for (i = 0; i < ppgtt->num_pd_pages; i++) {
766 /* TODO: In the future we'll support sparse mappings, so this
767 * will have to change. */
768 if (!ppgtt->pd_dma_addr[i])
769 continue;
770
771 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
772 PCI_DMA_BIDIRECTIONAL);
773
774 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
775 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
776 if (addr)
777 pci_unmap_page(hwdev, addr, PAGE_SIZE,
778 PCI_DMA_BIDIRECTIONAL);
779 }
780 }
781 }
782
gen8_ppgtt_cleanup(struct i915_address_space * vm)783 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
784 {
785 struct i915_hw_ppgtt *ppgtt =
786 container_of(vm, struct i915_hw_ppgtt, base);
787
788 list_del(&vm->global_link);
789 drm_mm_takedown(&vm->mm);
790
791 gen8_ppgtt_unmap_pages(ppgtt);
792 gen8_ppgtt_free(ppgtt);
793 }
794
__gen8_alloc_page_tables(void)795 static struct page **__gen8_alloc_page_tables(void)
796 {
797 struct page **pt_pages;
798 int i;
799
800 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL);
801 if (!pt_pages)
802 return ERR_PTR(-ENOMEM);
803
804 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
805 pt_pages[i] = alloc_page(GFP_KERNEL);
806 if (!pt_pages[i])
807 goto bail;
808 }
809
810 return pt_pages;
811
812 bail:
813 gen8_free_page_tables(pt_pages);
814 kfree(pt_pages);
815 return ERR_PTR(-ENOMEM);
816 }
817
gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt * ppgtt,const int max_pdp)818 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt,
819 const int max_pdp)
820 {
821 struct page **pt_pages[GEN8_LEGACY_PDPS];
822 int i, ret;
823
824 for (i = 0; i < max_pdp; i++) {
825 pt_pages[i] = __gen8_alloc_page_tables();
826 if (IS_ERR(pt_pages[i])) {
827 ret = PTR_ERR(pt_pages[i]);
828 goto unwind_out;
829 }
830 }
831
832 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation,
833 * "atomic" - for cleanup purposes.
834 */
835 for (i = 0; i < max_pdp; i++)
836 ppgtt->gen8_pt_pages[i] = pt_pages[i];
837
838 return 0;
839
840 unwind_out:
841 while (i--) {
842 gen8_free_page_tables(pt_pages[i]);
843 kfree(pt_pages[i]);
844 }
845
846 return ret;
847 }
848
gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt * ppgtt)849 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
850 {
851 int i;
852
853 for (i = 0; i < ppgtt->num_pd_pages; i++) {
854 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
855 sizeof(dma_addr_t),
856 GFP_KERNEL);
857 if (!ppgtt->gen8_pt_dma_addr[i])
858 return -ENOMEM;
859 }
860
861 return 0;
862 }
863
gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt * ppgtt,const int max_pdp)864 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
865 const int max_pdp)
866 {
867 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
868 if (!ppgtt->pd_pages)
869 return -ENOMEM;
870
871 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
872 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
873
874 return 0;
875 }
876
gen8_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt,const int max_pdp)877 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
878 const int max_pdp)
879 {
880 int ret;
881
882 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
883 if (ret)
884 return ret;
885
886 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp);
887 if (ret) {
888 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
889 return ret;
890 }
891
892 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
893
894 ret = gen8_ppgtt_allocate_dma(ppgtt);
895 if (ret)
896 gen8_ppgtt_free(ppgtt);
897
898 return ret;
899 }
900
gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt * ppgtt,const int pd)901 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
902 const int pd)
903 {
904 dma_addr_t pd_addr;
905 int ret;
906
907 pd_addr = pci_map_page(ppgtt->base.dev->pdev,
908 &ppgtt->pd_pages[pd], 0,
909 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
910
911 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
912 if (ret)
913 return ret;
914
915 ppgtt->pd_dma_addr[pd] = pd_addr;
916
917 return 0;
918 }
919
gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt * ppgtt,const int pd,const int pt)920 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
921 const int pd,
922 const int pt)
923 {
924 dma_addr_t pt_addr;
925 struct page *p;
926 int ret;
927
928 p = ppgtt->gen8_pt_pages[pd][pt];
929 pt_addr = pci_map_page(ppgtt->base.dev->pdev,
930 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
931 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
932 if (ret)
933 return ret;
934
935 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
936
937 return 0;
938 }
939
940 /**
941 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
942 * with a net effect resembling a 2-level page table in normal x86 terms. Each
943 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
944 * space.
945 *
946 * FIXME: split allocation into smaller pieces. For now we only ever do this
947 * once, but with full PPGTT, the multiple contiguous allocations will be bad.
948 * TODO: Do something with the size parameter
949 */
gen8_ppgtt_init(struct i915_hw_ppgtt * ppgtt,uint64_t size)950 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
951 {
952 const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
953 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
954 int i, j, ret;
955
956 if (size % (1<<30))
957 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
958
959 /* 1. Do all our allocations for page directories and page tables. */
960 ret = gen8_ppgtt_alloc(ppgtt, max_pdp);
961 if (ret)
962 return ret;
963
964 /*
965 * 2. Create DMA mappings for the page directories and page tables.
966 */
967 for (i = 0; i < max_pdp; i++) {
968 ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
969 if (ret)
970 goto bail;
971
972 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
973 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
974 if (ret)
975 goto bail;
976 }
977 }
978
979 /*
980 * 3. Map all the page directory entires to point to the page tables
981 * we've allocated.
982 *
983 * For now, the PPGTT helper functions all require that the PDEs are
984 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
985 * will never need to touch the PDEs again.
986 */
987 for (i = 0; i < max_pdp; i++) {
988 gen8_ppgtt_pde_t *pd_vaddr;
989 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
990 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
991 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
992 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
993 I915_CACHE_LLC);
994 }
995 kunmap_atomic(pd_vaddr);
996 }
997
998 ppgtt->enable = gen8_ppgtt_enable;
999 ppgtt->switch_mm = gen8_mm_switch;
1000 ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1001 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1002 ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1003 ppgtt->base.start = 0;
1004 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
1005
1006 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1007
1008 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
1009 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
1010 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
1011 ppgtt->num_pd_entries,
1012 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
1013 return 0;
1014
1015 bail:
1016 gen8_ppgtt_unmap_pages(ppgtt);
1017 gen8_ppgtt_free(ppgtt);
1018 return ret;
1019 }
1020 #endif
1021
1022 #ifndef __NetBSD__
gen6_dump_ppgtt(struct i915_hw_ppgtt * ppgtt,struct seq_file * m)1023 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1024 {
1025 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
1026 struct i915_address_space *vm = &ppgtt->base;
1027 gen6_gtt_pte_t __iomem *pd_addr;
1028 gen6_gtt_pte_t scratch_pte;
1029 uint32_t pd_entry;
1030 int pte, pde;
1031
1032 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
1033
1034 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
1035 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
1036
1037 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm,
1038 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
1039 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
1040 u32 expected;
1041 gen6_gtt_pte_t *pt_vaddr;
1042 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
1043 pd_entry = readl(pd_addr + pde);
1044 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1045
1046 if (pd_entry != expected)
1047 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1048 pde,
1049 pd_entry,
1050 expected);
1051 seq_printf(m, "\tPDE: %x\n", pd_entry);
1052
1053 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
1054 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
1055 unsigned long va =
1056 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
1057 (pte * PAGE_SIZE);
1058 int i;
1059 bool found = false;
1060 for (i = 0; i < 4; i++)
1061 if (pt_vaddr[pte + i] != scratch_pte)
1062 found = true;
1063 if (!found)
1064 continue;
1065
1066 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1067 for (i = 0; i < 4; i++) {
1068 if (pt_vaddr[pte + i] != scratch_pte)
1069 seq_printf(m, " %08x", pt_vaddr[pte + i]);
1070 else
1071 seq_puts(m, " SCRATCH ");
1072 }
1073 seq_puts(m, "\n");
1074 }
1075 kunmap_atomic(pt_vaddr);
1076 }
1077 }
1078 #endif
1079
gen6_write_pdes(struct i915_hw_ppgtt * ppgtt)1080 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
1081 {
1082 #ifdef __NetBSD__
1083 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
1084 const bus_space_tag_t bst = dev_priv->gtt.bst;
1085 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
1086 const bus_size_t pd_base = ppgtt->u.gen6->pd_base;
1087 unsigned i;
1088
1089 for (i = 0; i < ppgtt->num_pd_entries; i++) {
1090 const bus_addr_t pt_addr = ppgtt->u.gen6->pt_segs[i].ds_addr;
1091 uint32_t pd_entry;
1092
1093 KASSERT(ppgtt->u.gen6->pt_segs[i].ds_len == PAGE_SIZE);
1094
1095 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
1096 pd_entry |= GEN6_PDE_VALID;
1097
1098 bus_space_write_4(bst, bsh, pd_base + (4*i), pd_entry);
1099 }
1100 (void)bus_space_read_4(bst, bsh, pd_base);
1101 #else
1102 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
1103 gen6_gtt_pte_t __iomem *pd_addr;
1104 uint32_t pd_entry;
1105 int i;
1106
1107 WARN_ON(ppgtt->pd_offset & 0x3f);
1108 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
1109 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
1110 for (i = 0; i < ppgtt->num_pd_entries; i++) {
1111 dma_addr_t pt_addr;
1112
1113 pt_addr = ppgtt->pt_dma_addr[i];
1114 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
1115 pd_entry |= GEN6_PDE_VALID;
1116
1117 writel(pd_entry, pd_addr + i);
1118 }
1119 readl(pd_addr);
1120 #endif
1121 }
1122
get_pd_offset(struct i915_hw_ppgtt * ppgtt)1123 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1124 {
1125 #ifdef __NetBSD__
1126 KASSERT(!ISSET(ppgtt->u.gen6->pd_base, 0x3f));
1127
1128 /* XXX 64? 16? */
1129 return (ppgtt->u.gen6->pd_base / 64) << 16;
1130 #else
1131 BUG_ON(ppgtt->pd_offset & 0x3f);
1132
1133 return (ppgtt->pd_offset / 64) << 16;
1134 #endif
1135 }
1136
hsw_mm_switch(struct i915_hw_ppgtt * ppgtt,struct intel_ring_buffer * ring,bool synchronous)1137 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1138 struct intel_ring_buffer *ring,
1139 bool synchronous)
1140 {
1141 struct drm_device *dev = ppgtt->base.dev;
1142 struct drm_i915_private *dev_priv = dev->dev_private;
1143 int ret;
1144
1145 /* If we're in reset, we can assume the GPU is sufficiently idle to
1146 * manually frob these bits. Ideally we could use the ring functions,
1147 * except our error handling makes it quite difficult (can't use
1148 * intel_ring_begin, ring->flush, or intel_ring_advance)
1149 *
1150 * FIXME: We should try not to special case reset
1151 */
1152 if (synchronous ||
1153 i915_reset_in_progress(&dev_priv->gpu_error)) {
1154 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
1155 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1156 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1157 POSTING_READ(RING_PP_DIR_BASE(ring));
1158 return 0;
1159 }
1160
1161 /* NB: TLBs must be flushed and invalidated before a switch */
1162 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1163 if (ret)
1164 return ret;
1165
1166 ret = intel_ring_begin(ring, 6);
1167 if (ret)
1168 return ret;
1169
1170 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1171 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1172 intel_ring_emit(ring, PP_DIR_DCLV_2G);
1173 intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1174 intel_ring_emit(ring, get_pd_offset(ppgtt));
1175 intel_ring_emit(ring, MI_NOOP);
1176 intel_ring_advance(ring);
1177
1178 return 0;
1179 }
1180
gen7_mm_switch(struct i915_hw_ppgtt * ppgtt,struct intel_ring_buffer * ring,bool synchronous)1181 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1182 struct intel_ring_buffer *ring,
1183 bool synchronous)
1184 {
1185 struct drm_device *dev = ppgtt->base.dev;
1186 struct drm_i915_private *dev_priv = dev->dev_private;
1187 int ret;
1188
1189 /* If we're in reset, we can assume the GPU is sufficiently idle to
1190 * manually frob these bits. Ideally we could use the ring functions,
1191 * except our error handling makes it quite difficult (can't use
1192 * intel_ring_begin, ring->flush, or intel_ring_advance)
1193 *
1194 * FIXME: We should try not to special case reset
1195 */
1196 if (synchronous ||
1197 i915_reset_in_progress(&dev_priv->gpu_error)) {
1198 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
1199 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1200 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1201 POSTING_READ(RING_PP_DIR_BASE(ring));
1202 return 0;
1203 }
1204
1205 /* NB: TLBs must be flushed and invalidated before a switch */
1206 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1207 if (ret)
1208 return ret;
1209
1210 ret = intel_ring_begin(ring, 6);
1211 if (ret)
1212 return ret;
1213
1214 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1215 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1216 intel_ring_emit(ring, PP_DIR_DCLV_2G);
1217 intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1218 intel_ring_emit(ring, get_pd_offset(ppgtt));
1219 intel_ring_emit(ring, MI_NOOP);
1220 intel_ring_advance(ring);
1221
1222 /* XXX: RCS is the only one to auto invalidate the TLBs? */
1223 if (ring->id != RCS) {
1224 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1225 if (ret)
1226 return ret;
1227 }
1228
1229 return 0;
1230 }
1231
gen6_mm_switch(struct i915_hw_ppgtt * ppgtt,struct intel_ring_buffer * ring,bool synchronous)1232 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1233 struct intel_ring_buffer *ring,
1234 bool synchronous)
1235 {
1236 struct drm_device *dev = ppgtt->base.dev;
1237 struct drm_i915_private *dev_priv = dev->dev_private;
1238
1239 if (!synchronous)
1240 return 0;
1241
1242 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1243 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1244
1245 POSTING_READ(RING_PP_DIR_DCLV(ring));
1246
1247 return 0;
1248 }
1249
gen8_ppgtt_enable(struct i915_hw_ppgtt * ppgtt)1250 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
1251 {
1252 struct drm_device *dev = ppgtt->base.dev;
1253 struct drm_i915_private *dev_priv = dev->dev_private;
1254 struct intel_ring_buffer *ring;
1255 int j, ret;
1256
1257 for_each_ring(ring, dev_priv, j) {
1258 I915_WRITE(RING_MODE_GEN7(ring),
1259 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1260
1261 /* We promise to do a switch later with FULL PPGTT. If this is
1262 * aliasing, this is the one and only switch we'll do */
1263 if (USES_FULL_PPGTT(dev))
1264 continue;
1265
1266 ret = ppgtt->switch_mm(ppgtt, ring, true);
1267 if (ret)
1268 goto err_out;
1269 }
1270
1271 return 0;
1272
1273 err_out:
1274 for_each_ring(ring, dev_priv, j)
1275 I915_WRITE(RING_MODE_GEN7(ring),
1276 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
1277 return ret;
1278 }
1279
gen7_ppgtt_enable(struct i915_hw_ppgtt * ppgtt)1280 static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
1281 {
1282 struct drm_device *dev = ppgtt->base.dev;
1283 struct drm_i915_private *dev_priv = dev->dev_private;
1284 struct intel_ring_buffer *ring;
1285 uint32_t ecochk, ecobits;
1286 int i;
1287
1288 ecobits = I915_READ(GAC_ECO_BITS);
1289 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1290
1291 ecochk = I915_READ(GAM_ECOCHK);
1292 if (IS_HASWELL(dev)) {
1293 ecochk |= ECOCHK_PPGTT_WB_HSW;
1294 } else {
1295 ecochk |= ECOCHK_PPGTT_LLC_IVB;
1296 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1297 }
1298 I915_WRITE(GAM_ECOCHK, ecochk);
1299
1300 for_each_ring(ring, dev_priv, i) {
1301 int ret;
1302 /* GFX_MODE is per-ring on gen7+ */
1303 I915_WRITE(RING_MODE_GEN7(ring),
1304 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1305
1306 /* We promise to do a switch later with FULL PPGTT. If this is
1307 * aliasing, this is the one and only switch we'll do */
1308 if (USES_FULL_PPGTT(dev))
1309 continue;
1310
1311 ret = ppgtt->switch_mm(ppgtt, ring, true);
1312 if (ret)
1313 return ret;
1314 }
1315
1316 return 0;
1317 }
1318
gen6_ppgtt_enable(struct i915_hw_ppgtt * ppgtt)1319 static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
1320 {
1321 struct drm_device *dev = ppgtt->base.dev;
1322 struct drm_i915_private *dev_priv = dev->dev_private;
1323 struct intel_ring_buffer *ring;
1324 uint32_t ecochk, gab_ctl, ecobits;
1325 int i;
1326
1327 ecobits = I915_READ(GAC_ECO_BITS);
1328 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1329 ECOBITS_PPGTT_CACHE64B);
1330
1331 gab_ctl = I915_READ(GAB_CTL);
1332 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1333
1334 ecochk = I915_READ(GAM_ECOCHK);
1335 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1336
1337 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1338
1339 for_each_ring(ring, dev_priv, i) {
1340 int ret = ppgtt->switch_mm(ppgtt, ring, true);
1341 if (ret)
1342 return ret;
1343 }
1344
1345 return 0;
1346 }
1347
1348 /* PPGTT support for Sandybdrige/Gen6 and later */
gen6_ppgtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool use_scratch)1349 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1350 uint64_t start,
1351 uint64_t length,
1352 bool use_scratch)
1353 {
1354 struct i915_hw_ppgtt *ppgtt =
1355 container_of(vm, struct i915_hw_ppgtt, base);
1356 gen6_gtt_pte_t *pt_vaddr, scratch_pte;
1357 unsigned first_entry = start >> PAGE_SHIFT;
1358 unsigned num_entries = length >> PAGE_SHIFT;
1359 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
1360 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
1361 unsigned last_pte, i;
1362 #ifdef __NetBSD__
1363 void *kva;
1364 int ret;
1365 #endif
1366
1367 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
1368
1369 while (num_entries) {
1370 last_pte = first_pte + num_entries;
1371 if (last_pte > I915_PPGTT_PT_ENTRIES)
1372 last_pte = I915_PPGTT_PT_ENTRIES;
1373
1374 #ifdef __NetBSD__
1375 /* XXX errno NetBSD->Linux */
1376 ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
1377 &ppgtt->u.gen6->pt_segs[act_pt], 1, PAGE_SIZE, &kva,
1378 BUS_DMA_NOWAIT);
1379 if (ret) {
1380 /*
1381 * XXX Should guarantee mapping earlier with
1382 * uvm_emap(9) or something.
1383 */
1384 device_printf(ppgtt->base.dev->dev,
1385 "failed to map page table: %d\n", -ret);
1386 goto skip;
1387 }
1388 pt_vaddr = kva;
1389 #else
1390 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
1391 #endif
1392
1393 for (i = first_pte; i < last_pte; i++)
1394 pt_vaddr[i] = scratch_pte;
1395
1396 #ifdef __NetBSD__
1397 bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
1398 skip:
1399 #else
1400 kunmap_atomic(pt_vaddr);
1401 #endif
1402
1403 num_entries -= last_pte - first_pte;
1404 first_pte = 0;
1405 act_pt++;
1406 }
1407 }
1408
1409 #ifdef __NetBSD__
1410 static void
gen6_ppgtt_insert_entries(struct i915_address_space * vm,bus_dmamap_t dmamap,uint64_t start,enum i915_cache_level cache_level)1411 gen6_ppgtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
1412 uint64_t start, enum i915_cache_level cache_level)
1413 {
1414 struct i915_hw_ppgtt *ppgtt =
1415 container_of(vm, struct i915_hw_ppgtt, base);
1416 gen6_gtt_pte_t *pt_vaddr;
1417 unsigned first_entry = start >> PAGE_SHIFT;
1418 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
1419 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
1420 unsigned seg;
1421 void *kva;
1422 int ret;
1423
1424 pt_vaddr = NULL;
1425 KASSERT(0 < dmamap->dm_nsegs);
1426 for (seg = 0; seg < dmamap->dm_nsegs; seg++) {
1427 KASSERT(dmamap->dm_segs[seg].ds_len == PAGE_SIZE);
1428 if (pt_vaddr == NULL) {
1429 /* XXX errno NetBSD->Linux */
1430 ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
1431 &ppgtt->u.gen6->pt_segs[act_pt], 1,
1432 PAGE_SIZE, &kva, BUS_DMA_NOWAIT);
1433 if (ret) {
1434 /*
1435 * XXX Should guarantee mapping earlier
1436 * with uvm_emap(9) or something.
1437 */
1438 device_printf(ppgtt->base.dev->dev,
1439 "failed to map page table: %d\n", -ret);
1440 goto skip;
1441 }
1442 pt_vaddr = kva;
1443 }
1444 pt_vaddr[act_pte] =
1445 vm->pte_encode(dmamap->dm_segs[seg].ds_addr, cache_level,
1446 true);
1447 skip:
1448 if (++act_pte == I915_PPGTT_PT_ENTRIES) {
1449 bus_dmamem_unmap(ppgtt->base.dev->dmat, kva,
1450 PAGE_SIZE);
1451 pt_vaddr = NULL;
1452 act_pt++;
1453 act_pte = 0;
1454 }
1455 }
1456 if (pt_vaddr)
1457 bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
1458 }
1459 #else
gen6_ppgtt_insert_entries(struct i915_address_space * vm,struct sg_table * pages,uint64_t start,enum i915_cache_level cache_level)1460 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1461 struct sg_table *pages,
1462 uint64_t start,
1463 enum i915_cache_level cache_level)
1464 {
1465 struct i915_hw_ppgtt *ppgtt =
1466 container_of(vm, struct i915_hw_ppgtt, base);
1467 gen6_gtt_pte_t *pt_vaddr;
1468 unsigned first_entry = start >> PAGE_SHIFT;
1469 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
1470 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
1471 struct sg_page_iter sg_iter;
1472
1473 pt_vaddr = NULL;
1474 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1475 if (pt_vaddr == NULL)
1476 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
1477
1478 pt_vaddr[act_pte] =
1479 vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1480 cache_level, true);
1481 if (++act_pte == I915_PPGTT_PT_ENTRIES) {
1482 kunmap_atomic(pt_vaddr);
1483 pt_vaddr = NULL;
1484 act_pt++;
1485 act_pte = 0;
1486 }
1487 }
1488 if (pt_vaddr)
1489 kunmap_atomic(pt_vaddr);
1490 }
1491 #endif
1492
1493 #ifndef __NetBSD__
gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt * ppgtt)1494 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
1495 {
1496 int i;
1497
1498 if (ppgtt->pt_dma_addr) {
1499 for (i = 0; i < ppgtt->num_pd_entries; i++)
1500 pci_unmap_page(ppgtt->base.dev->pdev,
1501 ppgtt->pt_dma_addr[i],
1502 4096, PCI_DMA_BIDIRECTIONAL);
1503 }
1504 }
1505
gen6_ppgtt_free(struct i915_hw_ppgtt * ppgtt)1506 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
1507 {
1508 int i;
1509
1510 kfree(ppgtt->pt_dma_addr);
1511 for (i = 0; i < ppgtt->num_pd_entries; i++)
1512 __free_page(ppgtt->pt_pages[i]);
1513 kfree(ppgtt->pt_pages);
1514 }
1515
gen6_ppgtt_cleanup(struct i915_address_space * vm)1516 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1517 {
1518 struct i915_hw_ppgtt *ppgtt =
1519 container_of(vm, struct i915_hw_ppgtt, base);
1520
1521 list_del(&vm->global_link);
1522 drm_mm_takedown(&ppgtt->base.mm);
1523 drm_mm_remove_node(&ppgtt->node);
1524
1525 gen6_ppgtt_unmap_pages(ppgtt);
1526 gen6_ppgtt_free(ppgtt);
1527 }
1528 #endif
1529
gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt * ppgtt)1530 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1531 {
1532 #define GEN6_PD_ALIGN (PAGE_SIZE * 16)
1533 #define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
1534 struct drm_device *dev = ppgtt->base.dev;
1535 struct drm_i915_private *dev_priv = dev->dev_private;
1536 bool retried = false;
1537 int ret;
1538
1539 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1540 * allocator works in address space sizes, so it's multiplied by page
1541 * size. We allocate at the top of the GTT to avoid fragmentation.
1542 */
1543 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
1544 alloc:
1545 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
1546 &ppgtt->node, GEN6_PD_SIZE,
1547 GEN6_PD_ALIGN, 0,
1548 0, dev_priv->gtt.base.total,
1549 DRM_MM_SEARCH_DEFAULT,
1550 DRM_MM_CREATE_DEFAULT);
1551 if (ret == -ENOSPC && !retried) {
1552 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
1553 GEN6_PD_SIZE, GEN6_PD_ALIGN,
1554 I915_CACHE_NONE,
1555 0, dev_priv->gtt.base.total,
1556 0);
1557 if (ret)
1558 return ret;
1559
1560 retried = true;
1561 goto alloc;
1562 }
1563
1564 if (ppgtt->node.start < dev_priv->gtt.mappable_end)
1565 DRM_DEBUG("Forced to use aperture for PDEs\n");
1566
1567 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
1568 return ret;
1569 }
1570
1571 #ifdef __NetBSD__
1572 static void gen6_ppgtt_cleanup(struct i915_address_space *);
1573 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *);
1574 static void gen6_ppgtt_free(struct i915_hw_ppgtt *);
1575 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *);
1576 static void gen6_ppgtt_free_page_directories(struct i915_hw_ppgtt *);
1577 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *);
1578 static void gen6_ppgtt_free_page_tables(struct i915_hw_ppgtt *);
1579
1580 static void
gen6_ppgtt_cleanup(struct i915_address_space * vm)1581 gen6_ppgtt_cleanup(struct i915_address_space *vm)
1582 {
1583 struct i915_hw_ppgtt *ppgtt =
1584 container_of(vm, struct i915_hw_ppgtt, base);
1585
1586 list_del(&vm->global_link);
1587 drm_mm_takedown(&ppgtt->base.mm);
1588
1589 gen6_ppgtt_free(ppgtt);
1590 }
1591
1592 static int
gen6_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt)1593 gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1594 {
1595 int ret;
1596
1597 ppgtt->u.gen6 = kmem_alloc(sizeof(*ppgtt->u.gen6), KM_SLEEP);
1598
1599 ret = gen6_ppgtt_allocate_page_directories(ppgtt);
1600 if (ret)
1601 goto fail0;
1602 ret = gen6_ppgtt_allocate_page_tables(ppgtt);
1603 if (ret)
1604 goto fail1;
1605
1606 /* Success! */
1607 return 0;
1608
1609 fail2: __unused
1610 gen6_ppgtt_free_page_tables(ppgtt);
1611 fail1: gen6_ppgtt_free_page_directories(ppgtt);
1612 fail0: KASSERT(ret);
1613 kmem_free(ppgtt->u.gen6, sizeof(*ppgtt->u.gen6));
1614 return ret;
1615 }
1616
1617 static void
gen6_ppgtt_free(struct i915_hw_ppgtt * ppgtt)1618 gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
1619 {
1620
1621 gen6_ppgtt_free_page_tables(ppgtt);
1622 gen6_ppgtt_free_page_directories(ppgtt);
1623 kmem_free(ppgtt->u.gen6, sizeof(*ppgtt->u.gen6));
1624 }
1625
1626 static void
gen6_ppgtt_free_page_directories(struct i915_hw_ppgtt * ppgtt)1627 gen6_ppgtt_free_page_directories(struct i915_hw_ppgtt *ppgtt)
1628 {
1629
1630 drm_mm_remove_node(&ppgtt->node);
1631 }
1632
1633 static int
gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt * ppgtt)1634 gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
1635 {
1636 int rsegs;
1637 int ret;
1638
1639 KASSERT(ppgtt->num_pd_entries <= INT_MAX);
1640 #if UINT_MAX == SIZE_MAX /* XXX ugh */
1641 KASSERT(ppgtt->num_pd_entries < (SIZE_MAX /
1642 sizeof(ppgtt->u.gen6->pt_segs[0])));
1643 KASSERT(ppgtt->num_pd_entries < (__type_max(bus_size_t) / PAGE_SIZE));
1644 #endif
1645
1646 ppgtt->u.gen6->pt_segs = kmem_alloc((ppgtt->num_pd_entries *
1647 sizeof(ppgtt->u.gen6->pt_segs[0])), KM_SLEEP);
1648
1649 /* XXX errno NetBSD->Linux */
1650 ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat,
1651 (PAGE_SIZE * ppgtt->num_pd_entries), PAGE_SIZE, PAGE_SIZE,
1652 ppgtt->u.gen6->pt_segs, ppgtt->num_pd_entries, &rsegs,
1653 BUS_DMA_WAITOK);
1654 if (ret)
1655 goto fail0;
1656 KASSERT(rsegs == ppgtt->num_pd_entries);
1657
1658 /* XXX errno NetBSD->Linux */
1659 ret = -bus_dmamap_create(ppgtt->base.dev->dmat,
1660 (PAGE_SIZE * ppgtt->num_pd_entries), ppgtt->num_pd_entries,
1661 PAGE_SIZE, 0, BUS_DMA_WAITOK, &ppgtt->u.gen6->pt_map);
1662 if (ret)
1663 goto fail1;
1664
1665 /* XXX errno NetBSD->Linux */
1666 ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
1667 ppgtt->u.gen6->pt_map, ppgtt->u.gen6->pt_segs,
1668 ppgtt->num_pd_entries, (PAGE_SIZE * ppgtt->num_pd_entries),
1669 BUS_DMA_WAITOK);
1670 if (ret)
1671 goto fail2;
1672
1673 /* Success! */
1674 return 0;
1675
1676 fail3: __unused
1677 bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
1678 fail2: bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
1679 fail1: bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_segs,
1680 ppgtt->num_pd_entries);
1681 fail0: KASSERT(ret);
1682 return ret;
1683 }
1684
1685 static void
gen6_ppgtt_free_page_tables(struct i915_hw_ppgtt * ppgtt)1686 gen6_ppgtt_free_page_tables(struct i915_hw_ppgtt *ppgtt)
1687 {
1688
1689 bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
1690 bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
1691 bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_segs,
1692 ppgtt->num_pd_entries);
1693 }
1694 #else
gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt * ppgtt)1695 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
1696 {
1697 int i;
1698
1699 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
1700 GFP_KERNEL);
1701
1702 if (!ppgtt->pt_pages)
1703 return -ENOMEM;
1704
1705 for (i = 0; i < ppgtt->num_pd_entries; i++) {
1706 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
1707 if (!ppgtt->pt_pages[i]) {
1708 gen6_ppgtt_free(ppgtt);
1709 return -ENOMEM;
1710 }
1711 }
1712
1713 return 0;
1714 }
1715
gen6_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt)1716 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1717 {
1718 int ret;
1719
1720 ret = gen6_ppgtt_allocate_page_directories(ppgtt);
1721 if (ret)
1722 return ret;
1723
1724 ret = gen6_ppgtt_allocate_page_tables(ppgtt);
1725 if (ret) {
1726 drm_mm_remove_node(&ppgtt->node);
1727 return ret;
1728 }
1729
1730 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
1731 GFP_KERNEL);
1732 if (!ppgtt->pt_dma_addr) {
1733 drm_mm_remove_node(&ppgtt->node);
1734 gen6_ppgtt_free(ppgtt);
1735 return -ENOMEM;
1736 }
1737
1738 return 0;
1739 }
1740
gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt * ppgtt)1741 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
1742 {
1743 struct drm_device *dev = ppgtt->base.dev;
1744 int i;
1745
1746 for (i = 0; i < ppgtt->num_pd_entries; i++) {
1747 dma_addr_t pt_addr;
1748
1749 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
1750 PCI_DMA_BIDIRECTIONAL);
1751
1752 if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
1753 gen6_ppgtt_unmap_pages(ppgtt);
1754 return -EIO;
1755 }
1756
1757 ppgtt->pt_dma_addr[i] = pt_addr;
1758 }
1759
1760 return 0;
1761 }
1762 #endif
1763
gen6_ppgtt_init(struct i915_hw_ppgtt * ppgtt)1764 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1765 {
1766 struct drm_device *dev = ppgtt->base.dev;
1767 struct drm_i915_private *dev_priv = dev->dev_private;
1768 int ret;
1769
1770 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
1771 if (IS_GEN6(dev)) {
1772 ppgtt->enable = gen6_ppgtt_enable;
1773 ppgtt->switch_mm = gen6_mm_switch;
1774 } else if (IS_HASWELL(dev)) {
1775 ppgtt->enable = gen7_ppgtt_enable;
1776 ppgtt->switch_mm = hsw_mm_switch;
1777 } else if (IS_GEN7(dev)) {
1778 ppgtt->enable = gen7_ppgtt_enable;
1779 ppgtt->switch_mm = gen7_mm_switch;
1780 } else
1781 BUG();
1782
1783 ret = gen6_ppgtt_alloc(ppgtt);
1784 if (ret)
1785 return ret;
1786
1787 #ifndef __NetBSD__
1788 ret = gen6_ppgtt_setup_page_tables(ppgtt);
1789 if (ret) {
1790 gen6_ppgtt_free(ppgtt);
1791 return ret;
1792 }
1793 #endif
1794
1795 ppgtt->base.clear_range = gen6_ppgtt_clear_range;
1796 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1797 ppgtt->base.cleanup = gen6_ppgtt_cleanup;
1798 ppgtt->base.start = 0;
1799 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
1800 #ifndef __NetBSD__
1801 ppgtt->debug_dump = gen6_dump_ppgtt;
1802 #endif
1803
1804 #ifdef __NetBSD__
1805 CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
1806 ppgtt->u.gen6->pd_base = 4*(ppgtt->node.start / PAGE_SIZE);
1807 #else
1808 ppgtt->pd_offset =
1809 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
1810 #endif
1811
1812 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1813
1814 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
1815 ppgtt->node.size >> 20,
1816 ppgtt->node.start / PAGE_SIZE);
1817
1818 return 0;
1819 }
1820
i915_gem_init_ppgtt(struct drm_device * dev,struct i915_hw_ppgtt * ppgtt)1821 int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1822 {
1823 struct drm_i915_private *dev_priv = dev->dev_private;
1824 int ret = 0;
1825
1826 ppgtt->base.dev = dev;
1827 ppgtt->base.scratch = dev_priv->gtt.base.scratch;
1828
1829 if (INTEL_INFO(dev)->gen < 8)
1830 ret = gen6_ppgtt_init(ppgtt);
1831 else if (IS_GEN8(dev))
1832 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
1833 else
1834 BUG();
1835
1836 if (!ret) {
1837 kref_init(&ppgtt->ref);
1838 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
1839 ppgtt->base.total);
1840 i915_init_vm(dev_priv, &ppgtt->base);
1841 if (INTEL_INFO(dev)->gen < 8) {
1842 gen6_write_pdes(ppgtt);
1843 #ifdef __NetBSD__
1844 DRM_DEBUG("Adding PPGTT at offset %"PRIxMAX"\n",
1845 (uintmax_t)ppgtt->u.gen6->pd_base << 10);
1846 #else
1847 DRM_DEBUG("Adding PPGTT at offset %x\n",
1848 ppgtt->pd_offset << 10);
1849 #endif
1850 }
1851 }
1852
1853 return ret;
1854 }
1855
1856 static void
ppgtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 flags)1857 ppgtt_bind_vma(struct i915_vma *vma,
1858 enum i915_cache_level cache_level,
1859 u32 flags)
1860 {
1861 #ifdef __NetBSD__
1862 vma->vm->insert_entries(vma->vm, vma->obj->igo_dmamap, vma->node.start,
1863 cache_level);
1864 #else
1865 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
1866 cache_level);
1867 #endif
1868 }
1869
ppgtt_unbind_vma(struct i915_vma * vma)1870 static void ppgtt_unbind_vma(struct i915_vma *vma)
1871 {
1872 vma->vm->clear_range(vma->vm,
1873 vma->node.start,
1874 vma->obj->base.size,
1875 true);
1876 }
1877
1878 extern int intel_iommu_gfx_mapped;
1879 /* Certain Gen5 chipsets require require idling the GPU before
1880 * unmapping anything from the GTT when VT-d is enabled.
1881 */
needs_idle_maps(struct drm_device * dev)1882 static inline bool needs_idle_maps(struct drm_device *dev)
1883 {
1884 #ifdef CONFIG_INTEL_IOMMU
1885 /* Query intel_iommu to see if we need the workaround. Presumably that
1886 * was loaded first.
1887 */
1888 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
1889 return true;
1890 #endif
1891 return false;
1892 }
1893
do_idling(struct drm_i915_private * dev_priv)1894 static bool do_idling(struct drm_i915_private *dev_priv)
1895 {
1896 bool ret = dev_priv->mm.interruptible;
1897
1898 if (unlikely(dev_priv->gtt.do_idle_maps)) {
1899 dev_priv->mm.interruptible = false;
1900 if (i915_gpu_idle(dev_priv->dev)) {
1901 DRM_ERROR("Couldn't idle GPU\n");
1902 /* Wait a bit, in hopes it avoids the hang */
1903 udelay(10);
1904 }
1905 }
1906
1907 return ret;
1908 }
1909
undo_idling(struct drm_i915_private * dev_priv,bool interruptible)1910 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
1911 {
1912 if (unlikely(dev_priv->gtt.do_idle_maps))
1913 dev_priv->mm.interruptible = interruptible;
1914 }
1915
i915_check_and_clear_faults(struct drm_device * dev)1916 void i915_check_and_clear_faults(struct drm_device *dev)
1917 {
1918 struct drm_i915_private *dev_priv = dev->dev_private;
1919 struct intel_ring_buffer *ring;
1920 int i;
1921
1922 if (INTEL_INFO(dev)->gen < 6)
1923 return;
1924
1925 for_each_ring(ring, dev_priv, i) {
1926 u32 fault_reg;
1927 fault_reg = I915_READ(RING_FAULT_REG(ring));
1928 if (fault_reg & RING_FAULT_VALID) {
1929 DRM_DEBUG_DRIVER("Unexpected fault\n"
1930 "\tAddr: 0x%08"PRIx32"\\n"
1931 "\tAddress space: %s\n"
1932 "\tSource ID: %d\n"
1933 "\tType: %d\n",
1934 fault_reg & PAGE_MASK,
1935 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
1936 RING_FAULT_SRCID(fault_reg),
1937 RING_FAULT_FAULT_TYPE(fault_reg));
1938 I915_WRITE(RING_FAULT_REG(ring),
1939 fault_reg & ~RING_FAULT_VALID);
1940 }
1941 }
1942 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
1943 }
1944
i915_gem_suspend_gtt_mappings(struct drm_device * dev)1945 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
1946 {
1947 struct drm_i915_private *dev_priv = dev->dev_private;
1948
1949 /* Don't bother messing with faults pre GEN6 as we have little
1950 * documentation supporting that it's a good idea.
1951 */
1952 if (INTEL_INFO(dev)->gen < 6)
1953 return;
1954
1955 i915_check_and_clear_faults(dev);
1956
1957 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1958 dev_priv->gtt.base.start,
1959 dev_priv->gtt.base.total,
1960 true);
1961 }
1962
i915_gem_restore_gtt_mappings(struct drm_device * dev)1963 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
1964 {
1965 struct drm_i915_private *dev_priv = dev->dev_private;
1966 struct drm_i915_gem_object *obj;
1967 struct i915_address_space *vm;
1968
1969 i915_check_and_clear_faults(dev);
1970
1971 /* First fill our portion of the GTT with scratch pages */
1972 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1973 dev_priv->gtt.base.start,
1974 dev_priv->gtt.base.total,
1975 true);
1976
1977 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1978 struct i915_vma *vma = i915_gem_obj_to_vma(obj,
1979 &dev_priv->gtt.base);
1980 if (!vma)
1981 continue;
1982
1983 i915_gem_clflush_object(obj, obj->pin_display);
1984 /* The bind_vma code tries to be smart about tracking mappings.
1985 * Unfortunately above, we've just wiped out the mappings
1986 * without telling our object about it. So we need to fake it.
1987 */
1988 obj->has_global_gtt_mapping = 0;
1989 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
1990 }
1991
1992
1993 if (INTEL_INFO(dev)->gen >= 8) {
1994 gen8_setup_private_ppat(dev_priv);
1995 return;
1996 }
1997
1998 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
1999 /* TODO: Perhaps it shouldn't be gen6 specific */
2000 if (i915_is_ggtt(vm)) {
2001 if (dev_priv->mm.aliasing_ppgtt)
2002 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
2003 continue;
2004 }
2005
2006 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
2007 }
2008
2009 i915_gem_chipset_flush(dev);
2010 }
2011
i915_gem_gtt_prepare_object(struct drm_i915_gem_object * obj)2012 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2013 {
2014 if (obj->has_dma_mapping)
2015 return 0;
2016
2017 #ifdef __NetBSD__
2018 KASSERT(0 < obj->base.size);
2019 /* XXX errno NetBSD->Linux */
2020 return -bus_dmamap_load_raw(obj->base.dev->dmat, obj->igo_dmamap,
2021 obj->pages, obj->igo_nsegs, obj->base.size, BUS_DMA_NOWAIT);
2022 #else
2023 if (!dma_map_sg(&obj->base.dev->pdev->dev,
2024 obj->pages->sgl, obj->pages->nents,
2025 PCI_DMA_BIDIRECTIONAL))
2026 return -ENOSPC;
2027
2028 return 0;
2029 #endif
2030 }
2031
2032 #ifdef __NetBSD__
2033 static inline uint64_t
gen8_get_pte(bus_space_tag_t bst,bus_space_handle_t bsh,unsigned i)2034 gen8_get_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i)
2035 {
2036 CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
2037 CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
2038 #ifdef _LP64 /* XXX How to detect bus_space_read_8? */
2039 return bus_space_read_8(bst, bsh, 8*i);
2040 #else
2041 /*
2042 * XXX I'm not sure this case can actually happen in practice:
2043 * 32-bit gen8 chipsets?
2044 */
2045 return bus_space_read_4(bst, bsh, 8*i) |
2046 ((uint64_t)bus_space_read_4(bst, bsh, 8*i + 4) << 32);
2047 #endif
2048 }
2049
2050 static inline void
gen8_set_pte(bus_space_tag_t bst,bus_space_handle_t bsh,unsigned i,gen8_gtt_pte_t pte)2051 gen8_set_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i,
2052 gen8_gtt_pte_t pte)
2053 {
2054 CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
2055 CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
2056 #ifdef _LP64 /* XXX How to detect bus_space_write_8? */
2057 bus_space_write_8(bst, bsh, 8*i, pte);
2058 #else
2059 bus_space_write_4(bst, bsh, 8*i, (uint32_t)pte);
2060 bus_space_write_4(bst, bsh, 8*i + 4, (uint32_t)(pte >> 32));
2061 #endif
2062 }
2063 #else
gen8_set_pte(void __iomem * addr,gen8_gtt_pte_t pte)2064 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
2065 {
2066 #ifdef writeq
2067 writeq(pte, addr);
2068 #else
2069 iowrite32((u32)pte, addr);
2070 iowrite32(pte >> 32, addr + 4);
2071 #endif
2072 }
2073 #endif
2074
2075 #ifdef __NetBSD__
2076 static void
gen8_ggtt_insert_entries(struct i915_address_space * vm,bus_dmamap_t dmamap,uint64_t start,enum i915_cache_level level)2077 gen8_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
2078 uint64_t start, enum i915_cache_level level)
2079 {
2080 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2081 unsigned first_entry = start >> PAGE_SHIFT;
2082 const bus_space_tag_t bst = dev_priv->gtt.bst;
2083 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2084 unsigned i;
2085
2086 KASSERT(0 < dmamap->dm_nsegs);
2087 for (i = 0; i < dmamap->dm_nsegs; i++) {
2088 KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
2089 gen8_set_pte(bst, bsh, first_entry + i,
2090 gen8_pte_encode(dmamap->dm_segs[i].ds_addr, level, true));
2091 }
2092 if (0 < i) {
2093 /* Posting read. */
2094 WARN_ON(gen8_get_pte(bst, bsh, (first_entry + i - 1))
2095 != gen8_pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
2096 true));
2097 }
2098 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2099 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2100 }
2101 #else
gen8_ggtt_insert_entries(struct i915_address_space * vm,struct sg_table * st,uint64_t start,enum i915_cache_level level)2102 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2103 struct sg_table *st,
2104 uint64_t start,
2105 enum i915_cache_level level)
2106 {
2107 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2108 unsigned first_entry = start >> PAGE_SHIFT;
2109 gen8_gtt_pte_t __iomem *gtt_entries =
2110 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2111 int i = 0;
2112 struct sg_page_iter sg_iter;
2113 dma_addr_t addr;
2114
2115 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2116 addr = sg_dma_address(sg_iter.sg) +
2117 (sg_iter.sg_pgoffset << PAGE_SHIFT);
2118 gen8_set_pte(>t_entries[i],
2119 gen8_pte_encode(addr, level, true));
2120 i++;
2121 }
2122
2123 /*
2124 * XXX: This serves as a posting read to make sure that the PTE has
2125 * actually been updated. There is some concern that even though
2126 * registers and PTEs are within the same BAR that they are potentially
2127 * of NUMA access patterns. Therefore, even with the way we assume
2128 * hardware should work, we must keep this posting read for paranoia.
2129 */
2130 if (i != 0)
2131 WARN_ON(readq(>t_entries[i-1])
2132 != gen8_pte_encode(addr, level, true));
2133
2134 /* This next bit makes the above posting read even more important. We
2135 * want to flush the TLBs only after we're certain all the PTE updates
2136 * have finished.
2137 */
2138 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2139 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2140 }
2141 #endif
2142
2143 /*
2144 * Binds an object into the global gtt with the specified cache level. The object
2145 * will be accessible to the GPU via commands whose operands reference offsets
2146 * within the global GTT as well as accessible by the GPU through the GMADR
2147 * mapped BAR (dev_priv->mm.gtt->gtt).
2148 */
2149 #ifdef __NetBSD__
2150 static void
gen6_ggtt_insert_entries(struct i915_address_space * vm,bus_dmamap_t dmamap,uint64_t start,enum i915_cache_level level)2151 gen6_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
2152 uint64_t start, enum i915_cache_level level)
2153 {
2154 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2155 unsigned first_entry = start >> PAGE_SHIFT;
2156 const bus_space_tag_t bst = dev_priv->gtt.bst;
2157 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2158 unsigned i;
2159
2160 KASSERT(0 < dmamap->dm_nsegs);
2161 for (i = 0; i < dmamap->dm_nsegs; i++) {
2162 KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
2163 CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
2164 bus_space_write_4(bst, bsh, 4*(first_entry + i),
2165 vm->pte_encode(dmamap->dm_segs[i].ds_addr, level, true));
2166 }
2167 if (0 < i) {
2168 /* Posting read. */
2169 WARN_ON(bus_space_read_4(bst, bsh, 4*(first_entry + i - 1))
2170 != vm->pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
2171 true));
2172 }
2173 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2174 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2175 }
2176 #else
gen6_ggtt_insert_entries(struct i915_address_space * vm,struct sg_table * st,uint64_t start,enum i915_cache_level level)2177 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2178 struct sg_table *st,
2179 uint64_t start,
2180 enum i915_cache_level level)
2181 {
2182 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2183 unsigned first_entry = start >> PAGE_SHIFT;
2184 gen6_gtt_pte_t __iomem *gtt_entries =
2185 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2186 int i = 0;
2187 struct sg_page_iter sg_iter;
2188 dma_addr_t addr;
2189
2190 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2191 addr = sg_page_iter_dma_address(&sg_iter);
2192 iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]);
2193 i++;
2194 }
2195
2196 /* XXX: This serves as a posting read to make sure that the PTE has
2197 * actually been updated. There is some concern that even though
2198 * registers and PTEs are within the same BAR that they are potentially
2199 * of NUMA access patterns. Therefore, even with the way we assume
2200 * hardware should work, we must keep this posting read for paranoia.
2201 */
2202 if (i != 0)
2203 WARN_ON(readl(>t_entries[i-1]) !=
2204 vm->pte_encode(addr, level, true));
2205
2206 /* This next bit makes the above posting read even more important. We
2207 * want to flush the TLBs only after we're certain all the PTE updates
2208 * have finished.
2209 */
2210 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2211 POSTING_READ(GFX_FLSH_CNTL_GEN6);
2212 }
2213 #endif
2214
gen8_ggtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool use_scratch)2215 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2216 uint64_t start,
2217 uint64_t length,
2218 bool use_scratch)
2219 {
2220 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2221 unsigned first_entry = start >> PAGE_SHIFT;
2222 unsigned num_entries = length >> PAGE_SHIFT;
2223 #ifdef __NetBSD__
2224 const bus_space_tag_t bst = dev_priv->gtt.bst;
2225 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2226 gen8_gtt_pte_t scratch_pte;
2227 #else
2228 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
2229 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2230 #endif
2231 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2232 int i;
2233
2234 if (WARN(num_entries > max_entries,
2235 "First entry = %d; Num entries = %d (max=%d)\n",
2236 first_entry, num_entries, max_entries))
2237 num_entries = max_entries;
2238
2239 scratch_pte = gen8_pte_encode(vm->scratch.addr,
2240 I915_CACHE_LLC,
2241 use_scratch);
2242 #ifdef __NetBSD__
2243 CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
2244 for (i = 0; i < num_entries; i++)
2245 gen8_set_pte(bst, bsh, first_entry + i, scratch_pte);
2246 (void)gen8_get_pte(bst, bsh, first_entry);
2247 #else
2248 for (i = 0; i < num_entries; i++)
2249 gen8_set_pte(>t_base[i], scratch_pte);
2250 readl(gtt_base);
2251 #endif
2252 }
2253
gen6_ggtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool use_scratch)2254 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2255 uint64_t start,
2256 uint64_t length,
2257 bool use_scratch)
2258 {
2259 struct drm_i915_private *dev_priv = vm->dev->dev_private;
2260 unsigned first_entry = start >> PAGE_SHIFT;
2261 unsigned num_entries = length >> PAGE_SHIFT;
2262 #ifdef __NetBSD__
2263 const bus_space_tag_t bst = dev_priv->gtt.bst;
2264 const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2265 gen8_gtt_pte_t scratch_pte;
2266 #else
2267 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
2268 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2269 #endif
2270 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2271 int i;
2272
2273 if (WARN(num_entries > max_entries,
2274 "First entry = %d; Num entries = %d (max=%d)\n",
2275 first_entry, num_entries, max_entries))
2276 num_entries = max_entries;
2277
2278 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
2279
2280 #ifdef __NetBSD__
2281 CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
2282 for (i = 0; i < num_entries; i++)
2283 bus_space_write_4(bst, bsh, 4*(first_entry + i), scratch_pte);
2284 (void)bus_space_read_4(bst, bsh, 4*first_entry);
2285 #else
2286 for (i = 0; i < num_entries; i++)
2287 iowrite32(scratch_pte, >t_base[i]);
2288 readl(gtt_base);
2289 #endif
2290 }
2291
2292
i915_ggtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 unused)2293 static void i915_ggtt_bind_vma(struct i915_vma *vma,
2294 enum i915_cache_level cache_level,
2295 u32 unused)
2296 {
2297 const unsigned long entry = vma->node.start >> PAGE_SHIFT;
2298 unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2299 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2300
2301 BUG_ON(!i915_is_ggtt(vma->vm));
2302 #ifdef __NetBSD__
2303 intel_gtt_insert_entries(vma->obj->igo_dmamap, entry, flags);
2304 #else
2305 intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
2306 #endif
2307 vma->obj->has_global_gtt_mapping = 1;
2308 }
2309
i915_ggtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool unused)2310 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2311 uint64_t start,
2312 uint64_t length,
2313 bool unused)
2314 {
2315 unsigned first_entry = start >> PAGE_SHIFT;
2316 unsigned num_entries = length >> PAGE_SHIFT;
2317 intel_gtt_clear_range(first_entry, num_entries);
2318 }
2319
i915_ggtt_unbind_vma(struct i915_vma * vma)2320 static void i915_ggtt_unbind_vma(struct i915_vma *vma)
2321 {
2322 const unsigned int first = vma->node.start >> PAGE_SHIFT;
2323 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
2324
2325 BUG_ON(!i915_is_ggtt(vma->vm));
2326 vma->obj->has_global_gtt_mapping = 0;
2327 intel_gtt_clear_range(first, size);
2328 }
2329
ggtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 flags)2330 static void ggtt_bind_vma(struct i915_vma *vma,
2331 enum i915_cache_level cache_level,
2332 u32 flags)
2333 {
2334 struct drm_device *dev = vma->vm->dev;
2335 struct drm_i915_private *dev_priv = dev->dev_private;
2336 struct drm_i915_gem_object *obj = vma->obj;
2337
2338 /* If there is no aliasing PPGTT, or the caller needs a global mapping,
2339 * or we have a global mapping already but the cacheability flags have
2340 * changed, set the global PTEs.
2341 *
2342 * If there is an aliasing PPGTT it is anecdotally faster, so use that
2343 * instead if none of the above hold true.
2344 *
2345 * NB: A global mapping should only be needed for special regions like
2346 * "gtt mappable", SNB errata, or if specified via special execbuf
2347 * flags. At all other times, the GPU will use the aliasing PPGTT.
2348 */
2349 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
2350 if (!obj->has_global_gtt_mapping ||
2351 (cache_level != obj->cache_level)) {
2352 vma->vm->insert_entries(vma->vm,
2353 #ifdef __NetBSD__
2354 obj->igo_dmamap,
2355 #else
2356 obj->pages,
2357 #endif
2358 vma->node.start,
2359 cache_level);
2360 obj->has_global_gtt_mapping = 1;
2361 }
2362 }
2363
2364 if (dev_priv->mm.aliasing_ppgtt &&
2365 (!obj->has_aliasing_ppgtt_mapping ||
2366 (cache_level != obj->cache_level))) {
2367 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2368 appgtt->base.insert_entries(&appgtt->base,
2369 #ifdef __NetBSD__
2370 vma->obj->igo_dmamap,
2371 #else
2372 vma->obj->pages,
2373 #endif
2374 vma->node.start,
2375 cache_level);
2376 vma->obj->has_aliasing_ppgtt_mapping = 1;
2377 }
2378 }
2379
ggtt_unbind_vma(struct i915_vma * vma)2380 static void ggtt_unbind_vma(struct i915_vma *vma)
2381 {
2382 struct drm_device *dev = vma->vm->dev;
2383 struct drm_i915_private *dev_priv = dev->dev_private;
2384 struct drm_i915_gem_object *obj = vma->obj;
2385
2386 if (obj->has_global_gtt_mapping) {
2387 vma->vm->clear_range(vma->vm,
2388 vma->node.start,
2389 obj->base.size,
2390 true);
2391 obj->has_global_gtt_mapping = 0;
2392 }
2393
2394 if (obj->has_aliasing_ppgtt_mapping) {
2395 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2396 appgtt->base.clear_range(&appgtt->base,
2397 vma->node.start,
2398 obj->base.size,
2399 true);
2400 obj->has_aliasing_ppgtt_mapping = 0;
2401 }
2402 }
2403
i915_gem_gtt_finish_object(struct drm_i915_gem_object * obj)2404 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2405 {
2406 struct drm_device *dev = obj->base.dev;
2407 struct drm_i915_private *dev_priv = dev->dev_private;
2408 bool interruptible;
2409
2410 interruptible = do_idling(dev_priv);
2411
2412 #ifdef __NetBSD__
2413 bus_dmamap_unload(dev->dmat, obj->igo_dmamap);
2414 #else
2415 if (!obj->has_dma_mapping)
2416 dma_unmap_sg(&dev->pdev->dev,
2417 obj->pages->sgl, obj->pages->nents,
2418 PCI_DMA_BIDIRECTIONAL);
2419 #endif
2420
2421 undo_idling(dev_priv, interruptible);
2422 }
2423
i915_gtt_color_adjust(struct drm_mm_node * node,unsigned long color,unsigned long * start,unsigned long * end)2424 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2425 unsigned long color,
2426 unsigned long *start,
2427 unsigned long *end)
2428 {
2429 if (node->color != color)
2430 *start += 4096;
2431
2432 if (!list_empty(&node->node_list)) {
2433 node = list_entry(node->node_list.next,
2434 struct drm_mm_node,
2435 node_list);
2436 if (node->allocated && node->color != color)
2437 *end -= 4096;
2438 }
2439 }
2440
i915_gem_setup_global_gtt(struct drm_device * dev,unsigned long start,unsigned long mappable_end,unsigned long end)2441 void i915_gem_setup_global_gtt(struct drm_device *dev,
2442 unsigned long start,
2443 unsigned long mappable_end,
2444 unsigned long end)
2445 {
2446 /* Let GEM Manage all of the aperture.
2447 *
2448 * However, leave one page at the end still bound to the scratch page.
2449 * There are a number of places where the hardware apparently prefetches
2450 * past the end of the object, and we've seen multiple hangs with the
2451 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2452 * aperture. One page should be enough to keep any prefetching inside
2453 * of the aperture.
2454 */
2455 struct drm_i915_private *dev_priv = dev->dev_private;
2456 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2457 struct drm_mm_node *entry;
2458 struct drm_i915_gem_object *obj;
2459 unsigned long hole_start, hole_end;
2460
2461 BUG_ON(mappable_end > end);
2462
2463 /* Subtract the guard page ... */
2464 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
2465 if (!HAS_LLC(dev))
2466 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
2467
2468 /* Mark any preallocated objects as occupied */
2469 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2470 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
2471 int ret;
2472 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
2473 i915_gem_obj_ggtt_offset(obj), obj->base.size);
2474
2475 WARN_ON(i915_gem_obj_ggtt_bound(obj));
2476 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
2477 if (ret)
2478 DRM_DEBUG_KMS("Reservation failed\n");
2479 obj->has_global_gtt_mapping = 1;
2480 }
2481
2482 dev_priv->gtt.base.start = start;
2483 dev_priv->gtt.base.total = end - start;
2484
2485 /* Clear any non-preallocated blocks */
2486 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
2487 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2488 hole_start, hole_end);
2489 ggtt_vm->clear_range(ggtt_vm, hole_start,
2490 hole_end - hole_start, true);
2491 }
2492
2493 /* And finally clear the reserved guard page */
2494 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
2495 }
2496
i915_gem_init_global_gtt(struct drm_device * dev)2497 void i915_gem_init_global_gtt(struct drm_device *dev)
2498 {
2499 struct drm_i915_private *dev_priv = dev->dev_private;
2500 unsigned long gtt_size, mappable_size;
2501
2502 gtt_size = dev_priv->gtt.base.total;
2503 mappable_size = dev_priv->gtt.mappable_end;
2504
2505 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
2506 }
2507
setup_scratch_page(struct drm_device * dev)2508 static int setup_scratch_page(struct drm_device *dev)
2509 {
2510 struct drm_i915_private *dev_priv = dev->dev_private;
2511 #ifdef __NetBSD__
2512 int nsegs;
2513 int error;
2514
2515 error = bus_dmamem_alloc(dev->dmat, PAGE_SIZE, PAGE_SIZE, 0,
2516 &dev_priv->gtt.base.scratch.seg, 1, &nsegs, BUS_DMA_WAITOK);
2517 if (error)
2518 goto fail0;
2519 KASSERT(nsegs == 1);
2520
2521 error = bus_dmamap_create(dev->dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
2522 BUS_DMA_WAITOK, &dev_priv->gtt.base.scratch.map);
2523 if (error)
2524 goto fail1;
2525
2526 error = bus_dmamap_load_raw(dev->dmat, dev_priv->gtt.base.scratch.map,
2527 &dev_priv->gtt.base.scratch.seg, 1, PAGE_SIZE, BUS_DMA_WAITOK);
2528 if (error)
2529 goto fail2;
2530
2531 /* Success! */
2532 dev_priv->gtt.base.scratch.addr =
2533 dev_priv->gtt.base.scratch.map->dm_segs[0].ds_addr;
2534 return 0;
2535
2536 fail3: __unused
2537 dev_priv->gtt.base.scratch.addr = 0;
2538 bus_dmamap_unload(dev->dmat, dev_priv->gtt.base.scratch.map);
2539 fail2: bus_dmamap_destroy(dev->dmat, dev_priv->gtt.base.scratch.map);
2540 fail1: bus_dmamem_free(dev->dmat, &dev_priv->gtt.base.scratch.seg, 1);
2541 fail0: KASSERT(error);
2542 /* XXX errno NetBSD->Linux */
2543 return -error;
2544 #else
2545 struct page *page;
2546 dma_addr_t dma_addr;
2547
2548 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
2549 if (page == NULL)
2550 return -ENOMEM;
2551 get_page(page);
2552 set_pages_uc(page, 1);
2553
2554 #ifdef CONFIG_INTEL_IOMMU
2555 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
2556 PCI_DMA_BIDIRECTIONAL);
2557 if (pci_dma_mapping_error(dev->pdev, dma_addr))
2558 return -EINVAL;
2559 #else
2560 dma_addr = page_to_phys(page);
2561 #endif
2562 dev_priv->gtt.base.scratch.page = page;
2563 dev_priv->gtt.base.scratch.addr = dma_addr;
2564
2565 return 0;
2566 #endif
2567 }
2568
teardown_scratch_page(struct drm_device * dev)2569 static void teardown_scratch_page(struct drm_device *dev)
2570 {
2571 struct drm_i915_private *dev_priv = dev->dev_private;
2572 #ifdef __NetBSD__
2573
2574 dev_priv->gtt.base.scratch.addr = 0;
2575 bus_dmamap_unload(dev->dmat, dev_priv->gtt.base.scratch.map);
2576 bus_dmamap_destroy(dev->dmat, dev_priv->gtt.base.scratch.map);
2577 bus_dmamem_free(dev->dmat, &dev_priv->gtt.base.scratch.seg, 1);
2578 #else
2579 struct page *page = dev_priv->gtt.base.scratch.page;
2580
2581 set_pages_wb(page, 1);
2582 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
2583 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
2584 put_page(page);
2585 __free_page(page);
2586 #endif
2587 }
2588
gen6_get_total_gtt_size(u16 snb_gmch_ctl)2589 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2590 {
2591 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2592 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2593 return snb_gmch_ctl << 20;
2594 }
2595
gen8_get_total_gtt_size(u16 bdw_gmch_ctl)2596 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2597 {
2598 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2599 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2600 if (bdw_gmch_ctl)
2601 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2602 return bdw_gmch_ctl << 20;
2603 }
2604
gen6_get_stolen_size(u16 snb_gmch_ctl)2605 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2606 {
2607 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2608 snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2609 return snb_gmch_ctl << 25; /* 32 MB units */
2610 }
2611
gen8_get_stolen_size(u16 bdw_gmch_ctl)2612 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2613 {
2614 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2615 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2616 return bdw_gmch_ctl << 25; /* 32 MB units */
2617 }
2618
ggtt_probe_common(struct drm_device * dev,size_t gtt_size)2619 static int ggtt_probe_common(struct drm_device *dev,
2620 size_t gtt_size)
2621 {
2622 struct drm_i915_private *dev_priv = dev->dev_private;
2623 phys_addr_t gtt_phys_addr;
2624 int ret;
2625
2626 /* For Modern GENs the PTEs and register space are split in the BAR */
2627 gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2628 (pci_resource_len(dev->pdev, 0) / 2);
2629
2630 #ifdef __NetBSD__
2631 dev_priv->gtt.bst = dev->pdev->pd_pa.pa_memt;
2632 /* XXX errno NetBSD->Linux */
2633 ret = -bus_space_map(dev_priv->gtt.bst, gtt_phys_addr, gtt_size,
2634 BUS_SPACE_MAP_PREFETCHABLE, &dev_priv->gtt.bsh);
2635 if (ret) {
2636 DRM_ERROR("Failed to map the graphics translation table: %d\n",
2637 ret);
2638 return ret;
2639 }
2640 dev_priv->gtt.size = gtt_size;
2641 #else
2642 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
2643 if (!dev_priv->gtt.gsm) {
2644 DRM_ERROR("Failed to map the gtt page table\n");
2645 return -ENOMEM;
2646 }
2647 #endif
2648
2649 ret = setup_scratch_page(dev);
2650 if (ret) {
2651 DRM_ERROR("Scratch setup failed\n");
2652 /* iounmap will also get called at remove, but meh */
2653 #ifdef __NetBSD__
2654 bus_space_unmap(dev_priv->gtt.bst, dev_priv->gtt.bsh,
2655 dev_priv->gtt.size);
2656 #else
2657 iounmap(dev_priv->gtt.gsm);
2658 #endif
2659 }
2660
2661 return ret;
2662 }
2663
2664 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2665 * bits. When using advanced contexts each context stores its own PAT, but
2666 * writing this data shouldn't be harmful even in those cases. */
gen8_setup_private_ppat(struct drm_i915_private * dev_priv)2667 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
2668 {
2669 #define GEN8_PPAT_UC (0<<0)
2670 #define GEN8_PPAT_WC (1<<0)
2671 #define GEN8_PPAT_WT (2<<0)
2672 #define GEN8_PPAT_WB (3<<0)
2673 #define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
2674 /* FIXME(BDW): Bspec is completely confused about cache control bits. */
2675 #define GEN8_PPAT_LLC (1<<2)
2676 #define GEN8_PPAT_LLCELLC (2<<2)
2677 #define GEN8_PPAT_LLCeLLC (3<<2)
2678 #define GEN8_PPAT_AGE(x) (x<<4)
2679 #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
2680 uint64_t pat;
2681
2682 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
2683 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2684 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2685 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
2686 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2687 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2688 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2689 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2690
2691 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2692 * write would work. */
2693 I915_WRITE(GEN8_PRIVATE_PAT, pat);
2694 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
2695 }
2696
gen8_gmch_probe(struct drm_device * dev,size_t * gtt_total,size_t * stolen,phys_addr_t * mappable_base,unsigned long * mappable_end)2697 static int gen8_gmch_probe(struct drm_device *dev,
2698 size_t *gtt_total,
2699 size_t *stolen,
2700 phys_addr_t *mappable_base,
2701 unsigned long *mappable_end)
2702 {
2703 struct drm_i915_private *dev_priv = dev->dev_private;
2704 unsigned int gtt_size;
2705 u16 snb_gmch_ctl;
2706 int ret;
2707
2708 /* TODO: We're not aware of mappable constraints on gen8 yet */
2709 *mappable_base = pci_resource_start(dev->pdev, 2);
2710 *mappable_end = pci_resource_len(dev->pdev, 2);
2711
2712 #ifndef __NetBSD__
2713 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
2714 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
2715 #endif
2716
2717 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2718
2719 *stolen = gen8_get_stolen_size(snb_gmch_ctl);
2720
2721 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2722 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
2723
2724 gen8_setup_private_ppat(dev_priv);
2725
2726 ret = ggtt_probe_common(dev, gtt_size);
2727
2728 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
2729 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
2730
2731 /* XXX 39-bit addresses? Really? See pci_set_dma_mask above... */
2732 dev_priv->gtt.max_paddr = __BITS(38, 0);
2733
2734 return ret;
2735 }
2736
gen6_gmch_probe(struct drm_device * dev,size_t * gtt_total,size_t * stolen,phys_addr_t * mappable_base,unsigned long * mappable_end)2737 static int gen6_gmch_probe(struct drm_device *dev,
2738 size_t *gtt_total,
2739 size_t *stolen,
2740 phys_addr_t *mappable_base,
2741 unsigned long *mappable_end)
2742 {
2743 struct drm_i915_private *dev_priv = dev->dev_private;
2744 unsigned int gtt_size;
2745 u16 snb_gmch_ctl;
2746 int ret;
2747
2748 *mappable_base = pci_resource_start(dev->pdev, 2);
2749 *mappable_end = pci_resource_len(dev->pdev, 2);
2750
2751 /* 64/512MB is the current min/max we actually know of, but this is just
2752 * a coarse sanity check.
2753 */
2754 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
2755 DRM_ERROR("Unknown GMADR size (%lx)\n",
2756 dev_priv->gtt.mappable_end);
2757 return -ENXIO;
2758 }
2759
2760 #ifndef __NetBSD__
2761 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
2762 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
2763 #endif
2764 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2765
2766 *stolen = gen6_get_stolen_size(snb_gmch_ctl);
2767
2768 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
2769 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
2770
2771 ret = ggtt_probe_common(dev, gtt_size);
2772
2773 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
2774 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
2775
2776 dev_priv->gtt.max_paddr = __BITS(39, 0);
2777
2778 return ret;
2779 }
2780
gen6_gmch_remove(struct i915_address_space * vm)2781 static void gen6_gmch_remove(struct i915_address_space *vm)
2782 {
2783
2784 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
2785
2786 drm_mm_takedown(&vm->mm);
2787 #ifdef __NetBSD__
2788 bus_space_unmap(gtt->bst, gtt->bsh, gtt->size);
2789 #else
2790 iounmap(gtt->gsm);
2791 #endif
2792 teardown_scratch_page(vm->dev);
2793 }
2794
i915_gmch_probe(struct drm_device * dev,size_t * gtt_total,size_t * stolen,phys_addr_t * mappable_base,unsigned long * mappable_end)2795 static int i915_gmch_probe(struct drm_device *dev,
2796 size_t *gtt_total,
2797 size_t *stolen,
2798 phys_addr_t *mappable_base,
2799 unsigned long *mappable_end)
2800 {
2801 struct drm_i915_private *dev_priv = dev->dev_private;
2802 int ret;
2803
2804 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
2805 if (!ret) {
2806 DRM_ERROR("failed to set up gmch\n");
2807 return -EIO;
2808 }
2809
2810 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
2811
2812 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
2813 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
2814
2815 if (unlikely(dev_priv->gtt.do_idle_maps))
2816 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
2817
2818 if (INTEL_INFO(dev)->gen <= 2)
2819 dev_priv->gtt.max_paddr = __BITS(29, 0);
2820 else if ((INTEL_INFO(dev)->gen <= 3) ||
2821 IS_BROADWATER(dev) || IS_CRESTLINE(dev))
2822 dev_priv->gtt.max_paddr = __BITS(31, 0);
2823 else if (INTEL_INFO(dev)->gen <= 5)
2824 dev_priv->gtt.max_paddr = __BITS(35, 0);
2825 else
2826 dev_priv->gtt.max_paddr = __BITS(39, 0);
2827
2828 return 0;
2829 }
2830
i915_gmch_remove(struct i915_address_space * vm)2831 static void i915_gmch_remove(struct i915_address_space *vm)
2832 {
2833 intel_gmch_remove();
2834 }
2835
i915_gem_gtt_init(struct drm_device * dev)2836 int i915_gem_gtt_init(struct drm_device *dev)
2837 {
2838 struct drm_i915_private *dev_priv = dev->dev_private;
2839 struct i915_gtt *gtt = &dev_priv->gtt;
2840 int ret;
2841
2842 if (INTEL_INFO(dev)->gen <= 5) {
2843 gtt->gtt_probe = i915_gmch_probe;
2844 gtt->base.cleanup = i915_gmch_remove;
2845 } else if (INTEL_INFO(dev)->gen < 8) {
2846 gtt->gtt_probe = gen6_gmch_probe;
2847 gtt->base.cleanup = gen6_gmch_remove;
2848 if (IS_HASWELL(dev) && dev_priv->ellc_size)
2849 gtt->base.pte_encode = iris_pte_encode;
2850 else if (IS_HASWELL(dev))
2851 gtt->base.pte_encode = hsw_pte_encode;
2852 else if (IS_VALLEYVIEW(dev))
2853 gtt->base.pte_encode = byt_pte_encode;
2854 else if (INTEL_INFO(dev)->gen >= 7)
2855 gtt->base.pte_encode = ivb_pte_encode;
2856 else
2857 gtt->base.pte_encode = snb_pte_encode;
2858 } else {
2859 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
2860 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
2861 }
2862
2863 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
2864 >t->mappable_base, >t->mappable_end);
2865 if (ret)
2866 return ret;
2867
2868 #ifdef __NetBSD__
2869 dev_priv->gtt.pgfl = x86_select_freelist(dev_priv->gtt.max_paddr);
2870 ret = drm_limit_dma_space(dev, 0, dev_priv->gtt.max_paddr);
2871 if (ret) {
2872 DRM_ERROR("Unable to limit DMA paddr allocations: %d!\n", ret);
2873 gtt->base.cleanup(>t->base);
2874 return ret;
2875 }
2876 #endif
2877
2878 gtt->base.dev = dev;
2879
2880 /* GMADR is the PCI mmio aperture into the global GTT. */
2881 DRM_INFO("Memory usable by graphics device = %zdM\n",
2882 gtt->base.total >> 20);
2883 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
2884 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
2885 /*
2886 * i915.enable_ppgtt is read-only, so do an early pass to validate the
2887 * user's requested state against the hardware/driver capabilities. We
2888 * do this now so that we can print out any log messages once rather
2889 * than every time we check intel_enable_ppgtt().
2890 */
2891 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
2892 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
2893
2894 return 0;
2895 }
2896
__i915_gem_vma_create(struct drm_i915_gem_object * obj,struct i915_address_space * vm)2897 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
2898 struct i915_address_space *vm)
2899 {
2900 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
2901 if (vma == NULL)
2902 return ERR_PTR(-ENOMEM);
2903
2904 INIT_LIST_HEAD(&vma->vma_link);
2905 INIT_LIST_HEAD(&vma->mm_list);
2906 INIT_LIST_HEAD(&vma->exec_list);
2907 vma->vm = vm;
2908 vma->obj = obj;
2909
2910 switch (INTEL_INFO(vm->dev)->gen) {
2911 case 8:
2912 case 7:
2913 case 6:
2914 if (i915_is_ggtt(vm)) {
2915 vma->unbind_vma = ggtt_unbind_vma;
2916 vma->bind_vma = ggtt_bind_vma;
2917 } else {
2918 vma->unbind_vma = ppgtt_unbind_vma;
2919 vma->bind_vma = ppgtt_bind_vma;
2920 }
2921 break;
2922 case 5:
2923 case 4:
2924 case 3:
2925 case 2:
2926 BUG_ON(!i915_is_ggtt(vm));
2927 vma->unbind_vma = i915_ggtt_unbind_vma;
2928 vma->bind_vma = i915_ggtt_bind_vma;
2929 break;
2930 default:
2931 BUG();
2932 }
2933
2934 /* Keep GGTT vmas first to make debug easier */
2935 if (i915_is_ggtt(vm))
2936 list_add(&vma->vma_link, &obj->vma_list);
2937 else
2938 list_add_tail(&vma->vma_link, &obj->vma_list);
2939
2940 return vma;
2941 }
2942
2943 struct i915_vma *
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object * obj,struct i915_address_space * vm)2944 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
2945 struct i915_address_space *vm)
2946 {
2947 struct i915_vma *vma;
2948
2949 vma = i915_gem_obj_to_vma(obj, vm);
2950 if (!vma)
2951 vma = __i915_gem_vma_create(obj, vm);
2952
2953 return vma;
2954 }
2955