1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/err.h>
27 #include <linux/seq_file.h>
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_trace.h"
32 #include "intel_drv.h"
33 
34 #ifdef __NetBSD__
35 #include <x86/machdep.h>
36 #include <x86/pte.h>
37 #define	_PAGE_PRESENT	PG_V	/* 0x01 PTE is present / valid */
38 #define	_PAGE_RW	PG_RW	/* 0x02 read/write */
39 #define	_PAGE_PWT	PG_WT	/* 0x08 write-through */
40 #define	_PAGE_PCD	PG_N	/* 0x10 page cache disabled / non-cacheable */
41 #define	_PAGE_PAT	PG_PAT	/* 0x80 page attribute table on PTE */
42 #endif
43 
44 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv);
45 
intel_enable_ppgtt(struct drm_device * dev,bool full)46 bool intel_enable_ppgtt(struct drm_device *dev, bool full)
47 {
48 	if (i915.enable_ppgtt == 0)
49 		return false;
50 
51 	if (i915.enable_ppgtt == 1 && full)
52 		return false;
53 
54 	return true;
55 }
56 
sanitize_enable_ppgtt(struct drm_device * dev,int enable_ppgtt)57 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
58 {
59 	if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
60 		return 0;
61 
62 	if (enable_ppgtt == 1)
63 		return 1;
64 
65 	if (enable_ppgtt == 2 && HAS_PPGTT(dev))
66 		return 2;
67 
68 #ifdef CONFIG_INTEL_IOMMU
69 	/* Disable ppgtt on SNB if VT-d is on. */
70 	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
71 		DRM_INFO("Disabling PPGTT because VT-d is on\n");
72 		return 0;
73 	}
74 #endif
75 
76 	return HAS_ALIASING_PPGTT(dev) ? 1 : 0;
77 }
78 
79 #define GEN6_PPGTT_PD_ENTRIES 512
80 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
81 typedef uint64_t gen8_gtt_pte_t;
82 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
83 
84 /* PPGTT stuff */
85 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
86 #define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
87 
88 #define GEN6_PDE_VALID			(1 << 0)
89 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
90 #define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
91 
92 #define GEN6_PTE_VALID			(1 << 0)
93 #define GEN6_PTE_UNCACHED		(1 << 1)
94 #define HSW_PTE_UNCACHED		(0)
95 #define GEN6_PTE_CACHE_LLC		(2 << 1)
96 #define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
97 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
98 #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
99 
100 /* Cacheability Control is a 4-bit value. The low three bits are stored in *
101  * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
102  */
103 #define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
104 					 (((bits) & 0x8) << (11 - 3)))
105 #define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
106 #define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
107 #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
108 #define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
109 #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
110 #define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
111 
112 #define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
113 #define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
114 
115 /* GEN8 legacy style addressis defined as a 3 level page table:
116  * 31:30 | 29:21 | 20:12 |  11:0
117  * PDPE  |  PDE  |  PTE  | offset
118  * The difference as compared to normal x86 3 level page table is the PDPEs are
119  * programmed via register.
120  */
121 #define GEN8_PDPE_SHIFT			30
122 #define GEN8_PDPE_MASK			0x3
123 #define GEN8_PDE_SHIFT			21
124 #define GEN8_PDE_MASK			0x1ff
125 #define GEN8_PTE_SHIFT			12
126 #define GEN8_PTE_MASK			0x1ff
127 
128 #define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
129 #define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
130 #define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
131 #define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
132 
133 static void ppgtt_bind_vma(struct i915_vma *vma,
134 			   enum i915_cache_level cache_level,
135 			   u32 flags);
136 static void ppgtt_unbind_vma(struct i915_vma *vma);
137 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
138 
gen8_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)139 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
140 					     enum i915_cache_level level,
141 					     bool valid)
142 {
143 	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
144 	pte |= addr;
145 	if (level != I915_CACHE_NONE)
146 		pte |= PPAT_CACHED_INDEX;
147 	else
148 		pte |= PPAT_UNCACHED_INDEX;
149 	return pte;
150 }
151 
gen8_pde_encode(struct drm_device * dev,dma_addr_t addr,enum i915_cache_level level)152 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
153 					     dma_addr_t addr,
154 					     enum i915_cache_level level)
155 {
156 	gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
157 	pde |= addr;
158 	if (level != I915_CACHE_NONE)
159 		pde |= PPAT_CACHED_PDE_INDEX;
160 	else
161 		pde |= PPAT_UNCACHED_INDEX;
162 	return pde;
163 }
164 
snb_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)165 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
166 				     enum i915_cache_level level,
167 				     bool valid)
168 {
169 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
170 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
171 
172 	switch (level) {
173 	case I915_CACHE_L3_LLC:
174 	case I915_CACHE_LLC:
175 		pte |= GEN6_PTE_CACHE_LLC;
176 		break;
177 	case I915_CACHE_NONE:
178 		pte |= GEN6_PTE_UNCACHED;
179 		break;
180 	default:
181 		WARN_ON(1);
182 	}
183 
184 	return pte;
185 }
186 
ivb_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)187 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
188 				     enum i915_cache_level level,
189 				     bool valid)
190 {
191 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
192 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
193 
194 	switch (level) {
195 	case I915_CACHE_L3_LLC:
196 		pte |= GEN7_PTE_CACHE_L3_LLC;
197 		break;
198 	case I915_CACHE_LLC:
199 		pte |= GEN6_PTE_CACHE_LLC;
200 		break;
201 	case I915_CACHE_NONE:
202 		pte |= GEN6_PTE_UNCACHED;
203 		break;
204 	default:
205 		WARN_ON(1);
206 	}
207 
208 	return pte;
209 }
210 
211 #define BYT_PTE_WRITEABLE		(1 << 1)
212 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
213 
byt_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)214 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
215 				     enum i915_cache_level level,
216 				     bool valid)
217 {
218 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
219 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
220 
221 	/* Mark the page as writeable.  Other platforms don't have a
222 	 * setting for read-only/writable, so this matches that behavior.
223 	 */
224 	pte |= BYT_PTE_WRITEABLE;
225 
226 	if (level != I915_CACHE_NONE)
227 		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
228 
229 	return pte;
230 }
231 
hsw_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)232 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
233 				     enum i915_cache_level level,
234 				     bool valid)
235 {
236 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
237 	pte |= HSW_PTE_ADDR_ENCODE(addr);
238 
239 	if (level != I915_CACHE_NONE)
240 		pte |= HSW_WB_LLC_AGE3;
241 
242 	return pte;
243 }
244 
iris_pte_encode(dma_addr_t addr,enum i915_cache_level level,bool valid)245 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
246 				      enum i915_cache_level level,
247 				      bool valid)
248 {
249 	gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
250 	pte |= HSW_PTE_ADDR_ENCODE(addr);
251 
252 	switch (level) {
253 	case I915_CACHE_NONE:
254 		break;
255 	case I915_CACHE_WT:
256 		pte |= HSW_WT_ELLC_LLC_AGE3;
257 		break;
258 	default:
259 		pte |= HSW_WB_ELLC_LLC_AGE3;
260 		break;
261 	}
262 
263 	return pte;
264 }
265 
266 /* Broadwell Page Directory Pointer Descriptors */
gen8_write_pdp(struct intel_ring_buffer * ring,unsigned entry,uint64_t val,bool synchronous)267 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
268 			   uint64_t val, bool synchronous)
269 {
270 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
271 	int ret;
272 
273 	BUG_ON(entry >= 4);
274 
275 	if (synchronous) {
276 		I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
277 		I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
278 		return 0;
279 	}
280 
281 	ret = intel_ring_begin(ring, 6);
282 	if (ret)
283 		return ret;
284 
285 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
286 	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
287 	intel_ring_emit(ring, (u32)(val >> 32));
288 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
289 	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
290 	intel_ring_emit(ring, (u32)(val));
291 	intel_ring_advance(ring);
292 
293 	return 0;
294 }
295 
gen8_mm_switch(struct i915_hw_ppgtt * ppgtt,struct intel_ring_buffer * ring,bool synchronous)296 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
297 			  struct intel_ring_buffer *ring,
298 			  bool synchronous)
299 {
300 	int i, ret;
301 
302 	/* bit of a hack to find the actual last used pd */
303 	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
304 
305 	for (i = used_pd - 1; i >= 0; i--) {
306 #ifdef __NetBSD__
307 		const bus_addr_t addr =
308 		    ppgtt->u.gen8->pd_map->dm_segs[i].ds_addr;
309 #else
310 		dma_addr_t addr = ppgtt->pd_dma_addr[i];
311 #endif
312 		ret = gen8_write_pdp(ring, i, addr, synchronous);
313 		if (ret)
314 			return ret;
315 	}
316 
317 	return 0;
318 }
319 
gen8_ppgtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool use_scratch)320 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
321 				   uint64_t start,
322 				   uint64_t length,
323 				   bool use_scratch)
324 {
325 	struct i915_hw_ppgtt *ppgtt =
326 		container_of(vm, struct i915_hw_ppgtt, base);
327 	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
328 	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
329 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
330 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
331 	unsigned num_entries = length >> PAGE_SHIFT;
332 	unsigned last_pte, i;
333 #ifdef __NetBSD__
334 	void *kva;
335 	int ret;
336 #endif
337 
338 	scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
339 				      I915_CACHE_LLC, use_scratch);
340 
341 	while (num_entries) {
342 #ifndef __NetBSD__
343 		struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
344 #endif
345 
346 		last_pte = pte + num_entries;
347 		if (last_pte > GEN8_PTES_PER_PAGE)
348 			last_pte = GEN8_PTES_PER_PAGE;
349 
350 #ifdef __NetBSD__
351 		/* XXX errno NetBSD->Linux */
352 		ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
353 		    &ppgtt->u.gen8->pd[pdpe].pt_segs[pde], 1, PAGE_SIZE, &kva,
354 		    BUS_DMA_NOWAIT);
355 		if (ret) {
356 			/*
357 			 * XXX Should guarantee mapping earlier with
358 			 * uvm_emap(9) or something.
359 			 */
360 			device_printf(ppgtt->base.dev->dev,
361 			    "failed to map page table: %d\n", -ret);
362 			goto skip;
363 		}
364 		pt_vaddr = kva;
365 #else
366 		pt_vaddr = kmap_atomic(page_table);
367 #endif
368 
369 		for (i = pte; i < last_pte; i++) {
370 			pt_vaddr[i] = scratch_pte;
371 			num_entries--;
372 		}
373 
374 #ifdef __NetBSD__
375 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
376 skip:;
377 #else
378 		kunmap_atomic(pt_vaddr);
379 #endif
380 
381 		pte = 0;
382 		if (++pde == GEN8_PDES_PER_PAGE) {
383 			pdpe++;
384 			pde = 0;
385 		}
386 	}
387 }
388 
389 #ifdef __NetBSD__
390 static void
gen8_ppgtt_insert_entries(struct i915_address_space * vm,bus_dmamap_t dmamap,uint64_t start,enum i915_cache_level cache_level)391 gen8_ppgtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
392     uint64_t start, enum i915_cache_level cache_level)
393 {
394 	struct i915_hw_ppgtt *ppgtt =
395 		container_of(vm, struct i915_hw_ppgtt, base);
396 	gen8_gtt_pte_t *pt_vaddr;
397 	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
398 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
399 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
400 	unsigned seg;
401 	void *kva;
402 	int ret;
403 
404 	pt_vaddr = NULL;
405 	KASSERT(0 < dmamap->dm_nsegs);
406 	for (seg = 0; seg < dmamap->dm_nsegs; seg++) {
407 		KASSERT(dmamap->dm_segs[seg].ds_len == PAGE_SIZE);
408 		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
409 			break;
410 		if (pt_vaddr == NULL) {
411 			/* XXX errno NetBSD->Linux */
412 			ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
413 			    &ppgtt->u.gen8->pd[pdpe].pt_segs[pde], 1,
414 			    PAGE_SIZE, &kva, BUS_DMA_NOWAIT);
415 			if (ret) {
416 				/*
417 				 * XXX Should guarantee mapping earlier
418 				 * with uvm_emap(9) or something.
419 				 */
420 				device_printf(ppgtt->base.dev->dev,
421 				    "failed to map page table: %d\n", -ret);
422 				goto skip;
423 			}
424 			pt_vaddr = kva;
425 		}
426 		pt_vaddr[pte] = gen8_pte_encode(dmamap->dm_segs[seg].ds_addr,
427 		    cache_level, true);
428 skip:		if (++pte == GEN8_PTES_PER_PAGE) {
429 			bus_dmamem_unmap(ppgtt->base.dev->dmat, kva,
430 			    PAGE_SIZE);
431 			pt_vaddr = NULL;
432 			if (++pde == GEN8_PDES_PER_PAGE) {
433 				pdpe++;
434 				pde = 0;
435 			}
436 			pte = 0;
437 		}
438 	}
439 	if (pt_vaddr)
440 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
441 }
442 
443 static void	gen8_ppgtt_cleanup(struct i915_address_space *);
444 static int	gen8_ppgtt_alloc(struct i915_hw_ppgtt *, unsigned);
445 static void	gen8_ppgtt_free(struct i915_hw_ppgtt *);
446 static int	gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *,
447 		    unsigned);
448 static void	gen8_ppgtt_free_page_directories(struct i915_hw_ppgtt *);
449 static int	gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *,
450 		    unsigned);
451 static void	gen8_ppgtt_free_page_tables(struct i915_hw_ppgtt *);
452 
453 static int
gen8_ppgtt_init(struct i915_hw_ppgtt * ppgtt,uint64_t size)454 gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
455 {
456 	const unsigned npdp = DIV_ROUND_UP(size, (1U << 30));
457 	const unsigned min_pt_pages = (GEN8_PDES_PER_PAGE * npdp);
458 	unsigned i, j;
459 	int ret;
460 
461 	/* Allocate the PPGTT structures.  */
462 	ret = gen8_ppgtt_alloc(ppgtt, npdp);
463 	if (ret)
464 		goto fail0;
465 
466 	/* Fill the page directory entries.  */
467 	for (i = 0; i < npdp; i++) {
468 		void *kva;
469 
470 		/* XXX errno NetBSD->Linux */
471 		ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
472 		    &ppgtt->u.gen8->pd_segs[i], 1, PAGE_SIZE, &kva,
473 		    BUS_DMA_WAITOK);
474 		if (ret)
475 			goto fail1;
476 
477 		gen8_ppgtt_pde_t *const pd = kva;
478 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
479 			const bus_dma_segment_t *const seg =
480 			    &ppgtt->u.gen8->pd[i].pt_segs[j];
481 			KASSERT(seg->ds_len == PAGE_SIZE);
482 			pd[j] = gen8_pde_encode(ppgtt->base.dev, seg->ds_addr,
483 			    I915_CACHE_LLC);
484 		}
485 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
486 	}
487 
488 	ppgtt->enable = gen8_ppgtt_enable;
489 	ppgtt->switch_mm = gen8_mm_switch;
490 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
491 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
492 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
493 	ppgtt->base.start = 0;
494 	ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
495 
496 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
497 
498 	DRM_DEBUG_DRIVER("Allocated %u pages for page directories\n", npdp);
499 	DRM_DEBUG_DRIVER("Allocated %u pages for page tables"
500 	    " (%"PRIxMAX" wasted)\n",
501 	    ppgtt->num_pd_entries,
502 	    ((uintmax_t)(ppgtt->num_pd_entries - min_pt_pages) +
503 		(size % (1<<30))));
504 
505 	/* Success!  */
506 	return 0;
507 
508 fail1:	gen8_ppgtt_free(ppgtt);
509 fail0:	KASSERT(ret);
510 	return ret;
511 }
512 
513 static void
gen8_ppgtt_cleanup(struct i915_address_space * vm)514 gen8_ppgtt_cleanup(struct i915_address_space *vm)
515 {
516 	struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt,
517 	    base);
518 
519 	list_del(&vm->global_link);
520 	drm_mm_takedown(&vm->mm);
521 
522 	gen8_ppgtt_free(ppgtt);
523 }
524 
525 static int
gen8_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt,unsigned npdp)526 gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, unsigned npdp)
527 {
528 	int ret;
529 
530 	/*
531 	 * XXX This is a very large (48 MB) allocation!  However, it
532 	 * can't really be made smaller than 8 MB, since we need a
533 	 * contiguous array of DMA segments for the page tables.  I
534 	 * expect this to be used mainly on machines with lots of
535 	 * memory, so...
536 	 */
537 	ppgtt->u.gen8 = kmem_alloc(sizeof(*ppgtt->u.gen8), KM_SLEEP);
538 
539 	ret = gen8_ppgtt_allocate_page_directories(ppgtt, npdp);
540 	if (ret)
541 		goto fail0;
542 	ppgtt->num_pd_entries = (npdp * GEN8_PDES_PER_PAGE);
543 	ret = gen8_ppgtt_allocate_page_tables(ppgtt, npdp);
544 	if (ret)
545 		goto fail1;
546 
547 	/* Success!  */
548 	return 0;
549 
550 fail2: __unused
551 	gen8_ppgtt_free_page_tables(ppgtt);
552 fail1:	gen8_ppgtt_free_page_directories(ppgtt);
553 fail0:	KASSERT(ret);
554 	kmem_free(ppgtt->u.gen8, sizeof(*ppgtt->u.gen8));
555 	return ret;
556 }
557 
558 static void
gen8_ppgtt_free(struct i915_hw_ppgtt * ppgtt)559 gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
560 {
561 
562 	gen8_ppgtt_free_page_tables(ppgtt);
563 	gen8_ppgtt_free_page_directories(ppgtt);
564 	kmem_free(ppgtt->u.gen8, sizeof(*ppgtt->u.gen8));
565 }
566 
567 static int
gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt * ppgtt,unsigned npdp)568 gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
569     unsigned npdp)
570 {
571 	const bus_size_t pd_bytes = (npdp << PAGE_SHIFT);
572 	const int nsegs = npdp;
573 	int rsegs;
574 	int ret;
575 
576 	ppgtt->u.gen8->npdp = npdp;
577 
578 	KASSERT(nsegs <= GEN8_LEGACY_PDPS);
579 	CTASSERT(GEN8_LEGACY_PDPS == __arraycount(ppgtt->u.gen8->pd_segs));
580 
581 	/* XXX errno NetBSD->Linux */
582 	ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat, pd_bytes, PAGE_SIZE,
583 	    PAGE_SIZE, ppgtt->u.gen8->pd_segs, nsegs, &rsegs, BUS_DMA_WAITOK);
584 	if (ret)
585 		goto fail0;
586 	KASSERT(rsegs == nsegs);
587 
588 	/* XXX errno NetBSD->Linux */
589 	ret = -bus_dmamap_create(ppgtt->base.dev->dmat, pd_bytes, nsegs,
590 	    PAGE_SIZE, 0, BUS_DMA_WAITOK, &ppgtt->u.gen8->pd_map);
591 	if (ret)
592 		goto fail1;
593 
594 	/* XXX errno NetBSD->Linux */
595 	ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
596 	    ppgtt->u.gen8->pd_map, ppgtt->u.gen8->pd_segs, nsegs, pd_bytes,
597 	    BUS_DMA_WAITOK);
598 	if (ret)
599 		goto fail2;
600 
601 	/* Success!  */
602 	return 0;
603 
604 fail3: __unused
605 	bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
606 fail2:	bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
607 fail1:	bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_segs,
608 	    ppgtt->u.gen8->npdp);
609 fail0:	KASSERT(ret);
610 	return ret;
611 }
612 
613 static void
gen8_ppgtt_free_page_directories(struct i915_hw_ppgtt * ppgtt)614 gen8_ppgtt_free_page_directories(struct i915_hw_ppgtt *ppgtt)
615 {
616 
617 	bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
618 	bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_map);
619 	bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen8->pd_segs,
620 	    ppgtt->u.gen8->npdp);
621 }
622 
623 static int
gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt * ppgtt,unsigned npdp)624 gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, unsigned npdp)
625 {
626 	unsigned i, j;
627 	int rsegs;
628 	int ret;
629 
630 	for (i = 0; i < npdp; i++) {
631 		CTASSERT(__arraycount(ppgtt->u.gen8->pd[i].pt_segs) ==
632 		    GEN8_PDES_PER_PAGE);
633 		/* XXX errno NetBSD->Linux */
634 		ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat,
635 		    (PAGE_SIZE * GEN8_PDES_PER_PAGE), PAGE_SIZE, PAGE_SIZE,
636 		    ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE, &rsegs,
637 		    BUS_DMA_WAITOK);
638 		if (ret)
639 			goto fail0;
640 		KASSERT(rsegs == GEN8_PDES_PER_PAGE);
641 		/* XXX errno NetBSD->Linux */
642 		ret = -bus_dmamap_create(ppgtt->base.dev->dmat,
643 		    (PAGE_SIZE * GEN8_PDES_PER_PAGE), GEN8_PDES_PER_PAGE,
644 		    PAGE_SIZE, 0, BUS_DMA_WAITOK,
645 		    &ppgtt->u.gen8->pd[i].pt_map);
646 		if (ret)
647 			goto fail1;
648 		/* XXX errno NetBSD->Linux */
649 		ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
650 		    ppgtt->u.gen8->pd[i].pt_map, ppgtt->u.gen8->pd[i].pt_segs,
651 		    GEN8_PDES_PER_PAGE, PAGE_SIZE, BUS_DMA_WAITOK);
652 		if (ret)
653 			goto fail2;
654 		continue;
655 
656 fail3: __unused
657 		bus_dmamap_unload(ppgtt->base.dev->dmat,
658 		    ppgtt->u.gen8->pd[i].pt_map);
659 fail2:		bus_dmamap_destroy(ppgtt->base.dev->dmat,
660 		    ppgtt->u.gen8->pd[i].pt_map);
661 fail1:		bus_dmamem_free(ppgtt->base.dev->dmat,
662 		    ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE);
663 fail0:		goto fail;
664 	}
665 
666 	/* Success!  */
667 	return 0;
668 
669 fail:	KASSERT(ret);
670 	for (j = 0; j < i; j++) {
671 		bus_dmamap_unload(ppgtt->base.dev->dmat,
672 		    ppgtt->u.gen8->pd[j].pt_map);
673 		bus_dmamap_destroy(ppgtt->base.dev->dmat,
674 		    ppgtt->u.gen8->pd[j].pt_map);
675 		bus_dmamem_free(ppgtt->base.dev->dmat,
676 		    ppgtt->u.gen8->pd[j].pt_segs, GEN8_PDES_PER_PAGE);
677 	}
678 	return ret;
679 }
680 
681 static void
gen8_ppgtt_free_page_tables(struct i915_hw_ppgtt * ppgtt)682 gen8_ppgtt_free_page_tables(struct i915_hw_ppgtt *ppgtt)
683 {
684 	unsigned i;
685 
686 	for (i = 0; i < ppgtt->u.gen8->npdp; i++) {
687 		bus_dmamap_unload(ppgtt->base.dev->dmat,
688 		    ppgtt->u.gen8->pd[i].pt_map);
689 		bus_dmamap_destroy(ppgtt->base.dev->dmat,
690 		    ppgtt->u.gen8->pd[i].pt_map);
691 		bus_dmamem_free(ppgtt->base.dev->dmat,
692 		    ppgtt->u.gen8->pd[i].pt_segs, GEN8_PDES_PER_PAGE);
693 	}
694 }
695 #else
gen8_ppgtt_insert_entries(struct i915_address_space * vm,struct sg_table * pages,uint64_t start,enum i915_cache_level cache_level)696 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
697 				      struct sg_table *pages,
698 				      uint64_t start,
699 				      enum i915_cache_level cache_level)
700 {
701 	struct i915_hw_ppgtt *ppgtt =
702 		container_of(vm, struct i915_hw_ppgtt, base);
703 	gen8_gtt_pte_t *pt_vaddr;
704 	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
705 	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
706 	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
707 	struct sg_page_iter sg_iter;
708 
709 	pt_vaddr = NULL;
710 
711 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
712 		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
713 			break;
714 
715 		if (pt_vaddr == NULL)
716 			pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]);
717 
718 		pt_vaddr[pte] =
719 			gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
720 					cache_level, true);
721 		if (++pte == GEN8_PTES_PER_PAGE) {
722 			kunmap_atomic(pt_vaddr);
723 			pt_vaddr = NULL;
724 			if (++pde == GEN8_PDES_PER_PAGE) {
725 				pdpe++;
726 				pde = 0;
727 			}
728 			pte = 0;
729 		}
730 	}
731 	if (pt_vaddr)
732 		kunmap_atomic(pt_vaddr);
733 }
734 
gen8_free_page_tables(struct page ** pt_pages)735 static void gen8_free_page_tables(struct page **pt_pages)
736 {
737 	int i;
738 
739 	if (pt_pages == NULL)
740 		return;
741 
742 	for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
743 		if (pt_pages[i])
744 			__free_pages(pt_pages[i], 0);
745 }
746 
gen8_ppgtt_free(const struct i915_hw_ppgtt * ppgtt)747 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
748 {
749 	int i;
750 
751 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
752 		gen8_free_page_tables(ppgtt->gen8_pt_pages[i]);
753 		kfree(ppgtt->gen8_pt_pages[i]);
754 		kfree(ppgtt->gen8_pt_dma_addr[i]);
755 	}
756 
757 	__free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
758 }
759 
gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt * ppgtt)760 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
761 {
762 	struct pci_dev *hwdev = ppgtt->base.dev->pdev;
763 	int i, j;
764 
765 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
766 		/* TODO: In the future we'll support sparse mappings, so this
767 		 * will have to change. */
768 		if (!ppgtt->pd_dma_addr[i])
769 			continue;
770 
771 		pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
772 			       PCI_DMA_BIDIRECTIONAL);
773 
774 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
775 			dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
776 			if (addr)
777 				pci_unmap_page(hwdev, addr, PAGE_SIZE,
778 					       PCI_DMA_BIDIRECTIONAL);
779 		}
780 	}
781 }
782 
gen8_ppgtt_cleanup(struct i915_address_space * vm)783 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
784 {
785 	struct i915_hw_ppgtt *ppgtt =
786 		container_of(vm, struct i915_hw_ppgtt, base);
787 
788 	list_del(&vm->global_link);
789 	drm_mm_takedown(&vm->mm);
790 
791 	gen8_ppgtt_unmap_pages(ppgtt);
792 	gen8_ppgtt_free(ppgtt);
793 }
794 
__gen8_alloc_page_tables(void)795 static struct page **__gen8_alloc_page_tables(void)
796 {
797 	struct page **pt_pages;
798 	int i;
799 
800 	pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL);
801 	if (!pt_pages)
802 		return ERR_PTR(-ENOMEM);
803 
804 	for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
805 		pt_pages[i] = alloc_page(GFP_KERNEL);
806 		if (!pt_pages[i])
807 			goto bail;
808 	}
809 
810 	return pt_pages;
811 
812 bail:
813 	gen8_free_page_tables(pt_pages);
814 	kfree(pt_pages);
815 	return ERR_PTR(-ENOMEM);
816 }
817 
gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt * ppgtt,const int max_pdp)818 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt,
819 					   const int max_pdp)
820 {
821 	struct page **pt_pages[GEN8_LEGACY_PDPS];
822 	int i, ret;
823 
824 	for (i = 0; i < max_pdp; i++) {
825 		pt_pages[i] = __gen8_alloc_page_tables();
826 		if (IS_ERR(pt_pages[i])) {
827 			ret = PTR_ERR(pt_pages[i]);
828 			goto unwind_out;
829 		}
830 	}
831 
832 	/* NB: Avoid touching gen8_pt_pages until last to keep the allocation,
833 	 * "atomic" - for cleanup purposes.
834 	 */
835 	for (i = 0; i < max_pdp; i++)
836 		ppgtt->gen8_pt_pages[i] = pt_pages[i];
837 
838 	return 0;
839 
840 unwind_out:
841 	while (i--) {
842 		gen8_free_page_tables(pt_pages[i]);
843 		kfree(pt_pages[i]);
844 	}
845 
846 	return ret;
847 }
848 
gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt * ppgtt)849 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
850 {
851 	int i;
852 
853 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
854 		ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
855 						     sizeof(dma_addr_t),
856 						     GFP_KERNEL);
857 		if (!ppgtt->gen8_pt_dma_addr[i])
858 			return -ENOMEM;
859 	}
860 
861 	return 0;
862 }
863 
gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt * ppgtt,const int max_pdp)864 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
865 						const int max_pdp)
866 {
867 	ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
868 	if (!ppgtt->pd_pages)
869 		return -ENOMEM;
870 
871 	ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
872 	BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
873 
874 	return 0;
875 }
876 
gen8_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt,const int max_pdp)877 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
878 			    const int max_pdp)
879 {
880 	int ret;
881 
882 	ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
883 	if (ret)
884 		return ret;
885 
886 	ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp);
887 	if (ret) {
888 		__free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
889 		return ret;
890 	}
891 
892 	ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
893 
894 	ret = gen8_ppgtt_allocate_dma(ppgtt);
895 	if (ret)
896 		gen8_ppgtt_free(ppgtt);
897 
898 	return ret;
899 }
900 
gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt * ppgtt,const int pd)901 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
902 					     const int pd)
903 {
904 	dma_addr_t pd_addr;
905 	int ret;
906 
907 	pd_addr = pci_map_page(ppgtt->base.dev->pdev,
908 			       &ppgtt->pd_pages[pd], 0,
909 			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
910 
911 	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
912 	if (ret)
913 		return ret;
914 
915 	ppgtt->pd_dma_addr[pd] = pd_addr;
916 
917 	return 0;
918 }
919 
gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt * ppgtt,const int pd,const int pt)920 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
921 					const int pd,
922 					const int pt)
923 {
924 	dma_addr_t pt_addr;
925 	struct page *p;
926 	int ret;
927 
928 	p = ppgtt->gen8_pt_pages[pd][pt];
929 	pt_addr = pci_map_page(ppgtt->base.dev->pdev,
930 			       p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
931 	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
932 	if (ret)
933 		return ret;
934 
935 	ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
936 
937 	return 0;
938 }
939 
940 /**
941  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
942  * with a net effect resembling a 2-level page table in normal x86 terms. Each
943  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
944  * space.
945  *
946  * FIXME: split allocation into smaller pieces. For now we only ever do this
947  * once, but with full PPGTT, the multiple contiguous allocations will be bad.
948  * TODO: Do something with the size parameter
949  */
gen8_ppgtt_init(struct i915_hw_ppgtt * ppgtt,uint64_t size)950 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
951 {
952 	const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
953 	const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
954 	int i, j, ret;
955 
956 	if (size % (1<<30))
957 		DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
958 
959 	/* 1. Do all our allocations for page directories and page tables. */
960 	ret = gen8_ppgtt_alloc(ppgtt, max_pdp);
961 	if (ret)
962 		return ret;
963 
964 	/*
965 	 * 2. Create DMA mappings for the page directories and page tables.
966 	 */
967 	for (i = 0; i < max_pdp; i++) {
968 		ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
969 		if (ret)
970 			goto bail;
971 
972 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
973 			ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
974 			if (ret)
975 				goto bail;
976 		}
977 	}
978 
979 	/*
980 	 * 3. Map all the page directory entires to point to the page tables
981 	 * we've allocated.
982 	 *
983 	 * For now, the PPGTT helper functions all require that the PDEs are
984 	 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
985 	 * will never need to touch the PDEs again.
986 	 */
987 	for (i = 0; i < max_pdp; i++) {
988 		gen8_ppgtt_pde_t *pd_vaddr;
989 		pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
990 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
991 			dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
992 			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
993 						      I915_CACHE_LLC);
994 		}
995 		kunmap_atomic(pd_vaddr);
996 	}
997 
998 	ppgtt->enable = gen8_ppgtt_enable;
999 	ppgtt->switch_mm = gen8_mm_switch;
1000 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1001 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1002 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1003 	ppgtt->base.start = 0;
1004 	ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
1005 
1006 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1007 
1008 	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
1009 			 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
1010 	DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
1011 			 ppgtt->num_pd_entries,
1012 			 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
1013 	return 0;
1014 
1015 bail:
1016 	gen8_ppgtt_unmap_pages(ppgtt);
1017 	gen8_ppgtt_free(ppgtt);
1018 	return ret;
1019 }
1020 #endif
1021 
1022 #ifndef __NetBSD__
gen6_dump_ppgtt(struct i915_hw_ppgtt * ppgtt,struct seq_file * m)1023 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1024 {
1025 	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
1026 	struct i915_address_space *vm = &ppgtt->base;
1027 	gen6_gtt_pte_t __iomem *pd_addr;
1028 	gen6_gtt_pte_t scratch_pte;
1029 	uint32_t pd_entry;
1030 	int pte, pde;
1031 
1032 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
1033 
1034 	pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
1035 		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
1036 
1037 	seq_printf(m, "  VM %p (pd_offset %x-%x):\n", vm,
1038 		   ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
1039 	for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
1040 		u32 expected;
1041 		gen6_gtt_pte_t *pt_vaddr;
1042 		dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
1043 		pd_entry = readl(pd_addr + pde);
1044 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1045 
1046 		if (pd_entry != expected)
1047 			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1048 				   pde,
1049 				   pd_entry,
1050 				   expected);
1051 		seq_printf(m, "\tPDE: %x\n", pd_entry);
1052 
1053 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
1054 		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
1055 			unsigned long va =
1056 				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
1057 				(pte * PAGE_SIZE);
1058 			int i;
1059 			bool found = false;
1060 			for (i = 0; i < 4; i++)
1061 				if (pt_vaddr[pte + i] != scratch_pte)
1062 					found = true;
1063 			if (!found)
1064 				continue;
1065 
1066 			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1067 			for (i = 0; i < 4; i++) {
1068 				if (pt_vaddr[pte + i] != scratch_pte)
1069 					seq_printf(m, " %08x", pt_vaddr[pte + i]);
1070 				else
1071 					seq_puts(m, "  SCRATCH ");
1072 			}
1073 			seq_puts(m, "\n");
1074 		}
1075 		kunmap_atomic(pt_vaddr);
1076 	}
1077 }
1078 #endif
1079 
gen6_write_pdes(struct i915_hw_ppgtt * ppgtt)1080 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
1081 {
1082 #ifdef __NetBSD__
1083 	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
1084 	const bus_space_tag_t bst = dev_priv->gtt.bst;
1085 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
1086 	const bus_size_t pd_base = ppgtt->u.gen6->pd_base;
1087 	unsigned i;
1088 
1089 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
1090 		const bus_addr_t pt_addr = ppgtt->u.gen6->pt_segs[i].ds_addr;
1091 		uint32_t pd_entry;
1092 
1093 		KASSERT(ppgtt->u.gen6->pt_segs[i].ds_len == PAGE_SIZE);
1094 
1095 		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
1096 		pd_entry |= GEN6_PDE_VALID;
1097 
1098 		bus_space_write_4(bst, bsh, pd_base + (4*i), pd_entry);
1099 	}
1100 	(void)bus_space_read_4(bst, bsh, pd_base);
1101 #else
1102 	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
1103 	gen6_gtt_pte_t __iomem *pd_addr;
1104 	uint32_t pd_entry;
1105 	int i;
1106 
1107 	WARN_ON(ppgtt->pd_offset & 0x3f);
1108 	pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
1109 		ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
1110 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
1111 		dma_addr_t pt_addr;
1112 
1113 		pt_addr = ppgtt->pt_dma_addr[i];
1114 		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
1115 		pd_entry |= GEN6_PDE_VALID;
1116 
1117 		writel(pd_entry, pd_addr + i);
1118 	}
1119 	readl(pd_addr);
1120 #endif
1121 }
1122 
get_pd_offset(struct i915_hw_ppgtt * ppgtt)1123 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1124 {
1125 #ifdef __NetBSD__
1126 	KASSERT(!ISSET(ppgtt->u.gen6->pd_base, 0x3f));
1127 
1128 	/* XXX 64? 16?  */
1129 	return (ppgtt->u.gen6->pd_base / 64) << 16;
1130 #else
1131 	BUG_ON(ppgtt->pd_offset & 0x3f);
1132 
1133 	return (ppgtt->pd_offset / 64) << 16;
1134 #endif
1135 }
1136 
hsw_mm_switch(struct i915_hw_ppgtt * ppgtt,struct intel_ring_buffer * ring,bool synchronous)1137 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1138 			 struct intel_ring_buffer *ring,
1139 			 bool synchronous)
1140 {
1141 	struct drm_device *dev = ppgtt->base.dev;
1142 	struct drm_i915_private *dev_priv = dev->dev_private;
1143 	int ret;
1144 
1145 	/* If we're in reset, we can assume the GPU is sufficiently idle to
1146 	 * manually frob these bits. Ideally we could use the ring functions,
1147 	 * except our error handling makes it quite difficult (can't use
1148 	 * intel_ring_begin, ring->flush, or intel_ring_advance)
1149 	 *
1150 	 * FIXME: We should try not to special case reset
1151 	 */
1152 	if (synchronous ||
1153 	    i915_reset_in_progress(&dev_priv->gpu_error)) {
1154 		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
1155 		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1156 		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1157 		POSTING_READ(RING_PP_DIR_BASE(ring));
1158 		return 0;
1159 	}
1160 
1161 	/* NB: TLBs must be flushed and invalidated before a switch */
1162 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1163 	if (ret)
1164 		return ret;
1165 
1166 	ret = intel_ring_begin(ring, 6);
1167 	if (ret)
1168 		return ret;
1169 
1170 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1171 	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1172 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
1173 	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1174 	intel_ring_emit(ring, get_pd_offset(ppgtt));
1175 	intel_ring_emit(ring, MI_NOOP);
1176 	intel_ring_advance(ring);
1177 
1178 	return 0;
1179 }
1180 
gen7_mm_switch(struct i915_hw_ppgtt * ppgtt,struct intel_ring_buffer * ring,bool synchronous)1181 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1182 			  struct intel_ring_buffer *ring,
1183 			  bool synchronous)
1184 {
1185 	struct drm_device *dev = ppgtt->base.dev;
1186 	struct drm_i915_private *dev_priv = dev->dev_private;
1187 	int ret;
1188 
1189 	/* If we're in reset, we can assume the GPU is sufficiently idle to
1190 	 * manually frob these bits. Ideally we could use the ring functions,
1191 	 * except our error handling makes it quite difficult (can't use
1192 	 * intel_ring_begin, ring->flush, or intel_ring_advance)
1193 	 *
1194 	 * FIXME: We should try not to special case reset
1195 	 */
1196 	if (synchronous ||
1197 	    i915_reset_in_progress(&dev_priv->gpu_error)) {
1198 		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
1199 		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1200 		I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1201 		POSTING_READ(RING_PP_DIR_BASE(ring));
1202 		return 0;
1203 	}
1204 
1205 	/* NB: TLBs must be flushed and invalidated before a switch */
1206 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1207 	if (ret)
1208 		return ret;
1209 
1210 	ret = intel_ring_begin(ring, 6);
1211 	if (ret)
1212 		return ret;
1213 
1214 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1215 	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1216 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
1217 	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1218 	intel_ring_emit(ring, get_pd_offset(ppgtt));
1219 	intel_ring_emit(ring, MI_NOOP);
1220 	intel_ring_advance(ring);
1221 
1222 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
1223 	if (ring->id != RCS) {
1224 		ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1225 		if (ret)
1226 			return ret;
1227 	}
1228 
1229 	return 0;
1230 }
1231 
gen6_mm_switch(struct i915_hw_ppgtt * ppgtt,struct intel_ring_buffer * ring,bool synchronous)1232 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1233 			  struct intel_ring_buffer *ring,
1234 			  bool synchronous)
1235 {
1236 	struct drm_device *dev = ppgtt->base.dev;
1237 	struct drm_i915_private *dev_priv = dev->dev_private;
1238 
1239 	if (!synchronous)
1240 		return 0;
1241 
1242 	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1243 	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1244 
1245 	POSTING_READ(RING_PP_DIR_DCLV(ring));
1246 
1247 	return 0;
1248 }
1249 
gen8_ppgtt_enable(struct i915_hw_ppgtt * ppgtt)1250 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
1251 {
1252 	struct drm_device *dev = ppgtt->base.dev;
1253 	struct drm_i915_private *dev_priv = dev->dev_private;
1254 	struct intel_ring_buffer *ring;
1255 	int j, ret;
1256 
1257 	for_each_ring(ring, dev_priv, j) {
1258 		I915_WRITE(RING_MODE_GEN7(ring),
1259 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1260 
1261 		/* We promise to do a switch later with FULL PPGTT. If this is
1262 		 * aliasing, this is the one and only switch we'll do */
1263 		if (USES_FULL_PPGTT(dev))
1264 			continue;
1265 
1266 		ret = ppgtt->switch_mm(ppgtt, ring, true);
1267 		if (ret)
1268 			goto err_out;
1269 	}
1270 
1271 	return 0;
1272 
1273 err_out:
1274 	for_each_ring(ring, dev_priv, j)
1275 		I915_WRITE(RING_MODE_GEN7(ring),
1276 			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
1277 	return ret;
1278 }
1279 
gen7_ppgtt_enable(struct i915_hw_ppgtt * ppgtt)1280 static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
1281 {
1282 	struct drm_device *dev = ppgtt->base.dev;
1283 	struct drm_i915_private *dev_priv = dev->dev_private;
1284 	struct intel_ring_buffer *ring;
1285 	uint32_t ecochk, ecobits;
1286 	int i;
1287 
1288 	ecobits = I915_READ(GAC_ECO_BITS);
1289 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1290 
1291 	ecochk = I915_READ(GAM_ECOCHK);
1292 	if (IS_HASWELL(dev)) {
1293 		ecochk |= ECOCHK_PPGTT_WB_HSW;
1294 	} else {
1295 		ecochk |= ECOCHK_PPGTT_LLC_IVB;
1296 		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1297 	}
1298 	I915_WRITE(GAM_ECOCHK, ecochk);
1299 
1300 	for_each_ring(ring, dev_priv, i) {
1301 		int ret;
1302 		/* GFX_MODE is per-ring on gen7+ */
1303 		I915_WRITE(RING_MODE_GEN7(ring),
1304 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1305 
1306 		/* We promise to do a switch later with FULL PPGTT. If this is
1307 		 * aliasing, this is the one and only switch we'll do */
1308 		if (USES_FULL_PPGTT(dev))
1309 			continue;
1310 
1311 		ret = ppgtt->switch_mm(ppgtt, ring, true);
1312 		if (ret)
1313 			return ret;
1314 	}
1315 
1316 	return 0;
1317 }
1318 
gen6_ppgtt_enable(struct i915_hw_ppgtt * ppgtt)1319 static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
1320 {
1321 	struct drm_device *dev = ppgtt->base.dev;
1322 	struct drm_i915_private *dev_priv = dev->dev_private;
1323 	struct intel_ring_buffer *ring;
1324 	uint32_t ecochk, gab_ctl, ecobits;
1325 	int i;
1326 
1327 	ecobits = I915_READ(GAC_ECO_BITS);
1328 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1329 		   ECOBITS_PPGTT_CACHE64B);
1330 
1331 	gab_ctl = I915_READ(GAB_CTL);
1332 	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1333 
1334 	ecochk = I915_READ(GAM_ECOCHK);
1335 	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1336 
1337 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1338 
1339 	for_each_ring(ring, dev_priv, i) {
1340 		int ret = ppgtt->switch_mm(ppgtt, ring, true);
1341 		if (ret)
1342 			return ret;
1343 	}
1344 
1345 	return 0;
1346 }
1347 
1348 /* PPGTT support for Sandybdrige/Gen6 and later */
gen6_ppgtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool use_scratch)1349 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1350 				   uint64_t start,
1351 				   uint64_t length,
1352 				   bool use_scratch)
1353 {
1354 	struct i915_hw_ppgtt *ppgtt =
1355 		container_of(vm, struct i915_hw_ppgtt, base);
1356 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
1357 	unsigned first_entry = start >> PAGE_SHIFT;
1358 	unsigned num_entries = length >> PAGE_SHIFT;
1359 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
1360 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
1361 	unsigned last_pte, i;
1362 #ifdef __NetBSD__
1363 	void *kva;
1364 	int ret;
1365 #endif
1366 
1367 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
1368 
1369 	while (num_entries) {
1370 		last_pte = first_pte + num_entries;
1371 		if (last_pte > I915_PPGTT_PT_ENTRIES)
1372 			last_pte = I915_PPGTT_PT_ENTRIES;
1373 
1374 #ifdef __NetBSD__
1375 		/* XXX errno NetBSD->Linux */
1376 		ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
1377 		    &ppgtt->u.gen6->pt_segs[act_pt], 1, PAGE_SIZE, &kva,
1378 		    BUS_DMA_NOWAIT);
1379 		if (ret) {
1380 			/*
1381 			 * XXX Should guarantee mapping earlier with
1382 			 * uvm_emap(9) or something.
1383 			 */
1384 			device_printf(ppgtt->base.dev->dev,
1385 			    "failed to map page table: %d\n", -ret);
1386 			goto skip;
1387 		}
1388 		pt_vaddr = kva;
1389 #else
1390 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
1391 #endif
1392 
1393 		for (i = first_pte; i < last_pte; i++)
1394 			pt_vaddr[i] = scratch_pte;
1395 
1396 #ifdef __NetBSD__
1397 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
1398 skip:
1399 #else
1400 		kunmap_atomic(pt_vaddr);
1401 #endif
1402 
1403 		num_entries -= last_pte - first_pte;
1404 		first_pte = 0;
1405 		act_pt++;
1406 	}
1407 }
1408 
1409 #ifdef __NetBSD__
1410 static void
gen6_ppgtt_insert_entries(struct i915_address_space * vm,bus_dmamap_t dmamap,uint64_t start,enum i915_cache_level cache_level)1411 gen6_ppgtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
1412     uint64_t start, enum i915_cache_level cache_level)
1413 {
1414 	struct i915_hw_ppgtt *ppgtt =
1415 		container_of(vm, struct i915_hw_ppgtt, base);
1416 	gen6_gtt_pte_t *pt_vaddr;
1417 	unsigned first_entry = start >> PAGE_SHIFT;
1418 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
1419 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
1420 	unsigned seg;
1421 	void *kva;
1422 	int ret;
1423 
1424 	pt_vaddr = NULL;
1425 	KASSERT(0 < dmamap->dm_nsegs);
1426 	for (seg = 0; seg < dmamap->dm_nsegs; seg++) {
1427 		KASSERT(dmamap->dm_segs[seg].ds_len == PAGE_SIZE);
1428 		if (pt_vaddr == NULL) {
1429 			/* XXX errno NetBSD->Linux */
1430 			ret = -bus_dmamem_map(ppgtt->base.dev->dmat,
1431 			    &ppgtt->u.gen6->pt_segs[act_pt], 1,
1432 			    PAGE_SIZE, &kva, BUS_DMA_NOWAIT);
1433 			if (ret) {
1434 				/*
1435 				 * XXX Should guarantee mapping earlier
1436 				 * with uvm_emap(9) or something.
1437 				 */
1438 				device_printf(ppgtt->base.dev->dev,
1439 				    "failed to map page table: %d\n", -ret);
1440 				goto skip;
1441 			}
1442 			pt_vaddr = kva;
1443 		}
1444 		pt_vaddr[act_pte] =
1445 		    vm->pte_encode(dmamap->dm_segs[seg].ds_addr, cache_level,
1446 			true);
1447 skip:
1448 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
1449 			bus_dmamem_unmap(ppgtt->base.dev->dmat, kva,
1450 			    PAGE_SIZE);
1451 			pt_vaddr = NULL;
1452 			act_pt++;
1453 			act_pte = 0;
1454 		}
1455 	}
1456 	if (pt_vaddr)
1457 		bus_dmamem_unmap(ppgtt->base.dev->dmat, kva, PAGE_SIZE);
1458 }
1459 #else
gen6_ppgtt_insert_entries(struct i915_address_space * vm,struct sg_table * pages,uint64_t start,enum i915_cache_level cache_level)1460 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1461 				      struct sg_table *pages,
1462 				      uint64_t start,
1463 				      enum i915_cache_level cache_level)
1464 {
1465 	struct i915_hw_ppgtt *ppgtt =
1466 		container_of(vm, struct i915_hw_ppgtt, base);
1467 	gen6_gtt_pte_t *pt_vaddr;
1468 	unsigned first_entry = start >> PAGE_SHIFT;
1469 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
1470 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
1471 	struct sg_page_iter sg_iter;
1472 
1473 	pt_vaddr = NULL;
1474 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1475 		if (pt_vaddr == NULL)
1476 			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
1477 
1478 		pt_vaddr[act_pte] =
1479 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1480 				       cache_level, true);
1481 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
1482 			kunmap_atomic(pt_vaddr);
1483 			pt_vaddr = NULL;
1484 			act_pt++;
1485 			act_pte = 0;
1486 		}
1487 	}
1488 	if (pt_vaddr)
1489 		kunmap_atomic(pt_vaddr);
1490 }
1491 #endif
1492 
1493 #ifndef __NetBSD__
gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt * ppgtt)1494 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
1495 {
1496 	int i;
1497 
1498 	if (ppgtt->pt_dma_addr) {
1499 		for (i = 0; i < ppgtt->num_pd_entries; i++)
1500 			pci_unmap_page(ppgtt->base.dev->pdev,
1501 				       ppgtt->pt_dma_addr[i],
1502 				       4096, PCI_DMA_BIDIRECTIONAL);
1503 	}
1504 }
1505 
gen6_ppgtt_free(struct i915_hw_ppgtt * ppgtt)1506 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
1507 {
1508 	int i;
1509 
1510 	kfree(ppgtt->pt_dma_addr);
1511 	for (i = 0; i < ppgtt->num_pd_entries; i++)
1512 		__free_page(ppgtt->pt_pages[i]);
1513 	kfree(ppgtt->pt_pages);
1514 }
1515 
gen6_ppgtt_cleanup(struct i915_address_space * vm)1516 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1517 {
1518 	struct i915_hw_ppgtt *ppgtt =
1519 		container_of(vm, struct i915_hw_ppgtt, base);
1520 
1521 	list_del(&vm->global_link);
1522 	drm_mm_takedown(&ppgtt->base.mm);
1523 	drm_mm_remove_node(&ppgtt->node);
1524 
1525 	gen6_ppgtt_unmap_pages(ppgtt);
1526 	gen6_ppgtt_free(ppgtt);
1527 }
1528 #endif
1529 
gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt * ppgtt)1530 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1531 {
1532 #define GEN6_PD_ALIGN (PAGE_SIZE * 16)
1533 #define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
1534 	struct drm_device *dev = ppgtt->base.dev;
1535 	struct drm_i915_private *dev_priv = dev->dev_private;
1536 	bool retried = false;
1537 	int ret;
1538 
1539 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1540 	 * allocator works in address space sizes, so it's multiplied by page
1541 	 * size. We allocate at the top of the GTT to avoid fragmentation.
1542 	 */
1543 	BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
1544 alloc:
1545 	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
1546 						  &ppgtt->node, GEN6_PD_SIZE,
1547 						  GEN6_PD_ALIGN, 0,
1548 						  0, dev_priv->gtt.base.total,
1549 						  DRM_MM_SEARCH_DEFAULT,
1550 						  DRM_MM_CREATE_DEFAULT);
1551 	if (ret == -ENOSPC && !retried) {
1552 		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
1553 					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
1554 					       I915_CACHE_NONE,
1555 					       0, dev_priv->gtt.base.total,
1556 					       0);
1557 		if (ret)
1558 			return ret;
1559 
1560 		retried = true;
1561 		goto alloc;
1562 	}
1563 
1564 	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
1565 		DRM_DEBUG("Forced to use aperture for PDEs\n");
1566 
1567 	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
1568 	return ret;
1569 }
1570 
1571 #ifdef __NetBSD__
1572 static void	gen6_ppgtt_cleanup(struct i915_address_space *);
1573 static int	gen6_ppgtt_alloc(struct i915_hw_ppgtt *);
1574 static void	gen6_ppgtt_free(struct i915_hw_ppgtt *);
1575 static int	gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *);
1576 static void	gen6_ppgtt_free_page_directories(struct i915_hw_ppgtt *);
1577 static int	gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *);
1578 static void	gen6_ppgtt_free_page_tables(struct i915_hw_ppgtt *);
1579 
1580 static void
gen6_ppgtt_cleanup(struct i915_address_space * vm)1581 gen6_ppgtt_cleanup(struct i915_address_space *vm)
1582 {
1583 	struct i915_hw_ppgtt *ppgtt =
1584 		container_of(vm, struct i915_hw_ppgtt, base);
1585 
1586 	list_del(&vm->global_link);
1587 	drm_mm_takedown(&ppgtt->base.mm);
1588 
1589 	gen6_ppgtt_free(ppgtt);
1590 }
1591 
1592 static int
gen6_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt)1593 gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1594 {
1595 	int ret;
1596 
1597 	ppgtt->u.gen6 = kmem_alloc(sizeof(*ppgtt->u.gen6), KM_SLEEP);
1598 
1599 	ret = gen6_ppgtt_allocate_page_directories(ppgtt);
1600 	if (ret)
1601 		goto fail0;
1602 	ret = gen6_ppgtt_allocate_page_tables(ppgtt);
1603 	if (ret)
1604 		goto fail1;
1605 
1606 	/* Success!  */
1607 	return 0;
1608 
1609 fail2: __unused
1610 	gen6_ppgtt_free_page_tables(ppgtt);
1611 fail1:	gen6_ppgtt_free_page_directories(ppgtt);
1612 fail0:	KASSERT(ret);
1613 	kmem_free(ppgtt->u.gen6, sizeof(*ppgtt->u.gen6));
1614 	return ret;
1615 }
1616 
1617 static void
gen6_ppgtt_free(struct i915_hw_ppgtt * ppgtt)1618 gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
1619 {
1620 
1621 	gen6_ppgtt_free_page_tables(ppgtt);
1622 	gen6_ppgtt_free_page_directories(ppgtt);
1623 	kmem_free(ppgtt->u.gen6, sizeof(*ppgtt->u.gen6));
1624 }
1625 
1626 static void
gen6_ppgtt_free_page_directories(struct i915_hw_ppgtt * ppgtt)1627 gen6_ppgtt_free_page_directories(struct i915_hw_ppgtt *ppgtt)
1628 {
1629 
1630 	drm_mm_remove_node(&ppgtt->node);
1631 }
1632 
1633 static int
gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt * ppgtt)1634 gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
1635 {
1636 	int rsegs;
1637 	int ret;
1638 
1639 	KASSERT(ppgtt->num_pd_entries <= INT_MAX);
1640 #if UINT_MAX == SIZE_MAX	/* XXX ugh */
1641 	KASSERT(ppgtt->num_pd_entries < (SIZE_MAX /
1642 		sizeof(ppgtt->u.gen6->pt_segs[0])));
1643 	KASSERT(ppgtt->num_pd_entries < (__type_max(bus_size_t) / PAGE_SIZE));
1644 #endif
1645 
1646 	ppgtt->u.gen6->pt_segs = kmem_alloc((ppgtt->num_pd_entries *
1647 		sizeof(ppgtt->u.gen6->pt_segs[0])), KM_SLEEP);
1648 
1649 	/* XXX errno NetBSD->Linux */
1650 	ret = -bus_dmamem_alloc(ppgtt->base.dev->dmat,
1651 	    (PAGE_SIZE * ppgtt->num_pd_entries), PAGE_SIZE, PAGE_SIZE,
1652 	    ppgtt->u.gen6->pt_segs, ppgtt->num_pd_entries, &rsegs,
1653 	    BUS_DMA_WAITOK);
1654 	if (ret)
1655 		goto fail0;
1656 	KASSERT(rsegs == ppgtt->num_pd_entries);
1657 
1658 	/* XXX errno NetBSD->Linux */
1659 	ret = -bus_dmamap_create(ppgtt->base.dev->dmat,
1660 	    (PAGE_SIZE * ppgtt->num_pd_entries), ppgtt->num_pd_entries,
1661 	    PAGE_SIZE, 0, BUS_DMA_WAITOK, &ppgtt->u.gen6->pt_map);
1662 	if (ret)
1663 		goto fail1;
1664 
1665 	/* XXX errno NetBSD->Linux */
1666 	ret = -bus_dmamap_load_raw(ppgtt->base.dev->dmat,
1667 	    ppgtt->u.gen6->pt_map, ppgtt->u.gen6->pt_segs,
1668 	    ppgtt->num_pd_entries, (PAGE_SIZE * ppgtt->num_pd_entries),
1669 	    BUS_DMA_WAITOK);
1670 	if (ret)
1671 		goto fail2;
1672 
1673 	/* Success!  */
1674 	return 0;
1675 
1676 fail3: __unused
1677 	bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
1678 fail2:	bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
1679 fail1:	bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_segs,
1680 	    ppgtt->num_pd_entries);
1681 fail0:	KASSERT(ret);
1682 	return ret;
1683 }
1684 
1685 static void
gen6_ppgtt_free_page_tables(struct i915_hw_ppgtt * ppgtt)1686 gen6_ppgtt_free_page_tables(struct i915_hw_ppgtt *ppgtt)
1687 {
1688 
1689 	bus_dmamap_unload(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
1690 	bus_dmamap_destroy(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_map);
1691 	bus_dmamem_free(ppgtt->base.dev->dmat, ppgtt->u.gen6->pt_segs,
1692 	    ppgtt->num_pd_entries);
1693 }
1694 #else
gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt * ppgtt)1695 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
1696 {
1697 	int i;
1698 
1699 	ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
1700 				  GFP_KERNEL);
1701 
1702 	if (!ppgtt->pt_pages)
1703 		return -ENOMEM;
1704 
1705 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
1706 		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
1707 		if (!ppgtt->pt_pages[i]) {
1708 			gen6_ppgtt_free(ppgtt);
1709 			return -ENOMEM;
1710 		}
1711 	}
1712 
1713 	return 0;
1714 }
1715 
gen6_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt)1716 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1717 {
1718 	int ret;
1719 
1720 	ret = gen6_ppgtt_allocate_page_directories(ppgtt);
1721 	if (ret)
1722 		return ret;
1723 
1724 	ret = gen6_ppgtt_allocate_page_tables(ppgtt);
1725 	if (ret) {
1726 		drm_mm_remove_node(&ppgtt->node);
1727 		return ret;
1728 	}
1729 
1730 	ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
1731 				     GFP_KERNEL);
1732 	if (!ppgtt->pt_dma_addr) {
1733 		drm_mm_remove_node(&ppgtt->node);
1734 		gen6_ppgtt_free(ppgtt);
1735 		return -ENOMEM;
1736 	}
1737 
1738 	return 0;
1739 }
1740 
gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt * ppgtt)1741 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
1742 {
1743 	struct drm_device *dev = ppgtt->base.dev;
1744 	int i;
1745 
1746 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
1747 		dma_addr_t pt_addr;
1748 
1749 		pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
1750 				       PCI_DMA_BIDIRECTIONAL);
1751 
1752 		if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
1753 			gen6_ppgtt_unmap_pages(ppgtt);
1754 			return -EIO;
1755 		}
1756 
1757 		ppgtt->pt_dma_addr[i] = pt_addr;
1758 	}
1759 
1760 	return 0;
1761 }
1762 #endif
1763 
gen6_ppgtt_init(struct i915_hw_ppgtt * ppgtt)1764 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1765 {
1766 	struct drm_device *dev = ppgtt->base.dev;
1767 	struct drm_i915_private *dev_priv = dev->dev_private;
1768 	int ret;
1769 
1770 	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
1771 	if (IS_GEN6(dev)) {
1772 		ppgtt->enable = gen6_ppgtt_enable;
1773 		ppgtt->switch_mm = gen6_mm_switch;
1774 	} else if (IS_HASWELL(dev)) {
1775 		ppgtt->enable = gen7_ppgtt_enable;
1776 		ppgtt->switch_mm = hsw_mm_switch;
1777 	} else if (IS_GEN7(dev)) {
1778 		ppgtt->enable = gen7_ppgtt_enable;
1779 		ppgtt->switch_mm = gen7_mm_switch;
1780 	} else
1781 		BUG();
1782 
1783 	ret = gen6_ppgtt_alloc(ppgtt);
1784 	if (ret)
1785 		return ret;
1786 
1787 #ifndef __NetBSD__
1788 	ret = gen6_ppgtt_setup_page_tables(ppgtt);
1789 	if (ret) {
1790 		gen6_ppgtt_free(ppgtt);
1791 		return ret;
1792 	}
1793 #endif
1794 
1795 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
1796 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1797 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
1798 	ppgtt->base.start = 0;
1799 	ppgtt->base.total =  ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
1800 #ifndef __NetBSD__
1801 	ppgtt->debug_dump = gen6_dump_ppgtt;
1802 #endif
1803 
1804 #ifdef __NetBSD__
1805 	CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
1806 	ppgtt->u.gen6->pd_base = 4*(ppgtt->node.start / PAGE_SIZE);
1807 #else
1808 	ppgtt->pd_offset =
1809 		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
1810 #endif
1811 
1812 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1813 
1814 	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
1815 			 ppgtt->node.size >> 20,
1816 			 ppgtt->node.start / PAGE_SIZE);
1817 
1818 	return 0;
1819 }
1820 
i915_gem_init_ppgtt(struct drm_device * dev,struct i915_hw_ppgtt * ppgtt)1821 int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1822 {
1823 	struct drm_i915_private *dev_priv = dev->dev_private;
1824 	int ret = 0;
1825 
1826 	ppgtt->base.dev = dev;
1827 	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
1828 
1829 	if (INTEL_INFO(dev)->gen < 8)
1830 		ret = gen6_ppgtt_init(ppgtt);
1831 	else if (IS_GEN8(dev))
1832 		ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
1833 	else
1834 		BUG();
1835 
1836 	if (!ret) {
1837 		kref_init(&ppgtt->ref);
1838 		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
1839 			    ppgtt->base.total);
1840 		i915_init_vm(dev_priv, &ppgtt->base);
1841 		if (INTEL_INFO(dev)->gen < 8) {
1842 			gen6_write_pdes(ppgtt);
1843 #ifdef __NetBSD__
1844 			DRM_DEBUG("Adding PPGTT at offset %"PRIxMAX"\n",
1845 			    (uintmax_t)ppgtt->u.gen6->pd_base << 10);
1846 #else
1847 			DRM_DEBUG("Adding PPGTT at offset %x\n",
1848 				  ppgtt->pd_offset << 10);
1849 #endif
1850 		}
1851 	}
1852 
1853 	return ret;
1854 }
1855 
1856 static void
ppgtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 flags)1857 ppgtt_bind_vma(struct i915_vma *vma,
1858 	       enum i915_cache_level cache_level,
1859 	       u32 flags)
1860 {
1861 #ifdef __NetBSD__
1862 	vma->vm->insert_entries(vma->vm, vma->obj->igo_dmamap, vma->node.start,
1863 				cache_level);
1864 #else
1865 	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
1866 				cache_level);
1867 #endif
1868 }
1869 
ppgtt_unbind_vma(struct i915_vma * vma)1870 static void ppgtt_unbind_vma(struct i915_vma *vma)
1871 {
1872 	vma->vm->clear_range(vma->vm,
1873 			     vma->node.start,
1874 			     vma->obj->base.size,
1875 			     true);
1876 }
1877 
1878 extern int intel_iommu_gfx_mapped;
1879 /* Certain Gen5 chipsets require require idling the GPU before
1880  * unmapping anything from the GTT when VT-d is enabled.
1881  */
needs_idle_maps(struct drm_device * dev)1882 static inline bool needs_idle_maps(struct drm_device *dev)
1883 {
1884 #ifdef CONFIG_INTEL_IOMMU
1885 	/* Query intel_iommu to see if we need the workaround. Presumably that
1886 	 * was loaded first.
1887 	 */
1888 	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
1889 		return true;
1890 #endif
1891 	return false;
1892 }
1893 
do_idling(struct drm_i915_private * dev_priv)1894 static bool do_idling(struct drm_i915_private *dev_priv)
1895 {
1896 	bool ret = dev_priv->mm.interruptible;
1897 
1898 	if (unlikely(dev_priv->gtt.do_idle_maps)) {
1899 		dev_priv->mm.interruptible = false;
1900 		if (i915_gpu_idle(dev_priv->dev)) {
1901 			DRM_ERROR("Couldn't idle GPU\n");
1902 			/* Wait a bit, in hopes it avoids the hang */
1903 			udelay(10);
1904 		}
1905 	}
1906 
1907 	return ret;
1908 }
1909 
undo_idling(struct drm_i915_private * dev_priv,bool interruptible)1910 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
1911 {
1912 	if (unlikely(dev_priv->gtt.do_idle_maps))
1913 		dev_priv->mm.interruptible = interruptible;
1914 }
1915 
i915_check_and_clear_faults(struct drm_device * dev)1916 void i915_check_and_clear_faults(struct drm_device *dev)
1917 {
1918 	struct drm_i915_private *dev_priv = dev->dev_private;
1919 	struct intel_ring_buffer *ring;
1920 	int i;
1921 
1922 	if (INTEL_INFO(dev)->gen < 6)
1923 		return;
1924 
1925 	for_each_ring(ring, dev_priv, i) {
1926 		u32 fault_reg;
1927 		fault_reg = I915_READ(RING_FAULT_REG(ring));
1928 		if (fault_reg & RING_FAULT_VALID) {
1929 			DRM_DEBUG_DRIVER("Unexpected fault\n"
1930 					 "\tAddr: 0x%08"PRIx32"\\n"
1931 					 "\tAddress space: %s\n"
1932 					 "\tSource ID: %d\n"
1933 					 "\tType: %d\n",
1934 					 fault_reg & PAGE_MASK,
1935 					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
1936 					 RING_FAULT_SRCID(fault_reg),
1937 					 RING_FAULT_FAULT_TYPE(fault_reg));
1938 			I915_WRITE(RING_FAULT_REG(ring),
1939 				   fault_reg & ~RING_FAULT_VALID);
1940 		}
1941 	}
1942 	POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
1943 }
1944 
i915_gem_suspend_gtt_mappings(struct drm_device * dev)1945 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
1946 {
1947 	struct drm_i915_private *dev_priv = dev->dev_private;
1948 
1949 	/* Don't bother messing with faults pre GEN6 as we have little
1950 	 * documentation supporting that it's a good idea.
1951 	 */
1952 	if (INTEL_INFO(dev)->gen < 6)
1953 		return;
1954 
1955 	i915_check_and_clear_faults(dev);
1956 
1957 	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1958 				       dev_priv->gtt.base.start,
1959 				       dev_priv->gtt.base.total,
1960 				       true);
1961 }
1962 
i915_gem_restore_gtt_mappings(struct drm_device * dev)1963 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
1964 {
1965 	struct drm_i915_private *dev_priv = dev->dev_private;
1966 	struct drm_i915_gem_object *obj;
1967 	struct i915_address_space *vm;
1968 
1969 	i915_check_and_clear_faults(dev);
1970 
1971 	/* First fill our portion of the GTT with scratch pages */
1972 	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1973 				       dev_priv->gtt.base.start,
1974 				       dev_priv->gtt.base.total,
1975 				       true);
1976 
1977 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
1978 		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
1979 							   &dev_priv->gtt.base);
1980 		if (!vma)
1981 			continue;
1982 
1983 		i915_gem_clflush_object(obj, obj->pin_display);
1984 		/* The bind_vma code tries to be smart about tracking mappings.
1985 		 * Unfortunately above, we've just wiped out the mappings
1986 		 * without telling our object about it. So we need to fake it.
1987 		 */
1988 		obj->has_global_gtt_mapping = 0;
1989 		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
1990 	}
1991 
1992 
1993 	if (INTEL_INFO(dev)->gen >= 8) {
1994 		gen8_setup_private_ppat(dev_priv);
1995 		return;
1996 	}
1997 
1998 	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
1999 		/* TODO: Perhaps it shouldn't be gen6 specific */
2000 		if (i915_is_ggtt(vm)) {
2001 			if (dev_priv->mm.aliasing_ppgtt)
2002 				gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
2003 			continue;
2004 		}
2005 
2006 		gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
2007 	}
2008 
2009 	i915_gem_chipset_flush(dev);
2010 }
2011 
i915_gem_gtt_prepare_object(struct drm_i915_gem_object * obj)2012 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2013 {
2014 	if (obj->has_dma_mapping)
2015 		return 0;
2016 
2017 #ifdef __NetBSD__
2018 	KASSERT(0 < obj->base.size);
2019 	/* XXX errno NetBSD->Linux */
2020 	return -bus_dmamap_load_raw(obj->base.dev->dmat, obj->igo_dmamap,
2021 	    obj->pages, obj->igo_nsegs, obj->base.size, BUS_DMA_NOWAIT);
2022 #else
2023 	if (!dma_map_sg(&obj->base.dev->pdev->dev,
2024 			obj->pages->sgl, obj->pages->nents,
2025 			PCI_DMA_BIDIRECTIONAL))
2026 		return -ENOSPC;
2027 
2028 	return 0;
2029 #endif
2030 }
2031 
2032 #ifdef __NetBSD__
2033 static inline uint64_t
gen8_get_pte(bus_space_tag_t bst,bus_space_handle_t bsh,unsigned i)2034 gen8_get_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i)
2035 {
2036 	CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
2037 	CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
2038 #ifdef _LP64			/* XXX How to detect bus_space_read_8?  */
2039 	return bus_space_read_8(bst, bsh, 8*i);
2040 #else
2041 	/*
2042 	 * XXX I'm not sure this case can actually happen in practice:
2043 	 * 32-bit gen8 chipsets?
2044 	 */
2045 	return bus_space_read_4(bst, bsh, 8*i) |
2046 	    ((uint64_t)bus_space_read_4(bst, bsh, 8*i + 4) << 32);
2047 #endif
2048 }
2049 
2050 static inline void
gen8_set_pte(bus_space_tag_t bst,bus_space_handle_t bsh,unsigned i,gen8_gtt_pte_t pte)2051 gen8_set_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i,
2052     gen8_gtt_pte_t pte)
2053 {
2054 	CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */
2055 	CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
2056 #ifdef _LP64			/* XXX How to detect bus_space_write_8?  */
2057 	bus_space_write_8(bst, bsh, 8*i, pte);
2058 #else
2059 	bus_space_write_4(bst, bsh, 8*i, (uint32_t)pte);
2060 	bus_space_write_4(bst, bsh, 8*i + 4, (uint32_t)(pte >> 32));
2061 #endif
2062 }
2063 #else
gen8_set_pte(void __iomem * addr,gen8_gtt_pte_t pte)2064 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
2065 {
2066 #ifdef writeq
2067 	writeq(pte, addr);
2068 #else
2069 	iowrite32((u32)pte, addr);
2070 	iowrite32(pte >> 32, addr + 4);
2071 #endif
2072 }
2073 #endif
2074 
2075 #ifdef __NetBSD__
2076 static void
gen8_ggtt_insert_entries(struct i915_address_space * vm,bus_dmamap_t dmamap,uint64_t start,enum i915_cache_level level)2077 gen8_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
2078     uint64_t start, enum i915_cache_level level)
2079 {
2080 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2081 	unsigned first_entry = start >> PAGE_SHIFT;
2082 	const bus_space_tag_t bst = dev_priv->gtt.bst;
2083 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2084 	unsigned i;
2085 
2086 	KASSERT(0 < dmamap->dm_nsegs);
2087 	for (i = 0; i < dmamap->dm_nsegs; i++) {
2088 		KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
2089 		gen8_set_pte(bst, bsh, first_entry + i,
2090 		    gen8_pte_encode(dmamap->dm_segs[i].ds_addr, level, true));
2091 	}
2092 	if (0 < i) {
2093 		/* Posting read.  */
2094 		WARN_ON(gen8_get_pte(bst, bsh, (first_entry + i - 1))
2095 		    != gen8_pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
2096 			true));
2097 	}
2098 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2099 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2100 }
2101 #else
gen8_ggtt_insert_entries(struct i915_address_space * vm,struct sg_table * st,uint64_t start,enum i915_cache_level level)2102 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2103 				     struct sg_table *st,
2104 				     uint64_t start,
2105 				     enum i915_cache_level level)
2106 {
2107 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2108 	unsigned first_entry = start >> PAGE_SHIFT;
2109 	gen8_gtt_pte_t __iomem *gtt_entries =
2110 		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2111 	int i = 0;
2112 	struct sg_page_iter sg_iter;
2113 	dma_addr_t addr;
2114 
2115 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2116 		addr = sg_dma_address(sg_iter.sg) +
2117 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
2118 		gen8_set_pte(&gtt_entries[i],
2119 			     gen8_pte_encode(addr, level, true));
2120 		i++;
2121 	}
2122 
2123 	/*
2124 	 * XXX: This serves as a posting read to make sure that the PTE has
2125 	 * actually been updated. There is some concern that even though
2126 	 * registers and PTEs are within the same BAR that they are potentially
2127 	 * of NUMA access patterns. Therefore, even with the way we assume
2128 	 * hardware should work, we must keep this posting read for paranoia.
2129 	 */
2130 	if (i != 0)
2131 		WARN_ON(readq(&gtt_entries[i-1])
2132 			!= gen8_pte_encode(addr, level, true));
2133 
2134 	/* This next bit makes the above posting read even more important. We
2135 	 * want to flush the TLBs only after we're certain all the PTE updates
2136 	 * have finished.
2137 	 */
2138 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2139 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2140 }
2141 #endif
2142 
2143 /*
2144  * Binds an object into the global gtt with the specified cache level. The object
2145  * will be accessible to the GPU via commands whose operands reference offsets
2146  * within the global GTT as well as accessible by the GPU through the GMADR
2147  * mapped BAR (dev_priv->mm.gtt->gtt).
2148  */
2149 #ifdef __NetBSD__
2150 static void
gen6_ggtt_insert_entries(struct i915_address_space * vm,bus_dmamap_t dmamap,uint64_t start,enum i915_cache_level level)2151 gen6_ggtt_insert_entries(struct i915_address_space *vm, bus_dmamap_t dmamap,
2152     uint64_t start, enum i915_cache_level level)
2153 {
2154 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2155 	unsigned first_entry = start >> PAGE_SHIFT;
2156 	const bus_space_tag_t bst = dev_priv->gtt.bst;
2157 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2158 	unsigned i;
2159 
2160 	KASSERT(0 < dmamap->dm_nsegs);
2161 	for (i = 0; i < dmamap->dm_nsegs; i++) {
2162 		KASSERT(dmamap->dm_segs[i].ds_len == PAGE_SIZE);
2163 		CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
2164 		bus_space_write_4(bst, bsh, 4*(first_entry + i),
2165 		    vm->pte_encode(dmamap->dm_segs[i].ds_addr, level, true));
2166 	}
2167 	if (0 < i) {
2168 		/* Posting read.  */
2169 		WARN_ON(bus_space_read_4(bst, bsh, 4*(first_entry + i - 1))
2170 		    != vm->pte_encode(dmamap->dm_segs[i - 1].ds_addr, level,
2171 			true));
2172 	}
2173 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2174 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2175 }
2176 #else
gen6_ggtt_insert_entries(struct i915_address_space * vm,struct sg_table * st,uint64_t start,enum i915_cache_level level)2177 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2178 				     struct sg_table *st,
2179 				     uint64_t start,
2180 				     enum i915_cache_level level)
2181 {
2182 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2183 	unsigned first_entry = start >> PAGE_SHIFT;
2184 	gen6_gtt_pte_t __iomem *gtt_entries =
2185 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
2186 	int i = 0;
2187 	struct sg_page_iter sg_iter;
2188 	dma_addr_t addr;
2189 
2190 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2191 		addr = sg_page_iter_dma_address(&sg_iter);
2192 		iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
2193 		i++;
2194 	}
2195 
2196 	/* XXX: This serves as a posting read to make sure that the PTE has
2197 	 * actually been updated. There is some concern that even though
2198 	 * registers and PTEs are within the same BAR that they are potentially
2199 	 * of NUMA access patterns. Therefore, even with the way we assume
2200 	 * hardware should work, we must keep this posting read for paranoia.
2201 	 */
2202 	if (i != 0)
2203 		WARN_ON(readl(&gtt_entries[i-1]) !=
2204 			vm->pte_encode(addr, level, true));
2205 
2206 	/* This next bit makes the above posting read even more important. We
2207 	 * want to flush the TLBs only after we're certain all the PTE updates
2208 	 * have finished.
2209 	 */
2210 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2211 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2212 }
2213 #endif
2214 
gen8_ggtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool use_scratch)2215 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2216 				  uint64_t start,
2217 				  uint64_t length,
2218 				  bool use_scratch)
2219 {
2220 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2221 	unsigned first_entry = start >> PAGE_SHIFT;
2222 	unsigned num_entries = length >> PAGE_SHIFT;
2223 #ifdef __NetBSD__
2224 	const bus_space_tag_t bst = dev_priv->gtt.bst;
2225 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2226 	gen8_gtt_pte_t scratch_pte;
2227 #else
2228 	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
2229 		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2230 #endif
2231 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2232 	int i;
2233 
2234 	if (WARN(num_entries > max_entries,
2235 		 "First entry = %d; Num entries = %d (max=%d)\n",
2236 		 first_entry, num_entries, max_entries))
2237 		num_entries = max_entries;
2238 
2239 	scratch_pte = gen8_pte_encode(vm->scratch.addr,
2240 				      I915_CACHE_LLC,
2241 				      use_scratch);
2242 #ifdef __NetBSD__
2243 	CTASSERT(sizeof(gen8_gtt_pte_t) == 8);
2244 	for (i = 0; i < num_entries; i++)
2245 		gen8_set_pte(bst, bsh, first_entry + i, scratch_pte);
2246 	(void)gen8_get_pte(bst, bsh, first_entry);
2247 #else
2248 	for (i = 0; i < num_entries; i++)
2249 		gen8_set_pte(&gtt_base[i], scratch_pte);
2250 	readl(gtt_base);
2251 #endif
2252 }
2253 
gen6_ggtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool use_scratch)2254 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2255 				  uint64_t start,
2256 				  uint64_t length,
2257 				  bool use_scratch)
2258 {
2259 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
2260 	unsigned first_entry = start >> PAGE_SHIFT;
2261 	unsigned num_entries = length >> PAGE_SHIFT;
2262 #ifdef __NetBSD__
2263 	const bus_space_tag_t bst = dev_priv->gtt.bst;
2264 	const bus_space_handle_t bsh = dev_priv->gtt.bsh;
2265 	gen8_gtt_pte_t scratch_pte;
2266 #else
2267 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
2268 		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
2269 #endif
2270 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2271 	int i;
2272 
2273 	if (WARN(num_entries > max_entries,
2274 		 "First entry = %d; Num entries = %d (max=%d)\n",
2275 		 first_entry, num_entries, max_entries))
2276 		num_entries = max_entries;
2277 
2278 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
2279 
2280 #ifdef __NetBSD__
2281 	CTASSERT(sizeof(gen6_gtt_pte_t) == 4);
2282 	for (i = 0; i < num_entries; i++)
2283 		bus_space_write_4(bst, bsh, 4*(first_entry + i), scratch_pte);
2284 	(void)bus_space_read_4(bst, bsh, 4*first_entry);
2285 #else
2286 	for (i = 0; i < num_entries; i++)
2287 		iowrite32(scratch_pte, &gtt_base[i]);
2288 	readl(gtt_base);
2289 #endif
2290 }
2291 
2292 
i915_ggtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 unused)2293 static void i915_ggtt_bind_vma(struct i915_vma *vma,
2294 			       enum i915_cache_level cache_level,
2295 			       u32 unused)
2296 {
2297 	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
2298 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2299 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2300 
2301 	BUG_ON(!i915_is_ggtt(vma->vm));
2302 #ifdef __NetBSD__
2303 	intel_gtt_insert_entries(vma->obj->igo_dmamap, entry, flags);
2304 #else
2305 	intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
2306 #endif
2307 	vma->obj->has_global_gtt_mapping = 1;
2308 }
2309 
i915_ggtt_clear_range(struct i915_address_space * vm,uint64_t start,uint64_t length,bool unused)2310 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2311 				  uint64_t start,
2312 				  uint64_t length,
2313 				  bool unused)
2314 {
2315 	unsigned first_entry = start >> PAGE_SHIFT;
2316 	unsigned num_entries = length >> PAGE_SHIFT;
2317 	intel_gtt_clear_range(first_entry, num_entries);
2318 }
2319 
i915_ggtt_unbind_vma(struct i915_vma * vma)2320 static void i915_ggtt_unbind_vma(struct i915_vma *vma)
2321 {
2322 	const unsigned int first = vma->node.start >> PAGE_SHIFT;
2323 	const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
2324 
2325 	BUG_ON(!i915_is_ggtt(vma->vm));
2326 	vma->obj->has_global_gtt_mapping = 0;
2327 	intel_gtt_clear_range(first, size);
2328 }
2329 
ggtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 flags)2330 static void ggtt_bind_vma(struct i915_vma *vma,
2331 			  enum i915_cache_level cache_level,
2332 			  u32 flags)
2333 {
2334 	struct drm_device *dev = vma->vm->dev;
2335 	struct drm_i915_private *dev_priv = dev->dev_private;
2336 	struct drm_i915_gem_object *obj = vma->obj;
2337 
2338 	/* If there is no aliasing PPGTT, or the caller needs a global mapping,
2339 	 * or we have a global mapping already but the cacheability flags have
2340 	 * changed, set the global PTEs.
2341 	 *
2342 	 * If there is an aliasing PPGTT it is anecdotally faster, so use that
2343 	 * instead if none of the above hold true.
2344 	 *
2345 	 * NB: A global mapping should only be needed for special regions like
2346 	 * "gtt mappable", SNB errata, or if specified via special execbuf
2347 	 * flags. At all other times, the GPU will use the aliasing PPGTT.
2348 	 */
2349 	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
2350 		if (!obj->has_global_gtt_mapping ||
2351 		    (cache_level != obj->cache_level)) {
2352 			vma->vm->insert_entries(vma->vm,
2353 #ifdef __NetBSD__
2354 						obj->igo_dmamap,
2355 #else
2356 						obj->pages,
2357 #endif
2358 						vma->node.start,
2359 						cache_level);
2360 			obj->has_global_gtt_mapping = 1;
2361 		}
2362 	}
2363 
2364 	if (dev_priv->mm.aliasing_ppgtt &&
2365 	    (!obj->has_aliasing_ppgtt_mapping ||
2366 	     (cache_level != obj->cache_level))) {
2367 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2368 		appgtt->base.insert_entries(&appgtt->base,
2369 #ifdef __NetBSD__
2370 					    vma->obj->igo_dmamap,
2371 #else
2372 					    vma->obj->pages,
2373 #endif
2374 					    vma->node.start,
2375 					    cache_level);
2376 		vma->obj->has_aliasing_ppgtt_mapping = 1;
2377 	}
2378 }
2379 
ggtt_unbind_vma(struct i915_vma * vma)2380 static void ggtt_unbind_vma(struct i915_vma *vma)
2381 {
2382 	struct drm_device *dev = vma->vm->dev;
2383 	struct drm_i915_private *dev_priv = dev->dev_private;
2384 	struct drm_i915_gem_object *obj = vma->obj;
2385 
2386 	if (obj->has_global_gtt_mapping) {
2387 		vma->vm->clear_range(vma->vm,
2388 				     vma->node.start,
2389 				     obj->base.size,
2390 				     true);
2391 		obj->has_global_gtt_mapping = 0;
2392 	}
2393 
2394 	if (obj->has_aliasing_ppgtt_mapping) {
2395 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
2396 		appgtt->base.clear_range(&appgtt->base,
2397 					 vma->node.start,
2398 					 obj->base.size,
2399 					 true);
2400 		obj->has_aliasing_ppgtt_mapping = 0;
2401 	}
2402 }
2403 
i915_gem_gtt_finish_object(struct drm_i915_gem_object * obj)2404 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2405 {
2406 	struct drm_device *dev = obj->base.dev;
2407 	struct drm_i915_private *dev_priv = dev->dev_private;
2408 	bool interruptible;
2409 
2410 	interruptible = do_idling(dev_priv);
2411 
2412 #ifdef __NetBSD__
2413 	bus_dmamap_unload(dev->dmat, obj->igo_dmamap);
2414 #else
2415 	if (!obj->has_dma_mapping)
2416 		dma_unmap_sg(&dev->pdev->dev,
2417 			     obj->pages->sgl, obj->pages->nents,
2418 			     PCI_DMA_BIDIRECTIONAL);
2419 #endif
2420 
2421 	undo_idling(dev_priv, interruptible);
2422 }
2423 
i915_gtt_color_adjust(struct drm_mm_node * node,unsigned long color,unsigned long * start,unsigned long * end)2424 static void i915_gtt_color_adjust(struct drm_mm_node *node,
2425 				  unsigned long color,
2426 				  unsigned long *start,
2427 				  unsigned long *end)
2428 {
2429 	if (node->color != color)
2430 		*start += 4096;
2431 
2432 	if (!list_empty(&node->node_list)) {
2433 		node = list_entry(node->node_list.next,
2434 				  struct drm_mm_node,
2435 				  node_list);
2436 		if (node->allocated && node->color != color)
2437 			*end -= 4096;
2438 	}
2439 }
2440 
i915_gem_setup_global_gtt(struct drm_device * dev,unsigned long start,unsigned long mappable_end,unsigned long end)2441 void i915_gem_setup_global_gtt(struct drm_device *dev,
2442 			       unsigned long start,
2443 			       unsigned long mappable_end,
2444 			       unsigned long end)
2445 {
2446 	/* Let GEM Manage all of the aperture.
2447 	 *
2448 	 * However, leave one page at the end still bound to the scratch page.
2449 	 * There are a number of places where the hardware apparently prefetches
2450 	 * past the end of the object, and we've seen multiple hangs with the
2451 	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2452 	 * aperture.  One page should be enough to keep any prefetching inside
2453 	 * of the aperture.
2454 	 */
2455 	struct drm_i915_private *dev_priv = dev->dev_private;
2456 	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2457 	struct drm_mm_node *entry;
2458 	struct drm_i915_gem_object *obj;
2459 	unsigned long hole_start, hole_end;
2460 
2461 	BUG_ON(mappable_end > end);
2462 
2463 	/* Subtract the guard page ... */
2464 	drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
2465 	if (!HAS_LLC(dev))
2466 		dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
2467 
2468 	/* Mark any preallocated objects as occupied */
2469 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2470 		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
2471 		int ret;
2472 		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
2473 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
2474 
2475 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
2476 		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
2477 		if (ret)
2478 			DRM_DEBUG_KMS("Reservation failed\n");
2479 		obj->has_global_gtt_mapping = 1;
2480 	}
2481 
2482 	dev_priv->gtt.base.start = start;
2483 	dev_priv->gtt.base.total = end - start;
2484 
2485 	/* Clear any non-preallocated blocks */
2486 	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
2487 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2488 			      hole_start, hole_end);
2489 		ggtt_vm->clear_range(ggtt_vm, hole_start,
2490 				     hole_end - hole_start, true);
2491 	}
2492 
2493 	/* And finally clear the reserved guard page */
2494 	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
2495 }
2496 
i915_gem_init_global_gtt(struct drm_device * dev)2497 void i915_gem_init_global_gtt(struct drm_device *dev)
2498 {
2499 	struct drm_i915_private *dev_priv = dev->dev_private;
2500 	unsigned long gtt_size, mappable_size;
2501 
2502 	gtt_size = dev_priv->gtt.base.total;
2503 	mappable_size = dev_priv->gtt.mappable_end;
2504 
2505 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
2506 }
2507 
setup_scratch_page(struct drm_device * dev)2508 static int setup_scratch_page(struct drm_device *dev)
2509 {
2510 	struct drm_i915_private *dev_priv = dev->dev_private;
2511 #ifdef __NetBSD__
2512 	int nsegs;
2513 	int error;
2514 
2515 	error = bus_dmamem_alloc(dev->dmat, PAGE_SIZE, PAGE_SIZE, 0,
2516 	    &dev_priv->gtt.base.scratch.seg, 1, &nsegs, BUS_DMA_WAITOK);
2517 	if (error)
2518 		goto fail0;
2519 	KASSERT(nsegs == 1);
2520 
2521 	error = bus_dmamap_create(dev->dmat, PAGE_SIZE, 1, PAGE_SIZE, 0,
2522 	    BUS_DMA_WAITOK, &dev_priv->gtt.base.scratch.map);
2523 	if (error)
2524 		goto fail1;
2525 
2526 	error = bus_dmamap_load_raw(dev->dmat, dev_priv->gtt.base.scratch.map,
2527 	    &dev_priv->gtt.base.scratch.seg, 1, PAGE_SIZE, BUS_DMA_WAITOK);
2528 	if (error)
2529 		goto fail2;
2530 
2531 	/* Success!  */
2532 	dev_priv->gtt.base.scratch.addr =
2533 	    dev_priv->gtt.base.scratch.map->dm_segs[0].ds_addr;
2534 	return 0;
2535 
2536 fail3: __unused
2537 	dev_priv->gtt.base.scratch.addr = 0;
2538 	bus_dmamap_unload(dev->dmat, dev_priv->gtt.base.scratch.map);
2539 fail2:	bus_dmamap_destroy(dev->dmat, dev_priv->gtt.base.scratch.map);
2540 fail1:	bus_dmamem_free(dev->dmat, &dev_priv->gtt.base.scratch.seg, 1);
2541 fail0:	KASSERT(error);
2542 	/* XXX errno NetBSD->Linux */
2543 	return -error;
2544 #else
2545 	struct page *page;
2546 	dma_addr_t dma_addr;
2547 
2548 	page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
2549 	if (page == NULL)
2550 		return -ENOMEM;
2551 	get_page(page);
2552 	set_pages_uc(page, 1);
2553 
2554 #ifdef CONFIG_INTEL_IOMMU
2555 	dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
2556 				PCI_DMA_BIDIRECTIONAL);
2557 	if (pci_dma_mapping_error(dev->pdev, dma_addr))
2558 		return -EINVAL;
2559 #else
2560 	dma_addr = page_to_phys(page);
2561 #endif
2562 	dev_priv->gtt.base.scratch.page = page;
2563 	dev_priv->gtt.base.scratch.addr = dma_addr;
2564 
2565 	return 0;
2566 #endif
2567 }
2568 
teardown_scratch_page(struct drm_device * dev)2569 static void teardown_scratch_page(struct drm_device *dev)
2570 {
2571 	struct drm_i915_private *dev_priv = dev->dev_private;
2572 #ifdef __NetBSD__
2573 
2574 	dev_priv->gtt.base.scratch.addr = 0;
2575 	bus_dmamap_unload(dev->dmat, dev_priv->gtt.base.scratch.map);
2576 	bus_dmamap_destroy(dev->dmat, dev_priv->gtt.base.scratch.map);
2577 	bus_dmamem_free(dev->dmat, &dev_priv->gtt.base.scratch.seg, 1);
2578 #else
2579 	struct page *page = dev_priv->gtt.base.scratch.page;
2580 
2581 	set_pages_wb(page, 1);
2582 	pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
2583 		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
2584 	put_page(page);
2585 	__free_page(page);
2586 #endif
2587 }
2588 
gen6_get_total_gtt_size(u16 snb_gmch_ctl)2589 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2590 {
2591 	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2592 	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2593 	return snb_gmch_ctl << 20;
2594 }
2595 
gen8_get_total_gtt_size(u16 bdw_gmch_ctl)2596 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2597 {
2598 	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2599 	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2600 	if (bdw_gmch_ctl)
2601 		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2602 	return bdw_gmch_ctl << 20;
2603 }
2604 
gen6_get_stolen_size(u16 snb_gmch_ctl)2605 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2606 {
2607 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2608 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2609 	return snb_gmch_ctl << 25; /* 32 MB units */
2610 }
2611 
gen8_get_stolen_size(u16 bdw_gmch_ctl)2612 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2613 {
2614 	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2615 	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2616 	return bdw_gmch_ctl << 25; /* 32 MB units */
2617 }
2618 
ggtt_probe_common(struct drm_device * dev,size_t gtt_size)2619 static int ggtt_probe_common(struct drm_device *dev,
2620 			     size_t gtt_size)
2621 {
2622 	struct drm_i915_private *dev_priv = dev->dev_private;
2623 	phys_addr_t gtt_phys_addr;
2624 	int ret;
2625 
2626 	/* For Modern GENs the PTEs and register space are split in the BAR */
2627 	gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2628 		(pci_resource_len(dev->pdev, 0) / 2);
2629 
2630 #ifdef __NetBSD__
2631 	dev_priv->gtt.bst = dev->pdev->pd_pa.pa_memt;
2632 	/* XXX errno NetBSD->Linux */
2633 	ret = -bus_space_map(dev_priv->gtt.bst, gtt_phys_addr, gtt_size,
2634 	    BUS_SPACE_MAP_PREFETCHABLE, &dev_priv->gtt.bsh);
2635 	if (ret) {
2636 		DRM_ERROR("Failed to map the graphics translation table: %d\n",
2637 		    ret);
2638 		return ret;
2639 	}
2640 	dev_priv->gtt.size = gtt_size;
2641 #else
2642 	dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
2643 	if (!dev_priv->gtt.gsm) {
2644 		DRM_ERROR("Failed to map the gtt page table\n");
2645 		return -ENOMEM;
2646 	}
2647 #endif
2648 
2649 	ret = setup_scratch_page(dev);
2650 	if (ret) {
2651 		DRM_ERROR("Scratch setup failed\n");
2652 		/* iounmap will also get called at remove, but meh */
2653 #ifdef __NetBSD__
2654 		bus_space_unmap(dev_priv->gtt.bst, dev_priv->gtt.bsh,
2655 		    dev_priv->gtt.size);
2656 #else
2657 		iounmap(dev_priv->gtt.gsm);
2658 #endif
2659 	}
2660 
2661 	return ret;
2662 }
2663 
2664 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2665  * bits. When using advanced contexts each context stores its own PAT, but
2666  * writing this data shouldn't be harmful even in those cases. */
gen8_setup_private_ppat(struct drm_i915_private * dev_priv)2667 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
2668 {
2669 #define GEN8_PPAT_UC		(0<<0)
2670 #define GEN8_PPAT_WC		(1<<0)
2671 #define GEN8_PPAT_WT		(2<<0)
2672 #define GEN8_PPAT_WB		(3<<0)
2673 #define GEN8_PPAT_ELLC_OVERRIDE	(0<<2)
2674 /* FIXME(BDW): Bspec is completely confused about cache control bits. */
2675 #define GEN8_PPAT_LLC		(1<<2)
2676 #define GEN8_PPAT_LLCELLC	(2<<2)
2677 #define GEN8_PPAT_LLCeLLC	(3<<2)
2678 #define GEN8_PPAT_AGE(x)	(x<<4)
2679 #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
2680 	uint64_t pat;
2681 
2682 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2683 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2684 	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2685 	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2686 	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2687 	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2688 	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2689 	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2690 
2691 	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2692 	 * write would work. */
2693 	I915_WRITE(GEN8_PRIVATE_PAT, pat);
2694 	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
2695 }
2696 
gen8_gmch_probe(struct drm_device * dev,size_t * gtt_total,size_t * stolen,phys_addr_t * mappable_base,unsigned long * mappable_end)2697 static int gen8_gmch_probe(struct drm_device *dev,
2698 			   size_t *gtt_total,
2699 			   size_t *stolen,
2700 			   phys_addr_t *mappable_base,
2701 			   unsigned long *mappable_end)
2702 {
2703 	struct drm_i915_private *dev_priv = dev->dev_private;
2704 	unsigned int gtt_size;
2705 	u16 snb_gmch_ctl;
2706 	int ret;
2707 
2708 	/* TODO: We're not aware of mappable constraints on gen8 yet */
2709 	*mappable_base = pci_resource_start(dev->pdev, 2);
2710 	*mappable_end = pci_resource_len(dev->pdev, 2);
2711 
2712 #ifndef __NetBSD__
2713 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
2714 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
2715 #endif
2716 
2717 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2718 
2719 	*stolen = gen8_get_stolen_size(snb_gmch_ctl);
2720 
2721 	gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2722 	*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
2723 
2724 	gen8_setup_private_ppat(dev_priv);
2725 
2726 	ret = ggtt_probe_common(dev, gtt_size);
2727 
2728 	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
2729 	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
2730 
2731 	/* XXX 39-bit addresses?  Really?  See pci_set_dma_mask above...  */
2732 	dev_priv->gtt.max_paddr = __BITS(38, 0);
2733 
2734 	return ret;
2735 }
2736 
gen6_gmch_probe(struct drm_device * dev,size_t * gtt_total,size_t * stolen,phys_addr_t * mappable_base,unsigned long * mappable_end)2737 static int gen6_gmch_probe(struct drm_device *dev,
2738 			   size_t *gtt_total,
2739 			   size_t *stolen,
2740 			   phys_addr_t *mappable_base,
2741 			   unsigned long *mappable_end)
2742 {
2743 	struct drm_i915_private *dev_priv = dev->dev_private;
2744 	unsigned int gtt_size;
2745 	u16 snb_gmch_ctl;
2746 	int ret;
2747 
2748 	*mappable_base = pci_resource_start(dev->pdev, 2);
2749 	*mappable_end = pci_resource_len(dev->pdev, 2);
2750 
2751 	/* 64/512MB is the current min/max we actually know of, but this is just
2752 	 * a coarse sanity check.
2753 	 */
2754 	if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
2755 		DRM_ERROR("Unknown GMADR size (%lx)\n",
2756 			  dev_priv->gtt.mappable_end);
2757 		return -ENXIO;
2758 	}
2759 
2760 #ifndef __NetBSD__
2761 	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
2762 		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
2763 #endif
2764 	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2765 
2766 	*stolen = gen6_get_stolen_size(snb_gmch_ctl);
2767 
2768 	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
2769 	*gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
2770 
2771 	ret = ggtt_probe_common(dev, gtt_size);
2772 
2773 	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
2774 	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
2775 
2776 	dev_priv->gtt.max_paddr = __BITS(39, 0);
2777 
2778 	return ret;
2779 }
2780 
gen6_gmch_remove(struct i915_address_space * vm)2781 static void gen6_gmch_remove(struct i915_address_space *vm)
2782 {
2783 
2784 	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
2785 
2786 	drm_mm_takedown(&vm->mm);
2787 #ifdef __NetBSD__
2788 	bus_space_unmap(gtt->bst, gtt->bsh, gtt->size);
2789 #else
2790 	iounmap(gtt->gsm);
2791 #endif
2792 	teardown_scratch_page(vm->dev);
2793 }
2794 
i915_gmch_probe(struct drm_device * dev,size_t * gtt_total,size_t * stolen,phys_addr_t * mappable_base,unsigned long * mappable_end)2795 static int i915_gmch_probe(struct drm_device *dev,
2796 			   size_t *gtt_total,
2797 			   size_t *stolen,
2798 			   phys_addr_t *mappable_base,
2799 			   unsigned long *mappable_end)
2800 {
2801 	struct drm_i915_private *dev_priv = dev->dev_private;
2802 	int ret;
2803 
2804 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
2805 	if (!ret) {
2806 		DRM_ERROR("failed to set up gmch\n");
2807 		return -EIO;
2808 	}
2809 
2810 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
2811 
2812 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
2813 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
2814 
2815 	if (unlikely(dev_priv->gtt.do_idle_maps))
2816 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
2817 
2818 	if (INTEL_INFO(dev)->gen <= 2)
2819 		dev_priv->gtt.max_paddr = __BITS(29, 0);
2820 	else if ((INTEL_INFO(dev)->gen <= 3) ||
2821 	    IS_BROADWATER(dev) || IS_CRESTLINE(dev))
2822 		dev_priv->gtt.max_paddr = __BITS(31, 0);
2823 	else if (INTEL_INFO(dev)->gen <= 5)
2824 		dev_priv->gtt.max_paddr = __BITS(35, 0);
2825 	else
2826 		dev_priv->gtt.max_paddr = __BITS(39, 0);
2827 
2828 	return 0;
2829 }
2830 
i915_gmch_remove(struct i915_address_space * vm)2831 static void i915_gmch_remove(struct i915_address_space *vm)
2832 {
2833 	intel_gmch_remove();
2834 }
2835 
i915_gem_gtt_init(struct drm_device * dev)2836 int i915_gem_gtt_init(struct drm_device *dev)
2837 {
2838 	struct drm_i915_private *dev_priv = dev->dev_private;
2839 	struct i915_gtt *gtt = &dev_priv->gtt;
2840 	int ret;
2841 
2842 	if (INTEL_INFO(dev)->gen <= 5) {
2843 		gtt->gtt_probe = i915_gmch_probe;
2844 		gtt->base.cleanup = i915_gmch_remove;
2845 	} else if (INTEL_INFO(dev)->gen < 8) {
2846 		gtt->gtt_probe = gen6_gmch_probe;
2847 		gtt->base.cleanup = gen6_gmch_remove;
2848 		if (IS_HASWELL(dev) && dev_priv->ellc_size)
2849 			gtt->base.pte_encode = iris_pte_encode;
2850 		else if (IS_HASWELL(dev))
2851 			gtt->base.pte_encode = hsw_pte_encode;
2852 		else if (IS_VALLEYVIEW(dev))
2853 			gtt->base.pte_encode = byt_pte_encode;
2854 		else if (INTEL_INFO(dev)->gen >= 7)
2855 			gtt->base.pte_encode = ivb_pte_encode;
2856 		else
2857 			gtt->base.pte_encode = snb_pte_encode;
2858 	} else {
2859 		dev_priv->gtt.gtt_probe = gen8_gmch_probe;
2860 		dev_priv->gtt.base.cleanup = gen6_gmch_remove;
2861 	}
2862 
2863 	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
2864 			     &gtt->mappable_base, &gtt->mappable_end);
2865 	if (ret)
2866 		return ret;
2867 
2868 #ifdef __NetBSD__
2869 	dev_priv->gtt.pgfl = x86_select_freelist(dev_priv->gtt.max_paddr);
2870 	ret = drm_limit_dma_space(dev, 0, dev_priv->gtt.max_paddr);
2871 	if (ret) {
2872 		DRM_ERROR("Unable to limit DMA paddr allocations: %d!\n", ret);
2873 		gtt->base.cleanup(&gtt->base);
2874 		return ret;
2875 	}
2876 #endif
2877 
2878 	gtt->base.dev = dev;
2879 
2880 	/* GMADR is the PCI mmio aperture into the global GTT. */
2881 	DRM_INFO("Memory usable by graphics device = %zdM\n",
2882 		 gtt->base.total >> 20);
2883 	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
2884 	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
2885 	/*
2886 	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
2887 	 * user's requested state against the hardware/driver capabilities.  We
2888 	 * do this now so that we can print out any log messages once rather
2889 	 * than every time we check intel_enable_ppgtt().
2890 	 */
2891 	i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
2892 	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
2893 
2894 	return 0;
2895 }
2896 
__i915_gem_vma_create(struct drm_i915_gem_object * obj,struct i915_address_space * vm)2897 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
2898 					      struct i915_address_space *vm)
2899 {
2900 	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
2901 	if (vma == NULL)
2902 		return ERR_PTR(-ENOMEM);
2903 
2904 	INIT_LIST_HEAD(&vma->vma_link);
2905 	INIT_LIST_HEAD(&vma->mm_list);
2906 	INIT_LIST_HEAD(&vma->exec_list);
2907 	vma->vm = vm;
2908 	vma->obj = obj;
2909 
2910 	switch (INTEL_INFO(vm->dev)->gen) {
2911 	case 8:
2912 	case 7:
2913 	case 6:
2914 		if (i915_is_ggtt(vm)) {
2915 			vma->unbind_vma = ggtt_unbind_vma;
2916 			vma->bind_vma = ggtt_bind_vma;
2917 		} else {
2918 			vma->unbind_vma = ppgtt_unbind_vma;
2919 			vma->bind_vma = ppgtt_bind_vma;
2920 		}
2921 		break;
2922 	case 5:
2923 	case 4:
2924 	case 3:
2925 	case 2:
2926 		BUG_ON(!i915_is_ggtt(vm));
2927 		vma->unbind_vma = i915_ggtt_unbind_vma;
2928 		vma->bind_vma = i915_ggtt_bind_vma;
2929 		break;
2930 	default:
2931 		BUG();
2932 	}
2933 
2934 	/* Keep GGTT vmas first to make debug easier */
2935 	if (i915_is_ggtt(vm))
2936 		list_add(&vma->vma_link, &obj->vma_list);
2937 	else
2938 		list_add_tail(&vma->vma_link, &obj->vma_list);
2939 
2940 	return vma;
2941 }
2942 
2943 struct i915_vma *
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object * obj,struct i915_address_space * vm)2944 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
2945 				  struct i915_address_space *vm)
2946 {
2947 	struct i915_vma *vma;
2948 
2949 	vma = i915_gem_obj_to_vma(obj, vm);
2950 	if (!vma)
2951 		vma = __i915_gem_vma_create(obj, vm);
2952 
2953 	return vma;
2954 }
2955