xref: /dragonfly/sys/dev/drm/i915/i915_gem_gtt.c (revision 31c7ac8b)
1 /*
2  * Copyright © 2010 Daniel Vetter
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * $FreeBSD: src/sys/dev/drm2/i915/i915_gem_gtt.c,v 1.1 2012/05/22 11:07:44 kib Exp $
24  */
25 
26 #include <sys/sfbuf.h>
27 
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "intel_drv.h"
32 
33 typedef uint32_t gtt_pte_t;
34 
35 /* PPGTT stuff */
36 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
37 
38 #define GEN6_PDE_VALID			(1 << 0)
39 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
40 #define GEN6_PDE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
41 
42 #define GEN6_PTE_VALID			(1 << 0)
43 #define GEN6_PTE_UNCACHED		(1 << 1)
44 #define HSW_PTE_UNCACHED		(0)
45 #define GEN6_PTE_CACHE_LLC		(2 << 1)
46 #define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
47 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
48 
49 static inline gtt_pte_t pte_encode(struct drm_device *dev,
50 				   dma_addr_t addr,
51 				   enum i915_cache_level level)
52 {
53 	gtt_pte_t pte = GEN6_PTE_VALID;
54 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
55 
56 	switch (level) {
57 	case I915_CACHE_LLC_MLC:
58 		/* Haswell doesn't set L3 this way */
59 		if (IS_HASWELL(dev))
60 			pte |= GEN6_PTE_CACHE_LLC;
61 		else
62 			pte |= GEN6_PTE_CACHE_LLC_MLC;
63 		break;
64 	case I915_CACHE_LLC:
65 		pte |= GEN6_PTE_CACHE_LLC;
66 		break;
67 	case I915_CACHE_NONE:
68 		if (IS_HASWELL(dev))
69 			pte |= HSW_PTE_UNCACHED;
70 		else
71 			pte |= GEN6_PTE_UNCACHED;
72 		break;
73 	default:
74 		BUG();
75 	}
76 
77 
78 	return pte;
79 }
80 
81 /* PPGTT support for Sandybdrige/Gen6 and later */
82 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
83 				   unsigned first_entry,
84 				   unsigned num_entries)
85 {
86 	gtt_pte_t *pt_vaddr;
87 	gtt_pte_t scratch_pte;
88 	struct sf_buf *sf;
89 	unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
90 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
91 	unsigned last_pte, i;
92 
93 	scratch_pte = GEN6_GTT_ADDR_ENCODE(ppgtt->scratch_page_dma_addr);
94 	scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC;
95 
96 	while (num_entries) {
97 		last_pte = first_pte + num_entries;
98 		if (last_pte > I915_PPGTT_PT_ENTRIES)
99 			last_pte = I915_PPGTT_PT_ENTRIES;
100 
101 		sf = sf_buf_alloc(ppgtt->pt_pages[act_pd]);
102 		pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf);
103 
104 		for (i = first_pte; i < last_pte; i++)
105 			pt_vaddr[i] = scratch_pte;
106 
107 		sf_buf_free(sf);
108 
109 		num_entries -= last_pte - first_pte;
110 		first_pte = 0;
111 		act_pd++;
112 	}
113 }
114 
115 int
116 i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
117 {
118 	struct drm_i915_private *dev_priv;
119 	struct i915_hw_ppgtt *ppgtt;
120 	u_int first_pd_entry_in_global_pt, i;
121 
122 	dev_priv = dev->dev_private;
123 
124 	/*
125 	 * ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
126 	 * entries. For aliasing ppgtt support we just steal them at the end for
127 	 * now.
128 	 */
129 	first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
130 
131 	ppgtt = kmalloc(sizeof(*ppgtt), DRM_I915_GEM, M_WAITOK | M_ZERO);
132 
133 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
134 	ppgtt->pt_pages = kmalloc(sizeof(vm_page_t) * ppgtt->num_pd_entries,
135 	    DRM_I915_GEM, M_WAITOK | M_ZERO);
136 
137 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
138 		ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0,
139 		    VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
140 		if (ppgtt->pt_pages[i] == NULL) {
141 			dev_priv->mm.aliasing_ppgtt = ppgtt;
142 			i915_gem_cleanup_aliasing_ppgtt(dev);
143 			return (-ENOMEM);
144 		}
145 	}
146 
147 	ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt->scratch_page_dma;
148 
149 	i915_ppgtt_clear_range(ppgtt, 0, ppgtt->num_pd_entries *
150 	    I915_PPGTT_PT_ENTRIES);
151 	ppgtt->pd_offset = (first_pd_entry_in_global_pt) * sizeof(uint32_t);
152 	dev_priv->mm.aliasing_ppgtt = ppgtt;
153 	return (0);
154 }
155 
156 static void
157 i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry,
158     unsigned num_entries, vm_page_t *pages, uint32_t pte_flags)
159 {
160 	uint32_t *pt_vaddr, pte;
161 	struct sf_buf *sf;
162 	unsigned act_pd, first_pte;
163 	unsigned last_pte, i;
164 	vm_paddr_t page_addr;
165 
166 	act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
167 	first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
168 
169 	while (num_entries) {
170 		last_pte = first_pte + num_entries;
171 		if (last_pte > I915_PPGTT_PT_ENTRIES)
172 			last_pte = I915_PPGTT_PT_ENTRIES;
173 
174 		sf = sf_buf_alloc(ppgtt->pt_pages[act_pd]);
175 		pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf);
176 
177 		for (i = first_pte; i < last_pte; i++) {
178 			page_addr = VM_PAGE_TO_PHYS(*pages);
179 			pte = GEN6_PTE_ADDR_ENCODE(page_addr);
180 			pt_vaddr[i] = pte | pte_flags;
181 
182 			pages++;
183 		}
184 
185 		sf_buf_free(sf);
186 
187 		num_entries -= last_pte - first_pte;
188 		first_pte = 0;
189 		act_pd++;
190 	}
191 }
192 
193 void
194 i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
195     struct drm_i915_gem_object *obj, enum i915_cache_level cache_level)
196 {
197 	struct drm_device *dev;
198 	struct drm_i915_private *dev_priv;
199 	uint32_t pte_flags;
200 
201 	dev = obj->base.dev;
202 	dev_priv = dev->dev_private;
203 	pte_flags = GEN6_PTE_VALID;
204 
205 	switch (cache_level) {
206 	case I915_CACHE_LLC_MLC:
207 		pte_flags |= GEN6_PTE_CACHE_LLC_MLC;
208 		break;
209 	case I915_CACHE_LLC:
210 		pte_flags |= GEN6_PTE_CACHE_LLC;
211 		break;
212 	case I915_CACHE_NONE:
213 		pte_flags |= GEN6_PTE_UNCACHED;
214 		break;
215 	default:
216 		panic("cache mode");
217 	}
218 
219 	i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT,
220 	    obj->base.size >> PAGE_SHIFT, obj->pages, pte_flags);
221 }
222 
223 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
224 			      struct drm_i915_gem_object *obj)
225 {
226 	i915_ppgtt_clear_range(ppgtt, obj->gtt_space->start >> PAGE_SHIFT,
227 	    obj->base.size >> PAGE_SHIFT);
228 }
229 
230 void
231 i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
232 {
233 	struct drm_i915_private *dev_priv;
234 	struct i915_hw_ppgtt *ppgtt;
235 	vm_page_t m;
236 	int i;
237 
238 	dev_priv = dev->dev_private;
239 	ppgtt = dev_priv->mm.aliasing_ppgtt;
240 	if (ppgtt == NULL)
241 		return;
242 	dev_priv->mm.aliasing_ppgtt = NULL;
243 
244 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
245 		m = ppgtt->pt_pages[i];
246 		if (m != NULL) {
247 			vm_page_busy_wait(m, FALSE, "i915gem");
248 			vm_page_unwire(m, 0);
249 			vm_page_free(m);
250 		}
251 	}
252 	drm_free(ppgtt->pt_pages, DRM_I915_GEM);
253 	drm_free(ppgtt, DRM_I915_GEM);
254 }
255 
256 
257 static unsigned int
258 cache_level_to_agp_type(struct drm_device *dev, enum i915_cache_level
259     cache_level)
260 {
261 
262 	switch (cache_level) {
263 	case I915_CACHE_LLC_MLC:
264 		if (INTEL_INFO(dev)->gen >= 6)
265 			return (AGP_USER_CACHED_MEMORY_LLC_MLC);
266 		/*
267 		 * Older chipsets do not have this extra level of CPU
268 		 * cacheing, so fallthrough and request the PTE simply
269 		 * as cached.
270 		 */
271 	case I915_CACHE_LLC:
272 		return (AGP_USER_CACHED_MEMORY);
273 
274 	default:
275 	case I915_CACHE_NONE:
276 		return (AGP_USER_MEMORY);
277 	}
278 }
279 
280 static bool
281 do_idling(struct drm_i915_private *dev_priv)
282 {
283 	bool ret = dev_priv->mm.interruptible;
284 
285 	if (unlikely(dev_priv->mm.gtt->do_idle_maps)) {
286 		dev_priv->mm.interruptible = false;
287 		if (i915_gpu_idle(dev_priv->dev, false)) {
288 			DRM_ERROR("Couldn't idle GPU\n");
289 			/* Wait a bit, in hopes it avoids the hang */
290 			DELAY(10);
291 		}
292 	}
293 
294 	return ret;
295 }
296 
297 static void
298 undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
299 {
300 
301 	if (unlikely(dev_priv->mm.gtt->do_idle_maps))
302 		dev_priv->mm.interruptible = interruptible;
303 }
304 
305 void
306 i915_gem_restore_gtt_mappings(struct drm_device *dev)
307 {
308 	struct drm_i915_private *dev_priv;
309 	struct drm_i915_gem_object *obj;
310 
311 	dev_priv = dev->dev_private;
312 
313 	/* First fill our portion of the GTT with scratch pages */
314 	intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE,
315 	    (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
316 
317 	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
318 		i915_gem_clflush_object(obj);
319 		i915_gem_gtt_rebind_object(obj, obj->cache_level);
320 	}
321 
322 	intel_gtt_chipset_flush();
323 }
324 
325 int
326 i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
327 {
328 	unsigned int agp_type;
329 
330 	agp_type = cache_level_to_agp_type(obj->base.dev, obj->cache_level);
331 	intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
332 	    obj->base.size >> PAGE_SHIFT, obj->pages, agp_type);
333 	return (0);
334 }
335 
336 void
337 i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj,
338     enum i915_cache_level cache_level)
339 {
340 	struct drm_device *dev;
341 	struct drm_i915_private *dev_priv;
342 	unsigned int agp_type;
343 
344 	dev = obj->base.dev;
345 	dev_priv = dev->dev_private;
346 	agp_type = cache_level_to_agp_type(dev, cache_level);
347 
348 	intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
349 	    obj->base.size >> PAGE_SHIFT, obj->pages, agp_type);
350 }
351 
352 void
353 i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
354 {
355 	struct drm_device *dev = obj->base.dev;
356 	struct drm_i915_private *dev_priv = dev->dev_private;
357 	bool interruptible;
358 
359 	dev = obj->base.dev;
360 	dev_priv = dev->dev_private;
361 
362 	interruptible = do_idling(dev_priv);
363 
364 	intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
365 	    obj->base.size >> PAGE_SHIFT);
366 
367 	undo_idling(dev_priv, interruptible);
368 }
369 
370 #define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit))
371 
372 void i915_gem_init_ppgtt(struct drm_device *dev)
373 {
374 	drm_i915_private_t *dev_priv = dev->dev_private;
375 	uint32_t pd_offset;
376 	struct intel_ring_buffer *ring;
377 	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
378 	uint32_t pd_entry;
379 	vm_paddr_t pt_addr;
380 	u_int first_pd_entry_in_global_pt, i;
381 
382 	if (ppgtt == NULL)
383 		return;
384 
385 	first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES;
386 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
387 		pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]);
388 		pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
389 		pd_entry |= GEN6_PDE_VALID;
390 		intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry);
391 	}
392 	intel_gtt_read_pte(first_pd_entry_in_global_pt);
393 
394 	pd_offset = ppgtt->pd_offset;
395 	pd_offset /= 64; /* in cachelines, */
396 	pd_offset <<= 16;
397 
398 	if (INTEL_INFO(dev)->gen == 6) {
399 		uint32_t ecochk = I915_READ(GAM_ECOCHK);
400 		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
401 				       ECOCHK_PPGTT_CACHE64B);
402 		I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE));
403 	} else if (INTEL_INFO(dev)->gen >= 7) {
404 		I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
405 		/* GFX_MODE is per-ring on gen7+ */
406 	}
407 
408 	for (i = 0; i < I915_NUM_RINGS; i++) {
409 		ring = &dev_priv->ring[i];
410 
411 		if (INTEL_INFO(dev)->gen >= 7)
412 			I915_WRITE(RING_MODE_GEN7(ring),
413 				   GFX_MODE_ENABLE(GFX_PPGTT_ENABLE));
414 
415 		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
416 		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
417 	}
418 }
419