xref: /dragonfly/sys/dev/drm/i915/intel_ringbuffer.c (revision 066b6da2)
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29 
30 #include <drm/drmP.h>
31 #include <drm/i915_drm.h>
32 #include "i915_drv.h"
33 #include "intel_drv.h"
34 #include "intel_ringbuffer.h"
35 #include <sys/sched.h>
36 
37 /*
38  * 965+ support PIPE_CONTROL commands, which provide finer grained control
39  * over cache flushing.
40  */
41 struct pipe_control {
42 	struct drm_i915_gem_object *obj;
43 	volatile u32 *cpu_page;
44 	u32 gtt_offset;
45 };
46 
47 static inline int ring_space(struct intel_ring_buffer *ring)
48 {
49 	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
50 	if (space < 0)
51 		space += ring->size;
52 	return space;
53 }
54 
55 static int
56 gen2_render_ring_flush(struct intel_ring_buffer *ring,
57 		       u32	invalidate_domains,
58 		       u32	flush_domains)
59 {
60 	u32 cmd;
61 	int ret;
62 
63 	cmd = MI_FLUSH;
64 	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
65 		cmd |= MI_NO_WRITE_FLUSH;
66 
67 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
68 		cmd |= MI_READ_FLUSH;
69 
70 	ret = intel_ring_begin(ring, 2);
71 	if (ret)
72 		return ret;
73 
74 	intel_ring_emit(ring, cmd);
75 	intel_ring_emit(ring, MI_NOOP);
76 	intel_ring_advance(ring);
77 
78 	return 0;
79 }
80 
81 static int
82 gen4_render_ring_flush(struct intel_ring_buffer *ring,
83 		       u32	invalidate_domains,
84 		       u32	flush_domains)
85 {
86 	struct drm_device *dev = ring->dev;
87 	u32 cmd;
88 	int ret;
89 
90 	/*
91 	 * read/write caches:
92 	 *
93 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
94 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
95 	 * also flushed at 2d versus 3d pipeline switches.
96 	 *
97 	 * read-only caches:
98 	 *
99 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
100 	 * MI_READ_FLUSH is set, and is always flushed on 965.
101 	 *
102 	 * I915_GEM_DOMAIN_COMMAND may not exist?
103 	 *
104 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
105 	 * invalidated when MI_EXE_FLUSH is set.
106 	 *
107 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
108 	 * invalidated with every MI_FLUSH.
109 	 *
110 	 * TLBs:
111 	 *
112 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
113 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
114 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
115 	 * are flushed at any MI_FLUSH.
116 	 */
117 
118 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
119 	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
120 		cmd &= ~MI_NO_WRITE_FLUSH;
121 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
122 		cmd |= MI_EXE_FLUSH;
123 
124 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
125 	    (IS_G4X(dev) || IS_GEN5(dev)))
126 		cmd |= MI_INVALIDATE_ISP;
127 
128 	ret = intel_ring_begin(ring, 2);
129 	if (ret)
130 		return ret;
131 
132 	intel_ring_emit(ring, cmd);
133 	intel_ring_emit(ring, MI_NOOP);
134 	intel_ring_advance(ring);
135 
136 	return 0;
137 }
138 
139 /**
140  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
141  * implementing two workarounds on gen6.  From section 1.4.7.1
142  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
143  *
144  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
145  * produced by non-pipelined state commands), software needs to first
146  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
147  * 0.
148  *
149  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
150  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
151  *
152  * And the workaround for these two requires this workaround first:
153  *
154  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
155  * BEFORE the pipe-control with a post-sync op and no write-cache
156  * flushes.
157  *
158  * And this last workaround is tricky because of the requirements on
159  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
160  * volume 2 part 1:
161  *
162  *     "1 of the following must also be set:
163  *      - Render Target Cache Flush Enable ([12] of DW1)
164  *      - Depth Cache Flush Enable ([0] of DW1)
165  *      - Stall at Pixel Scoreboard ([1] of DW1)
166  *      - Depth Stall ([13] of DW1)
167  *      - Post-Sync Operation ([13] of DW1)
168  *      - Notify Enable ([8] of DW1)"
169  *
170  * The cache flushes require the workaround flush that triggered this
171  * one, so we can't use it.  Depth stall would trigger the same.
172  * Post-sync nonzero is what triggered this second workaround, so we
173  * can't use that one either.  Notify enable is IRQs, which aren't
174  * really our business.  That leaves only stall at scoreboard.
175  */
176 static int
177 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
178 {
179 	struct pipe_control *pc = ring->private;
180 	u32 scratch_addr = pc->gtt_offset + 128;
181 	int ret;
182 
183 
184 	ret = intel_ring_begin(ring, 6);
185 	if (ret)
186 		return ret;
187 
188 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
189 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
190 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
191 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
192 	intel_ring_emit(ring, 0); /* low dword */
193 	intel_ring_emit(ring, 0); /* high dword */
194 	intel_ring_emit(ring, MI_NOOP);
195 	intel_ring_advance(ring);
196 
197 	ret = intel_ring_begin(ring, 6);
198 	if (ret)
199 		return ret;
200 
201 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
202 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
203 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
204 	intel_ring_emit(ring, 0);
205 	intel_ring_emit(ring, 0);
206 	intel_ring_emit(ring, MI_NOOP);
207 	intel_ring_advance(ring);
208 
209 	return 0;
210 }
211 
212 static int
213 gen6_render_ring_flush(struct intel_ring_buffer *ring,
214                          u32 invalidate_domains, u32 flush_domains)
215 {
216 	u32 flags = 0;
217 	struct pipe_control *pc = ring->private;
218 	u32 scratch_addr = pc->gtt_offset + 128;
219 	int ret;
220 
221 	/* Force SNB workarounds for PIPE_CONTROL flushes */
222 	ret = intel_emit_post_sync_nonzero_flush(ring);
223 	if (ret)
224 		return ret;
225 
226 	/* Just flush everything.  Experiments have shown that reducing the
227 	 * number of bits based on the write domains has little performance
228 	 * impact.
229 	 */
230 	if (flush_domains) {
231 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
232 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
233 		/*
234 		 * Ensure that any following seqno writes only happen
235 		 * when the render cache is indeed flushed.
236 		 */
237 		flags |= PIPE_CONTROL_CS_STALL;
238 	}
239 	if (invalidate_domains) {
240 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
241 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
242 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
243 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
244 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
245 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
246 		/*
247 		 * TLB invalidate requires a post-sync write.
248 		 */
249 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
250 	}
251 
252 	ret = intel_ring_begin(ring, 4);
253 	if (ret)
254 		return ret;
255 
256 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
257 	intel_ring_emit(ring, flags);
258 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
259 	intel_ring_emit(ring, 0);
260 	intel_ring_advance(ring);
261 
262 	return 0;
263 }
264 
265 static int
266 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
267 {
268 	int ret;
269 
270 	ret = intel_ring_begin(ring, 4);
271 	if (ret)
272 		return ret;
273 
274 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
275 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
276 			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
277 	intel_ring_emit(ring, 0);
278 	intel_ring_emit(ring, 0);
279 	intel_ring_advance(ring);
280 
281 	return 0;
282 }
283 
284 static int
285 gen7_render_ring_flush(struct intel_ring_buffer *ring,
286 		       u32 invalidate_domains, u32 flush_domains)
287 {
288 	u32 flags = 0;
289 	struct pipe_control *pc = ring->private;
290 	u32 scratch_addr = pc->gtt_offset + 128;
291 	int ret;
292 
293 	/*
294 	 * Ensure that any following seqno writes only happen when the render
295 	 * cache is indeed flushed.
296 	 *
297 	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
298 	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
299 	 * don't try to be clever and just set it unconditionally.
300 	 */
301 	flags |= PIPE_CONTROL_CS_STALL;
302 
303 	/* Just flush everything.  Experiments have shown that reducing the
304 	 * number of bits based on the write domains has little performance
305 	 * impact.
306 	 */
307 	if (flush_domains) {
308 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
309 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
310 	}
311 	if (invalidate_domains) {
312 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
313 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
314 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
315 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
316 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
317 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
318 		/*
319 		 * TLB invalidate requires a post-sync write.
320 		 */
321 		flags |= PIPE_CONTROL_QW_WRITE;
322 
323 		/* Workaround: we must issue a pipe_control with CS-stall bit
324 		 * set before a pipe_control command that has the state cache
325 		 * invalidate bit set. */
326 		gen7_render_ring_cs_stall_wa(ring);
327 	}
328 
329 	ret = intel_ring_begin(ring, 4);
330 	if (ret)
331 		return ret;
332 
333 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
334 	intel_ring_emit(ring, flags);
335 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
336 	intel_ring_emit(ring, 0);
337 	intel_ring_advance(ring);
338 
339 	return 0;
340 }
341 
342 static void ring_write_tail(struct intel_ring_buffer *ring,
343 			    u32 value)
344 {
345 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
346 	I915_WRITE_TAIL(ring, value);
347 }
348 
349 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
350 {
351 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
352 	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
353 			RING_ACTHD(ring->mmio_base) : ACTHD;
354 
355 	return I915_READ(acthd_reg);
356 }
357 
358 static int init_ring_common(struct intel_ring_buffer *ring)
359 {
360 	struct drm_device *dev = ring->dev;
361 	drm_i915_private_t *dev_priv = dev->dev_private;
362 	struct drm_i915_gem_object *obj = ring->obj;
363 	int ret = 0;
364 	u32 head;
365 
366 	if (HAS_FORCE_WAKE(dev))
367 		gen6_gt_force_wake_get(dev_priv);
368 
369 	/* Stop the ring if it's running. */
370 	I915_WRITE_CTL(ring, 0);
371 	I915_WRITE_HEAD(ring, 0);
372 	ring->write_tail(ring, 0);
373 
374 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
375 
376 	/* G45 ring initialization fails to reset head to zero */
377 	if (head != 0) {
378 		DRM_DEBUG_KMS("%s head not reset to zero "
379 			      "ctl %08x head %08x tail %08x start %08x\n",
380 			      ring->name,
381 			      I915_READ_CTL(ring),
382 			      I915_READ_HEAD(ring),
383 			      I915_READ_TAIL(ring),
384 			      I915_READ_START(ring));
385 
386 		I915_WRITE_HEAD(ring, 0);
387 
388 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
389 			DRM_ERROR("failed to set %s head to zero "
390 				  "ctl %08x head %08x tail %08x start %08x\n",
391 				  ring->name,
392 				  I915_READ_CTL(ring),
393 				  I915_READ_HEAD(ring),
394 				  I915_READ_TAIL(ring),
395 				  I915_READ_START(ring));
396 		}
397 	}
398 
399 	/* Initialize the ring. This must happen _after_ we've cleared the ring
400 	 * registers with the above sequence (the readback of the HEAD registers
401 	 * also enforces ordering), otherwise the hw might lose the new ring
402 	 * register values. */
403 	I915_WRITE_START(ring, obj->gtt_offset);
404 	I915_WRITE_CTL(ring,
405 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
406 			| RING_VALID);
407 
408 	/* If the head is still not zero, the ring is dead */
409 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
410 		     I915_READ_START(ring) == obj->gtt_offset &&
411 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
412 		DRM_ERROR("%s initialization failed "
413 				"ctl %08x head %08x tail %08x start %08x\n",
414 				ring->name,
415 				I915_READ_CTL(ring),
416 				I915_READ_HEAD(ring),
417 				I915_READ_TAIL(ring),
418 				I915_READ_START(ring));
419 		ret = -EIO;
420 		goto out;
421 	}
422 
423 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
424 		i915_kernel_lost_context(ring->dev);
425 	else {
426 		ring->head = I915_READ_HEAD(ring);
427 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
428 		ring->space = ring_space(ring);
429 		ring->last_retired_head = -1;
430 	}
431 
432 out:
433 	if (HAS_FORCE_WAKE(dev))
434 		gen6_gt_force_wake_put(dev_priv);
435 
436 	return ret;
437 }
438 
439 static int
440 init_pipe_control(struct intel_ring_buffer *ring)
441 {
442 	struct pipe_control *pc;
443 	struct drm_i915_gem_object *obj;
444 	int ret;
445 
446 	if (ring->private)
447 		return 0;
448 
449 	pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
450 	if (!pc)
451 		return -ENOMEM;
452 
453 	obj = i915_gem_alloc_object(ring->dev, 4096);
454 	if (obj == NULL) {
455 		DRM_ERROR("Failed to allocate seqno page\n");
456 		ret = -ENOMEM;
457 		goto err;
458 	}
459 
460 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
461 
462 	ret = i915_gem_object_pin(obj, 4096, true);
463 	if (ret)
464 		goto err_unref;
465 
466 	pc->gtt_offset = obj->gtt_offset;
467 	pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE);
468 	if (pc->cpu_page == NULL)
469 		goto err_unpin;
470 	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
471 	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
472 	    (vm_offset_t)pc->cpu_page + PAGE_SIZE);
473 
474 	pc->obj = obj;
475 	ring->private = pc;
476 	return 0;
477 
478 err_unpin:
479 	i915_gem_object_unpin(obj);
480 err_unref:
481 	drm_gem_object_unreference(&obj->base);
482 err:
483 	drm_free(pc, DRM_I915_GEM);
484 	return ret;
485 }
486 
487 static void
488 cleanup_pipe_control(struct intel_ring_buffer *ring)
489 {
490 	struct pipe_control *pc = ring->private;
491 	struct drm_i915_gem_object *obj;
492 
493 	if (!ring->private)
494 		return;
495 
496 	obj = pc->obj;
497 	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
498 	kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE);
499 	i915_gem_object_unpin(obj);
500 	drm_gem_object_unreference(&obj->base);
501 
502 	drm_free(pc, DRM_I915_GEM);
503 	ring->private = NULL;
504 }
505 
506 static int init_render_ring(struct intel_ring_buffer *ring)
507 {
508 	struct drm_device *dev = ring->dev;
509 	struct drm_i915_private *dev_priv = dev->dev_private;
510 	int ret = init_ring_common(ring);
511 
512 	if (INTEL_INFO(dev)->gen > 3)
513 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
514 
515 	/* We need to disable the AsyncFlip performance optimisations in order
516 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
517 	 * programmed to '1' on all products.
518 	 */
519 	if (INTEL_INFO(dev)->gen >= 6)
520 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
521 
522 	/* Required for the hardware to program scanline values for waiting */
523 	if (INTEL_INFO(dev)->gen == 6)
524 		I915_WRITE(GFX_MODE,
525 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
526 
527 	if (IS_GEN7(dev))
528 		I915_WRITE(GFX_MODE_GEN7,
529 			   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
530 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
531 
532 	if (INTEL_INFO(dev)->gen >= 5) {
533 		ret = init_pipe_control(ring);
534 		if (ret)
535 			return ret;
536 	}
537 
538 	if (IS_GEN6(dev)) {
539 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
540 		 * "If this bit is set, STCunit will have LRA as replacement
541 		 *  policy. [...] This bit must be reset.  LRA replacement
542 		 *  policy is not supported."
543 		 */
544 		I915_WRITE(CACHE_MODE_0,
545 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
546 
547 		/* This is not explicitly set for GEN6, so read the register.
548 		 * see intel_ring_mi_set_context() for why we care.
549 		 * TODO: consider explicitly setting the bit for GEN5
550 		 */
551 		ring->itlb_before_ctx_switch =
552 			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
553 	}
554 
555 	if (INTEL_INFO(dev)->gen >= 6)
556 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
557 
558 	if (HAS_L3_GPU_CACHE(dev))
559 		I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
560 
561 	return ret;
562 }
563 
564 static void render_ring_cleanup(struct intel_ring_buffer *ring)
565 {
566 	struct drm_device *dev = ring->dev;
567 
568 	if (!ring->private)
569 		return;
570 
571 	if (HAS_BROKEN_CS_TLB(dev))
572 		drm_gem_object_unreference(to_gem_object(ring->private));
573 
574 	cleanup_pipe_control(ring);
575 }
576 
577 static void
578 update_mboxes(struct intel_ring_buffer *ring,
579 	      u32 mmio_offset)
580 {
581 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
582 	intel_ring_emit(ring, mmio_offset);
583 	intel_ring_emit(ring, ring->outstanding_lazy_request);
584 }
585 
586 /**
587  * gen6_add_request - Update the semaphore mailbox registers
588  *
589  * @ring - ring that is adding a request
590  * @seqno - return seqno stuck into the ring
591  *
592  * Update the mailbox registers in the *other* rings with the current seqno.
593  * This acts like a signal in the canonical semaphore.
594  */
595 static int
596 gen6_add_request(struct intel_ring_buffer *ring)
597 {
598 	u32 mbox1_reg;
599 	u32 mbox2_reg;
600 	int ret;
601 
602 	ret = intel_ring_begin(ring, 10);
603 	if (ret)
604 		return ret;
605 
606 	mbox1_reg = ring->signal_mbox[0];
607 	mbox2_reg = ring->signal_mbox[1];
608 
609 	update_mboxes(ring, mbox1_reg);
610 	update_mboxes(ring, mbox2_reg);
611 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
612 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
613 	intel_ring_emit(ring, ring->outstanding_lazy_request);
614 	intel_ring_emit(ring, MI_USER_INTERRUPT);
615 	intel_ring_advance(ring);
616 
617 	return 0;
618 }
619 
620 /**
621  * intel_ring_sync - sync the waiter to the signaller on seqno
622  *
623  * @waiter - ring that is waiting
624  * @signaller - ring which has, or will signal
625  * @seqno - seqno which the waiter will block on
626  */
627 static int
628 gen6_ring_sync(struct intel_ring_buffer *waiter,
629 	       struct intel_ring_buffer *signaller,
630 	       u32 seqno)
631 {
632 	int ret;
633 	u32 dw1 = MI_SEMAPHORE_MBOX |
634 		  MI_SEMAPHORE_COMPARE |
635 		  MI_SEMAPHORE_REGISTER;
636 
637 	/* Throughout all of the GEM code, seqno passed implies our current
638 	 * seqno is >= the last seqno executed. However for hardware the
639 	 * comparison is strictly greater than.
640 	 */
641 	seqno -= 1;
642 
643 	WARN_ON(signaller->semaphore_register[waiter->id] ==
644 		MI_SEMAPHORE_SYNC_INVALID);
645 
646 	ret = intel_ring_begin(waiter, 4);
647 	if (ret)
648 		return ret;
649 
650 	intel_ring_emit(waiter,
651 			dw1 | signaller->semaphore_register[waiter->id]);
652 	intel_ring_emit(waiter, seqno);
653 	intel_ring_emit(waiter, 0);
654 	intel_ring_emit(waiter, MI_NOOP);
655 	intel_ring_advance(waiter);
656 
657 	return 0;
658 }
659 
660 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
661 do {									\
662 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
663 		 PIPE_CONTROL_DEPTH_STALL);				\
664 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
665 	intel_ring_emit(ring__, 0);							\
666 	intel_ring_emit(ring__, 0);							\
667 } while (0)
668 
669 static int
670 pc_render_add_request(struct intel_ring_buffer *ring)
671 {
672 	struct pipe_control *pc = ring->private;
673 	u32 scratch_addr = pc->gtt_offset + 128;
674 	int ret;
675 
676 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
677 	 * incoherent with writes to memory, i.e. completely fubar,
678 	 * so we need to use PIPE_NOTIFY instead.
679 	 *
680 	 * However, we also need to workaround the qword write
681 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
682 	 * memory before requesting an interrupt.
683 	 */
684 	ret = intel_ring_begin(ring, 32);
685 	if (ret)
686 		return ret;
687 
688 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
689 			PIPE_CONTROL_WRITE_FLUSH |
690 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
691 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
692 	intel_ring_emit(ring, ring->outstanding_lazy_request);
693 	intel_ring_emit(ring, 0);
694 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
695 	scratch_addr += 128; /* write to separate cachelines */
696 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
697 	scratch_addr += 128;
698 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
699 	scratch_addr += 128;
700 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
701 	scratch_addr += 128;
702 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
703 	scratch_addr += 128;
704 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
705 
706 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
707 			PIPE_CONTROL_WRITE_FLUSH |
708 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
709 			PIPE_CONTROL_NOTIFY);
710 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
711 	intel_ring_emit(ring, ring->outstanding_lazy_request);
712 	intel_ring_emit(ring, 0);
713 	intel_ring_advance(ring);
714 
715 	return 0;
716 }
717 
718 static u32
719 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
720 {
721 	/* Workaround to force correct ordering between irq and seqno writes on
722 	 * ivb (and maybe also on snb) by reading from a CS register (like
723 	 * ACTHD) before reading the status page. */
724 	if (!lazy_coherency)
725 		intel_ring_get_active_head(ring);
726 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
727 }
728 
729 static u32
730 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
731 {
732 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
733 }
734 
735 static u32
736 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
737 {
738 	struct pipe_control *pc = ring->private;
739 	return pc->cpu_page[0];
740 }
741 
742 static bool
743 gen5_ring_get_irq(struct intel_ring_buffer *ring)
744 {
745 	struct drm_device *dev = ring->dev;
746 	drm_i915_private_t *dev_priv = dev->dev_private;
747 
748 	if (!dev->irq_enabled)
749 		return false;
750 
751 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
752 	if (ring->irq_refcount++ == 0) {
753 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
754 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
755 		POSTING_READ(GTIMR);
756 	}
757 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
758 
759 	return true;
760 }
761 
762 static void
763 gen5_ring_put_irq(struct intel_ring_buffer *ring)
764 {
765 	struct drm_device *dev = ring->dev;
766 	drm_i915_private_t *dev_priv = dev->dev_private;
767 
768 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
769 	if (--ring->irq_refcount == 0) {
770 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
771 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
772 		POSTING_READ(GTIMR);
773 	}
774 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
775 }
776 
777 static bool
778 i9xx_ring_get_irq(struct intel_ring_buffer *ring)
779 {
780 	struct drm_device *dev = ring->dev;
781 	drm_i915_private_t *dev_priv = dev->dev_private;
782 
783 	if (!dev->irq_enabled)
784 		return false;
785 
786 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
787 	if (ring->irq_refcount++ == 0) {
788 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
789 		I915_WRITE(IMR, dev_priv->irq_mask);
790 		POSTING_READ(IMR);
791 	}
792 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
793 
794 	return true;
795 }
796 
797 static void
798 i9xx_ring_put_irq(struct intel_ring_buffer *ring)
799 {
800 	struct drm_device *dev = ring->dev;
801 	drm_i915_private_t *dev_priv = dev->dev_private;
802 
803 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
804 	if (--ring->irq_refcount == 0) {
805 		dev_priv->irq_mask |= ring->irq_enable_mask;
806 		I915_WRITE(IMR, dev_priv->irq_mask);
807 		POSTING_READ(IMR);
808 	}
809 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
810 }
811 
812 static bool
813 i8xx_ring_get_irq(struct intel_ring_buffer *ring)
814 {
815 	struct drm_device *dev = ring->dev;
816 	drm_i915_private_t *dev_priv = dev->dev_private;
817 
818 	if (!dev->irq_enabled)
819 		return false;
820 
821 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
822 	if (ring->irq_refcount++ == 0) {
823 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
824 		I915_WRITE16(IMR, dev_priv->irq_mask);
825 		POSTING_READ16(IMR);
826 	}
827 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
828 
829 	return true;
830 }
831 
832 static void
833 i8xx_ring_put_irq(struct intel_ring_buffer *ring)
834 {
835 	struct drm_device *dev = ring->dev;
836 	drm_i915_private_t *dev_priv = dev->dev_private;
837 
838 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
839 	if (--ring->irq_refcount == 0) {
840 		dev_priv->irq_mask |= ring->irq_enable_mask;
841 		I915_WRITE16(IMR, dev_priv->irq_mask);
842 		POSTING_READ16(IMR);
843 	}
844 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
845 }
846 
847 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
848 {
849 	struct drm_device *dev = ring->dev;
850 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
851 	u32 mmio = 0;
852 
853 	/* The ring status page addresses are no longer next to the rest of
854 	 * the ring registers as of gen7.
855 	 */
856 	if (IS_GEN7(dev)) {
857 		switch (ring->id) {
858 		case RCS:
859 			mmio = RENDER_HWS_PGA_GEN7;
860 			break;
861 		case BCS:
862 			mmio = BLT_HWS_PGA_GEN7;
863 			break;
864 		case VCS:
865 			mmio = BSD_HWS_PGA_GEN7;
866 			break;
867 		}
868 	} else if (IS_GEN6(ring->dev)) {
869 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
870 	} else {
871 		mmio = RING_HWS_PGA(ring->mmio_base);
872 	}
873 
874 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
875 	POSTING_READ(mmio);
876 }
877 
878 static int
879 bsd_ring_flush(struct intel_ring_buffer *ring,
880 	       u32    invalidate_domains,
881 	       u32    flush_domains)
882 {
883 	int ret;
884 
885 	ret = intel_ring_begin(ring, 2);
886 	if (ret)
887 		return ret;
888 
889 	intel_ring_emit(ring, MI_FLUSH);
890 	intel_ring_emit(ring, MI_NOOP);
891 	intel_ring_advance(ring);
892 	return 0;
893 }
894 
895 static int
896 i9xx_add_request(struct intel_ring_buffer *ring)
897 {
898 	int ret;
899 
900 	ret = intel_ring_begin(ring, 4);
901 	if (ret)
902 		return ret;
903 
904 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
905 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
906 	intel_ring_emit(ring, ring->outstanding_lazy_request);
907 	intel_ring_emit(ring, MI_USER_INTERRUPT);
908 	intel_ring_advance(ring);
909 
910 	return 0;
911 }
912 
913 static bool
914 gen6_ring_get_irq(struct intel_ring_buffer *ring)
915 {
916 	struct drm_device *dev = ring->dev;
917 	drm_i915_private_t *dev_priv = dev->dev_private;
918 
919 	if (!dev->irq_enabled)
920 	       return false;
921 
922 	/* It looks like we need to prevent the gt from suspending while waiting
923 	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
924 	 * blt/bsd rings on ivb. */
925 	gen6_gt_force_wake_get(dev_priv);
926 
927 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
928 	if (ring->irq_refcount++ == 0) {
929 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
930 			I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
931 						GEN6_RENDER_L3_PARITY_ERROR));
932 		else
933 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
934 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
935 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
936 		POSTING_READ(GTIMR);
937 	}
938 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
939 
940 	return true;
941 }
942 
943 static void
944 gen6_ring_put_irq(struct intel_ring_buffer *ring)
945 {
946 	struct drm_device *dev = ring->dev;
947 	drm_i915_private_t *dev_priv = dev->dev_private;
948 
949 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
950 	if (--ring->irq_refcount == 0) {
951 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
952 			I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
953 		else
954 			I915_WRITE_IMR(ring, ~0);
955 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
956 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
957 		POSTING_READ(GTIMR);
958 	}
959 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
960 
961 	gen6_gt_force_wake_put(dev_priv);
962 }
963 
964 static int
965 i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
966 			 u32 offset, u32 length,
967 			 unsigned flags)
968 {
969 	int ret;
970 
971 	ret = intel_ring_begin(ring, 2);
972 	if (ret)
973 		return ret;
974 
975 	intel_ring_emit(ring,
976 			MI_BATCH_BUFFER_START |
977 			MI_BATCH_GTT |
978 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
979 	intel_ring_emit(ring, offset);
980 	intel_ring_advance(ring);
981 
982 	return 0;
983 }
984 
985 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
986 #define I830_BATCH_LIMIT (256*1024)
987 static int
988 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
989 				u32 offset, u32 len,
990 				unsigned flags)
991 {
992 	int ret;
993 
994 	if (flags & I915_DISPATCH_PINNED) {
995 		ret = intel_ring_begin(ring, 4);
996 		if (ret)
997 			return ret;
998 
999 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1000 		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1001 		intel_ring_emit(ring, offset + len - 8);
1002 		intel_ring_emit(ring, MI_NOOP);
1003 		intel_ring_advance(ring);
1004 	} else {
1005 		struct drm_i915_gem_object *obj = ring->private;
1006 		u32 cs_offset = obj->gtt_offset;
1007 
1008 		if (len > I830_BATCH_LIMIT)
1009 			return -ENOSPC;
1010 
1011 		ret = intel_ring_begin(ring, 9+3);
1012 		if (ret)
1013 			return ret;
1014 		/* Blit the batch (which has now all relocs applied) to the stable batch
1015 		 * scratch bo area (so that the CS never stumbles over its tlb
1016 		 * invalidation bug) ... */
1017 		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1018 				XY_SRC_COPY_BLT_WRITE_ALPHA |
1019 				XY_SRC_COPY_BLT_WRITE_RGB);
1020 		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1021 		intel_ring_emit(ring, 0);
1022 		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1023 		intel_ring_emit(ring, cs_offset);
1024 		intel_ring_emit(ring, 0);
1025 		intel_ring_emit(ring, 4096);
1026 		intel_ring_emit(ring, offset);
1027 		intel_ring_emit(ring, MI_FLUSH);
1028 
1029 		/* ... and execute it. */
1030 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1031 		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1032 		intel_ring_emit(ring, cs_offset + len - 8);
1033 		intel_ring_advance(ring);
1034 	}
1035 
1036 	return 0;
1037 }
1038 
1039 static int
1040 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1041 			 u32 offset, u32 len,
1042 			 unsigned flags)
1043 {
1044 	int ret;
1045 
1046 	ret = intel_ring_begin(ring, 2);
1047 	if (ret)
1048 		return ret;
1049 
1050 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1051 	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1052 	intel_ring_advance(ring);
1053 
1054 	return 0;
1055 }
1056 
1057 static void cleanup_status_page(struct intel_ring_buffer *ring)
1058 {
1059 	struct drm_i915_gem_object *obj;
1060 
1061 	obj = ring->status_page.obj;
1062 	if (obj == NULL)
1063 		return;
1064 
1065 	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
1066 	kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr,
1067 	    PAGE_SIZE);
1068 	i915_gem_object_unpin(obj);
1069 	drm_gem_object_unreference(&obj->base);
1070 	ring->status_page.obj = NULL;
1071 }
1072 
1073 static int init_status_page(struct intel_ring_buffer *ring)
1074 {
1075 	struct drm_device *dev = ring->dev;
1076 	struct drm_i915_gem_object *obj;
1077 	int ret;
1078 
1079 	obj = i915_gem_alloc_object(dev, 4096);
1080 	if (obj == NULL) {
1081 		DRM_ERROR("Failed to allocate status page\n");
1082 		ret = -ENOMEM;
1083 		goto err;
1084 	}
1085 
1086 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1087 
1088 	ret = i915_gem_object_pin(obj, 4096, true);
1089 	if (ret != 0) {
1090 		goto err_unref;
1091 	}
1092 
1093 	ring->status_page.gfx_addr = obj->gtt_offset;
1094 	ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map,
1095 	    PAGE_SIZE, PAGE_SIZE);
1096 	if (ring->status_page.page_addr == NULL) {
1097 		ret = -ENOMEM;
1098 		goto err_unpin;
1099 	}
1100 	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1101 	    1);
1102 	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1103 	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1104 	ring->status_page.obj = obj;
1105 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1106 
1107 	intel_ring_setup_status_page(ring);
1108 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1109 			ring->name, ring->status_page.gfx_addr);
1110 
1111 	return 0;
1112 
1113 err_unpin:
1114 	i915_gem_object_unpin(obj);
1115 err_unref:
1116 	drm_gem_object_unreference(&obj->base);
1117 err:
1118 	return ret;
1119 }
1120 
1121 static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1122 {
1123 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1124 	u32 addr;
1125 
1126 	if (!dev_priv->status_page_dmah) {
1127 		dev_priv->status_page_dmah =
1128 			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, ~0);
1129 		if (!dev_priv->status_page_dmah)
1130 			return -ENOMEM;
1131 	}
1132 
1133 	addr = dev_priv->status_page_dmah->busaddr;
1134 	if (INTEL_INFO(ring->dev)->gen >= 4)
1135 		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1136 	I915_WRITE(HWS_PGA, addr);
1137 
1138 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1139 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1140 
1141 	return 0;
1142 }
1143 
1144 static inline void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
1145 {
1146 	return pmap_mapdev_attr(phys_addr, size, VM_MEMATTR_WRITE_COMBINING);
1147 }
1148 
1149 static int intel_init_ring_buffer(struct drm_device *dev,
1150 				  struct intel_ring_buffer *ring)
1151 {
1152 	struct drm_i915_gem_object *obj;
1153 	int ret;
1154 
1155 	ring->dev = dev;
1156 	INIT_LIST_HEAD(&ring->active_list);
1157 	INIT_LIST_HEAD(&ring->request_list);
1158 	ring->size = 32 * PAGE_SIZE;
1159 	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1160 
1161 	init_waitqueue_head(&ring->irq_queue);
1162 
1163 	if (I915_NEED_GFX_HWS(dev)) {
1164 		ret = init_status_page(ring);
1165 		if (ret)
1166 			return ret;
1167 	} else {
1168 		BUG_ON(ring->id != RCS);
1169 		ret = init_phys_hws_pga(ring);
1170 		if (ret)
1171 			return ret;
1172 	}
1173 
1174 	obj = i915_gem_alloc_object(dev, ring->size);
1175 	if (obj == NULL) {
1176 		DRM_ERROR("Failed to allocate ringbuffer\n");
1177 		ret = -ENOMEM;
1178 		goto err_hws;
1179 	}
1180 
1181 	ring->obj = obj;
1182 
1183 	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1184 	if (ret)
1185 		goto err_unref;
1186 
1187 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1188 	if (ret)
1189 		goto err_unpin;
1190 
1191 	ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset,
1192 					 ring->size);
1193 	if (ring->virtual_start == NULL) {
1194 		DRM_ERROR("Failed to map ringbuffer.\n");
1195 		ret = -EINVAL;
1196 		goto err_unpin;
1197 	}
1198 
1199 	ret = ring->init(ring);
1200 	if (ret)
1201 		goto err_unmap;
1202 
1203 	/* Workaround an erratum on the i830 which causes a hang if
1204 	 * the TAIL pointer points to within the last 2 cachelines
1205 	 * of the buffer.
1206 	 */
1207 	ring->effective_size = ring->size;
1208 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1209 		ring->effective_size -= 128;
1210 
1211 	return 0;
1212 
1213 err_unmap:
1214 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1215 err_unpin:
1216 	i915_gem_object_unpin(obj);
1217 err_unref:
1218 	drm_gem_object_unreference(&obj->base);
1219 	ring->obj = NULL;
1220 err_hws:
1221 	cleanup_status_page(ring);
1222 	return ret;
1223 }
1224 
1225 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1226 {
1227 	struct drm_i915_private *dev_priv;
1228 	int ret;
1229 
1230 	if (ring->obj == NULL)
1231 		return;
1232 
1233 	/* Disable the ring buffer. The ring must be idle at this point */
1234 	dev_priv = ring->dev->dev_private;
1235 	ret = intel_ring_idle(ring);
1236 	if (ret)
1237 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1238 			  ring->name, ret);
1239 
1240 	I915_WRITE_CTL(ring, 0);
1241 
1242 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1243 
1244 	i915_gem_object_unpin(ring->obj);
1245 	drm_gem_object_unreference(&ring->obj->base);
1246 	ring->obj = NULL;
1247 
1248 	if (ring->cleanup)
1249 		ring->cleanup(ring);
1250 
1251 	cleanup_status_page(ring);
1252 }
1253 
1254 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1255 {
1256 	int ret;
1257 
1258 	ret = i915_wait_seqno(ring, seqno);
1259 	if (!ret)
1260 		i915_gem_retire_requests_ring(ring);
1261 
1262 	return ret;
1263 }
1264 
1265 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1266 {
1267 	struct drm_i915_gem_request *request;
1268 	u32 seqno = 0;
1269 	int ret;
1270 
1271 	i915_gem_retire_requests_ring(ring);
1272 
1273 	if (ring->last_retired_head != -1) {
1274 		ring->head = ring->last_retired_head;
1275 		ring->last_retired_head = -1;
1276 		ring->space = ring_space(ring);
1277 		if (ring->space >= n)
1278 			return 0;
1279 	}
1280 
1281 	list_for_each_entry(request, &ring->request_list, list) {
1282 		int space;
1283 
1284 		if (request->tail == -1)
1285 			continue;
1286 
1287 		space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
1288 		if (space < 0)
1289 			space += ring->size;
1290 		if (space >= n) {
1291 			seqno = request->seqno;
1292 			break;
1293 		}
1294 
1295 		/* Consume this request in case we need more space than
1296 		 * is available and so need to prevent a race between
1297 		 * updating last_retired_head and direct reads of
1298 		 * I915_RING_HEAD. It also provides a nice sanity check.
1299 		 */
1300 		request->tail = -1;
1301 	}
1302 
1303 	if (seqno == 0)
1304 		return -ENOSPC;
1305 
1306 	ret = intel_ring_wait_seqno(ring, seqno);
1307 	if (ret)
1308 		return ret;
1309 
1310 	if (WARN_ON(ring->last_retired_head == -1))
1311 		return -ENOSPC;
1312 
1313 	ring->head = ring->last_retired_head;
1314 	ring->last_retired_head = -1;
1315 	ring->space = ring_space(ring);
1316 	if (WARN_ON(ring->space < n))
1317 		return -ENOSPC;
1318 
1319 	return 0;
1320 }
1321 
1322 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1323 {
1324 	struct drm_device *dev = ring->dev;
1325 	struct drm_i915_private *dev_priv = dev->dev_private;
1326 	unsigned long end;
1327 	int ret;
1328 
1329 	ret = intel_ring_wait_request(ring, n);
1330 	if (ret != -ENOSPC)
1331 		return ret;
1332 
1333 	/* With GEM the hangcheck timer should kick us out of the loop,
1334 	 * leaving it early runs the risk of corrupting GEM state (due
1335 	 * to running on almost untested codepaths). But on resume
1336 	 * timers don't work yet, so prevent a complete hang in that
1337 	 * case by choosing an insanely large timeout. */
1338 	end = ticks + 60 * hz;
1339 
1340 	do {
1341 		ring->head = I915_READ_HEAD(ring);
1342 		ring->space = ring_space(ring);
1343 		if (ring->space >= n) {
1344 			return 0;
1345 		}
1346 
1347 #if 0
1348 		if (dev->primary->master) {
1349 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1350 			if (master_priv->sarea_priv)
1351 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1352 		}
1353 #else
1354 		if (dev_priv->sarea_priv)
1355 			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1356 #endif
1357 
1358 		DELAY(1000);
1359 
1360 		ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1361 		if (ret)
1362 			return ret;
1363 	} while (!time_after(ticks, end));
1364 	return -EBUSY;
1365 }
1366 
1367 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1368 {
1369 	uint32_t __iomem *virt;
1370 	int rem = ring->size - ring->tail;
1371 
1372 	if (ring->space < rem) {
1373 		int ret = ring_wait_for_space(ring, rem);
1374 		if (ret)
1375 			return ret;
1376 	}
1377 
1378 	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1379 	rem /= 4;
1380 	while (rem--)
1381 		iowrite32(MI_NOOP, virt++);
1382 
1383 	ring->tail = 0;
1384 	ring->space = ring_space(ring);
1385 
1386 	return 0;
1387 }
1388 
1389 int intel_ring_idle(struct intel_ring_buffer *ring)
1390 {
1391 	u32 seqno;
1392 	int ret;
1393 
1394 	/* We need to add any requests required to flush the objects and ring */
1395 	if (ring->outstanding_lazy_request) {
1396 		ret = i915_add_request(ring, NULL, NULL);
1397 		if (ret)
1398 			return ret;
1399 	}
1400 
1401 	/* Wait upon the last request to be completed */
1402 	if (list_empty(&ring->request_list))
1403 		return 0;
1404 
1405 	seqno = list_entry(ring->request_list.prev,
1406 			   struct drm_i915_gem_request,
1407 			   list)->seqno;
1408 
1409 	return i915_wait_seqno(ring, seqno);
1410 }
1411 
1412 static int
1413 intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
1414 {
1415 	if (ring->outstanding_lazy_request)
1416 		return 0;
1417 
1418 	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
1419 }
1420 
1421 int intel_ring_begin(struct intel_ring_buffer *ring,
1422 		     int num_dwords)
1423 {
1424 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1425 	int n = 4*num_dwords;
1426 	int ret;
1427 
1428 	ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1429 	if (ret)
1430 		return ret;
1431 
1432 	/* Preallocate the olr before touching the ring */
1433 	ret = intel_ring_alloc_seqno(ring);
1434 	if (ret)
1435 		return ret;
1436 
1437 	if (unlikely(ring->tail + n > ring->effective_size)) {
1438 		ret = intel_wrap_ring_buffer(ring);
1439 		if (unlikely(ret))
1440 			return ret;
1441 	}
1442 
1443 	if (unlikely(ring->space < n)) {
1444 		ret = ring_wait_for_space(ring, n);
1445 		if (unlikely(ret))
1446 			return ret;
1447 	}
1448 
1449 	ring->space -= n;
1450 	return 0;
1451 }
1452 
1453 void intel_ring_advance(struct intel_ring_buffer *ring)
1454 {
1455 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1456 
1457 	ring->tail &= ring->size - 1;
1458 	if (dev_priv->stop_rings & intel_ring_flag(ring))
1459 		return;
1460 	ring->write_tail(ring, ring->tail);
1461 }
1462 
1463 
1464 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1465 				     u32 value)
1466 {
1467 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1468 
1469        /* Every tail move must follow the sequence below */
1470 
1471 	/* Disable notification that the ring is IDLE. The GT
1472 	 * will then assume that it is busy and bring it out of rc6.
1473 	 */
1474 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1475 		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1476 
1477 	/* Clear the context id. Here be magic! */
1478 	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1479 
1480 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
1481 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1482 		      GEN6_BSD_SLEEP_INDICATOR) == 0,
1483 		     50))
1484 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1485 
1486 	/* Now that the ring is fully powered up, update the tail */
1487 	I915_WRITE_TAIL(ring, value);
1488 	POSTING_READ(RING_TAIL(ring->mmio_base));
1489 
1490 	/* Let the ring send IDLE messages to the GT again,
1491 	 * and so let it sleep to conserve power when idle.
1492 	 */
1493 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1494 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1495 }
1496 
1497 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1498 			   u32 invalidate, u32 flush)
1499 {
1500 	uint32_t cmd;
1501 	int ret;
1502 
1503 	ret = intel_ring_begin(ring, 4);
1504 	if (ret)
1505 		return ret;
1506 
1507 	cmd = MI_FLUSH_DW;
1508 	/*
1509 	 * Bspec vol 1c.5 - video engine command streamer:
1510 	 * "If ENABLED, all TLBs will be invalidated once the flush
1511 	 * operation is complete. This bit is only valid when the
1512 	 * Post-Sync Operation field is a value of 1h or 3h."
1513 	 */
1514 	if (invalidate & I915_GEM_GPU_DOMAINS)
1515 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1516 			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1517 	intel_ring_emit(ring, cmd);
1518 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1519 	intel_ring_emit(ring, 0);
1520 	intel_ring_emit(ring, MI_NOOP);
1521 	intel_ring_advance(ring);
1522 	return 0;
1523 }
1524 
1525 static int
1526 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1527 			      u32 offset, u32 len,
1528 			      unsigned flags)
1529 {
1530 	int ret;
1531 
1532 	ret = intel_ring_begin(ring, 2);
1533 	if (ret)
1534 		return ret;
1535 
1536 	intel_ring_emit(ring,
1537 			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
1538 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
1539 	/* bit0-7 is the length on GEN6+ */
1540 	intel_ring_emit(ring, offset);
1541 	intel_ring_advance(ring);
1542 
1543 	return 0;
1544 }
1545 
1546 static int
1547 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1548 			      u32 offset, u32 len,
1549 			      unsigned flags)
1550 {
1551 	int ret;
1552 
1553 	ret = intel_ring_begin(ring, 2);
1554 	if (ret)
1555 		return ret;
1556 
1557 	intel_ring_emit(ring,
1558 			MI_BATCH_BUFFER_START |
1559 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1560 	/* bit0-7 is the length on GEN6+ */
1561 	intel_ring_emit(ring, offset);
1562 	intel_ring_advance(ring);
1563 
1564 	return 0;
1565 }
1566 
1567 /* Blitter support (SandyBridge+) */
1568 
1569 static int blt_ring_flush(struct intel_ring_buffer *ring,
1570 			  u32 invalidate, u32 flush)
1571 {
1572 	uint32_t cmd;
1573 	int ret;
1574 
1575 	ret = intel_ring_begin(ring, 4);
1576 	if (ret)
1577 		return ret;
1578 
1579 	cmd = MI_FLUSH_DW;
1580 	/*
1581 	 * Bspec vol 1c.3 - blitter engine command streamer:
1582 	 * "If ENABLED, all TLBs will be invalidated once the flush
1583 	 * operation is complete. This bit is only valid when the
1584 	 * Post-Sync Operation field is a value of 1h or 3h."
1585 	 */
1586 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1587 		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1588 			MI_FLUSH_DW_OP_STOREDW;
1589 	intel_ring_emit(ring, cmd);
1590 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1591 	intel_ring_emit(ring, 0);
1592 	intel_ring_emit(ring, MI_NOOP);
1593 	intel_ring_advance(ring);
1594 	return 0;
1595 }
1596 
1597 int intel_init_render_ring_buffer(struct drm_device *dev)
1598 {
1599 	drm_i915_private_t *dev_priv = dev->dev_private;
1600 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1601 
1602 	ring->name = "render ring";
1603 	ring->id = RCS;
1604 	ring->mmio_base = RENDER_RING_BASE;
1605 
1606 	if (INTEL_INFO(dev)->gen >= 6) {
1607 		ring->add_request = gen6_add_request;
1608 		ring->flush = gen7_render_ring_flush;
1609 		if (INTEL_INFO(dev)->gen == 6)
1610 			ring->flush = gen6_render_ring_flush;
1611 		ring->irq_get = gen6_ring_get_irq;
1612 		ring->irq_put = gen6_ring_put_irq;
1613 		ring->irq_enable_mask = GT_USER_INTERRUPT;
1614 		ring->get_seqno = gen6_ring_get_seqno;
1615 		ring->sync_to = gen6_ring_sync;
1616 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID;
1617 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV;
1618 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB;
1619 		ring->signal_mbox[0] = GEN6_VRSYNC;
1620 		ring->signal_mbox[1] = GEN6_BRSYNC;
1621 	} else if (IS_GEN5(dev)) {
1622 		ring->add_request = pc_render_add_request;
1623 		ring->flush = gen4_render_ring_flush;
1624 		ring->get_seqno = pc_render_get_seqno;
1625 		ring->irq_get = gen5_ring_get_irq;
1626 		ring->irq_put = gen5_ring_put_irq;
1627 		ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY;
1628 	} else {
1629 		ring->add_request = i9xx_add_request;
1630 		if (INTEL_INFO(dev)->gen < 4)
1631 			ring->flush = gen2_render_ring_flush;
1632 		else
1633 			ring->flush = gen4_render_ring_flush;
1634 		ring->get_seqno = ring_get_seqno;
1635 		if (IS_GEN2(dev)) {
1636 			ring->irq_get = i8xx_ring_get_irq;
1637 			ring->irq_put = i8xx_ring_put_irq;
1638 		} else {
1639 			ring->irq_get = i9xx_ring_get_irq;
1640 			ring->irq_put = i9xx_ring_put_irq;
1641 		}
1642 		ring->irq_enable_mask = I915_USER_INTERRUPT;
1643 	}
1644 	ring->write_tail = ring_write_tail;
1645 	if (IS_HASWELL(dev))
1646 		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1647 	else if (INTEL_INFO(dev)->gen >= 6)
1648 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1649 	else if (INTEL_INFO(dev)->gen >= 4)
1650 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1651 	else if (IS_I830(dev) || IS_845G(dev))
1652 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1653 	else
1654 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1655 	ring->init = init_render_ring;
1656 	ring->cleanup = render_ring_cleanup;
1657 
1658 	/* Workaround batchbuffer to combat CS tlb bug. */
1659 	if (HAS_BROKEN_CS_TLB(dev)) {
1660 		struct drm_i915_gem_object *obj;
1661 		int ret;
1662 
1663 		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1664 		if (obj == NULL) {
1665 			DRM_ERROR("Failed to allocate batch bo\n");
1666 			return -ENOMEM;
1667 		}
1668 
1669 		ret = i915_gem_object_pin(obj, 0, true);
1670 		if (ret != 0) {
1671 			drm_gem_object_unreference(&obj->base);
1672 			DRM_ERROR("Failed to ping batch bo\n");
1673 			return ret;
1674 		}
1675 
1676 		ring->private = obj;
1677 	}
1678 
1679 	return intel_init_ring_buffer(dev, ring);
1680 }
1681 
1682 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1683 {
1684 	drm_i915_private_t *dev_priv = dev->dev_private;
1685 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1686 	int ret;
1687 
1688 	ring->name = "render ring";
1689 	ring->id = RCS;
1690 	ring->mmio_base = RENDER_RING_BASE;
1691 
1692 	if (INTEL_INFO(dev)->gen >= 6) {
1693 		/* non-kms not supported on gen6+ */
1694 		return -ENODEV;
1695 	}
1696 
1697 	/* Note: gem is not supported on gen5/ilk without kms (the corresponding
1698 	 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1699 	 * the special gen5 functions. */
1700 	ring->add_request = i9xx_add_request;
1701 	if (INTEL_INFO(dev)->gen < 4)
1702 		ring->flush = gen2_render_ring_flush;
1703 	else
1704 		ring->flush = gen4_render_ring_flush;
1705 	ring->get_seqno = ring_get_seqno;
1706 	if (IS_GEN2(dev)) {
1707 		ring->irq_get = i8xx_ring_get_irq;
1708 		ring->irq_put = i8xx_ring_put_irq;
1709 	} else {
1710 		ring->irq_get = i9xx_ring_get_irq;
1711 		ring->irq_put = i9xx_ring_put_irq;
1712 	}
1713 	ring->irq_enable_mask = I915_USER_INTERRUPT;
1714 	ring->write_tail = ring_write_tail;
1715 	if (INTEL_INFO(dev)->gen >= 4)
1716 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1717 	else if (IS_I830(dev) || IS_845G(dev))
1718 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1719 	else
1720 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1721 	ring->init = init_render_ring;
1722 	ring->cleanup = render_ring_cleanup;
1723 
1724 	ring->dev = dev;
1725 	INIT_LIST_HEAD(&ring->active_list);
1726 	INIT_LIST_HEAD(&ring->request_list);
1727 	INIT_LIST_HEAD(&ring->gpu_write_list);
1728 
1729 	ring->size = size;
1730 	ring->effective_size = ring->size;
1731 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1732 		ring->effective_size -= 128;
1733 
1734 	ring->virtual_start = ioremap_wc(start, size);
1735 	if (ring->virtual_start == NULL) {
1736 		DRM_ERROR("can not ioremap virtual address for"
1737 			  " ring buffer\n");
1738 		return -ENOMEM;
1739 	}
1740 
1741 	if (!I915_NEED_GFX_HWS(dev)) {
1742 		ret = init_phys_hws_pga(ring);
1743 		if (ret)
1744 			return ret;
1745 	}
1746 
1747 	return 0;
1748 }
1749 
1750 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1751 {
1752 	drm_i915_private_t *dev_priv = dev->dev_private;
1753 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1754 
1755 	ring->name = "bsd ring";
1756 	ring->id = VCS;
1757 
1758 	ring->write_tail = ring_write_tail;
1759 	if (IS_GEN6(dev) || IS_GEN7(dev)) {
1760 		ring->mmio_base = GEN6_BSD_RING_BASE;
1761 		/* gen6 bsd needs a special wa for tail updates */
1762 		if (IS_GEN6(dev))
1763 			ring->write_tail = gen6_bsd_ring_write_tail;
1764 		ring->flush = gen6_ring_flush;
1765 		ring->add_request = gen6_add_request;
1766 		ring->get_seqno = gen6_ring_get_seqno;
1767 		ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT;
1768 		ring->irq_get = gen6_ring_get_irq;
1769 		ring->irq_put = gen6_ring_put_irq;
1770 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1771 		ring->sync_to = gen6_ring_sync;
1772 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR;
1773 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID;
1774 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB;
1775 		ring->signal_mbox[0] = GEN6_RVSYNC;
1776 		ring->signal_mbox[1] = GEN6_BVSYNC;
1777 	} else {
1778 		ring->mmio_base = BSD_RING_BASE;
1779 		ring->flush = bsd_ring_flush;
1780 		ring->add_request = i9xx_add_request;
1781 		ring->get_seqno = ring_get_seqno;
1782 		if (IS_GEN5(dev)) {
1783 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1784 			ring->irq_get = gen5_ring_get_irq;
1785 			ring->irq_put = gen5_ring_put_irq;
1786 		} else {
1787 			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1788 			ring->irq_get = i9xx_ring_get_irq;
1789 			ring->irq_put = i9xx_ring_put_irq;
1790 		}
1791 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1792 	}
1793 	ring->init = init_ring_common;
1794 
1795 	return intel_init_ring_buffer(dev, ring);
1796 }
1797 
1798 int intel_init_blt_ring_buffer(struct drm_device *dev)
1799 {
1800 	drm_i915_private_t *dev_priv = dev->dev_private;
1801 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1802 
1803 	ring->name = "blitter ring";
1804 	ring->id = BCS;
1805 
1806 	ring->mmio_base = BLT_RING_BASE;
1807 	ring->write_tail = ring_write_tail;
1808 	ring->flush = blt_ring_flush;
1809 	ring->add_request = gen6_add_request;
1810 	ring->get_seqno = gen6_ring_get_seqno;
1811 	ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT;
1812 	ring->irq_get = gen6_ring_get_irq;
1813 	ring->irq_put = gen6_ring_put_irq;
1814 	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1815 	ring->sync_to = gen6_ring_sync;
1816 	ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR;
1817 	ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV;
1818 	ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID;
1819 	ring->signal_mbox[0] = GEN6_RBSYNC;
1820 	ring->signal_mbox[1] = GEN6_VBSYNC;
1821 	ring->init = init_ring_common;
1822 
1823 	return intel_init_ring_buffer(dev, ring);
1824 }
1825 
1826 int
1827 intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1828 {
1829 	int ret;
1830 
1831 	if (!ring->gpu_caches_dirty)
1832 		return 0;
1833 
1834 	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1835 	if (ret)
1836 		return ret;
1837 
1838 	ring->gpu_caches_dirty = false;
1839 	return 0;
1840 }
1841 
1842 int
1843 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1844 {
1845 	uint32_t flush_domains;
1846 	int ret;
1847 
1848 	flush_domains = 0;
1849 	if (ring->gpu_caches_dirty)
1850 		flush_domains = I915_GEM_GPU_DOMAINS;
1851 
1852 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1853 	if (ret)
1854 		return ret;
1855 
1856 	ring->gpu_caches_dirty = false;
1857 	return 0;
1858 }
1859