xref: /dragonfly/sys/dev/drm/i915/intel_ringbuffer.c (revision f7df6c8e)
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29 
30 #include <drm/drmP.h>
31 #include <drm/i915_drm.h>
32 #include "i915_drv.h"
33 #include "intel_drv.h"
34 #include "intel_ringbuffer.h"
35 
36 /*
37  * 965+ support PIPE_CONTROL commands, which provide finer grained control
38  * over cache flushing.
39  */
40 struct pipe_control {
41 	struct drm_i915_gem_object *obj;
42 	volatile u32 *cpu_page;
43 	u32 gtt_offset;
44 };
45 
46 static inline int ring_space(struct intel_ring_buffer *ring)
47 {
48 	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
49 	if (space < 0)
50 		space += ring->size;
51 	return space;
52 }
53 
54 static int
55 gen2_render_ring_flush(struct intel_ring_buffer *ring,
56 		       u32	invalidate_domains,
57 		       u32	flush_domains)
58 {
59 	u32 cmd;
60 	int ret;
61 
62 	cmd = MI_FLUSH;
63 	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
64 		cmd |= MI_NO_WRITE_FLUSH;
65 
66 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
67 		cmd |= MI_READ_FLUSH;
68 
69 	ret = intel_ring_begin(ring, 2);
70 	if (ret)
71 		return ret;
72 
73 	intel_ring_emit(ring, cmd);
74 	intel_ring_emit(ring, MI_NOOP);
75 	intel_ring_advance(ring);
76 
77 	return 0;
78 }
79 
80 static int
81 gen4_render_ring_flush(struct intel_ring_buffer *ring,
82 		       u32	invalidate_domains,
83 		       u32	flush_domains)
84 {
85 	struct drm_device *dev = ring->dev;
86 	u32 cmd;
87 	int ret;
88 
89 	/*
90 	 * read/write caches:
91 	 *
92 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
93 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
94 	 * also flushed at 2d versus 3d pipeline switches.
95 	 *
96 	 * read-only caches:
97 	 *
98 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
99 	 * MI_READ_FLUSH is set, and is always flushed on 965.
100 	 *
101 	 * I915_GEM_DOMAIN_COMMAND may not exist?
102 	 *
103 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
104 	 * invalidated when MI_EXE_FLUSH is set.
105 	 *
106 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
107 	 * invalidated with every MI_FLUSH.
108 	 *
109 	 * TLBs:
110 	 *
111 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
112 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
113 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
114 	 * are flushed at any MI_FLUSH.
115 	 */
116 
117 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
118 	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
119 		cmd &= ~MI_NO_WRITE_FLUSH;
120 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
121 		cmd |= MI_EXE_FLUSH;
122 
123 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
124 	    (IS_G4X(dev) || IS_GEN5(dev)))
125 		cmd |= MI_INVALIDATE_ISP;
126 
127 	ret = intel_ring_begin(ring, 2);
128 	if (ret)
129 		return ret;
130 
131 	intel_ring_emit(ring, cmd);
132 	intel_ring_emit(ring, MI_NOOP);
133 	intel_ring_advance(ring);
134 
135 	return 0;
136 }
137 
138 /**
139  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
140  * implementing two workarounds on gen6.  From section 1.4.7.1
141  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
142  *
143  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
144  * produced by non-pipelined state commands), software needs to first
145  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
146  * 0.
147  *
148  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
149  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
150  *
151  * And the workaround for these two requires this workaround first:
152  *
153  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
154  * BEFORE the pipe-control with a post-sync op and no write-cache
155  * flushes.
156  *
157  * And this last workaround is tricky because of the requirements on
158  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
159  * volume 2 part 1:
160  *
161  *     "1 of the following must also be set:
162  *      - Render Target Cache Flush Enable ([12] of DW1)
163  *      - Depth Cache Flush Enable ([0] of DW1)
164  *      - Stall at Pixel Scoreboard ([1] of DW1)
165  *      - Depth Stall ([13] of DW1)
166  *      - Post-Sync Operation ([13] of DW1)
167  *      - Notify Enable ([8] of DW1)"
168  *
169  * The cache flushes require the workaround flush that triggered this
170  * one, so we can't use it.  Depth stall would trigger the same.
171  * Post-sync nonzero is what triggered this second workaround, so we
172  * can't use that one either.  Notify enable is IRQs, which aren't
173  * really our business.  That leaves only stall at scoreboard.
174  */
175 static int
176 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
177 {
178 	struct pipe_control *pc = ring->private;
179 	u32 scratch_addr = pc->gtt_offset + 128;
180 	int ret;
181 
182 
183 	ret = intel_ring_begin(ring, 6);
184 	if (ret)
185 		return ret;
186 
187 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
188 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
189 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
190 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
191 	intel_ring_emit(ring, 0); /* low dword */
192 	intel_ring_emit(ring, 0); /* high dword */
193 	intel_ring_emit(ring, MI_NOOP);
194 	intel_ring_advance(ring);
195 
196 	ret = intel_ring_begin(ring, 6);
197 	if (ret)
198 		return ret;
199 
200 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
201 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
202 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
203 	intel_ring_emit(ring, 0);
204 	intel_ring_emit(ring, 0);
205 	intel_ring_emit(ring, MI_NOOP);
206 	intel_ring_advance(ring);
207 
208 	return 0;
209 }
210 
211 static int
212 gen6_render_ring_flush(struct intel_ring_buffer *ring,
213                          u32 invalidate_domains, u32 flush_domains)
214 {
215 	u32 flags = 0;
216 	struct pipe_control *pc = ring->private;
217 	u32 scratch_addr = pc->gtt_offset + 128;
218 	int ret;
219 
220 	/* Force SNB workarounds for PIPE_CONTROL flushes */
221 	ret = intel_emit_post_sync_nonzero_flush(ring);
222 	if (ret)
223 		return ret;
224 
225 	/* Just flush everything.  Experiments have shown that reducing the
226 	 * number of bits based on the write domains has little performance
227 	 * impact.
228 	 */
229 	if (flush_domains) {
230 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
231 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
232 		/*
233 		 * Ensure that any following seqno writes only happen
234 		 * when the render cache is indeed flushed.
235 		 */
236 		flags |= PIPE_CONTROL_CS_STALL;
237 	}
238 	if (invalidate_domains) {
239 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
240 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
241 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
242 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
243 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
244 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
245 		/*
246 		 * TLB invalidate requires a post-sync write.
247 		 */
248 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
249 	}
250 
251 	ret = intel_ring_begin(ring, 4);
252 	if (ret)
253 		return ret;
254 
255 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
256 	intel_ring_emit(ring, flags);
257 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
258 	intel_ring_emit(ring, 0);
259 	intel_ring_advance(ring);
260 
261 	return 0;
262 }
263 
264 static int
265 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
266 {
267 	int ret;
268 
269 	ret = intel_ring_begin(ring, 4);
270 	if (ret)
271 		return ret;
272 
273 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
274 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
275 			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
276 	intel_ring_emit(ring, 0);
277 	intel_ring_emit(ring, 0);
278 	intel_ring_advance(ring);
279 
280 	return 0;
281 }
282 
283 static int
284 gen7_render_ring_flush(struct intel_ring_buffer *ring,
285 		       u32 invalidate_domains, u32 flush_domains)
286 {
287 	u32 flags = 0;
288 	struct pipe_control *pc = ring->private;
289 	u32 scratch_addr = pc->gtt_offset + 128;
290 	int ret;
291 
292 	/*
293 	 * Ensure that any following seqno writes only happen when the render
294 	 * cache is indeed flushed.
295 	 *
296 	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
297 	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
298 	 * don't try to be clever and just set it unconditionally.
299 	 */
300 	flags |= PIPE_CONTROL_CS_STALL;
301 
302 	/* Just flush everything.  Experiments have shown that reducing the
303 	 * number of bits based on the write domains has little performance
304 	 * impact.
305 	 */
306 	if (flush_domains) {
307 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
308 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
309 	}
310 	if (invalidate_domains) {
311 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
312 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
313 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
314 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
315 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
316 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
317 		/*
318 		 * TLB invalidate requires a post-sync write.
319 		 */
320 		flags |= PIPE_CONTROL_QW_WRITE;
321 
322 		/* Workaround: we must issue a pipe_control with CS-stall bit
323 		 * set before a pipe_control command that has the state cache
324 		 * invalidate bit set. */
325 		gen7_render_ring_cs_stall_wa(ring);
326 	}
327 
328 	ret = intel_ring_begin(ring, 4);
329 	if (ret)
330 		return ret;
331 
332 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
333 	intel_ring_emit(ring, flags);
334 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
335 	intel_ring_emit(ring, 0);
336 	intel_ring_advance(ring);
337 
338 	return 0;
339 }
340 
341 static void ring_write_tail(struct intel_ring_buffer *ring,
342 			    u32 value)
343 {
344 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
345 	I915_WRITE_TAIL(ring, value);
346 }
347 
348 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
349 {
350 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
351 	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
352 			RING_ACTHD(ring->mmio_base) : ACTHD;
353 
354 	return I915_READ(acthd_reg);
355 }
356 
357 static int init_ring_common(struct intel_ring_buffer *ring)
358 {
359 	struct drm_device *dev = ring->dev;
360 	drm_i915_private_t *dev_priv = dev->dev_private;
361 	struct drm_i915_gem_object *obj = ring->obj;
362 	int ret = 0;
363 	u32 head;
364 
365 	if (HAS_FORCE_WAKE(dev))
366 		gen6_gt_force_wake_get(dev_priv);
367 
368 	/* Stop the ring if it's running. */
369 	I915_WRITE_CTL(ring, 0);
370 	I915_WRITE_HEAD(ring, 0);
371 	ring->write_tail(ring, 0);
372 
373 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
374 
375 	/* G45 ring initialization fails to reset head to zero */
376 	if (head != 0) {
377 		DRM_DEBUG_KMS("%s head not reset to zero "
378 			      "ctl %08x head %08x tail %08x start %08x\n",
379 			      ring->name,
380 			      I915_READ_CTL(ring),
381 			      I915_READ_HEAD(ring),
382 			      I915_READ_TAIL(ring),
383 			      I915_READ_START(ring));
384 
385 		I915_WRITE_HEAD(ring, 0);
386 
387 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
388 			DRM_ERROR("failed to set %s head to zero "
389 				  "ctl %08x head %08x tail %08x start %08x\n",
390 				  ring->name,
391 				  I915_READ_CTL(ring),
392 				  I915_READ_HEAD(ring),
393 				  I915_READ_TAIL(ring),
394 				  I915_READ_START(ring));
395 		}
396 	}
397 
398 	/* Initialize the ring. This must happen _after_ we've cleared the ring
399 	 * registers with the above sequence (the readback of the HEAD registers
400 	 * also enforces ordering), otherwise the hw might lose the new ring
401 	 * register values. */
402 	I915_WRITE_START(ring, obj->gtt_offset);
403 	I915_WRITE_CTL(ring,
404 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
405 			| RING_VALID);
406 
407 	/* If the head is still not zero, the ring is dead */
408 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
409 		     I915_READ_START(ring) == obj->gtt_offset &&
410 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
411 		DRM_ERROR("%s initialization failed "
412 				"ctl %08x head %08x tail %08x start %08x\n",
413 				ring->name,
414 				I915_READ_CTL(ring),
415 				I915_READ_HEAD(ring),
416 				I915_READ_TAIL(ring),
417 				I915_READ_START(ring));
418 		ret = -EIO;
419 		goto out;
420 	}
421 
422 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
423 		i915_kernel_lost_context(ring->dev);
424 	else {
425 		ring->head = I915_READ_HEAD(ring);
426 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
427 		ring->space = ring_space(ring);
428 		ring->last_retired_head = -1;
429 	}
430 
431 out:
432 	if (HAS_FORCE_WAKE(dev))
433 		gen6_gt_force_wake_put(dev_priv);
434 
435 	return ret;
436 }
437 
438 static int
439 init_pipe_control(struct intel_ring_buffer *ring)
440 {
441 	struct pipe_control *pc;
442 	struct drm_i915_gem_object *obj;
443 	int ret;
444 
445 	if (ring->private)
446 		return 0;
447 
448 	pc = kmalloc(sizeof(*pc), M_DRM, M_WAITOK);
449 	if (!pc)
450 		return -ENOMEM;
451 
452 	obj = i915_gem_alloc_object(ring->dev, 4096);
453 	if (obj == NULL) {
454 		DRM_ERROR("Failed to allocate seqno page\n");
455 		ret = -ENOMEM;
456 		goto err;
457 	}
458 
459 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
460 
461 	ret = i915_gem_object_pin(obj, 4096, true, false);
462 	if (ret)
463 		goto err_unref;
464 
465 	pc->gtt_offset = obj->gtt_offset;
466 	pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE);
467 	if (pc->cpu_page == NULL)
468 		goto err_unpin;
469 	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
470 	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
471 	    (vm_offset_t)pc->cpu_page + PAGE_SIZE);
472 
473 	pc->obj = obj;
474 	ring->private = pc;
475 	return 0;
476 
477 err_unpin:
478 	i915_gem_object_unpin(obj);
479 err_unref:
480 	drm_gem_object_unreference(&obj->base);
481 err:
482 	kfree(pc, M_DRM);
483 	return ret;
484 }
485 
486 static void
487 cleanup_pipe_control(struct intel_ring_buffer *ring)
488 {
489 	struct pipe_control *pc = ring->private;
490 	struct drm_i915_gem_object *obj;
491 
492 	if (!ring->private)
493 		return;
494 
495 	obj = pc->obj;
496 	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
497 	kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE);
498 	i915_gem_object_unpin(obj);
499 	drm_gem_object_unreference(&obj->base);
500 
501 	kfree(pc, M_DRM);
502 	ring->private = NULL;
503 }
504 
505 static int init_render_ring(struct intel_ring_buffer *ring)
506 {
507 	struct drm_device *dev = ring->dev;
508 	struct drm_i915_private *dev_priv = dev->dev_private;
509 	int ret = init_ring_common(ring);
510 
511 	if (INTEL_INFO(dev)->gen > 3)
512 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
513 
514 	/* We need to disable the AsyncFlip performance optimisations in order
515 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
516 	 * programmed to '1' on all products.
517 	 */
518 	if (INTEL_INFO(dev)->gen >= 6)
519 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
520 
521 	/* Required for the hardware to program scanline values for waiting */
522 	if (INTEL_INFO(dev)->gen == 6)
523 		I915_WRITE(GFX_MODE,
524 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
525 
526 	if (IS_GEN7(dev))
527 		I915_WRITE(GFX_MODE_GEN7,
528 			   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
529 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
530 
531 	if (INTEL_INFO(dev)->gen >= 5) {
532 		ret = init_pipe_control(ring);
533 		if (ret)
534 			return ret;
535 	}
536 
537 	if (IS_GEN6(dev)) {
538 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
539 		 * "If this bit is set, STCunit will have LRA as replacement
540 		 *  policy. [...] This bit must be reset.  LRA replacement
541 		 *  policy is not supported."
542 		 */
543 		I915_WRITE(CACHE_MODE_0,
544 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
545 
546 		/* This is not explicitly set for GEN6, so read the register.
547 		 * see intel_ring_mi_set_context() for why we care.
548 		 * TODO: consider explicitly setting the bit for GEN5
549 		 */
550 		ring->itlb_before_ctx_switch =
551 			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
552 	}
553 
554 	if (INTEL_INFO(dev)->gen >= 6)
555 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
556 
557 	if (HAS_L3_GPU_CACHE(dev))
558 		I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
559 
560 	return ret;
561 }
562 
563 static void render_ring_cleanup(struct intel_ring_buffer *ring)
564 {
565 	struct drm_device *dev = ring->dev;
566 
567 	if (!ring->private)
568 		return;
569 
570 	if (HAS_BROKEN_CS_TLB(dev))
571 		drm_gem_object_unreference(to_gem_object(ring->private));
572 
573 	cleanup_pipe_control(ring);
574 }
575 
576 static void
577 update_mboxes(struct intel_ring_buffer *ring,
578 	      u32 mmio_offset)
579 {
580 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
581 	intel_ring_emit(ring, mmio_offset);
582 	intel_ring_emit(ring, ring->outstanding_lazy_request);
583 }
584 
585 /**
586  * gen6_add_request - Update the semaphore mailbox registers
587  *
588  * @ring - ring that is adding a request
589  * @seqno - return seqno stuck into the ring
590  *
591  * Update the mailbox registers in the *other* rings with the current seqno.
592  * This acts like a signal in the canonical semaphore.
593  */
594 static int
595 gen6_add_request(struct intel_ring_buffer *ring)
596 {
597 	u32 mbox1_reg;
598 	u32 mbox2_reg;
599 	int ret;
600 
601 	ret = intel_ring_begin(ring, 10);
602 	if (ret)
603 		return ret;
604 
605 	mbox1_reg = ring->signal_mbox[0];
606 	mbox2_reg = ring->signal_mbox[1];
607 
608 	update_mboxes(ring, mbox1_reg);
609 	update_mboxes(ring, mbox2_reg);
610 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
611 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
612 	intel_ring_emit(ring, ring->outstanding_lazy_request);
613 	intel_ring_emit(ring, MI_USER_INTERRUPT);
614 	intel_ring_advance(ring);
615 
616 	return 0;
617 }
618 
619 /**
620  * intel_ring_sync - sync the waiter to the signaller on seqno
621  *
622  * @waiter - ring that is waiting
623  * @signaller - ring which has, or will signal
624  * @seqno - seqno which the waiter will block on
625  */
626 static int
627 gen6_ring_sync(struct intel_ring_buffer *waiter,
628 	       struct intel_ring_buffer *signaller,
629 	       u32 seqno)
630 {
631 	int ret;
632 	u32 dw1 = MI_SEMAPHORE_MBOX |
633 		  MI_SEMAPHORE_COMPARE |
634 		  MI_SEMAPHORE_REGISTER;
635 
636 	/* Throughout all of the GEM code, seqno passed implies our current
637 	 * seqno is >= the last seqno executed. However for hardware the
638 	 * comparison is strictly greater than.
639 	 */
640 	seqno -= 1;
641 
642 	WARN_ON(signaller->semaphore_register[waiter->id] ==
643 		MI_SEMAPHORE_SYNC_INVALID);
644 
645 	ret = intel_ring_begin(waiter, 4);
646 	if (ret)
647 		return ret;
648 
649 	intel_ring_emit(waiter,
650 			dw1 | signaller->semaphore_register[waiter->id]);
651 	intel_ring_emit(waiter, seqno);
652 	intel_ring_emit(waiter, 0);
653 	intel_ring_emit(waiter, MI_NOOP);
654 	intel_ring_advance(waiter);
655 
656 	return 0;
657 }
658 
659 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
660 do {									\
661 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
662 		 PIPE_CONTROL_DEPTH_STALL);				\
663 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
664 	intel_ring_emit(ring__, 0);							\
665 	intel_ring_emit(ring__, 0);							\
666 } while (0)
667 
668 static int
669 pc_render_add_request(struct intel_ring_buffer *ring)
670 {
671 	struct pipe_control *pc = ring->private;
672 	u32 scratch_addr = pc->gtt_offset + 128;
673 	int ret;
674 
675 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
676 	 * incoherent with writes to memory, i.e. completely fubar,
677 	 * so we need to use PIPE_NOTIFY instead.
678 	 *
679 	 * However, we also need to workaround the qword write
680 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
681 	 * memory before requesting an interrupt.
682 	 */
683 	ret = intel_ring_begin(ring, 32);
684 	if (ret)
685 		return ret;
686 
687 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
688 			PIPE_CONTROL_WRITE_FLUSH |
689 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
690 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
691 	intel_ring_emit(ring, ring->outstanding_lazy_request);
692 	intel_ring_emit(ring, 0);
693 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
694 	scratch_addr += 128; /* write to separate cachelines */
695 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
696 	scratch_addr += 128;
697 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
698 	scratch_addr += 128;
699 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
700 	scratch_addr += 128;
701 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
702 	scratch_addr += 128;
703 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
704 
705 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
706 			PIPE_CONTROL_WRITE_FLUSH |
707 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
708 			PIPE_CONTROL_NOTIFY);
709 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
710 	intel_ring_emit(ring, ring->outstanding_lazy_request);
711 	intel_ring_emit(ring, 0);
712 	intel_ring_advance(ring);
713 
714 	return 0;
715 }
716 
717 static u32
718 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
719 {
720 	/* Workaround to force correct ordering between irq and seqno writes on
721 	 * ivb (and maybe also on snb) by reading from a CS register (like
722 	 * ACTHD) before reading the status page. */
723 	if (!lazy_coherency)
724 		intel_ring_get_active_head(ring);
725 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
726 }
727 
728 static u32
729 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
730 {
731 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
732 }
733 
734 static u32
735 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
736 {
737 	struct pipe_control *pc = ring->private;
738 	return pc->cpu_page[0];
739 }
740 
741 static bool
742 gen5_ring_get_irq(struct intel_ring_buffer *ring)
743 {
744 	struct drm_device *dev = ring->dev;
745 	drm_i915_private_t *dev_priv = dev->dev_private;
746 
747 	if (!dev->irq_enabled)
748 		return false;
749 
750 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
751 	if (ring->irq_refcount++ == 0) {
752 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
753 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
754 		POSTING_READ(GTIMR);
755 	}
756 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
757 
758 	return true;
759 }
760 
761 static void
762 gen5_ring_put_irq(struct intel_ring_buffer *ring)
763 {
764 	struct drm_device *dev = ring->dev;
765 	drm_i915_private_t *dev_priv = dev->dev_private;
766 
767 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
768 	if (--ring->irq_refcount == 0) {
769 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
770 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
771 		POSTING_READ(GTIMR);
772 	}
773 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
774 }
775 
776 static bool
777 i9xx_ring_get_irq(struct intel_ring_buffer *ring)
778 {
779 	struct drm_device *dev = ring->dev;
780 	drm_i915_private_t *dev_priv = dev->dev_private;
781 
782 	if (!dev->irq_enabled)
783 		return false;
784 
785 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
786 	if (ring->irq_refcount++ == 0) {
787 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
788 		I915_WRITE(IMR, dev_priv->irq_mask);
789 		POSTING_READ(IMR);
790 	}
791 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
792 
793 	return true;
794 }
795 
796 static void
797 i9xx_ring_put_irq(struct intel_ring_buffer *ring)
798 {
799 	struct drm_device *dev = ring->dev;
800 	drm_i915_private_t *dev_priv = dev->dev_private;
801 
802 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
803 	if (--ring->irq_refcount == 0) {
804 		dev_priv->irq_mask |= ring->irq_enable_mask;
805 		I915_WRITE(IMR, dev_priv->irq_mask);
806 		POSTING_READ(IMR);
807 	}
808 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
809 }
810 
811 static bool
812 i8xx_ring_get_irq(struct intel_ring_buffer *ring)
813 {
814 	struct drm_device *dev = ring->dev;
815 	drm_i915_private_t *dev_priv = dev->dev_private;
816 
817 	if (!dev->irq_enabled)
818 		return false;
819 
820 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
821 	if (ring->irq_refcount++ == 0) {
822 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
823 		I915_WRITE16(IMR, dev_priv->irq_mask);
824 		POSTING_READ16(IMR);
825 	}
826 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
827 
828 	return true;
829 }
830 
831 static void
832 i8xx_ring_put_irq(struct intel_ring_buffer *ring)
833 {
834 	struct drm_device *dev = ring->dev;
835 	drm_i915_private_t *dev_priv = dev->dev_private;
836 
837 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
838 	if (--ring->irq_refcount == 0) {
839 		dev_priv->irq_mask |= ring->irq_enable_mask;
840 		I915_WRITE16(IMR, dev_priv->irq_mask);
841 		POSTING_READ16(IMR);
842 	}
843 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
844 }
845 
846 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
847 {
848 	struct drm_device *dev = ring->dev;
849 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
850 	u32 mmio = 0;
851 
852 	/* The ring status page addresses are no longer next to the rest of
853 	 * the ring registers as of gen7.
854 	 */
855 	if (IS_GEN7(dev)) {
856 		switch (ring->id) {
857 		case RCS:
858 			mmio = RENDER_HWS_PGA_GEN7;
859 			break;
860 		case BCS:
861 			mmio = BLT_HWS_PGA_GEN7;
862 			break;
863 		case VCS:
864 			mmio = BSD_HWS_PGA_GEN7;
865 			break;
866 		}
867 	} else if (IS_GEN6(ring->dev)) {
868 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
869 	} else {
870 		mmio = RING_HWS_PGA(ring->mmio_base);
871 	}
872 
873 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
874 	POSTING_READ(mmio);
875 }
876 
877 static int
878 bsd_ring_flush(struct intel_ring_buffer *ring,
879 	       u32     invalidate_domains,
880 	       u32     flush_domains)
881 {
882 	int ret;
883 
884 	ret = intel_ring_begin(ring, 2);
885 	if (ret)
886 		return ret;
887 
888 	intel_ring_emit(ring, MI_FLUSH);
889 	intel_ring_emit(ring, MI_NOOP);
890 	intel_ring_advance(ring);
891 	return 0;
892 }
893 
894 static int
895 i9xx_add_request(struct intel_ring_buffer *ring)
896 {
897 	int ret;
898 
899 	ret = intel_ring_begin(ring, 4);
900 	if (ret)
901 		return ret;
902 
903 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
904 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
905 	intel_ring_emit(ring, ring->outstanding_lazy_request);
906 	intel_ring_emit(ring, MI_USER_INTERRUPT);
907 	intel_ring_advance(ring);
908 
909 	return 0;
910 }
911 
912 static bool
913 gen6_ring_get_irq(struct intel_ring_buffer *ring)
914 {
915 	struct drm_device *dev = ring->dev;
916 	drm_i915_private_t *dev_priv = dev->dev_private;
917 
918 	if (!dev->irq_enabled)
919 	       return false;
920 
921 	/* It looks like we need to prevent the gt from suspending while waiting
922 	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
923 	 * blt/bsd rings on ivb. */
924 	gen6_gt_force_wake_get(dev_priv);
925 
926 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
927 	if (ring->irq_refcount++ == 0) {
928 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
929 			I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
930 						GEN6_RENDER_L3_PARITY_ERROR));
931 		else
932 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
933 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
934 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
935 		POSTING_READ(GTIMR);
936 	}
937 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
938 
939 	return true;
940 }
941 
942 static void
943 gen6_ring_put_irq(struct intel_ring_buffer *ring)
944 {
945 	struct drm_device *dev = ring->dev;
946 	drm_i915_private_t *dev_priv = dev->dev_private;
947 
948 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
949 	if (--ring->irq_refcount == 0) {
950 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
951 			I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
952 		else
953 			I915_WRITE_IMR(ring, ~0);
954 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
955 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
956 		POSTING_READ(GTIMR);
957 	}
958 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
959 
960 	gen6_gt_force_wake_put(dev_priv);
961 }
962 
963 static int
964 i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
965 			 u32 offset, u32 length,
966 			 unsigned flags)
967 {
968 	int ret;
969 
970 	ret = intel_ring_begin(ring, 2);
971 	if (ret)
972 		return ret;
973 
974 	intel_ring_emit(ring,
975 			MI_BATCH_BUFFER_START |
976 			MI_BATCH_GTT |
977 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
978 	intel_ring_emit(ring, offset);
979 	intel_ring_advance(ring);
980 
981 	return 0;
982 }
983 
984 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
985 #define I830_BATCH_LIMIT (256*1024)
986 static int
987 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
988 				u32 offset, u32 len,
989 				unsigned flags)
990 {
991 	int ret;
992 
993 	if (flags & I915_DISPATCH_PINNED) {
994 		ret = intel_ring_begin(ring, 4);
995 		if (ret)
996 			return ret;
997 
998 		intel_ring_emit(ring, MI_BATCH_BUFFER);
999 		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1000 		intel_ring_emit(ring, offset + len - 8);
1001 		intel_ring_emit(ring, MI_NOOP);
1002 		intel_ring_advance(ring);
1003 	} else {
1004 		struct drm_i915_gem_object *obj = ring->private;
1005 		u32 cs_offset = obj->gtt_offset;
1006 
1007 		if (len > I830_BATCH_LIMIT)
1008 			return -ENOSPC;
1009 
1010 		ret = intel_ring_begin(ring, 9+3);
1011 		if (ret)
1012 			return ret;
1013 		/* Blit the batch (which has now all relocs applied) to the stable batch
1014 		 * scratch bo area (so that the CS never stumbles over its tlb
1015 		 * invalidation bug) ... */
1016 		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1017 				XY_SRC_COPY_BLT_WRITE_ALPHA |
1018 				XY_SRC_COPY_BLT_WRITE_RGB);
1019 		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1020 		intel_ring_emit(ring, 0);
1021 		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1022 		intel_ring_emit(ring, cs_offset);
1023 		intel_ring_emit(ring, 0);
1024 		intel_ring_emit(ring, 4096);
1025 		intel_ring_emit(ring, offset);
1026 		intel_ring_emit(ring, MI_FLUSH);
1027 
1028 		/* ... and execute it. */
1029 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1030 		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1031 		intel_ring_emit(ring, cs_offset + len - 8);
1032 		intel_ring_advance(ring);
1033 	}
1034 
1035 	return 0;
1036 }
1037 
1038 static int
1039 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1040 			 u32 offset, u32 len,
1041 			 unsigned flags)
1042 {
1043 	int ret;
1044 
1045 	ret = intel_ring_begin(ring, 2);
1046 	if (ret)
1047 		return ret;
1048 
1049 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1050 	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1051 	intel_ring_advance(ring);
1052 
1053 	return 0;
1054 }
1055 
1056 static void cleanup_status_page(struct intel_ring_buffer *ring)
1057 {
1058 	struct drm_i915_gem_object *obj;
1059 
1060 	obj = ring->status_page.obj;
1061 	if (obj == NULL)
1062 		return;
1063 
1064 	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
1065 	kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr,
1066 	    PAGE_SIZE);
1067 	i915_gem_object_unpin(obj);
1068 	drm_gem_object_unreference(&obj->base);
1069 	ring->status_page.obj = NULL;
1070 }
1071 
1072 static int init_status_page(struct intel_ring_buffer *ring)
1073 {
1074 	struct drm_device *dev = ring->dev;
1075 	struct drm_i915_gem_object *obj;
1076 	int ret;
1077 
1078 	obj = i915_gem_alloc_object(dev, 4096);
1079 	if (obj == NULL) {
1080 		DRM_ERROR("Failed to allocate status page\n");
1081 		ret = -ENOMEM;
1082 		goto err;
1083 	}
1084 
1085 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1086 
1087 	ret = i915_gem_object_pin(obj, 4096, true, false);
1088 	if (ret != 0) {
1089 		goto err_unref;
1090 	}
1091 
1092 	ring->status_page.gfx_addr = obj->gtt_offset;
1093 	ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map,
1094 	    PAGE_SIZE, PAGE_SIZE);
1095 	if (ring->status_page.page_addr == NULL) {
1096 		ret = -ENOMEM;
1097 		goto err_unpin;
1098 	}
1099 	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1100 	    1);
1101 	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1102 	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1103 	ring->status_page.obj = obj;
1104 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1105 
1106 	intel_ring_setup_status_page(ring);
1107 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1108 			ring->name, ring->status_page.gfx_addr);
1109 
1110 	return 0;
1111 
1112 err_unpin:
1113 	i915_gem_object_unpin(obj);
1114 err_unref:
1115 	drm_gem_object_unreference(&obj->base);
1116 err:
1117 	return ret;
1118 }
1119 
1120 static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1121 {
1122 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1123 	u32 addr;
1124 
1125 	if (!dev_priv->status_page_dmah) {
1126 		dev_priv->status_page_dmah =
1127 			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, ~0);
1128 		if (!dev_priv->status_page_dmah)
1129 			return -ENOMEM;
1130 	}
1131 
1132 	addr = dev_priv->status_page_dmah->busaddr;
1133 	if (INTEL_INFO(ring->dev)->gen >= 4)
1134 		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1135 	I915_WRITE(HWS_PGA, addr);
1136 
1137 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1138 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1139 
1140 	return 0;
1141 }
1142 
1143 static int intel_init_ring_buffer(struct drm_device *dev,
1144 				  struct intel_ring_buffer *ring)
1145 {
1146 	struct drm_i915_gem_object *obj;
1147 	int ret;
1148 
1149 	ring->dev = dev;
1150 	INIT_LIST_HEAD(&ring->active_list);
1151 	INIT_LIST_HEAD(&ring->request_list);
1152 	ring->size = 32 * PAGE_SIZE;
1153 	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1154 
1155 	init_waitqueue_head(&ring->irq_queue);
1156 
1157 	if (I915_NEED_GFX_HWS(dev)) {
1158 		ret = init_status_page(ring);
1159 		if (ret)
1160 			return ret;
1161 	} else {
1162 		BUG_ON(ring->id != RCS);
1163 		ret = init_phys_hws_pga(ring);
1164 		if (ret)
1165 			return ret;
1166 	}
1167 
1168 	obj = i915_gem_alloc_object(dev, ring->size);
1169 	if (obj == NULL) {
1170 		DRM_ERROR("Failed to allocate ringbuffer\n");
1171 		ret = -ENOMEM;
1172 		goto err_hws;
1173 	}
1174 
1175 	ring->obj = obj;
1176 
1177 	ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
1178 	if (ret)
1179 		goto err_unref;
1180 
1181 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1182 	if (ret)
1183 		goto err_unpin;
1184 
1185 	ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset,
1186 					 ring->size);
1187 	if (ring->virtual_start == NULL) {
1188 		DRM_ERROR("Failed to map ringbuffer.\n");
1189 		ret = -EINVAL;
1190 		goto err_unpin;
1191 	}
1192 
1193 	ret = ring->init(ring);
1194 	if (ret)
1195 		goto err_unmap;
1196 
1197 	/* Workaround an erratum on the i830 which causes a hang if
1198 	 * the TAIL pointer points to within the last 2 cachelines
1199 	 * of the buffer.
1200 	 */
1201 	ring->effective_size = ring->size;
1202 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1203 		ring->effective_size -= 128;
1204 
1205 	return 0;
1206 
1207 err_unmap:
1208 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1209 err_unpin:
1210 	i915_gem_object_unpin(obj);
1211 err_unref:
1212 	drm_gem_object_unreference(&obj->base);
1213 	ring->obj = NULL;
1214 err_hws:
1215 	cleanup_status_page(ring);
1216 	return ret;
1217 }
1218 
1219 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1220 {
1221 	struct drm_i915_private *dev_priv;
1222 	int ret;
1223 
1224 	if (ring->obj == NULL)
1225 		return;
1226 
1227 	/* Disable the ring buffer. The ring must be idle at this point */
1228 	dev_priv = ring->dev->dev_private;
1229 	ret = intel_ring_idle(ring);
1230 	if (ret)
1231 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1232 			  ring->name, ret);
1233 
1234 	I915_WRITE_CTL(ring, 0);
1235 
1236 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1237 
1238 	i915_gem_object_unpin(ring->obj);
1239 	drm_gem_object_unreference(&ring->obj->base);
1240 	ring->obj = NULL;
1241 
1242 	if (ring->cleanup)
1243 		ring->cleanup(ring);
1244 
1245 	cleanup_status_page(ring);
1246 }
1247 
1248 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1249 {
1250 	int ret;
1251 
1252 	ret = i915_wait_seqno(ring, seqno);
1253 	if (!ret)
1254 		i915_gem_retire_requests_ring(ring);
1255 
1256 	return ret;
1257 }
1258 
1259 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1260 {
1261 	struct drm_i915_gem_request *request;
1262 	u32 seqno = 0;
1263 	int ret;
1264 
1265 	i915_gem_retire_requests_ring(ring);
1266 
1267 	if (ring->last_retired_head != -1) {
1268 		ring->head = ring->last_retired_head;
1269 		ring->last_retired_head = -1;
1270 		ring->space = ring_space(ring);
1271 		if (ring->space >= n)
1272 			return 0;
1273 	}
1274 
1275 	list_for_each_entry(request, &ring->request_list, list) {
1276 		int space;
1277 
1278 		if (request->tail == -1)
1279 			continue;
1280 
1281 		space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
1282 		if (space < 0)
1283 			space += ring->size;
1284 		if (space >= n) {
1285 			seqno = request->seqno;
1286 			break;
1287 		}
1288 
1289 		/* Consume this request in case we need more space than
1290 		 * is available and so need to prevent a race between
1291 		 * updating last_retired_head and direct reads of
1292 		 * I915_RING_HEAD. It also provides a nice sanity check.
1293 		 */
1294 		request->tail = -1;
1295 	}
1296 
1297 	if (seqno == 0)
1298 		return -ENOSPC;
1299 
1300 	ret = intel_ring_wait_seqno(ring, seqno);
1301 	if (ret)
1302 		return ret;
1303 
1304 	if (WARN_ON(ring->last_retired_head == -1))
1305 		return -ENOSPC;
1306 
1307 	ring->head = ring->last_retired_head;
1308 	ring->last_retired_head = -1;
1309 	ring->space = ring_space(ring);
1310 	if (WARN_ON(ring->space < n))
1311 		return -ENOSPC;
1312 
1313 	return 0;
1314 }
1315 
1316 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1317 {
1318 	struct drm_device *dev = ring->dev;
1319 	struct drm_i915_private *dev_priv = dev->dev_private;
1320 	unsigned long end;
1321 	int ret;
1322 
1323 	ret = intel_ring_wait_request(ring, n);
1324 	if (ret != -ENOSPC)
1325 		return ret;
1326 
1327 	/* With GEM the hangcheck timer should kick us out of the loop,
1328 	 * leaving it early runs the risk of corrupting GEM state (due
1329 	 * to running on almost untested codepaths). But on resume
1330 	 * timers don't work yet, so prevent a complete hang in that
1331 	 * case by choosing an insanely large timeout. */
1332 	end = jiffies + 60 * HZ;
1333 
1334 	do {
1335 		ring->head = I915_READ_HEAD(ring);
1336 		ring->space = ring_space(ring);
1337 		if (ring->space >= n) {
1338 			return 0;
1339 		}
1340 
1341 #if 0
1342 		if (dev->primary->master) {
1343 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1344 			if (master_priv->sarea_priv)
1345 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1346 		}
1347 #else
1348 		if (dev_priv->sarea_priv)
1349 			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1350 #endif
1351 
1352 		msleep(1);
1353 
1354 		ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1355 		if (ret)
1356 			return ret;
1357 	} while (!time_after(jiffies, end));
1358 	return -EBUSY;
1359 }
1360 
1361 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1362 {
1363 	uint32_t __iomem *virt;
1364 	int rem = ring->size - ring->tail;
1365 
1366 	if (ring->space < rem) {
1367 		int ret = ring_wait_for_space(ring, rem);
1368 		if (ret)
1369 			return ret;
1370 	}
1371 
1372 	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1373 	rem /= 4;
1374 	while (rem--)
1375 		iowrite32(MI_NOOP, virt++);
1376 
1377 	ring->tail = 0;
1378 	ring->space = ring_space(ring);
1379 
1380 	return 0;
1381 }
1382 
1383 int intel_ring_idle(struct intel_ring_buffer *ring)
1384 {
1385 	u32 seqno;
1386 	int ret;
1387 
1388 	/* We need to add any requests required to flush the objects and ring */
1389 	if (ring->outstanding_lazy_request) {
1390 		ret = i915_add_request(ring, NULL, NULL);
1391 		if (ret)
1392 			return ret;
1393 	}
1394 
1395 	/* Wait upon the last request to be completed */
1396 	if (list_empty(&ring->request_list))
1397 		return 0;
1398 
1399 	seqno = list_entry(ring->request_list.prev,
1400 			   struct drm_i915_gem_request,
1401 			   list)->seqno;
1402 
1403 	return i915_wait_seqno(ring, seqno);
1404 }
1405 
1406 static int
1407 intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
1408 {
1409 	if (ring->outstanding_lazy_request)
1410 		return 0;
1411 
1412 	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
1413 }
1414 
1415 int intel_ring_begin(struct intel_ring_buffer *ring,
1416 		     int num_dwords)
1417 {
1418 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1419 	int n = 4*num_dwords;
1420 	int ret;
1421 
1422 	ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1423 	if (ret)
1424 		return ret;
1425 
1426 	/* Preallocate the olr before touching the ring */
1427 	ret = intel_ring_alloc_seqno(ring);
1428 	if (ret)
1429 		return ret;
1430 
1431 	if (unlikely(ring->tail + n > ring->effective_size)) {
1432 		ret = intel_wrap_ring_buffer(ring);
1433 		if (unlikely(ret))
1434 			return ret;
1435 	}
1436 
1437 	if (unlikely(ring->space < n)) {
1438 		ret = ring_wait_for_space(ring, n);
1439 		if (unlikely(ret))
1440 			return ret;
1441 	}
1442 
1443 	ring->space -= n;
1444 	return 0;
1445 }
1446 
1447 void intel_ring_advance(struct intel_ring_buffer *ring)
1448 {
1449 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1450 
1451 	ring->tail &= ring->size - 1;
1452 	if (dev_priv->stop_rings & intel_ring_flag(ring))
1453 		return;
1454 	ring->write_tail(ring, ring->tail);
1455 }
1456 
1457 
1458 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1459 				     u32 value)
1460 {
1461 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1462 
1463        /* Every tail move must follow the sequence below */
1464 
1465 	/* Disable notification that the ring is IDLE. The GT
1466 	 * will then assume that it is busy and bring it out of rc6.
1467 	 */
1468 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1469 		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1470 
1471 	/* Clear the context id. Here be magic! */
1472 	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1473 
1474 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
1475 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1476 		      GEN6_BSD_SLEEP_INDICATOR) == 0,
1477 		     50))
1478 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1479 
1480 	/* Now that the ring is fully powered up, update the tail */
1481 	I915_WRITE_TAIL(ring, value);
1482 	POSTING_READ(RING_TAIL(ring->mmio_base));
1483 
1484 	/* Let the ring send IDLE messages to the GT again,
1485 	 * and so let it sleep to conserve power when idle.
1486 	 */
1487 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1488 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1489 }
1490 
1491 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1492 			   u32 invalidate, u32 flush)
1493 {
1494 	uint32_t cmd;
1495 	int ret;
1496 
1497 	ret = intel_ring_begin(ring, 4);
1498 	if (ret)
1499 		return ret;
1500 
1501 	cmd = MI_FLUSH_DW;
1502 	/*
1503 	 * Bspec vol 1c.5 - video engine command streamer:
1504 	 * "If ENABLED, all TLBs will be invalidated once the flush
1505 	 * operation is complete. This bit is only valid when the
1506 	 * Post-Sync Operation field is a value of 1h or 3h."
1507 	 */
1508 	if (invalidate & I915_GEM_GPU_DOMAINS)
1509 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1510 			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1511 	intel_ring_emit(ring, cmd);
1512 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1513 	intel_ring_emit(ring, 0);
1514 	intel_ring_emit(ring, MI_NOOP);
1515 	intel_ring_advance(ring);
1516 	return 0;
1517 }
1518 
1519 static int
1520 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1521 			      u32 offset, u32 len,
1522 			      unsigned flags)
1523 {
1524 	int ret;
1525 
1526 	ret = intel_ring_begin(ring, 2);
1527 	if (ret)
1528 		return ret;
1529 
1530 	intel_ring_emit(ring,
1531 			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
1532 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
1533 	/* bit0-7 is the length on GEN6+ */
1534 	intel_ring_emit(ring, offset);
1535 	intel_ring_advance(ring);
1536 
1537 	return 0;
1538 }
1539 
1540 static int
1541 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1542 			      u32 offset, u32 len,
1543 			      unsigned flags)
1544 {
1545 	int ret;
1546 
1547 	ret = intel_ring_begin(ring, 2);
1548 	if (ret)
1549 		return ret;
1550 
1551 	intel_ring_emit(ring,
1552 			MI_BATCH_BUFFER_START |
1553 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1554 	/* bit0-7 is the length on GEN6+ */
1555 	intel_ring_emit(ring, offset);
1556 	intel_ring_advance(ring);
1557 
1558 	return 0;
1559 }
1560 
1561 /* Blitter support (SandyBridge+) */
1562 
1563 static int blt_ring_flush(struct intel_ring_buffer *ring,
1564 			  u32 invalidate, u32 flush)
1565 {
1566 	uint32_t cmd;
1567 	int ret;
1568 
1569 	ret = intel_ring_begin(ring, 4);
1570 	if (ret)
1571 		return ret;
1572 
1573 	cmd = MI_FLUSH_DW;
1574 	/*
1575 	 * Bspec vol 1c.3 - blitter engine command streamer:
1576 	 * "If ENABLED, all TLBs will be invalidated once the flush
1577 	 * operation is complete. This bit is only valid when the
1578 	 * Post-Sync Operation field is a value of 1h or 3h."
1579 	 */
1580 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1581 		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1582 			MI_FLUSH_DW_OP_STOREDW;
1583 	intel_ring_emit(ring, cmd);
1584 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1585 	intel_ring_emit(ring, 0);
1586 	intel_ring_emit(ring, MI_NOOP);
1587 	intel_ring_advance(ring);
1588 	return 0;
1589 }
1590 
1591 int intel_init_render_ring_buffer(struct drm_device *dev)
1592 {
1593 	drm_i915_private_t *dev_priv = dev->dev_private;
1594 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1595 
1596 	ring->name = "render ring";
1597 	ring->id = RCS;
1598 	ring->mmio_base = RENDER_RING_BASE;
1599 
1600 	if (INTEL_INFO(dev)->gen >= 6) {
1601 		ring->add_request = gen6_add_request;
1602 		ring->flush = gen7_render_ring_flush;
1603 		if (INTEL_INFO(dev)->gen == 6)
1604 			ring->flush = gen6_render_ring_flush;
1605 		ring->irq_get = gen6_ring_get_irq;
1606 		ring->irq_put = gen6_ring_put_irq;
1607 		ring->irq_enable_mask = GT_USER_INTERRUPT;
1608 		ring->get_seqno = gen6_ring_get_seqno;
1609 		ring->sync_to = gen6_ring_sync;
1610 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID;
1611 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV;
1612 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB;
1613 		ring->signal_mbox[0] = GEN6_VRSYNC;
1614 		ring->signal_mbox[1] = GEN6_BRSYNC;
1615 	} else if (IS_GEN5(dev)) {
1616 		ring->add_request = pc_render_add_request;
1617 		ring->flush = gen4_render_ring_flush;
1618 		ring->get_seqno = pc_render_get_seqno;
1619 		ring->irq_get = gen5_ring_get_irq;
1620 		ring->irq_put = gen5_ring_put_irq;
1621 		ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY;
1622 	} else {
1623 		ring->add_request = i9xx_add_request;
1624 		if (INTEL_INFO(dev)->gen < 4)
1625 			ring->flush = gen2_render_ring_flush;
1626 		else
1627 			ring->flush = gen4_render_ring_flush;
1628 		ring->get_seqno = ring_get_seqno;
1629 		if (IS_GEN2(dev)) {
1630 			ring->irq_get = i8xx_ring_get_irq;
1631 			ring->irq_put = i8xx_ring_put_irq;
1632 		} else {
1633 			ring->irq_get = i9xx_ring_get_irq;
1634 			ring->irq_put = i9xx_ring_put_irq;
1635 		}
1636 		ring->irq_enable_mask = I915_USER_INTERRUPT;
1637 	}
1638 	ring->write_tail = ring_write_tail;
1639 	if (IS_HASWELL(dev))
1640 		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1641 	else if (INTEL_INFO(dev)->gen >= 6)
1642 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1643 	else if (INTEL_INFO(dev)->gen >= 4)
1644 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1645 	else if (IS_I830(dev) || IS_845G(dev))
1646 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1647 	else
1648 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1649 	ring->init = init_render_ring;
1650 	ring->cleanup = render_ring_cleanup;
1651 
1652 	/* Workaround batchbuffer to combat CS tlb bug. */
1653 	if (HAS_BROKEN_CS_TLB(dev)) {
1654 		struct drm_i915_gem_object *obj;
1655 		int ret;
1656 
1657 		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1658 		if (obj == NULL) {
1659 			DRM_ERROR("Failed to allocate batch bo\n");
1660 			return -ENOMEM;
1661 		}
1662 
1663 		ret = i915_gem_object_pin(obj, 0, true, false);
1664 		if (ret != 0) {
1665 			drm_gem_object_unreference(&obj->base);
1666 			DRM_ERROR("Failed to ping batch bo\n");
1667 			return ret;
1668 		}
1669 
1670 		ring->private = obj;
1671 	}
1672 
1673 	return intel_init_ring_buffer(dev, ring);
1674 }
1675 
1676 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1677 {
1678 	drm_i915_private_t *dev_priv = dev->dev_private;
1679 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1680 	int ret;
1681 
1682 	ring->name = "render ring";
1683 	ring->id = RCS;
1684 	ring->mmio_base = RENDER_RING_BASE;
1685 
1686 	if (INTEL_INFO(dev)->gen >= 6) {
1687 		/* non-kms not supported on gen6+ */
1688 		return -ENODEV;
1689 	}
1690 
1691 	/* Note: gem is not supported on gen5/ilk without kms (the corresponding
1692 	 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1693 	 * the special gen5 functions. */
1694 	ring->add_request = i9xx_add_request;
1695 	if (INTEL_INFO(dev)->gen < 4)
1696 		ring->flush = gen2_render_ring_flush;
1697 	else
1698 		ring->flush = gen4_render_ring_flush;
1699 	ring->get_seqno = ring_get_seqno;
1700 	if (IS_GEN2(dev)) {
1701 		ring->irq_get = i8xx_ring_get_irq;
1702 		ring->irq_put = i8xx_ring_put_irq;
1703 	} else {
1704 		ring->irq_get = i9xx_ring_get_irq;
1705 		ring->irq_put = i9xx_ring_put_irq;
1706 	}
1707 	ring->irq_enable_mask = I915_USER_INTERRUPT;
1708 	ring->write_tail = ring_write_tail;
1709 	if (INTEL_INFO(dev)->gen >= 4)
1710 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1711 	else if (IS_I830(dev) || IS_845G(dev))
1712 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1713 	else
1714 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1715 	ring->init = init_render_ring;
1716 	ring->cleanup = render_ring_cleanup;
1717 
1718 	ring->dev = dev;
1719 	INIT_LIST_HEAD(&ring->active_list);
1720 	INIT_LIST_HEAD(&ring->request_list);
1721 
1722 	ring->size = size;
1723 	ring->effective_size = ring->size;
1724 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1725 		ring->effective_size -= 128;
1726 
1727 	ring->virtual_start = ioremap_wc(start, size);
1728 	if (ring->virtual_start == NULL) {
1729 		DRM_ERROR("can not ioremap virtual address for"
1730 			  " ring buffer\n");
1731 		return -ENOMEM;
1732 	}
1733 
1734 	if (!I915_NEED_GFX_HWS(dev)) {
1735 		ret = init_phys_hws_pga(ring);
1736 		if (ret)
1737 			return ret;
1738 	}
1739 
1740 	return 0;
1741 }
1742 
1743 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1744 {
1745 	drm_i915_private_t *dev_priv = dev->dev_private;
1746 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1747 
1748 	ring->name = "bsd ring";
1749 	ring->id = VCS;
1750 
1751 	ring->write_tail = ring_write_tail;
1752 	if (IS_GEN6(dev) || IS_GEN7(dev)) {
1753 		ring->mmio_base = GEN6_BSD_RING_BASE;
1754 		/* gen6 bsd needs a special wa for tail updates */
1755 		if (IS_GEN6(dev))
1756 			ring->write_tail = gen6_bsd_ring_write_tail;
1757 		ring->flush = gen6_ring_flush;
1758 		ring->add_request = gen6_add_request;
1759 		ring->get_seqno = gen6_ring_get_seqno;
1760 		ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT;
1761 		ring->irq_get = gen6_ring_get_irq;
1762 		ring->irq_put = gen6_ring_put_irq;
1763 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1764 		ring->sync_to = gen6_ring_sync;
1765 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR;
1766 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID;
1767 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB;
1768 		ring->signal_mbox[0] = GEN6_RVSYNC;
1769 		ring->signal_mbox[1] = GEN6_BVSYNC;
1770 	} else {
1771 		ring->mmio_base = BSD_RING_BASE;
1772 		ring->flush = bsd_ring_flush;
1773 		ring->add_request = i9xx_add_request;
1774 		ring->get_seqno = ring_get_seqno;
1775 		if (IS_GEN5(dev)) {
1776 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1777 			ring->irq_get = gen5_ring_get_irq;
1778 			ring->irq_put = gen5_ring_put_irq;
1779 		} else {
1780 			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1781 			ring->irq_get = i9xx_ring_get_irq;
1782 			ring->irq_put = i9xx_ring_put_irq;
1783 		}
1784 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1785 	}
1786 	ring->init = init_ring_common;
1787 
1788 	return intel_init_ring_buffer(dev, ring);
1789 }
1790 
1791 int intel_init_blt_ring_buffer(struct drm_device *dev)
1792 {
1793 	drm_i915_private_t *dev_priv = dev->dev_private;
1794 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1795 
1796 	ring->name = "blitter ring";
1797 	ring->id = BCS;
1798 
1799 	ring->mmio_base = BLT_RING_BASE;
1800 	ring->write_tail = ring_write_tail;
1801 	ring->flush = blt_ring_flush;
1802 	ring->add_request = gen6_add_request;
1803 	ring->get_seqno = gen6_ring_get_seqno;
1804 	ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT;
1805 	ring->irq_get = gen6_ring_get_irq;
1806 	ring->irq_put = gen6_ring_put_irq;
1807 	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1808 	ring->sync_to = gen6_ring_sync;
1809 	ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR;
1810 	ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV;
1811 	ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID;
1812 	ring->signal_mbox[0] = GEN6_RBSYNC;
1813 	ring->signal_mbox[1] = GEN6_VBSYNC;
1814 	ring->init = init_ring_common;
1815 
1816 	return intel_init_ring_buffer(dev, ring);
1817 }
1818 
1819 int
1820 intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1821 {
1822 	int ret;
1823 
1824 	if (!ring->gpu_caches_dirty)
1825 		return 0;
1826 
1827 	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1828 	if (ret)
1829 		return ret;
1830 
1831 	ring->gpu_caches_dirty = false;
1832 	return 0;
1833 }
1834 
1835 int
1836 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1837 {
1838 	uint32_t flush_domains;
1839 	int ret;
1840 
1841 	flush_domains = 0;
1842 	if (ring->gpu_caches_dirty)
1843 		flush_domains = I915_GEM_GPU_DOMAINS;
1844 
1845 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1846 	if (ret)
1847 		return ret;
1848 
1849 	ring->gpu_caches_dirty = false;
1850 	return 0;
1851 }
1852