xref: /dragonfly/sys/dev/drm/i915/intel_engine_cs.c (revision 5ca0a96d)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <drm/drm_print.h>
26 
27 #include "i915_drv.h"
28 #include "i915_vgpu.h"
29 #include "intel_ringbuffer.h"
30 #include "intel_lrc.h"
31 
32 /* Haswell does have the CXT_SIZE register however it does not appear to be
33  * valid. Now, docs explain in dwords what is in the context object. The full
34  * size is 70720 bytes, however, the power context and execlist context will
35  * never be saved (power context is stored elsewhere, and execlists don't work
36  * on HSW) - so the final size, including the extra state required for the
37  * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
38  */
39 #define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
40 /* Same as Haswell, but 72064 bytes now. */
41 #define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
42 
43 #define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
44 #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
45 #define GEN10_LR_CONTEXT_RENDER_SIZE	(18 * PAGE_SIZE)
46 
47 #define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
48 
49 struct engine_class_info {
50 	const char *name;
51 	int (*init_legacy)(struct intel_engine_cs *engine);
52 	int (*init_execlists)(struct intel_engine_cs *engine);
53 };
54 
55 static const struct engine_class_info intel_engine_classes[] = {
56 	[RENDER_CLASS] = {
57 		.name = "rcs",
58 		.init_execlists = logical_render_ring_init,
59 		.init_legacy = intel_init_render_ring_buffer,
60 	},
61 	[COPY_ENGINE_CLASS] = {
62 		.name = "bcs",
63 		.init_execlists = logical_xcs_ring_init,
64 		.init_legacy = intel_init_blt_ring_buffer,
65 	},
66 	[VIDEO_DECODE_CLASS] = {
67 		.name = "vcs",
68 		.init_execlists = logical_xcs_ring_init,
69 		.init_legacy = intel_init_bsd_ring_buffer,
70 	},
71 	[VIDEO_ENHANCEMENT_CLASS] = {
72 		.name = "vecs",
73 		.init_execlists = logical_xcs_ring_init,
74 		.init_legacy = intel_init_vebox_ring_buffer,
75 	},
76 };
77 
78 struct engine_info {
79 	unsigned int hw_id;
80 	unsigned int uabi_id;
81 	u8 class;
82 	u8 instance;
83 	u32 mmio_base;
84 	unsigned irq_shift;
85 };
86 
87 static const struct engine_info intel_engines[] = {
88 	[RCS] = {
89 		.hw_id = RCS_HW,
90 		.uabi_id = I915_EXEC_RENDER,
91 		.class = RENDER_CLASS,
92 		.instance = 0,
93 		.mmio_base = RENDER_RING_BASE,
94 		.irq_shift = GEN8_RCS_IRQ_SHIFT,
95 	},
96 	[BCS] = {
97 		.hw_id = BCS_HW,
98 		.uabi_id = I915_EXEC_BLT,
99 		.class = COPY_ENGINE_CLASS,
100 		.instance = 0,
101 		.mmio_base = BLT_RING_BASE,
102 		.irq_shift = GEN8_BCS_IRQ_SHIFT,
103 	},
104 	[VCS] = {
105 		.hw_id = VCS_HW,
106 		.uabi_id = I915_EXEC_BSD,
107 		.class = VIDEO_DECODE_CLASS,
108 		.instance = 0,
109 		.mmio_base = GEN6_BSD_RING_BASE,
110 		.irq_shift = GEN8_VCS1_IRQ_SHIFT,
111 	},
112 	[VCS2] = {
113 		.hw_id = VCS2_HW,
114 		.uabi_id = I915_EXEC_BSD,
115 		.class = VIDEO_DECODE_CLASS,
116 		.instance = 1,
117 		.mmio_base = GEN8_BSD2_RING_BASE,
118 		.irq_shift = GEN8_VCS2_IRQ_SHIFT,
119 	},
120 	[VECS] = {
121 		.hw_id = VECS_HW,
122 		.uabi_id = I915_EXEC_VEBOX,
123 		.class = VIDEO_ENHANCEMENT_CLASS,
124 		.instance = 0,
125 		.mmio_base = VEBOX_RING_BASE,
126 		.irq_shift = GEN8_VECS_IRQ_SHIFT,
127 	},
128 };
129 
130 /**
131  * ___intel_engine_context_size() - return the size of the context for an engine
132  * @dev_priv: i915 device private
133  * @class: engine class
134  *
135  * Each engine class may require a different amount of space for a context
136  * image.
137  *
138  * Return: size (in bytes) of an engine class specific context image
139  *
140  * Note: this size includes the HWSP, which is part of the context image
141  * in LRC mode, but does not include the "shared data page" used with
142  * GuC submission. The caller should account for this if using the GuC.
143  */
144 static u32
145 __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
146 {
147 	u32 cxt_size;
148 
149 	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
150 
151 	switch (class) {
152 	case RENDER_CLASS:
153 		switch (INTEL_GEN(dev_priv)) {
154 		default:
155 			MISSING_CASE(INTEL_GEN(dev_priv));
156 		case 10:
157 			return GEN10_LR_CONTEXT_RENDER_SIZE;
158 		case 9:
159 			return GEN9_LR_CONTEXT_RENDER_SIZE;
160 		case 8:
161 			return i915_modparams.enable_execlists ?
162 			       GEN8_LR_CONTEXT_RENDER_SIZE :
163 			       GEN8_CXT_TOTAL_SIZE;
164 		case 7:
165 			if (IS_HASWELL(dev_priv))
166 				return HSW_CXT_TOTAL_SIZE;
167 
168 			cxt_size = I915_READ(GEN7_CXT_SIZE);
169 			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
170 					PAGE_SIZE);
171 		case 6:
172 			cxt_size = I915_READ(CXT_SIZE);
173 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
174 					PAGE_SIZE);
175 		case 5:
176 		case 4:
177 		case 3:
178 		case 2:
179 		/* For the special day when i810 gets merged. */
180 		case 1:
181 			return 0;
182 		}
183 		break;
184 	default:
185 		MISSING_CASE(class);
186 	case VIDEO_DECODE_CLASS:
187 	case VIDEO_ENHANCEMENT_CLASS:
188 	case COPY_ENGINE_CLASS:
189 		if (INTEL_GEN(dev_priv) < 8)
190 			return 0;
191 		return GEN8_LR_CONTEXT_OTHER_SIZE;
192 	}
193 }
194 
195 static int
196 intel_engine_setup(struct drm_i915_private *dev_priv,
197 		   enum intel_engine_id id)
198 {
199 	const struct engine_info *info = &intel_engines[id];
200 	const struct engine_class_info *class_info;
201 	struct intel_engine_cs *engine;
202 
203 	GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
204 	class_info = &intel_engine_classes[info->class];
205 
206 	GEM_BUG_ON(dev_priv->engine[id]);
207 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
208 	if (!engine)
209 		return -ENOMEM;
210 
211 	engine->id = id;
212 	engine->i915 = dev_priv;
213 	WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
214 			 class_info->name, info->instance) >=
215 		sizeof(engine->name));
216 	engine->uabi_id = info->uabi_id;
217 	engine->hw_id = engine->guc_id = info->hw_id;
218 	engine->mmio_base = info->mmio_base;
219 	engine->irq_shift = info->irq_shift;
220 	engine->class = info->class;
221 	engine->instance = info->instance;
222 
223 	engine->context_size = __intel_engine_context_size(dev_priv,
224 							   engine->class);
225 	if (WARN_ON(engine->context_size > BIT(20)))
226 		engine->context_size = 0;
227 
228 	/* Nothing to do here, execute in order of dependencies */
229 	engine->schedule = NULL;
230 
231 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
232 
233 	dev_priv->engine[id] = engine;
234 	return 0;
235 }
236 
237 /**
238  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
239  * @dev_priv: i915 device private
240  *
241  * Return: non-zero if the initialization failed.
242  */
243 int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
244 {
245 	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
246 	const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
247 	struct intel_engine_cs *engine;
248 	enum intel_engine_id id;
249 	unsigned int mask = 0;
250 	unsigned int i;
251 	int err;
252 
253 	WARN_ON(ring_mask == 0);
254 	WARN_ON(ring_mask &
255 		GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES));
256 
257 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
258 		if (!HAS_ENGINE(dev_priv, i))
259 			continue;
260 
261 		err = intel_engine_setup(dev_priv, i);
262 		if (err)
263 			goto cleanup;
264 
265 		mask |= ENGINE_MASK(i);
266 	}
267 
268 	/*
269 	 * Catch failures to update intel_engines table when the new engines
270 	 * are added to the driver by a warning and disabling the forgotten
271 	 * engines.
272 	 */
273 	if (WARN_ON(mask != ring_mask))
274 		device_info->ring_mask = mask;
275 
276 	/* We always presume we have at least RCS available for later probing */
277 	if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
278 		err = -ENODEV;
279 		goto cleanup;
280 	}
281 
282 	device_info->num_rings = hweight32(mask);
283 
284 	return 0;
285 
286 cleanup:
287 	for_each_engine(engine, dev_priv, id)
288 		kfree(engine);
289 	return err;
290 }
291 
292 /**
293  * intel_engines_init() - init the Engine Command Streamers
294  * @dev_priv: i915 device private
295  *
296  * Return: non-zero if the initialization failed.
297  */
298 int intel_engines_init(struct drm_i915_private *dev_priv)
299 {
300 	struct intel_engine_cs *engine;
301 	enum intel_engine_id id, err_id;
302 	int err;
303 
304 	for_each_engine(engine, dev_priv, id) {
305 		const struct engine_class_info *class_info =
306 			&intel_engine_classes[engine->class];
307 		int (*init)(struct intel_engine_cs *engine);
308 
309 		if (i915_modparams.enable_execlists)
310 			init = class_info->init_execlists;
311 		else
312 			init = class_info->init_legacy;
313 
314 		err = -EINVAL;
315 		err_id = id;
316 
317 		if (GEM_WARN_ON(!init))
318 			goto cleanup;
319 
320 		err = init(engine);
321 		if (err)
322 			goto cleanup;
323 
324 		GEM_BUG_ON(!engine->submit_request);
325 	}
326 
327 	return 0;
328 
329 cleanup:
330 	for_each_engine(engine, dev_priv, id) {
331 		if (id >= err_id) {
332 			kfree(engine);
333 			dev_priv->engine[id] = NULL;
334 		} else {
335 			dev_priv->gt.cleanup_engine(engine);
336 		}
337 	}
338 	return err;
339 }
340 
341 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
342 {
343 	struct drm_i915_private *dev_priv = engine->i915;
344 
345 	/* Our semaphore implementation is strictly monotonic (i.e. we proceed
346 	 * so long as the semaphore value in the register/page is greater
347 	 * than the sync value), so whenever we reset the seqno,
348 	 * so long as we reset the tracking semaphore value to 0, it will
349 	 * always be before the next request's seqno. If we don't reset
350 	 * the semaphore value, then when the seqno moves backwards all
351 	 * future waits will complete instantly (causing rendering corruption).
352 	 */
353 	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
354 		I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
355 		I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
356 		if (HAS_VEBOX(dev_priv))
357 			I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
358 	}
359 	if (dev_priv->semaphore) {
360 		struct page *page = i915_vma_first_page(dev_priv->semaphore);
361 		void *semaphores;
362 
363 		/* Semaphores are in noncoherent memory, flush to be safe */
364 		semaphores = kmap_atomic(page);
365 		memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
366 		       0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
367 		drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
368 				       I915_NUM_ENGINES * gen8_semaphore_seqno_size);
369 		kunmap_atomic(semaphores);
370 	}
371 
372 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
373 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
374 
375 	/* After manually advancing the seqno, fake the interrupt in case
376 	 * there are any waiters for that seqno.
377 	 */
378 	intel_engine_wakeup(engine);
379 
380 	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
381 }
382 
383 static void intel_engine_init_timeline(struct intel_engine_cs *engine)
384 {
385 	engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
386 }
387 
388 static bool csb_force_mmio(struct drm_i915_private *i915)
389 {
390 	/*
391 	 * IOMMU adds unpredictable latency causing the CSB write (from the
392 	 * GPU into the HWSP) to only be visible some time after the interrupt
393 	 * (missed breadcrumb syndrome).
394 	 */
395 	if (intel_vtd_active())
396 		return true;
397 
398 	/* Older GVT emulation depends upon intercepting CSB mmio */
399 	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
400 		return true;
401 
402 	return false;
403 }
404 
405 static void intel_engine_init_execlist(struct intel_engine_cs *engine)
406 {
407 	struct intel_engine_execlists * const execlists = &engine->execlists;
408 
409 	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
410 
411 	execlists->port_mask = 1;
412 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));	/* From Linux 5.0 */
413 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
414 
415 	execlists->queue = LINUX_RB_ROOT;
416 	execlists->first = NULL;
417 }
418 
419 /**
420  * intel_engines_setup_common - setup engine state not requiring hw access
421  * @engine: Engine to setup.
422  *
423  * Initializes @engine@ structure members shared between legacy and execlists
424  * submission modes which do not require hardware access.
425  *
426  * Typically done early in the submission mode specific engine setup stage.
427  */
428 void intel_engine_setup_common(struct intel_engine_cs *engine)
429 {
430 	intel_engine_init_execlist(engine);
431 
432 	intel_engine_init_timeline(engine);
433 	intel_engine_init_hangcheck(engine);
434 	i915_gem_batch_pool_init(engine, &engine->batch_pool);
435 
436 	intel_engine_init_cmd_parser(engine);
437 }
438 
439 int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
440 {
441 	struct drm_i915_gem_object *obj;
442 	struct i915_vma *vma;
443 	int ret;
444 
445 	WARN_ON(engine->scratch);
446 
447 	obj = i915_gem_object_create_stolen(engine->i915, size);
448 	if (!obj)
449 		obj = i915_gem_object_create_internal(engine->i915, size);
450 	if (IS_ERR(obj)) {
451 		DRM_ERROR("Failed to allocate scratch page\n");
452 		return PTR_ERR(obj);
453 	}
454 
455 	vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
456 	if (IS_ERR(vma)) {
457 		ret = PTR_ERR(vma);
458 		goto err_unref;
459 	}
460 
461 	ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH);
462 	if (ret)
463 		goto err_unref;
464 
465 	engine->scratch = vma;
466 	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
467 			 engine->name, i915_ggtt_offset(vma));
468 	return 0;
469 
470 err_unref:
471 	i915_gem_object_put(obj);
472 	return ret;
473 }
474 
475 static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
476 {
477 	i915_vma_unpin_and_release(&engine->scratch);
478 }
479 
480 static void cleanup_phys_status_page(struct intel_engine_cs *engine)
481 {
482 	struct drm_i915_private *dev_priv = engine->i915;
483 
484 	if (!dev_priv->status_page_dmah)
485 		return;
486 
487 	drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
488 	engine->status_page.page_addr = NULL;
489 }
490 
491 static void cleanup_status_page(struct intel_engine_cs *engine)
492 {
493 	struct i915_vma *vma;
494 	struct drm_i915_gem_object *obj;
495 
496 	vma = fetch_and_zero(&engine->status_page.vma);
497 	if (!vma)
498 		return;
499 
500 	obj = vma->obj;
501 
502 	i915_vma_unpin(vma);
503 	i915_vma_close(vma);
504 
505 	i915_gem_object_unpin_map(obj);
506 	__i915_gem_object_release_unless_active(obj);
507 }
508 
509 static int init_status_page(struct intel_engine_cs *engine)
510 {
511 	struct drm_i915_gem_object *obj;
512 	struct i915_vma *vma;
513 	unsigned int flags;
514 	void *vaddr;
515 	int ret;
516 
517 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
518 	if (IS_ERR(obj)) {
519 		DRM_ERROR("Failed to allocate status page\n");
520 		return PTR_ERR(obj);
521 	}
522 
523 	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
524 	if (ret)
525 		goto err;
526 
527 	vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
528 	if (IS_ERR(vma)) {
529 		ret = PTR_ERR(vma);
530 		goto err;
531 	}
532 
533 	flags = PIN_GLOBAL;
534 	if (!HAS_LLC(engine->i915))
535 		/* On g33, we cannot place HWS above 256MiB, so
536 		 * restrict its pinning to the low mappable arena.
537 		 * Though this restriction is not documented for
538 		 * gen4, gen5, or byt, they also behave similarly
539 		 * and hang if the HWS is placed at the top of the
540 		 * GTT. To generalise, it appears that all !llc
541 		 * platforms have issues with us placing the HWS
542 		 * above the mappable region (even though we never
543 		 * actually map it).
544 		 */
545 		flags |= PIN_MAPPABLE;
546 	else
547 		flags |= PIN_HIGH;
548 	ret = i915_vma_pin(vma, 0, 4096, flags);
549 	if (ret)
550 		goto err;
551 
552 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
553 	if (IS_ERR(vaddr)) {
554 		ret = PTR_ERR(vaddr);
555 		goto err_unpin;
556 	}
557 
558 	engine->status_page.vma = vma;
559 	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
560 	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
561 
562 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
563 			 engine->name, i915_ggtt_offset(vma));
564 	return 0;
565 
566 err_unpin:
567 	i915_vma_unpin(vma);
568 err:
569 	i915_gem_object_put(obj);
570 	return ret;
571 }
572 
573 static int init_phys_status_page(struct intel_engine_cs *engine)
574 {
575 	struct drm_i915_private *dev_priv = engine->i915;
576 
577 	GEM_BUG_ON(engine->id != RCS);
578 
579 	dev_priv->status_page_dmah =
580 		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
581 	if (!dev_priv->status_page_dmah)
582 		return -ENOMEM;
583 
584 	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
585 	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
586 
587 	return 0;
588 }
589 
590 /**
591  * intel_engines_init_common - initialize cengine state which might require hw access
592  * @engine: Engine to initialize.
593  *
594  * Initializes @engine@ structure members shared between legacy and execlists
595  * submission modes which do require hardware access.
596  *
597  * Typcally done at later stages of submission mode specific engine setup.
598  *
599  * Returns zero on success or an error code on failure.
600  */
601 int intel_engine_init_common(struct intel_engine_cs *engine)
602 {
603 	struct intel_ring *ring;
604 	int ret;
605 
606 	engine->set_default_submission(engine);
607 
608 	/* We may need to do things with the shrinker which
609 	 * require us to immediately switch back to the default
610 	 * context. This can cause a problem as pinning the
611 	 * default context also requires GTT space which may not
612 	 * be available. To avoid this we always pin the default
613 	 * context.
614 	 */
615 	ring = engine->context_pin(engine, engine->i915->kernel_context);
616 	if (IS_ERR(ring))
617 		return PTR_ERR(ring);
618 
619 	/*
620 	 * Similarly the preempt context must always be available so that
621 	 * we can interrupt the engine at any time.
622 	 */
623 	if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) {
624 		ring = engine->context_pin(engine,
625 					   engine->i915->preempt_context);
626 		if (IS_ERR(ring)) {
627 			ret = PTR_ERR(ring);
628 			goto err_unpin_kernel;
629 		}
630 	}
631 
632 	ret = intel_engine_init_breadcrumbs(engine);
633 	if (ret)
634 		goto err_unpin_preempt;
635 
636 	ret = i915_gem_render_state_init(engine);
637 	if (ret)
638 		goto err_breadcrumbs;
639 
640 	if (HWS_NEEDS_PHYSICAL(engine->i915))
641 		ret = init_phys_status_page(engine);
642 	else
643 		ret = init_status_page(engine);
644 	if (ret)
645 		goto err_rs_fini;
646 
647 	return 0;
648 
649 err_rs_fini:
650 	i915_gem_render_state_fini(engine);
651 err_breadcrumbs:
652 	intel_engine_fini_breadcrumbs(engine);
653 err_unpin_preempt:
654 	if (INTEL_INFO(engine->i915)->has_logical_ring_preemption)
655 		engine->context_unpin(engine, engine->i915->preempt_context);
656 err_unpin_kernel:
657 	engine->context_unpin(engine, engine->i915->kernel_context);
658 	return ret;
659 }
660 
661 /**
662  * intel_engines_cleanup_common - cleans up the engine state created by
663  *                                the common initiailizers.
664  * @engine: Engine to cleanup.
665  *
666  * This cleans up everything created by the common helpers.
667  */
668 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
669 {
670 	intel_engine_cleanup_scratch(engine);
671 
672 	if (HWS_NEEDS_PHYSICAL(engine->i915))
673 		cleanup_phys_status_page(engine);
674 	else
675 		cleanup_status_page(engine);
676 
677 	i915_gem_render_state_fini(engine);
678 	intel_engine_fini_breadcrumbs(engine);
679 	intel_engine_cleanup_cmd_parser(engine);
680 	i915_gem_batch_pool_fini(&engine->batch_pool);
681 
682 	if (INTEL_INFO(engine->i915)->has_logical_ring_preemption)
683 		engine->context_unpin(engine, engine->i915->preempt_context);
684 	engine->context_unpin(engine, engine->i915->kernel_context);
685 }
686 
687 u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
688 {
689 	struct drm_i915_private *dev_priv = engine->i915;
690 	u64 acthd;
691 
692 	if (INTEL_GEN(dev_priv) >= 8)
693 		acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base),
694 					 RING_ACTHD_UDW(engine->mmio_base));
695 	else if (INTEL_GEN(dev_priv) >= 4)
696 		acthd = I915_READ(RING_ACTHD(engine->mmio_base));
697 	else
698 		acthd = I915_READ(ACTHD);
699 
700 	return acthd;
701 }
702 
703 u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine)
704 {
705 	struct drm_i915_private *dev_priv = engine->i915;
706 	u64 bbaddr;
707 
708 	if (INTEL_GEN(dev_priv) >= 8)
709 		bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base),
710 					  RING_BBADDR_UDW(engine->mmio_base));
711 	else
712 		bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
713 
714 	return bbaddr;
715 }
716 
717 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
718 {
719 	switch (type) {
720 	case I915_CACHE_NONE: return " uncached";
721 	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
722 	case I915_CACHE_L3_LLC: return " L3+LLC";
723 	case I915_CACHE_WT: return " WT";
724 	default: return "";
725 	}
726 }
727 
728 static inline uint32_t
729 read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
730 		  int subslice, i915_reg_t reg)
731 {
732 	uint32_t mcr;
733 	uint32_t ret;
734 	enum forcewake_domains fw_domains;
735 
736 	fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
737 						    FW_REG_READ);
738 	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
739 						     GEN8_MCR_SELECTOR,
740 						     FW_REG_READ | FW_REG_WRITE);
741 
742 	spin_lock_irq(&dev_priv->uncore.lock);
743 	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
744 
745 	mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
746 	/*
747 	 * The HW expects the slice and sublice selectors to be reset to 0
748 	 * after reading out the registers.
749 	 */
750 	WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK));
751 	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
752 	mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
753 	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
754 
755 	ret = I915_READ_FW(reg);
756 
757 	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
758 	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
759 
760 	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
761 	spin_unlock_irq(&dev_priv->uncore.lock);
762 
763 	return ret;
764 }
765 
766 /* NB: please notice the memset */
767 void intel_engine_get_instdone(struct intel_engine_cs *engine,
768 			       struct intel_instdone *instdone)
769 {
770 	struct drm_i915_private *dev_priv = engine->i915;
771 	u32 mmio_base = engine->mmio_base;
772 	int slice;
773 	int subslice;
774 
775 	memset(instdone, 0, sizeof(*instdone));
776 
777 	switch (INTEL_GEN(dev_priv)) {
778 	default:
779 		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
780 
781 		if (engine->id != RCS)
782 			break;
783 
784 		instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
785 		for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
786 			instdone->sampler[slice][subslice] =
787 				read_subslice_reg(dev_priv, slice, subslice,
788 						  GEN7_SAMPLER_INSTDONE);
789 			instdone->row[slice][subslice] =
790 				read_subslice_reg(dev_priv, slice, subslice,
791 						  GEN7_ROW_INSTDONE);
792 		}
793 		break;
794 	case 7:
795 		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
796 
797 		if (engine->id != RCS)
798 			break;
799 
800 		instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
801 		instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE);
802 		instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE);
803 
804 		break;
805 	case 6:
806 	case 5:
807 	case 4:
808 		instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
809 
810 		if (engine->id == RCS)
811 			/* HACK: Using the wrong struct member */
812 			instdone->slice_common = I915_READ(GEN4_INSTDONE1);
813 		break;
814 	case 3:
815 	case 2:
816 		instdone->instdone = I915_READ(GEN2_INSTDONE);
817 		break;
818 	}
819 }
820 
821 static int wa_add(struct drm_i915_private *dev_priv,
822 		  i915_reg_t addr,
823 		  const u32 mask, const u32 val)
824 {
825 	const u32 idx = dev_priv->workarounds.count;
826 
827 	if (WARN_ON(idx >= I915_MAX_WA_REGS))
828 		return -ENOSPC;
829 
830 	dev_priv->workarounds.reg[idx].addr = addr;
831 	dev_priv->workarounds.reg[idx].value = val;
832 	dev_priv->workarounds.reg[idx].mask = mask;
833 
834 	dev_priv->workarounds.count++;
835 
836 	return 0;
837 }
838 
839 #define WA_REG(addr, mask, val) do { \
840 		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
841 		if (r) \
842 			return r; \
843 	} while (0)
844 
845 #define WA_SET_BIT_MASKED(addr, mask) \
846 	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
847 
848 #define WA_CLR_BIT_MASKED(addr, mask) \
849 	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
850 
851 #define WA_SET_FIELD_MASKED(addr, mask, value) \
852 	WA_REG(addr, mask, _MASKED_FIELD(mask, value))
853 
854 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
855 				 i915_reg_t reg)
856 {
857 	struct drm_i915_private *dev_priv = engine->i915;
858 	struct i915_workarounds *wa = &dev_priv->workarounds;
859 	const uint32_t index = wa->hw_whitelist_count[engine->id];
860 
861 	if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
862 		return -EINVAL;
863 
864 	I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
865 		   i915_mmio_reg_offset(reg));
866 	wa->hw_whitelist_count[engine->id]++;
867 
868 	return 0;
869 }
870 
871 static int gen8_init_workarounds(struct intel_engine_cs *engine)
872 {
873 	struct drm_i915_private *dev_priv = engine->i915;
874 
875 	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
876 
877 	/* WaDisableAsyncFlipPerfMode:bdw,chv */
878 	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
879 
880 	/* WaDisablePartialInstShootdown:bdw,chv */
881 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
882 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
883 
884 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
885 	 * workaround for for a possible hang in the unlikely event a TLB
886 	 * invalidation occurs during a PSD flush.
887 	 */
888 	/* WaForceEnableNonCoherent:bdw,chv */
889 	/* WaHdcDisableFetchWhenMasked:bdw,chv */
890 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
891 			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
892 			  HDC_FORCE_NON_COHERENT);
893 
894 	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
895 	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
896 	 *  polygons in the same 8x4 pixel/sample area to be processed without
897 	 *  stalling waiting for the earlier ones to write to Hierarchical Z
898 	 *  buffer."
899 	 *
900 	 * This optimization is off by default for BDW and CHV; turn it on.
901 	 */
902 	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
903 
904 	/* Wa4x4STCOptimizationDisable:bdw,chv */
905 	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
906 
907 	/*
908 	 * BSpec recommends 8x4 when MSAA is used,
909 	 * however in practice 16x4 seems fastest.
910 	 *
911 	 * Note that PS/WM thread counts depend on the WIZ hashing
912 	 * disable bit, which we don't touch here, but it's good
913 	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
914 	 */
915 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
916 			    GEN6_WIZ_HASHING_MASK,
917 			    GEN6_WIZ_HASHING_16x4);
918 
919 	return 0;
920 }
921 
922 static int bdw_init_workarounds(struct intel_engine_cs *engine)
923 {
924 	struct drm_i915_private *dev_priv = engine->i915;
925 	int ret;
926 
927 	ret = gen8_init_workarounds(engine);
928 	if (ret)
929 		return ret;
930 
931 	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
932 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
933 
934 	/* WaDisableDopClockGating:bdw
935 	 *
936 	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
937 	 * to disable EUTC clock gating.
938 	 */
939 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
940 			  DOP_CLOCK_GATING_DISABLE);
941 
942 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
943 			  GEN8_SAMPLER_POWER_BYPASS_DIS);
944 
945 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
946 			  /* WaForceContextSaveRestoreNonCoherent:bdw */
947 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
948 			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
949 			  (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
950 
951 	return 0;
952 }
953 
954 static int chv_init_workarounds(struct intel_engine_cs *engine)
955 {
956 	struct drm_i915_private *dev_priv = engine->i915;
957 	int ret;
958 
959 	ret = gen8_init_workarounds(engine);
960 	if (ret)
961 		return ret;
962 
963 	/* WaDisableThreadStallDopClockGating:chv */
964 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
965 
966 	/* Improve HiZ throughput on CHV. */
967 	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
968 
969 	return 0;
970 }
971 
972 static int gen9_init_workarounds(struct intel_engine_cs *engine)
973 {
974 	struct drm_i915_private *dev_priv = engine->i915;
975 	int ret;
976 
977 	/* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
978 	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
979 
980 	/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
981 	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
982 		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
983 
984 	/* WaDisableKillLogic:bxt,skl,kbl */
985 	if (!IS_COFFEELAKE(dev_priv))
986 		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
987 			   ECOCHK_DIS_TLB);
988 
989 	if (HAS_LLC(dev_priv)) {
990 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
991 		 *
992 		 * Must match Display Engine. See
993 		 * WaCompressedResourceDisplayNewHashMode.
994 		 */
995 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
996 				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
997 		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
998 				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
999 
1000 		I915_WRITE(MMCD_MISC_CTRL,
1001 			   I915_READ(MMCD_MISC_CTRL) |
1002 			   MMCD_PCLA |
1003 			   MMCD_HOTSPOT_EN);
1004 	}
1005 
1006 	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
1007 	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
1008 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1009 			  FLOW_CONTROL_ENABLE |
1010 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
1011 
1012 	/* Syncing dependencies between camera and graphics:skl,bxt,kbl */
1013 	if (!IS_COFFEELAKE(dev_priv))
1014 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
1015 				  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
1016 
1017 	/* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */
1018 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1019 		WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
1020 				  GEN9_DG_MIRROR_FIX_ENABLE);
1021 
1022 	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */
1023 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1024 		WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
1025 				  GEN9_RHWO_OPTIMIZATION_DISABLE);
1026 		/*
1027 		 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
1028 		 * but we do that in per ctx batchbuffer as there is an issue
1029 		 * with this register not getting restored on ctx restore
1030 		 */
1031 	}
1032 
1033 	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
1034 	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
1035 	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
1036 			  GEN9_ENABLE_YV12_BUGFIX |
1037 			  GEN9_ENABLE_GPGPU_PREEMPTION);
1038 
1039 	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
1040 	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
1041 	WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
1042 					 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
1043 
1044 	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
1045 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
1046 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
1047 
1048 	/* WaDisableMaskBasedCammingInRCC:bxt */
1049 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1050 		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
1051 				  PIXEL_MASK_CAMMING_DISABLE);
1052 
1053 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
1054 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
1055 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
1056 			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
1057 
1058 	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
1059 	 * both tied to WaForceContextSaveRestoreNonCoherent
1060 	 * in some hsds for skl. We keep the tie for all gen9. The
1061 	 * documentation is a bit hazy and so we want to get common behaviour,
1062 	 * even though there is no clear evidence we would need both on kbl/bxt.
1063 	 * This area has been source of system hangs so we play it safe
1064 	 * and mimic the skl regardless of what bspec says.
1065 	 *
1066 	 * Use Force Non-Coherent whenever executing a 3D context. This
1067 	 * is a workaround for a possible hang in the unlikely event
1068 	 * a TLB invalidation occurs during a PSD flush.
1069 	 */
1070 
1071 	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
1072 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
1073 			  HDC_FORCE_NON_COHERENT);
1074 
1075 	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1076 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1077 		   BDW_DISABLE_HDC_INVALIDATION);
1078 
1079 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
1080 	if (IS_SKYLAKE(dev_priv) ||
1081 	    IS_KABYLAKE(dev_priv) ||
1082 	    IS_COFFEELAKE(dev_priv) ||
1083 	    IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
1084 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
1085 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
1086 
1087 	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
1088 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
1089 
1090 	/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1091 	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
1092 				    GEN8_LQSC_FLUSH_COHERENT_LINES));
1093 
1094 	/*
1095 	 * Supporting preemption with fine-granularity requires changes in the
1096 	 * batch buffer programming. Since we can't break old userspace, we
1097 	 * need to set our default preemption level to safe value. Userspace is
1098 	 * still able to use more fine-grained preemption levels, since in
1099 	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
1100 	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
1101 	 * not real HW workarounds, but merely a way to start using preemption
1102 	 * while maintaining old contract with userspace.
1103 	 */
1104 
1105 	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
1106 	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
1107 
1108 	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
1109 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
1110 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
1111 
1112 	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1113 	ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
1114 	if (ret)
1115 		return ret;
1116 
1117 	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1118 	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1119 		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1120 	ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
1121 	if (ret)
1122 		return ret;
1123 
1124 	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1125 	ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
1126 	if (ret)
1127 		return ret;
1128 
1129 	return 0;
1130 }
1131 
1132 static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
1133 {
1134 	struct drm_i915_private *dev_priv = engine->i915;
1135 	u8 vals[3] = { 0, 0, 0 };
1136 	unsigned int i;
1137 
1138 	for (i = 0; i < 3; i++) {
1139 		u8 ss;
1140 
1141 		/*
1142 		 * Only consider slices where one, and only one, subslice has 7
1143 		 * EUs
1144 		 */
1145 		if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
1146 			continue;
1147 
1148 		/*
1149 		 * subslice_7eu[i] != 0 (because of the check above) and
1150 		 * ss_max == 4 (maximum number of subslices possible per slice)
1151 		 *
1152 		 * ->    0 <= ss <= 3;
1153 		 */
1154 		ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
1155 		vals[i] = 3 - ss;
1156 	}
1157 
1158 	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1159 		return 0;
1160 
1161 	/* Tune IZ hashing. See intel_device_info_runtime_init() */
1162 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1163 			    GEN9_IZ_HASHING_MASK(2) |
1164 			    GEN9_IZ_HASHING_MASK(1) |
1165 			    GEN9_IZ_HASHING_MASK(0),
1166 			    GEN9_IZ_HASHING(2, vals[2]) |
1167 			    GEN9_IZ_HASHING(1, vals[1]) |
1168 			    GEN9_IZ_HASHING(0, vals[0]));
1169 
1170 	return 0;
1171 }
1172 
1173 static int skl_init_workarounds(struct intel_engine_cs *engine)
1174 {
1175 	struct drm_i915_private *dev_priv = engine->i915;
1176 	int ret;
1177 
1178 	ret = gen9_init_workarounds(engine);
1179 	if (ret)
1180 		return ret;
1181 
1182 	/* WaEnableGapsTsvCreditFix:skl */
1183 	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1184 				   GEN9_GAPS_TSV_CREDIT_DISABLE));
1185 
1186 	/* WaDisableGafsUnitClkGating:skl */
1187 	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1188 				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1189 
1190 	/* WaInPlaceDecompressionHang:skl */
1191 	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
1192 		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1193 			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1194 			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1195 
1196 	/* WaDisableLSQCROPERFforOCL:skl */
1197 	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1198 	if (ret)
1199 		return ret;
1200 
1201 	return skl_tune_iz_hashing(engine);
1202 }
1203 
1204 static int bxt_init_workarounds(struct intel_engine_cs *engine)
1205 {
1206 	struct drm_i915_private *dev_priv = engine->i915;
1207 	int ret;
1208 
1209 	ret = gen9_init_workarounds(engine);
1210 	if (ret)
1211 		return ret;
1212 
1213 	/* WaStoreMultiplePTEenable:bxt */
1214 	/* This is a requirement according to Hardware specification */
1215 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1216 		I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1217 
1218 	/* WaSetClckGatingDisableMedia:bxt */
1219 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1220 		I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1221 					    ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1222 	}
1223 
1224 	/* WaDisableThreadStallDopClockGating:bxt */
1225 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1226 			  STALL_DOP_GATING_DISABLE);
1227 
1228 	/* WaDisablePooledEuLoadBalancingFix:bxt */
1229 	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
1230 		I915_WRITE(FF_SLICE_CS_CHICKEN2,
1231 			   _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
1232 	}
1233 
1234 	/* WaDisableSbeCacheDispatchPortSharing:bxt */
1235 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) {
1236 		WA_SET_BIT_MASKED(
1237 			GEN7_HALF_SLICE_CHICKEN1,
1238 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1239 	}
1240 
1241 	/* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1242 	/* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1243 	/* WaDisableObjectLevelPreemtionForInstanceId:bxt */
1244 	/* WaDisableLSQCROPERFforOCL:bxt */
1245 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1246 		ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1);
1247 		if (ret)
1248 			return ret;
1249 
1250 		ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1251 		if (ret)
1252 			return ret;
1253 	}
1254 
1255 	/* WaProgramL3SqcReg1DefaultForPerf:bxt */
1256 	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
1257 		u32 val = I915_READ(GEN8_L3SQCREG1);
1258 		val &= ~L3_PRIO_CREDITS_MASK;
1259 		val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
1260 		I915_WRITE(GEN8_L3SQCREG1, val);
1261 	}
1262 
1263 	/* WaToEnableHwFixForPushConstHWBug:bxt */
1264 	if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1265 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1266 				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1267 
1268 	/* WaInPlaceDecompressionHang:bxt */
1269 	if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1270 		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1271 			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1272 			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1273 
1274 	return 0;
1275 }
1276 
1277 static int cnl_init_workarounds(struct intel_engine_cs *engine)
1278 {
1279 	struct drm_i915_private *dev_priv = engine->i915;
1280 	int ret;
1281 
1282 	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
1283 	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
1284 		I915_WRITE(GAMT_CHKN_BIT_REG,
1285 			   (I915_READ(GAMT_CHKN_BIT_REG) |
1286 			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT));
1287 
1288 	/* WaForceContextSaveRestoreNonCoherent:cnl */
1289 	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
1290 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
1291 
1292 	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
1293 	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
1294 		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
1295 
1296 	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
1297 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1298 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1299 
1300 	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
1301 	if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
1302 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1303 				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
1304 
1305 	/* WaInPlaceDecompressionHang:cnl */
1306 	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1307 		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1308 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1309 
1310 	/* WaPushConstantDereferenceHoldDisable:cnl */
1311 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
1312 
1313 	/* FtrEnableFastAnisoL1BankingFix: cnl */
1314 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
1315 
1316 	/* WaDisable3DMidCmdPreemption:cnl */
1317 	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
1318 
1319 	/* WaDisableGPGPUMidCmdPreemption:cnl */
1320 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
1321 			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
1322 
1323 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1324 	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1325 		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1326 	ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
1327 	if (ret)
1328 		return ret;
1329 
1330 	return 0;
1331 }
1332 
1333 static int kbl_init_workarounds(struct intel_engine_cs *engine)
1334 {
1335 	struct drm_i915_private *dev_priv = engine->i915;
1336 	int ret;
1337 
1338 	ret = gen9_init_workarounds(engine);
1339 	if (ret)
1340 		return ret;
1341 
1342 	/* WaEnableGapsTsvCreditFix:kbl */
1343 	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1344 				   GEN9_GAPS_TSV_CREDIT_DISABLE));
1345 
1346 	/* WaDisableDynamicCreditSharing:kbl */
1347 	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
1348 		I915_WRITE(GAMT_CHKN_BIT_REG,
1349 			   (I915_READ(GAMT_CHKN_BIT_REG) |
1350 			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING));
1351 
1352 	/* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
1353 	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
1354 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
1355 				  HDC_FENCE_DEST_SLM_DISABLE);
1356 
1357 	/* WaToEnableHwFixForPushConstHWBug:kbl */
1358 	if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
1359 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1360 				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1361 
1362 	/* WaDisableGafsUnitClkGating:kbl */
1363 	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1364 				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1365 
1366 	/* WaDisableSbeCacheDispatchPortSharing:kbl */
1367 	WA_SET_BIT_MASKED(
1368 		GEN7_HALF_SLICE_CHICKEN1,
1369 		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1370 
1371 	/* WaInPlaceDecompressionHang:kbl */
1372 	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1373 		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1374 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1375 
1376 	/* WaDisableLSQCROPERFforOCL:kbl */
1377 	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1378 	if (ret)
1379 		return ret;
1380 
1381 	return 0;
1382 }
1383 
1384 static int glk_init_workarounds(struct intel_engine_cs *engine)
1385 {
1386 	struct drm_i915_private *dev_priv = engine->i915;
1387 	int ret;
1388 
1389 	ret = gen9_init_workarounds(engine);
1390 	if (ret)
1391 		return ret;
1392 
1393 	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1394 	ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1395 	if (ret)
1396 		return ret;
1397 
1398 	/* WaToEnableHwFixForPushConstHWBug:glk */
1399 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1400 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1401 
1402 	return 0;
1403 }
1404 
1405 static int cfl_init_workarounds(struct intel_engine_cs *engine)
1406 {
1407 	struct drm_i915_private *dev_priv = engine->i915;
1408 	int ret;
1409 
1410 	ret = gen9_init_workarounds(engine);
1411 	if (ret)
1412 		return ret;
1413 
1414 	/* WaEnableGapsTsvCreditFix:cfl */
1415 	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1416 				   GEN9_GAPS_TSV_CREDIT_DISABLE));
1417 
1418 	/* WaToEnableHwFixForPushConstHWBug:cfl */
1419 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1420 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1421 
1422 	/* WaDisableGafsUnitClkGating:cfl */
1423 	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1424 				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1425 
1426 	/* WaDisableSbeCacheDispatchPortSharing:cfl */
1427 	WA_SET_BIT_MASKED(
1428 		GEN7_HALF_SLICE_CHICKEN1,
1429 		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1430 
1431 	/* WaInPlaceDecompressionHang:cfl */
1432 	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1433 		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1434 		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1435 
1436 	return 0;
1437 }
1438 
1439 int init_workarounds_ring(struct intel_engine_cs *engine)
1440 {
1441 	struct drm_i915_private *dev_priv = engine->i915;
1442 	int err;
1443 
1444 	WARN_ON(engine->id != RCS);
1445 
1446 	dev_priv->workarounds.count = 0;
1447 	dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
1448 
1449 	if (IS_BROADWELL(dev_priv))
1450 		err = bdw_init_workarounds(engine);
1451 	else if (IS_CHERRYVIEW(dev_priv))
1452 		err = chv_init_workarounds(engine);
1453 	else if (IS_SKYLAKE(dev_priv))
1454 		err =  skl_init_workarounds(engine);
1455 	else if (IS_BROXTON(dev_priv))
1456 		err = bxt_init_workarounds(engine);
1457 	else if (IS_KABYLAKE(dev_priv))
1458 		err = kbl_init_workarounds(engine);
1459 	else if (IS_GEMINILAKE(dev_priv))
1460 		err =  glk_init_workarounds(engine);
1461 	else if (IS_COFFEELAKE(dev_priv))
1462 		err = cfl_init_workarounds(engine);
1463 	else if (IS_CANNONLAKE(dev_priv))
1464 		err = cnl_init_workarounds(engine);
1465 	else
1466 		err = 0;
1467 	if (err)
1468 		return err;
1469 
1470 	DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
1471 			 engine->name, dev_priv->workarounds.count);
1472 	return 0;
1473 }
1474 
1475 int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
1476 {
1477 	struct i915_workarounds *w = &req->i915->workarounds;
1478 	u32 *cs;
1479 	int ret, i;
1480 
1481 	if (w->count == 0)
1482 		return 0;
1483 
1484 	ret = req->engine->emit_flush(req, EMIT_BARRIER);
1485 	if (ret)
1486 		return ret;
1487 
1488 	cs = intel_ring_begin(req, (w->count * 2 + 2));
1489 	if (IS_ERR(cs))
1490 		return PTR_ERR(cs);
1491 
1492 	*cs++ = MI_LOAD_REGISTER_IMM(w->count);
1493 	for (i = 0; i < w->count; i++) {
1494 		*cs++ = i915_mmio_reg_offset(w->reg[i].addr);
1495 		*cs++ = w->reg[i].value;
1496 	}
1497 	*cs++ = MI_NOOP;
1498 
1499 	intel_ring_advance(req, cs);
1500 
1501 	ret = req->engine->emit_flush(req, EMIT_BARRIER);
1502 	if (ret)
1503 		return ret;
1504 
1505 	return 0;
1506 }
1507 
1508 static bool ring_is_idle(struct intel_engine_cs *engine)
1509 {
1510 	struct drm_i915_private *dev_priv = engine->i915;
1511 	bool idle = true;
1512 
1513 	intel_runtime_pm_get(dev_priv);
1514 
1515 	/* First check that no commands are left in the ring */
1516 	if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
1517 	    (I915_READ_TAIL(engine) & TAIL_ADDR))
1518 		idle = false;
1519 
1520 	/* No bit for gen2, so assume the CS parser is idle */
1521 	if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
1522 		idle = false;
1523 
1524 	intel_runtime_pm_put(dev_priv);
1525 
1526 	return idle;
1527 }
1528 
1529 /**
1530  * intel_engine_is_idle() - Report if the engine has finished process all work
1531  * @engine: the intel_engine_cs
1532  *
1533  * Return true if there are no requests pending, nothing left to be submitted
1534  * to hardware, and that the engine is idle.
1535  */
1536 bool intel_engine_is_idle(struct intel_engine_cs *engine)
1537 {
1538 	struct drm_i915_private *dev_priv = engine->i915;
1539 
1540 	/* More white lies, if wedged, hw state is inconsistent */
1541 	if (i915_terminally_wedged(&dev_priv->gpu_error))
1542 		return true;
1543 
1544 	/* Any inflight/incomplete requests? */
1545 	if (!i915_seqno_passed(intel_engine_get_seqno(engine),
1546 			       intel_engine_last_submit(engine)))
1547 		return false;
1548 
1549 	if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
1550 		return true;
1551 
1552 	/* Interrupt/tasklet pending? */
1553 	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
1554 		return false;
1555 
1556 	/* Waiting to drain ELSP? */
1557 	if (READ_ONCE(engine->execlists.active))
1558 		return false;
1559 
1560 	/* ELSP is empty, but there are ready requests? */
1561 	if (READ_ONCE(engine->execlists.first))
1562 		return false;
1563 
1564 	/* Ring stopped? */
1565 	if (!ring_is_idle(engine))
1566 		return false;
1567 
1568 	return true;
1569 }
1570 
1571 bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
1572 {
1573 	struct intel_engine_cs *engine;
1574 	enum intel_engine_id id;
1575 
1576 	if (READ_ONCE(dev_priv->gt.active_requests))
1577 		return false;
1578 
1579 	/* If the driver is wedged, HW state may be very inconsistent and
1580 	 * report that it is still busy, even though we have stopped using it.
1581 	 */
1582 	if (i915_terminally_wedged(&dev_priv->gpu_error))
1583 		return true;
1584 
1585 	for_each_engine(engine, dev_priv, id) {
1586 		if (!intel_engine_is_idle(engine))
1587 			return false;
1588 	}
1589 
1590 	return true;
1591 }
1592 
1593 void intel_engines_reset_default_submission(struct drm_i915_private *i915)
1594 {
1595 	struct intel_engine_cs *engine;
1596 	enum intel_engine_id id;
1597 
1598 	for_each_engine(engine, i915, id)
1599 		engine->set_default_submission(engine);
1600 }
1601 
1602 void intel_engines_mark_idle(struct drm_i915_private *i915)
1603 {
1604 	struct intel_engine_cs *engine;
1605 	enum intel_engine_id id;
1606 
1607 	for_each_engine(engine, i915, id) {
1608 		intel_engine_disarm_breadcrumbs(engine);
1609 		i915_gem_batch_pool_fini(&engine->batch_pool);
1610 		tasklet_kill(&engine->execlists.irq_tasklet);
1611 		engine->execlists.no_priolist = false;
1612 	}
1613 }
1614 
1615 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1616 {
1617 	switch (INTEL_GEN(engine->i915)) {
1618 	case 2:
1619 		return false; /* uses physical not virtual addresses */
1620 	case 3:
1621 		/* maybe only uses physical not virtual addresses */
1622 		return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1623 	case 6:
1624 		return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1625 	default:
1626 		return true;
1627 	}
1628 }
1629 
1630 static void print_request(struct drm_printer *m,
1631 			  struct drm_i915_gem_request *rq,
1632 			  const char *prefix)
1633 {
1634 	drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %ldms: %s\n", prefix,
1635 		   rq->global_seqno,
1636 		   i915_gem_request_completed(rq) ? "!" : "",
1637 		   rq->ctx->hw_id, rq->fence.seqno,
1638 		   rq->priotree.priority,
1639 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1640 		   rq->timeline->common->name);
1641 }
1642 
1643 void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
1644 {
1645 	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
1646 	const struct intel_engine_execlists * const execlists = &engine->execlists;
1647 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
1648 	struct drm_i915_private *dev_priv = engine->i915;
1649 	struct drm_i915_gem_request *rq;
1650 	struct rb_node *rb;
1651 	u64 addr;
1652 
1653 	drm_printf(m, "%s\n", engine->name);
1654 	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%ld ms], inflight %d\n",
1655 		   intel_engine_get_seqno(engine),
1656 		   intel_engine_last_submit(engine),
1657 		   engine->hangcheck.seqno,
1658 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
1659 		   engine->timeline->inflight_seqnos);
1660 	drm_printf(m, "\tReset count: %d\n",
1661 		   i915_reset_engine_count(error, engine));
1662 
1663 	rcu_read_lock();
1664 
1665 	drm_printf(m, "\tRequests:\n");
1666 
1667 	rq = list_first_entry(&engine->timeline->requests,
1668 			      struct drm_i915_gem_request, link);
1669 	if (&rq->link != &engine->timeline->requests)
1670 		print_request(m, rq, "\t\tfirst  ");
1671 
1672 	rq = list_last_entry(&engine->timeline->requests,
1673 			     struct drm_i915_gem_request, link);
1674 	if (&rq->link != &engine->timeline->requests)
1675 		print_request(m, rq, "\t\tlast   ");
1676 
1677 	rq = i915_gem_find_active_request(engine);
1678 	if (rq) {
1679 		print_request(m, rq, "\t\tactive ");
1680 		drm_printf(m,
1681 			   "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n",
1682 			   rq->head, rq->postfix, rq->tail,
1683 			   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1684 			   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1685 	}
1686 
1687 	drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n",
1688 		   I915_READ(RING_START(engine->mmio_base)),
1689 		   rq ? i915_ggtt_offset(rq->ring->vma) : 0);
1690 	drm_printf(m, "\tRING_HEAD:  0x%08x [0x%08x]\n",
1691 		   I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR,
1692 		   rq ? rq->ring->head : 0);
1693 	drm_printf(m, "\tRING_TAIL:  0x%08x [0x%08x]\n",
1694 		   I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR,
1695 		   rq ? rq->ring->tail : 0);
1696 	drm_printf(m, "\tRING_CTL:   0x%08x [%s]\n",
1697 		   I915_READ(RING_CTL(engine->mmio_base)),
1698 		   I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : "");
1699 
1700 	rcu_read_unlock();
1701 
1702 	addr = intel_engine_get_active_head(engine);
1703 	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
1704 		   upper_32_bits(addr), lower_32_bits(addr));
1705 	addr = intel_engine_get_last_batch_head(engine);
1706 	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1707 		   upper_32_bits(addr), lower_32_bits(addr));
1708 
1709 	if (i915_modparams.enable_execlists) {
1710 		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
1711 		u32 ptr, read, write;
1712 		unsigned int idx;
1713 
1714 		drm_printf(m, "\tExeclist status: 0x%08x %08x\n",
1715 			   I915_READ(RING_EXECLIST_STATUS_LO(engine)),
1716 			   I915_READ(RING_EXECLIST_STATUS_HI(engine)));
1717 
1718 		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
1719 		read = GEN8_CSB_READ_PTR(ptr);
1720 		write = GEN8_CSB_WRITE_PTR(ptr);
1721 		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n",
1722 			   read, execlists->csb_head,
1723 			   write,
1724 			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
1725 			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
1726 					  &engine->irq_posted)));
1727 		if (read >= GEN8_CSB_ENTRIES)
1728 			read = 0;
1729 		if (write >= GEN8_CSB_ENTRIES)
1730 			write = 0;
1731 		if (read > write)
1732 			write += GEN8_CSB_ENTRIES;
1733 		while (read < write) {
1734 			idx = ++read % GEN8_CSB_ENTRIES;
1735 			drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n",
1736 				   idx,
1737 				   I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
1738 				   hws[idx * 2],
1739 				   I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)),
1740 				   hws[idx * 2 + 1]);
1741 		}
1742 
1743 		rcu_read_lock();
1744 		for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
1745 			unsigned int count;
1746 
1747 			rq = port_unpack(&execlists->port[idx], &count);
1748 			if (rq) {
1749 				drm_printf(m, "\t\tELSP[%d] count=%d, ",
1750 					   idx, count);
1751 				print_request(m, rq, "rq: ");
1752 			} else {
1753 				drm_printf(m, "\t\tELSP[%d] idle\n",
1754 					   idx);
1755 			}
1756 		}
1757 		drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
1758 		rcu_read_unlock();
1759 	} else if (INTEL_GEN(dev_priv) > 6) {
1760 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1761 			   I915_READ(RING_PP_DIR_BASE(engine)));
1762 		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1763 			   I915_READ(RING_PP_DIR_BASE_READ(engine)));
1764 		drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1765 			   I915_READ(RING_PP_DIR_DCLV(engine)));
1766 	}
1767 
1768 	spin_lock_irq(&engine->timeline->lock);
1769 	list_for_each_entry(rq, &engine->timeline->requests, link)
1770 		print_request(m, rq, "\t\tE ");
1771 	for (rb = execlists->first; rb; rb = rb_next(rb)) {
1772 		struct i915_priolist *p =
1773 			rb_entry(rb, typeof(*p), node);
1774 
1775 		list_for_each_entry(rq, &p->requests, priotree.link)
1776 			print_request(m, rq, "\t\tQ ");
1777 	}
1778 	spin_unlock_irq(&engine->timeline->lock);
1779 
1780 	spin_lock_irq(&b->rb_lock);
1781 	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
1782 		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
1783 
1784 		drm_printf(m, "\t%s [%d] waiting for %x\n",
1785 			   w->tsk->comm, w->tsk->pid, w->seqno);
1786 	}
1787 	spin_unlock_irq(&b->rb_lock);
1788 
1789 	drm_printf(m, "\n");
1790 }
1791 
1792 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1793 #include "selftests/mock_engine.c"
1794 #endif
1795