xref: /dragonfly/sys/dev/drm/i915/intel_pm.c (revision 745703c7)
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27 
28 #include "i915_drv.h"
29 #include "intel_drv.h"
30 #include <linux/module.h>
31 #include <machine/clock.h>
32 #include <drm/i915_powerwell.h>
33 
34 /**
35  * RC6 is a special power stage which allows the GPU to enter an very
36  * low-voltage mode when idle, using down to 0V while at this stage.  This
37  * stage is entered automatically when the GPU is idle when RC6 support is
38  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
39  *
40  * There are different RC6 modes available in Intel GPU, which differentiate
41  * among each other with the latency required to enter and leave RC6 and
42  * voltage consumed by the GPU in different states.
43  *
44  * The combination of the following flags define which states GPU is allowed
45  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
46  * RC6pp is deepest RC6. Their support by hardware varies according to the
47  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
48  * which brings the most power savings; deeper states save more power, but
49  * require higher latency to switch to and wake up.
50  */
51 #define INTEL_RC6_ENABLE			(1<<0)
52 #define INTEL_RC6p_ENABLE			(1<<1)
53 #define INTEL_RC6pp_ENABLE			(1<<2)
54 
55 /* FBC, or Frame Buffer Compression, is a technique employed to compress the
56  * framebuffer contents in-memory, aiming at reducing the required bandwidth
57  * during in-memory transfers and, therefore, reduce the power packet.
58  *
59  * The benefits of FBC are mostly visible with solid backgrounds and
60  * variation-less patterns.
61  *
62  * FBC-related functionality can be enabled by the means of the
63  * i915.i915_enable_fbc parameter
64  */
65 
66 static void i8xx_disable_fbc(struct drm_device *dev)
67 {
68 	struct drm_i915_private *dev_priv = dev->dev_private;
69 	u32 fbc_ctl;
70 
71 	/* Disable compression */
72 	fbc_ctl = I915_READ(FBC_CONTROL);
73 	if ((fbc_ctl & FBC_CTL_EN) == 0)
74 		return;
75 
76 	fbc_ctl &= ~FBC_CTL_EN;
77 	I915_WRITE(FBC_CONTROL, fbc_ctl);
78 
79 	/* Wait for compressing bit to clear */
80 	if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) {
81 		DRM_DEBUG_KMS("FBC idle timed out\n");
82 		return;
83 	}
84 
85 	DRM_DEBUG_KMS("disabled FBC\n");
86 }
87 
88 static void i8xx_enable_fbc(struct drm_crtc *crtc)
89 {
90 	struct drm_device *dev = crtc->dev;
91 	struct drm_i915_private *dev_priv = dev->dev_private;
92 	struct drm_framebuffer *fb = crtc->fb;
93 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
94 	struct drm_i915_gem_object *obj = intel_fb->obj;
95 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
96 	int cfb_pitch;
97 	int plane, i;
98 	u32 fbc_ctl;
99 
100 	cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE;
101 	if (fb->pitches[0] < cfb_pitch)
102 		cfb_pitch = fb->pitches[0];
103 
104 	/* FBC_CTL wants 32B or 64B units */
105 	if (IS_GEN2(dev))
106 		cfb_pitch = (cfb_pitch / 32) - 1;
107 	else
108 		cfb_pitch = (cfb_pitch / 64) - 1;
109 	plane = intel_crtc->plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
110 
111 	/* Clear old tags */
112 	for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
113 		I915_WRITE(FBC_TAG + (i * 4), 0);
114 
115 	if (IS_GEN4(dev)) {
116 		u32 fbc_ctl2;
117 
118 		/* Set it up... */
119 		fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
120 		fbc_ctl2 |= plane;
121 		I915_WRITE(FBC_CONTROL2, fbc_ctl2);
122 		I915_WRITE(FBC_FENCE_OFF, crtc->y);
123 	}
124 
125 	/* enable it... */
126 	fbc_ctl = I915_READ(FBC_CONTROL);
127 	fbc_ctl &= 0x3fff << FBC_CTL_INTERVAL_SHIFT;
128 	fbc_ctl |= FBC_CTL_EN | FBC_CTL_PERIODIC;
129 	if (IS_I945GM(dev))
130 		fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
131 	fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
132 	fbc_ctl |= obj->fence_reg;
133 	I915_WRITE(FBC_CONTROL, fbc_ctl);
134 
135 	DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c, ",
136 		      cfb_pitch, crtc->y, plane_name(intel_crtc->plane));
137 }
138 
139 static bool i8xx_fbc_enabled(struct drm_device *dev)
140 {
141 	struct drm_i915_private *dev_priv = dev->dev_private;
142 
143 	return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
144 }
145 
146 static void g4x_enable_fbc(struct drm_crtc *crtc)
147 {
148 	struct drm_device *dev = crtc->dev;
149 	struct drm_i915_private *dev_priv = dev->dev_private;
150 	struct drm_framebuffer *fb = crtc->fb;
151 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
152 	struct drm_i915_gem_object *obj = intel_fb->obj;
153 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
154 	int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
155 	u32 dpfc_ctl;
156 
157 	dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
158 	dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg;
159 	I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
160 
161 	I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
162 
163 	/* enable it... */
164 	I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
165 
166 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
167 }
168 
169 static void g4x_disable_fbc(struct drm_device *dev)
170 {
171 	struct drm_i915_private *dev_priv = dev->dev_private;
172 	u32 dpfc_ctl;
173 
174 	/* Disable compression */
175 	dpfc_ctl = I915_READ(DPFC_CONTROL);
176 	if (dpfc_ctl & DPFC_CTL_EN) {
177 		dpfc_ctl &= ~DPFC_CTL_EN;
178 		I915_WRITE(DPFC_CONTROL, dpfc_ctl);
179 
180 		DRM_DEBUG_KMS("disabled FBC\n");
181 	}
182 }
183 
184 static bool g4x_fbc_enabled(struct drm_device *dev)
185 {
186 	struct drm_i915_private *dev_priv = dev->dev_private;
187 
188 	return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
189 }
190 
191 static void sandybridge_blit_fbc_update(struct drm_device *dev)
192 {
193 	struct drm_i915_private *dev_priv = dev->dev_private;
194 	u32 blt_ecoskpd;
195 
196 	/* Make sure blitter notifies FBC of writes */
197 
198 	/* Blitter is part of Media powerwell on VLV. No impact of
199 	 * his param in other platforms for now */
200 	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA);
201 
202 	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
203 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
204 		GEN6_BLITTER_LOCK_SHIFT;
205 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
206 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
207 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
208 	blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
209 			 GEN6_BLITTER_LOCK_SHIFT);
210 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
211 	POSTING_READ(GEN6_BLITTER_ECOSKPD);
212 
213 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA);
214 }
215 
216 static void ironlake_enable_fbc(struct drm_crtc *crtc)
217 {
218 	struct drm_device *dev = crtc->dev;
219 	struct drm_i915_private *dev_priv = dev->dev_private;
220 	struct drm_framebuffer *fb = crtc->fb;
221 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
222 	struct drm_i915_gem_object *obj = intel_fb->obj;
223 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
224 	int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
225 	u32 dpfc_ctl;
226 
227 	dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
228 	dpfc_ctl &= DPFC_RESERVED;
229 	dpfc_ctl |= (plane | DPFC_CTL_LIMIT_1X);
230 	/* Set persistent mode for front-buffer rendering, ala X. */
231 	dpfc_ctl |= DPFC_CTL_PERSISTENT_MODE;
232 	dpfc_ctl |= DPFC_CTL_FENCE_EN;
233 	if (IS_GEN5(dev))
234 		dpfc_ctl |= obj->fence_reg;
235 	I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY);
236 
237 	I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y);
238 	I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID);
239 	/* enable it... */
240 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
241 
242 	if (IS_GEN6(dev)) {
243 		I915_WRITE(SNB_DPFC_CTL_SA,
244 			   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
245 		I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
246 		sandybridge_blit_fbc_update(dev);
247 	}
248 
249 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
250 }
251 
252 static void ironlake_disable_fbc(struct drm_device *dev)
253 {
254 	struct drm_i915_private *dev_priv = dev->dev_private;
255 	u32 dpfc_ctl;
256 
257 	/* Disable compression */
258 	dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
259 	if (dpfc_ctl & DPFC_CTL_EN) {
260 		dpfc_ctl &= ~DPFC_CTL_EN;
261 		I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl);
262 
263 		DRM_DEBUG_KMS("disabled FBC\n");
264 	}
265 }
266 
267 static bool ironlake_fbc_enabled(struct drm_device *dev)
268 {
269 	struct drm_i915_private *dev_priv = dev->dev_private;
270 
271 	return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN;
272 }
273 
274 static void gen7_enable_fbc(struct drm_crtc *crtc)
275 {
276 	struct drm_device *dev = crtc->dev;
277 	struct drm_i915_private *dev_priv = dev->dev_private;
278 	struct drm_framebuffer *fb = crtc->fb;
279 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
280 	struct drm_i915_gem_object *obj = intel_fb->obj;
281 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
282 
283 	I915_WRITE(IVB_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj));
284 
285 	I915_WRITE(ILK_DPFC_CONTROL, DPFC_CTL_EN | DPFC_CTL_LIMIT_1X |
286 		   IVB_DPFC_CTL_FENCE_EN |
287 		   intel_crtc->plane << IVB_DPFC_CTL_PLANE_SHIFT);
288 
289 	if (IS_IVYBRIDGE(dev)) {
290 		/* WaFbcAsynchFlipDisableFbcQueue:ivb */
291 		I915_WRITE(ILK_DISPLAY_CHICKEN1, ILK_FBCQ_DIS);
292 	} else {
293 		/* WaFbcAsynchFlipDisableFbcQueue:hsw */
294 		I915_WRITE(HSW_PIPE_SLICE_CHICKEN_1(intel_crtc->pipe),
295 			   HSW_BYPASS_FBC_QUEUE);
296 	}
297 
298 	I915_WRITE(SNB_DPFC_CTL_SA,
299 		   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
300 	I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
301 
302 	sandybridge_blit_fbc_update(dev);
303 
304 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
305 }
306 
307 bool intel_fbc_enabled(struct drm_device *dev)
308 {
309 	struct drm_i915_private *dev_priv = dev->dev_private;
310 
311 	if (!dev_priv->display.fbc_enabled)
312 		return false;
313 
314 	return dev_priv->display.fbc_enabled(dev);
315 }
316 
317 static void intel_fbc_work_fn(struct work_struct *__work)
318 {
319 	struct intel_fbc_work *work =
320 		container_of(to_delayed_work(__work),
321 			     struct intel_fbc_work, work);
322 	struct drm_device *dev = work->crtc->dev;
323 	struct drm_i915_private *dev_priv = dev->dev_private;
324 
325 	mutex_lock(&dev->struct_mutex);
326 	if (work == dev_priv->fbc.fbc_work) {
327 		/* Double check that we haven't switched fb without cancelling
328 		 * the prior work.
329 		 */
330 		if (work->crtc->fb == work->fb) {
331 			dev_priv->display.enable_fbc(work->crtc);
332 
333 			dev_priv->fbc.plane = to_intel_crtc(work->crtc)->plane;
334 			dev_priv->fbc.fb_id = work->crtc->fb->base.id;
335 			dev_priv->fbc.y = work->crtc->y;
336 		}
337 
338 		dev_priv->fbc.fbc_work = NULL;
339 	}
340 	mutex_unlock(&dev->struct_mutex);
341 
342 	kfree(work);
343 }
344 
345 static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
346 {
347 	if (dev_priv->fbc.fbc_work == NULL)
348 		return;
349 
350 	DRM_DEBUG_KMS("cancelling pending FBC enable\n");
351 
352 	/* Synchronisation is provided by struct_mutex and checking of
353 	 * dev_priv->fbc.fbc_work, so we can perform the cancellation
354 	 * entirely asynchronously.
355 	 */
356 	if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work))
357 		/* tasklet was killed before being run, clean up */
358 		kfree(dev_priv->fbc.fbc_work);
359 
360 	/* Mark the work as no longer wanted so that if it does
361 	 * wake-up (because the work was already running and waiting
362 	 * for our mutex), it will discover that is no longer
363 	 * necessary to run.
364 	 */
365 	dev_priv->fbc.fbc_work = NULL;
366 }
367 
368 static void intel_enable_fbc(struct drm_crtc *crtc)
369 {
370 	struct intel_fbc_work *work;
371 	struct drm_device *dev = crtc->dev;
372 	struct drm_i915_private *dev_priv = dev->dev_private;
373 
374 	if (!dev_priv->display.enable_fbc)
375 		return;
376 
377 	intel_cancel_fbc_work(dev_priv);
378 
379 	work = kzalloc(sizeof(*work), GFP_KERNEL);
380 	if (work == NULL) {
381 		DRM_ERROR("Failed to allocate FBC work structure\n");
382 		dev_priv->display.enable_fbc(crtc);
383 		return;
384 	}
385 
386 	work->crtc = crtc;
387 	work->fb = crtc->fb;
388 	INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
389 
390 	dev_priv->fbc.fbc_work = work;
391 
392 	/* Delay the actual enabling to let pageflipping cease and the
393 	 * display to settle before starting the compression. Note that
394 	 * this delay also serves a second purpose: it allows for a
395 	 * vblank to pass after disabling the FBC before we attempt
396 	 * to modify the control registers.
397 	 *
398 	 * A more complicated solution would involve tracking vblanks
399 	 * following the termination of the page-flipping sequence
400 	 * and indeed performing the enable as a co-routine and not
401 	 * waiting synchronously upon the vblank.
402 	 *
403 	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
404 	 */
405 	schedule_delayed_work(&work->work, msecs_to_jiffies(50));
406 }
407 
408 void intel_disable_fbc(struct drm_device *dev)
409 {
410 	struct drm_i915_private *dev_priv = dev->dev_private;
411 
412 	intel_cancel_fbc_work(dev_priv);
413 
414 	if (!dev_priv->display.disable_fbc)
415 		return;
416 
417 	dev_priv->display.disable_fbc(dev);
418 	dev_priv->fbc.plane = -1;
419 }
420 
421 static bool set_no_fbc_reason(struct drm_i915_private *dev_priv,
422 			      enum no_fbc_reason reason)
423 {
424 	if (dev_priv->fbc.no_fbc_reason == reason)
425 		return false;
426 
427 	dev_priv->fbc.no_fbc_reason = reason;
428 	return true;
429 }
430 
431 /**
432  * intel_update_fbc - enable/disable FBC as needed
433  * @dev: the drm_device
434  *
435  * Set up the framebuffer compression hardware at mode set time.  We
436  * enable it if possible:
437  *   - plane A only (on pre-965)
438  *   - no pixel mulitply/line duplication
439  *   - no alpha buffer discard
440  *   - no dual wide
441  *   - framebuffer <= max_hdisplay in width, max_vdisplay in height
442  *
443  * We can't assume that any compression will take place (worst case),
444  * so the compressed buffer has to be the same size as the uncompressed
445  * one.  It also must reside (along with the line length buffer) in
446  * stolen memory.
447  *
448  * We need to enable/disable FBC on a global basis.
449  */
450 void intel_update_fbc(struct drm_device *dev)
451 {
452 	struct drm_i915_private *dev_priv = dev->dev_private;
453 	struct drm_crtc *crtc = NULL, *tmp_crtc;
454 	struct intel_crtc *intel_crtc;
455 	struct drm_framebuffer *fb;
456 	struct intel_framebuffer *intel_fb;
457 	struct drm_i915_gem_object *obj;
458 	const struct drm_display_mode *adjusted_mode;
459 	unsigned int max_width, max_height;
460 
461 	if (!HAS_FBC(dev)) {
462 		set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED);
463 		return;
464 	}
465 
466 	if (!i915_powersave) {
467 		if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
468 			DRM_DEBUG_KMS("fbc disabled per module param\n");
469 		return;
470 	}
471 
472 	/*
473 	 * If FBC is already on, we just have to verify that we can
474 	 * keep it that way...
475 	 * Need to disable if:
476 	 *   - more than one pipe is active
477 	 *   - changing FBC params (stride, fence, mode)
478 	 *   - new fb is too large to fit in compressed buffer
479 	 *   - going to an unsupported config (interlace, pixel multiply, etc.)
480 	 */
481 	list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) {
482 		if (intel_crtc_active(tmp_crtc) &&
483 		    to_intel_crtc(tmp_crtc)->primary_enabled) {
484 			if (crtc) {
485 				if (set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES))
486 					DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
487 				goto out_disable;
488 			}
489 			crtc = tmp_crtc;
490 		}
491 	}
492 
493 	if (!crtc || crtc->fb == NULL) {
494 		if (set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT))
495 			DRM_DEBUG_KMS("no output, disabling\n");
496 		goto out_disable;
497 	}
498 
499 	intel_crtc = to_intel_crtc(crtc);
500 	fb = crtc->fb;
501 	intel_fb = to_intel_framebuffer(fb);
502 	obj = intel_fb->obj;
503 	adjusted_mode = &intel_crtc->config.adjusted_mode;
504 
505 	if (i915_enable_fbc < 0 &&
506 	    INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) {
507 		if (set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT))
508 			DRM_DEBUG_KMS("disabled per chip default\n");
509 		goto out_disable;
510 	}
511 	if (!i915_enable_fbc) {
512 		if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
513 			DRM_DEBUG_KMS("fbc disabled per module param\n");
514 		goto out_disable;
515 	}
516 	if ((adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) ||
517 	    (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
518 		if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE))
519 			DRM_DEBUG_KMS("mode incompatible with compression, "
520 				      "disabling\n");
521 		goto out_disable;
522 	}
523 
524 	if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) {
525 		max_width = 4096;
526 		max_height = 2048;
527 	} else {
528 		max_width = 2048;
529 		max_height = 1536;
530 	}
531 	if (intel_crtc->config.pipe_src_w > max_width ||
532 	    intel_crtc->config.pipe_src_h > max_height) {
533 		if (set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE))
534 			DRM_DEBUG_KMS("mode too large for compression, disabling\n");
535 		goto out_disable;
536 	}
537 	if ((INTEL_INFO(dev)->gen < 4 || IS_HASWELL(dev)) &&
538 	    intel_crtc->plane != PLANE_A) {
539 		if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE))
540 			DRM_DEBUG_KMS("plane not A, disabling compression\n");
541 		goto out_disable;
542 	}
543 
544 	/* The use of a CPU fence is mandatory in order to detect writes
545 	 * by the CPU to the scanout and trigger updates to the FBC.
546 	 */
547 	if (obj->tiling_mode != I915_TILING_X ||
548 	    obj->fence_reg == I915_FENCE_REG_NONE) {
549 		if (set_no_fbc_reason(dev_priv, FBC_NOT_TILED))
550 			DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
551 		goto out_disable;
552 	}
553 
554 	/* If the kernel debugger is active, always disable compression */
555 #ifdef DDB
556 	if (in_dbg_master())
557 		goto out_disable;
558 #endif
559 
560 	if (i915_gem_stolen_setup_compression(dev, intel_fb->obj->base.size)) {
561 		if (set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL))
562 			DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
563 		goto out_disable;
564 	}
565 
566 	/* If the scanout has not changed, don't modify the FBC settings.
567 	 * Note that we make the fundamental assumption that the fb->obj
568 	 * cannot be unpinned (and have its GTT offset and fence revoked)
569 	 * without first being decoupled from the scanout and FBC disabled.
570 	 */
571 	if (dev_priv->fbc.plane == intel_crtc->plane &&
572 	    dev_priv->fbc.fb_id == fb->base.id &&
573 	    dev_priv->fbc.y == crtc->y)
574 		return;
575 
576 	if (intel_fbc_enabled(dev)) {
577 		/* We update FBC along two paths, after changing fb/crtc
578 		 * configuration (modeswitching) and after page-flipping
579 		 * finishes. For the latter, we know that not only did
580 		 * we disable the FBC at the start of the page-flip
581 		 * sequence, but also more than one vblank has passed.
582 		 *
583 		 * For the former case of modeswitching, it is possible
584 		 * to switch between two FBC valid configurations
585 		 * instantaneously so we do need to disable the FBC
586 		 * before we can modify its control registers. We also
587 		 * have to wait for the next vblank for that to take
588 		 * effect. However, since we delay enabling FBC we can
589 		 * assume that a vblank has passed since disabling and
590 		 * that we can safely alter the registers in the deferred
591 		 * callback.
592 		 *
593 		 * In the scenario that we go from a valid to invalid
594 		 * and then back to valid FBC configuration we have
595 		 * no strict enforcement that a vblank occurred since
596 		 * disabling the FBC. However, along all current pipe
597 		 * disabling paths we do need to wait for a vblank at
598 		 * some point. And we wait before enabling FBC anyway.
599 		 */
600 		DRM_DEBUG_KMS("disabling active FBC for update\n");
601 		intel_disable_fbc(dev);
602 	}
603 
604 	intel_enable_fbc(crtc);
605 	dev_priv->fbc.no_fbc_reason = FBC_OK;
606 	return;
607 
608 out_disable:
609 	/* Multiple disables should be harmless */
610 	if (intel_fbc_enabled(dev)) {
611 		DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
612 		intel_disable_fbc(dev);
613 	}
614 	i915_gem_stolen_cleanup_compression(dev);
615 }
616 
617 static void i915_pineview_get_mem_freq(struct drm_device *dev)
618 {
619 	drm_i915_private_t *dev_priv = dev->dev_private;
620 	u32 tmp;
621 
622 	tmp = I915_READ(CLKCFG);
623 
624 	switch (tmp & CLKCFG_FSB_MASK) {
625 	case CLKCFG_FSB_533:
626 		dev_priv->fsb_freq = 533; /* 133*4 */
627 		break;
628 	case CLKCFG_FSB_800:
629 		dev_priv->fsb_freq = 800; /* 200*4 */
630 		break;
631 	case CLKCFG_FSB_667:
632 		dev_priv->fsb_freq =  667; /* 167*4 */
633 		break;
634 	case CLKCFG_FSB_400:
635 		dev_priv->fsb_freq = 400; /* 100*4 */
636 		break;
637 	}
638 
639 	switch (tmp & CLKCFG_MEM_MASK) {
640 	case CLKCFG_MEM_533:
641 		dev_priv->mem_freq = 533;
642 		break;
643 	case CLKCFG_MEM_667:
644 		dev_priv->mem_freq = 667;
645 		break;
646 	case CLKCFG_MEM_800:
647 		dev_priv->mem_freq = 800;
648 		break;
649 	}
650 
651 	/* detect pineview DDR3 setting */
652 	tmp = I915_READ(CSHRDDR3CTL);
653 	dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
654 }
655 
656 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
657 {
658 	drm_i915_private_t *dev_priv = dev->dev_private;
659 	u16 ddrpll, csipll;
660 
661 	ddrpll = I915_READ16(DDRMPLL1);
662 	csipll = I915_READ16(CSIPLL0);
663 
664 	switch (ddrpll & 0xff) {
665 	case 0xc:
666 		dev_priv->mem_freq = 800;
667 		break;
668 	case 0x10:
669 		dev_priv->mem_freq = 1066;
670 		break;
671 	case 0x14:
672 		dev_priv->mem_freq = 1333;
673 		break;
674 	case 0x18:
675 		dev_priv->mem_freq = 1600;
676 		break;
677 	default:
678 		DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
679 				 ddrpll & 0xff);
680 		dev_priv->mem_freq = 0;
681 		break;
682 	}
683 
684 	dev_priv->ips.r_t = dev_priv->mem_freq;
685 
686 	switch (csipll & 0x3ff) {
687 	case 0x00c:
688 		dev_priv->fsb_freq = 3200;
689 		break;
690 	case 0x00e:
691 		dev_priv->fsb_freq = 3733;
692 		break;
693 	case 0x010:
694 		dev_priv->fsb_freq = 4266;
695 		break;
696 	case 0x012:
697 		dev_priv->fsb_freq = 4800;
698 		break;
699 	case 0x014:
700 		dev_priv->fsb_freq = 5333;
701 		break;
702 	case 0x016:
703 		dev_priv->fsb_freq = 5866;
704 		break;
705 	case 0x018:
706 		dev_priv->fsb_freq = 6400;
707 		break;
708 	default:
709 		DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
710 				 csipll & 0x3ff);
711 		dev_priv->fsb_freq = 0;
712 		break;
713 	}
714 
715 	if (dev_priv->fsb_freq == 3200) {
716 		dev_priv->ips.c_m = 0;
717 	} else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
718 		dev_priv->ips.c_m = 1;
719 	} else {
720 		dev_priv->ips.c_m = 2;
721 	}
722 }
723 
724 static const struct cxsr_latency cxsr_latency_table[] = {
725 	{1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
726 	{1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
727 	{1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
728 	{1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
729 	{1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
730 
731 	{1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
732 	{1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
733 	{1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
734 	{1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
735 	{1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
736 
737 	{1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
738 	{1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
739 	{1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
740 	{1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
741 	{1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
742 
743 	{0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
744 	{0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
745 	{0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
746 	{0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
747 	{0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
748 
749 	{0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
750 	{0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
751 	{0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
752 	{0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
753 	{0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
754 
755 	{0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
756 	{0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
757 	{0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
758 	{0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
759 	{0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
760 };
761 
762 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
763 							 int is_ddr3,
764 							 int fsb,
765 							 int mem)
766 {
767 	const struct cxsr_latency *latency;
768 	int i;
769 
770 	if (fsb == 0 || mem == 0)
771 		return NULL;
772 
773 	for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
774 		latency = &cxsr_latency_table[i];
775 		if (is_desktop == latency->is_desktop &&
776 		    is_ddr3 == latency->is_ddr3 &&
777 		    fsb == latency->fsb_freq && mem == latency->mem_freq)
778 			return latency;
779 	}
780 
781 	DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
782 
783 	return NULL;
784 }
785 
786 static void pineview_disable_cxsr(struct drm_device *dev)
787 {
788 	struct drm_i915_private *dev_priv = dev->dev_private;
789 
790 	/* deactivate cxsr */
791 	I915_WRITE(DSPFW3, I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN);
792 }
793 
794 /*
795  * Latency for FIFO fetches is dependent on several factors:
796  *   - memory configuration (speed, channels)
797  *   - chipset
798  *   - current MCH state
799  * It can be fairly high in some situations, so here we assume a fairly
800  * pessimal value.  It's a tradeoff between extra memory fetches (if we
801  * set this value too high, the FIFO will fetch frequently to stay full)
802  * and power consumption (set it too low to save power and we might see
803  * FIFO underruns and display "flicker").
804  *
805  * A value of 5us seems to be a good balance; safe for very low end
806  * platforms but not overly aggressive on lower latency configs.
807  */
808 static const int latency_ns = 5000;
809 
810 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
811 {
812 	struct drm_i915_private *dev_priv = dev->dev_private;
813 	uint32_t dsparb = I915_READ(DSPARB);
814 	int size;
815 
816 	size = dsparb & 0x7f;
817 	if (plane)
818 		size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
819 
820 	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
821 		      plane ? "B" : "A", size);
822 
823 	return size;
824 }
825 
826 static int i830_get_fifo_size(struct drm_device *dev, int plane)
827 {
828 	struct drm_i915_private *dev_priv = dev->dev_private;
829 	uint32_t dsparb = I915_READ(DSPARB);
830 	int size;
831 
832 	size = dsparb & 0x1ff;
833 	if (plane)
834 		size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
835 	size >>= 1; /* Convert to cachelines */
836 
837 	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
838 		      plane ? "B" : "A", size);
839 
840 	return size;
841 }
842 
843 static int i845_get_fifo_size(struct drm_device *dev, int plane)
844 {
845 	struct drm_i915_private *dev_priv = dev->dev_private;
846 	uint32_t dsparb = I915_READ(DSPARB);
847 	int size;
848 
849 	size = dsparb & 0x7f;
850 	size >>= 2; /* Convert to cachelines */
851 
852 	DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
853 		      plane ? "B" : "A",
854 		      size);
855 
856 	return size;
857 }
858 
859 /* Pineview has different values for various configs */
860 static const struct intel_watermark_params pineview_display_wm = {
861 	PINEVIEW_DISPLAY_FIFO,
862 	PINEVIEW_MAX_WM,
863 	PINEVIEW_DFT_WM,
864 	PINEVIEW_GUARD_WM,
865 	PINEVIEW_FIFO_LINE_SIZE
866 };
867 static const struct intel_watermark_params pineview_display_hplloff_wm = {
868 	PINEVIEW_DISPLAY_FIFO,
869 	PINEVIEW_MAX_WM,
870 	PINEVIEW_DFT_HPLLOFF_WM,
871 	PINEVIEW_GUARD_WM,
872 	PINEVIEW_FIFO_LINE_SIZE
873 };
874 static const struct intel_watermark_params pineview_cursor_wm = {
875 	PINEVIEW_CURSOR_FIFO,
876 	PINEVIEW_CURSOR_MAX_WM,
877 	PINEVIEW_CURSOR_DFT_WM,
878 	PINEVIEW_CURSOR_GUARD_WM,
879 	PINEVIEW_FIFO_LINE_SIZE,
880 };
881 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
882 	PINEVIEW_CURSOR_FIFO,
883 	PINEVIEW_CURSOR_MAX_WM,
884 	PINEVIEW_CURSOR_DFT_WM,
885 	PINEVIEW_CURSOR_GUARD_WM,
886 	PINEVIEW_FIFO_LINE_SIZE
887 };
888 static const struct intel_watermark_params g4x_wm_info = {
889 	G4X_FIFO_SIZE,
890 	G4X_MAX_WM,
891 	G4X_MAX_WM,
892 	2,
893 	G4X_FIFO_LINE_SIZE,
894 };
895 static const struct intel_watermark_params g4x_cursor_wm_info = {
896 	I965_CURSOR_FIFO,
897 	I965_CURSOR_MAX_WM,
898 	I965_CURSOR_DFT_WM,
899 	2,
900 	G4X_FIFO_LINE_SIZE,
901 };
902 static const struct intel_watermark_params valleyview_wm_info = {
903 	VALLEYVIEW_FIFO_SIZE,
904 	VALLEYVIEW_MAX_WM,
905 	VALLEYVIEW_MAX_WM,
906 	2,
907 	G4X_FIFO_LINE_SIZE,
908 };
909 static const struct intel_watermark_params valleyview_cursor_wm_info = {
910 	I965_CURSOR_FIFO,
911 	VALLEYVIEW_CURSOR_MAX_WM,
912 	I965_CURSOR_DFT_WM,
913 	2,
914 	G4X_FIFO_LINE_SIZE,
915 };
916 static const struct intel_watermark_params i965_cursor_wm_info = {
917 	I965_CURSOR_FIFO,
918 	I965_CURSOR_MAX_WM,
919 	I965_CURSOR_DFT_WM,
920 	2,
921 	I915_FIFO_LINE_SIZE,
922 };
923 static const struct intel_watermark_params i945_wm_info = {
924 	I945_FIFO_SIZE,
925 	I915_MAX_WM,
926 	1,
927 	2,
928 	I915_FIFO_LINE_SIZE
929 };
930 static const struct intel_watermark_params i915_wm_info = {
931 	I915_FIFO_SIZE,
932 	I915_MAX_WM,
933 	1,
934 	2,
935 	I915_FIFO_LINE_SIZE
936 };
937 static const struct intel_watermark_params i830_wm_info = {
938 	I855GM_FIFO_SIZE,
939 	I915_MAX_WM,
940 	1,
941 	2,
942 	I830_FIFO_LINE_SIZE
943 };
944 static const struct intel_watermark_params i845_wm_info = {
945 	I830_FIFO_SIZE,
946 	I915_MAX_WM,
947 	1,
948 	2,
949 	I830_FIFO_LINE_SIZE
950 };
951 
952 /**
953  * intel_calculate_wm - calculate watermark level
954  * @clock_in_khz: pixel clock
955  * @wm: chip FIFO params
956  * @pixel_size: display pixel size
957  * @latency_ns: memory latency for the platform
958  *
959  * Calculate the watermark level (the level at which the display plane will
960  * start fetching from memory again).  Each chip has a different display
961  * FIFO size and allocation, so the caller needs to figure that out and pass
962  * in the correct intel_watermark_params structure.
963  *
964  * As the pixel clock runs, the FIFO will be drained at a rate that depends
965  * on the pixel size.  When it reaches the watermark level, it'll start
966  * fetching FIFO line sized based chunks from memory until the FIFO fills
967  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
968  * will occur, and a display engine hang could result.
969  */
970 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
971 					const struct intel_watermark_params *wm,
972 					int fifo_size,
973 					int pixel_size,
974 					unsigned long latency_ns)
975 {
976 	long entries_required, wm_size;
977 
978 	/*
979 	 * Note: we need to make sure we don't overflow for various clock &
980 	 * latency values.
981 	 * clocks go from a few thousand to several hundred thousand.
982 	 * latency is usually a few thousand
983 	 */
984 	entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
985 		1000;
986 	entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
987 
988 	DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
989 
990 	wm_size = fifo_size - (entries_required + wm->guard_size);
991 
992 	DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
993 
994 	/* Don't promote wm_size to unsigned... */
995 	if (wm_size > (long)wm->max_wm)
996 		wm_size = wm->max_wm;
997 	if (wm_size <= 0)
998 		wm_size = wm->default_wm;
999 	return wm_size;
1000 }
1001 
1002 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
1003 {
1004 	struct drm_crtc *crtc, *enabled = NULL;
1005 
1006 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
1007 		if (intel_crtc_active(crtc)) {
1008 			if (enabled)
1009 				return NULL;
1010 			enabled = crtc;
1011 		}
1012 	}
1013 
1014 	return enabled;
1015 }
1016 
1017 static void pineview_update_wm(struct drm_crtc *unused_crtc)
1018 {
1019 	struct drm_device *dev = unused_crtc->dev;
1020 	struct drm_i915_private *dev_priv = dev->dev_private;
1021 	struct drm_crtc *crtc;
1022 	const struct cxsr_latency *latency;
1023 	u32 reg;
1024 	unsigned long wm;
1025 
1026 	latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
1027 					 dev_priv->fsb_freq, dev_priv->mem_freq);
1028 	if (!latency) {
1029 		DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
1030 		pineview_disable_cxsr(dev);
1031 		return;
1032 	}
1033 
1034 	crtc = single_enabled_crtc(dev);
1035 	if (crtc) {
1036 		const struct drm_display_mode *adjusted_mode;
1037 		int pixel_size = crtc->fb->bits_per_pixel / 8;
1038 		int clock;
1039 
1040 		adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1041 		clock = adjusted_mode->crtc_clock;
1042 
1043 		/* Display SR */
1044 		wm = intel_calculate_wm(clock, &pineview_display_wm,
1045 					pineview_display_wm.fifo_size,
1046 					pixel_size, latency->display_sr);
1047 		reg = I915_READ(DSPFW1);
1048 		reg &= ~DSPFW_SR_MASK;
1049 		reg |= wm << DSPFW_SR_SHIFT;
1050 		I915_WRITE(DSPFW1, reg);
1051 		DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
1052 
1053 		/* cursor SR */
1054 		wm = intel_calculate_wm(clock, &pineview_cursor_wm,
1055 					pineview_display_wm.fifo_size,
1056 					pixel_size, latency->cursor_sr);
1057 		reg = I915_READ(DSPFW3);
1058 		reg &= ~DSPFW_CURSOR_SR_MASK;
1059 		reg |= (wm & 0x3f) << DSPFW_CURSOR_SR_SHIFT;
1060 		I915_WRITE(DSPFW3, reg);
1061 
1062 		/* Display HPLL off SR */
1063 		wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
1064 					pineview_display_hplloff_wm.fifo_size,
1065 					pixel_size, latency->display_hpll_disable);
1066 		reg = I915_READ(DSPFW3);
1067 		reg &= ~DSPFW_HPLL_SR_MASK;
1068 		reg |= wm & DSPFW_HPLL_SR_MASK;
1069 		I915_WRITE(DSPFW3, reg);
1070 
1071 		/* cursor HPLL off SR */
1072 		wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
1073 					pineview_display_hplloff_wm.fifo_size,
1074 					pixel_size, latency->cursor_hpll_disable);
1075 		reg = I915_READ(DSPFW3);
1076 		reg &= ~DSPFW_HPLL_CURSOR_MASK;
1077 		reg |= (wm & 0x3f) << DSPFW_HPLL_CURSOR_SHIFT;
1078 		I915_WRITE(DSPFW3, reg);
1079 		DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
1080 
1081 		/* activate cxsr */
1082 		I915_WRITE(DSPFW3,
1083 			   I915_READ(DSPFW3) | PINEVIEW_SELF_REFRESH_EN);
1084 		DRM_DEBUG_KMS("Self-refresh is enabled\n");
1085 	} else {
1086 		pineview_disable_cxsr(dev);
1087 		DRM_DEBUG_KMS("Self-refresh is disabled\n");
1088 	}
1089 }
1090 
1091 static bool g4x_compute_wm0(struct drm_device *dev,
1092 			    int plane,
1093 			    const struct intel_watermark_params *display,
1094 			    int display_latency_ns,
1095 			    const struct intel_watermark_params *cursor,
1096 			    int cursor_latency_ns,
1097 			    int *plane_wm,
1098 			    int *cursor_wm)
1099 {
1100 	struct drm_crtc *crtc;
1101 	const struct drm_display_mode *adjusted_mode;
1102 	int htotal, hdisplay, clock, pixel_size;
1103 	int line_time_us, line_count;
1104 	int entries, tlb_miss;
1105 
1106 	crtc = intel_get_crtc_for_plane(dev, plane);
1107 	if (!intel_crtc_active(crtc)) {
1108 		*cursor_wm = cursor->guard_size;
1109 		*plane_wm = display->guard_size;
1110 		return false;
1111 	}
1112 
1113 	adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1114 	clock = adjusted_mode->crtc_clock;
1115 	htotal = adjusted_mode->crtc_htotal;
1116 	hdisplay = to_intel_crtc(crtc)->config.pipe_src_w;
1117 	pixel_size = crtc->fb->bits_per_pixel / 8;
1118 
1119 	/* Use the small buffer method to calculate plane watermark */
1120 	entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
1121 	tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
1122 	if (tlb_miss > 0)
1123 		entries += tlb_miss;
1124 	entries = DIV_ROUND_UP(entries, display->cacheline_size);
1125 	*plane_wm = entries + display->guard_size;
1126 	if (*plane_wm > (int)display->max_wm)
1127 		*plane_wm = display->max_wm;
1128 
1129 	/* Use the large buffer method to calculate cursor watermark */
1130 	line_time_us = ((htotal * 1000) / clock);
1131 	line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
1132 	entries = line_count * 64 * pixel_size;
1133 	tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
1134 	if (tlb_miss > 0)
1135 		entries += tlb_miss;
1136 	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1137 	*cursor_wm = entries + cursor->guard_size;
1138 	if (*cursor_wm > (int)cursor->max_wm)
1139 		*cursor_wm = (int)cursor->max_wm;
1140 
1141 	return true;
1142 }
1143 
1144 /*
1145  * Check the wm result.
1146  *
1147  * If any calculated watermark values is larger than the maximum value that
1148  * can be programmed into the associated watermark register, that watermark
1149  * must be disabled.
1150  */
1151 static bool g4x_check_srwm(struct drm_device *dev,
1152 			   int display_wm, int cursor_wm,
1153 			   const struct intel_watermark_params *display,
1154 			   const struct intel_watermark_params *cursor)
1155 {
1156 	DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
1157 		      display_wm, cursor_wm);
1158 
1159 	if (display_wm > display->max_wm) {
1160 		DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
1161 			      display_wm, display->max_wm);
1162 		return false;
1163 	}
1164 
1165 	if (cursor_wm > cursor->max_wm) {
1166 		DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
1167 			      cursor_wm, cursor->max_wm);
1168 		return false;
1169 	}
1170 
1171 	if (!(display_wm || cursor_wm)) {
1172 		DRM_DEBUG_KMS("SR latency is 0, disabling\n");
1173 		return false;
1174 	}
1175 
1176 	return true;
1177 }
1178 
1179 static bool g4x_compute_srwm(struct drm_device *dev,
1180 			     int plane,
1181 			     int latency_ns,
1182 			     const struct intel_watermark_params *display,
1183 			     const struct intel_watermark_params *cursor,
1184 			     int *display_wm, int *cursor_wm)
1185 {
1186 	struct drm_crtc *crtc;
1187 	const struct drm_display_mode *adjusted_mode;
1188 	int hdisplay, htotal, pixel_size, clock;
1189 	unsigned long line_time_us;
1190 	int line_count, line_size;
1191 	int small, large;
1192 	int entries;
1193 
1194 	if (!latency_ns) {
1195 		*display_wm = *cursor_wm = 0;
1196 		return false;
1197 	}
1198 
1199 	crtc = intel_get_crtc_for_plane(dev, plane);
1200 	adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1201 	clock = adjusted_mode->crtc_clock;
1202 	htotal = adjusted_mode->crtc_htotal;
1203 	hdisplay = to_intel_crtc(crtc)->config.pipe_src_w;
1204 	pixel_size = crtc->fb->bits_per_pixel / 8;
1205 
1206 	line_time_us = (htotal * 1000) / clock;
1207 	line_count = (latency_ns / line_time_us + 1000) / 1000;
1208 	line_size = hdisplay * pixel_size;
1209 
1210 	/* Use the minimum of the small and large buffer method for primary */
1211 	small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
1212 	large = line_count * line_size;
1213 
1214 	entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
1215 	*display_wm = entries + display->guard_size;
1216 
1217 	/* calculate the self-refresh watermark for display cursor */
1218 	entries = line_count * pixel_size * 64;
1219 	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
1220 	*cursor_wm = entries + cursor->guard_size;
1221 
1222 	return g4x_check_srwm(dev,
1223 			      *display_wm, *cursor_wm,
1224 			      display, cursor);
1225 }
1226 
1227 static bool vlv_compute_drain_latency(struct drm_device *dev,
1228 				     int plane,
1229 				     int *plane_prec_mult,
1230 				     int *plane_dl,
1231 				     int *cursor_prec_mult,
1232 				     int *cursor_dl)
1233 {
1234 	struct drm_crtc *crtc;
1235 	int clock, pixel_size;
1236 	int entries;
1237 
1238 	crtc = intel_get_crtc_for_plane(dev, plane);
1239 	if (!intel_crtc_active(crtc))
1240 		return false;
1241 
1242 	clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
1243 	pixel_size = crtc->fb->bits_per_pixel / 8;	/* BPP */
1244 
1245 	entries = (clock / 1000) * pixel_size;
1246 	*plane_prec_mult = (entries > 256) ?
1247 		DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_16;
1248 	*plane_dl = (64 * (*plane_prec_mult) * 4) / ((clock / 1000) *
1249 						     pixel_size);
1250 
1251 	entries = (clock / 1000) * 4;	/* BPP is always 4 for cursor */
1252 	*cursor_prec_mult = (entries > 256) ?
1253 		DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_16;
1254 	*cursor_dl = (64 * (*cursor_prec_mult) * 4) / ((clock / 1000) * 4);
1255 
1256 	return true;
1257 }
1258 
1259 /*
1260  * Update drain latency registers of memory arbiter
1261  *
1262  * Valleyview SoC has a new memory arbiter and needs drain latency registers
1263  * to be programmed. Each plane has a drain latency multiplier and a drain
1264  * latency value.
1265  */
1266 
1267 static void vlv_update_drain_latency(struct drm_device *dev)
1268 {
1269 	struct drm_i915_private *dev_priv = dev->dev_private;
1270 	int planea_prec, planea_dl, planeb_prec, planeb_dl;
1271 	int cursora_prec, cursora_dl, cursorb_prec, cursorb_dl;
1272 	int plane_prec_mult, cursor_prec_mult; /* Precision multiplier is
1273 							either 16 or 32 */
1274 
1275 	/* For plane A, Cursor A */
1276 	if (vlv_compute_drain_latency(dev, 0, &plane_prec_mult, &planea_dl,
1277 				      &cursor_prec_mult, &cursora_dl)) {
1278 		cursora_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1279 			DDL_CURSORA_PRECISION_32 : DDL_CURSORA_PRECISION_16;
1280 		planea_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1281 			DDL_PLANEA_PRECISION_32 : DDL_PLANEA_PRECISION_16;
1282 
1283 		I915_WRITE(VLV_DDL1, cursora_prec |
1284 				(cursora_dl << DDL_CURSORA_SHIFT) |
1285 				planea_prec | planea_dl);
1286 	}
1287 
1288 	/* For plane B, Cursor B */
1289 	if (vlv_compute_drain_latency(dev, 1, &plane_prec_mult, &planeb_dl,
1290 				      &cursor_prec_mult, &cursorb_dl)) {
1291 		cursorb_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1292 			DDL_CURSORB_PRECISION_32 : DDL_CURSORB_PRECISION_16;
1293 		planeb_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
1294 			DDL_PLANEB_PRECISION_32 : DDL_PLANEB_PRECISION_16;
1295 
1296 		I915_WRITE(VLV_DDL2, cursorb_prec |
1297 				(cursorb_dl << DDL_CURSORB_SHIFT) |
1298 				planeb_prec | planeb_dl);
1299 	}
1300 }
1301 
1302 #define single_plane_enabled(mask) is_power_of_2(mask)
1303 
1304 static void valleyview_update_wm(struct drm_crtc *crtc)
1305 {
1306 	struct drm_device *dev = crtc->dev;
1307 	static const int sr_latency_ns = 12000;
1308 	struct drm_i915_private *dev_priv = dev->dev_private;
1309 	int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1310 	int plane_sr, cursor_sr;
1311 	int ignore_plane_sr, ignore_cursor_sr;
1312 	unsigned int enabled = 0;
1313 
1314 	vlv_update_drain_latency(dev);
1315 
1316 	if (g4x_compute_wm0(dev, PIPE_A,
1317 			    &valleyview_wm_info, latency_ns,
1318 			    &valleyview_cursor_wm_info, latency_ns,
1319 			    &planea_wm, &cursora_wm))
1320 		enabled |= 1 << PIPE_A;
1321 
1322 	if (g4x_compute_wm0(dev, PIPE_B,
1323 			    &valleyview_wm_info, latency_ns,
1324 			    &valleyview_cursor_wm_info, latency_ns,
1325 			    &planeb_wm, &cursorb_wm))
1326 		enabled |= 1 << PIPE_B;
1327 
1328 	if (single_plane_enabled(enabled) &&
1329 	    g4x_compute_srwm(dev, ffs(enabled) - 1,
1330 			     sr_latency_ns,
1331 			     &valleyview_wm_info,
1332 			     &valleyview_cursor_wm_info,
1333 			     &plane_sr, &ignore_cursor_sr) &&
1334 	    g4x_compute_srwm(dev, ffs(enabled) - 1,
1335 			     2*sr_latency_ns,
1336 			     &valleyview_wm_info,
1337 			     &valleyview_cursor_wm_info,
1338 			     &ignore_plane_sr, &cursor_sr)) {
1339 		I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN);
1340 	} else {
1341 		I915_WRITE(FW_BLC_SELF_VLV,
1342 			   I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN);
1343 		plane_sr = cursor_sr = 0;
1344 	}
1345 
1346 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1347 		      planea_wm, cursora_wm,
1348 		      planeb_wm, cursorb_wm,
1349 		      plane_sr, cursor_sr);
1350 
1351 	I915_WRITE(DSPFW1,
1352 		   (plane_sr << DSPFW_SR_SHIFT) |
1353 		   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
1354 		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
1355 		   planea_wm);
1356 	I915_WRITE(DSPFW2,
1357 		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1358 		   (cursora_wm << DSPFW_CURSORA_SHIFT));
1359 	I915_WRITE(DSPFW3,
1360 		   (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
1361 		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1362 }
1363 
1364 static void g4x_update_wm(struct drm_crtc *crtc)
1365 {
1366 	struct drm_device *dev = crtc->dev;
1367 	static const int sr_latency_ns = 12000;
1368 	struct drm_i915_private *dev_priv = dev->dev_private;
1369 	int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
1370 	int plane_sr, cursor_sr;
1371 	unsigned int enabled = 0;
1372 
1373 	if (g4x_compute_wm0(dev, PIPE_A,
1374 			    &g4x_wm_info, latency_ns,
1375 			    &g4x_cursor_wm_info, latency_ns,
1376 			    &planea_wm, &cursora_wm))
1377 		enabled |= 1 << PIPE_A;
1378 
1379 	if (g4x_compute_wm0(dev, PIPE_B,
1380 			    &g4x_wm_info, latency_ns,
1381 			    &g4x_cursor_wm_info, latency_ns,
1382 			    &planeb_wm, &cursorb_wm))
1383 		enabled |= 1 << PIPE_B;
1384 
1385 	if (single_plane_enabled(enabled) &&
1386 	    g4x_compute_srwm(dev, ffs(enabled) - 1,
1387 			     sr_latency_ns,
1388 			     &g4x_wm_info,
1389 			     &g4x_cursor_wm_info,
1390 			     &plane_sr, &cursor_sr)) {
1391 		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
1392 	} else {
1393 		I915_WRITE(FW_BLC_SELF,
1394 			   I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN);
1395 		plane_sr = cursor_sr = 0;
1396 	}
1397 
1398 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1399 		      planea_wm, cursora_wm,
1400 		      planeb_wm, cursorb_wm,
1401 		      plane_sr, cursor_sr);
1402 
1403 	I915_WRITE(DSPFW1,
1404 		   (plane_sr << DSPFW_SR_SHIFT) |
1405 		   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
1406 		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
1407 		   planea_wm);
1408 	I915_WRITE(DSPFW2,
1409 		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1410 		   (cursora_wm << DSPFW_CURSORA_SHIFT));
1411 	/* HPLL off in SR has some issues on G4x... disable it */
1412 	I915_WRITE(DSPFW3,
1413 		   (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1414 		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1415 }
1416 
1417 static void i965_update_wm(struct drm_crtc *unused_crtc)
1418 {
1419 	struct drm_device *dev = unused_crtc->dev;
1420 	struct drm_i915_private *dev_priv = dev->dev_private;
1421 	struct drm_crtc *crtc;
1422 	int srwm = 1;
1423 	int cursor_sr = 16;
1424 
1425 	/* Calc sr entries for one plane configs */
1426 	crtc = single_enabled_crtc(dev);
1427 	if (crtc) {
1428 		/* self-refresh has much higher latency */
1429 		static const int sr_latency_ns = 12000;
1430 		const struct drm_display_mode *adjusted_mode =
1431 			&to_intel_crtc(crtc)->config.adjusted_mode;
1432 		int clock = adjusted_mode->crtc_clock;
1433 		int htotal = adjusted_mode->crtc_htotal;
1434 		int hdisplay = to_intel_crtc(crtc)->config.pipe_src_w;
1435 		int pixel_size = crtc->fb->bits_per_pixel / 8;
1436 		unsigned long line_time_us;
1437 		int entries;
1438 
1439 		line_time_us = ((htotal * 1000) / clock);
1440 
1441 		/* Use ns/us then divide to preserve precision */
1442 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1443 			pixel_size * hdisplay;
1444 		entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1445 		srwm = I965_FIFO_SIZE - entries;
1446 		if (srwm < 0)
1447 			srwm = 1;
1448 		srwm &= 0x1ff;
1449 		DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1450 			      entries, srwm);
1451 
1452 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1453 			pixel_size * 64;
1454 		entries = DIV_ROUND_UP(entries,
1455 					  i965_cursor_wm_info.cacheline_size);
1456 		cursor_sr = i965_cursor_wm_info.fifo_size -
1457 			(entries + i965_cursor_wm_info.guard_size);
1458 
1459 		if (cursor_sr > i965_cursor_wm_info.max_wm)
1460 			cursor_sr = i965_cursor_wm_info.max_wm;
1461 
1462 		DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1463 			      "cursor %d\n", srwm, cursor_sr);
1464 
1465 		if (IS_CRESTLINE(dev))
1466 			I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
1467 	} else {
1468 		/* Turn off self refresh if both pipes are enabled */
1469 		if (IS_CRESTLINE(dev))
1470 			I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
1471 				   & ~FW_BLC_SELF_EN);
1472 	}
1473 
1474 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1475 		      srwm);
1476 
1477 	/* 965 has limitations... */
1478 	I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) |
1479 		   (8 << 16) | (8 << 8) | (8 << 0));
1480 	I915_WRITE(DSPFW2, (8 << 8) | (8 << 0));
1481 	/* update cursor SR watermark */
1482 	I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1483 }
1484 
1485 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1486 {
1487 	struct drm_device *dev = unused_crtc->dev;
1488 	struct drm_i915_private *dev_priv = dev->dev_private;
1489 	const struct intel_watermark_params *wm_info;
1490 	uint32_t fwater_lo;
1491 	uint32_t fwater_hi;
1492 	int cwm, srwm = 1;
1493 	int fifo_size;
1494 	int planea_wm, planeb_wm;
1495 	struct drm_crtc *crtc, *enabled = NULL;
1496 
1497 	if (IS_I945GM(dev))
1498 		wm_info = &i945_wm_info;
1499 	else if (!IS_GEN2(dev))
1500 		wm_info = &i915_wm_info;
1501 	else
1502 		wm_info = &i830_wm_info;
1503 
1504 	fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1505 	crtc = intel_get_crtc_for_plane(dev, 0);
1506 	if (intel_crtc_active(crtc)) {
1507 		const struct drm_display_mode *adjusted_mode;
1508 		int cpp = crtc->fb->bits_per_pixel / 8;
1509 		if (IS_GEN2(dev))
1510 			cpp = 4;
1511 
1512 		adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1513 		planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1514 					       wm_info, fifo_size, cpp,
1515 					       latency_ns);
1516 		enabled = crtc;
1517 	} else
1518 		planea_wm = fifo_size - wm_info->guard_size;
1519 
1520 	fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1521 	crtc = intel_get_crtc_for_plane(dev, 1);
1522 	if (intel_crtc_active(crtc)) {
1523 		const struct drm_display_mode *adjusted_mode;
1524 		int cpp = crtc->fb->bits_per_pixel / 8;
1525 		if (IS_GEN2(dev))
1526 			cpp = 4;
1527 
1528 		adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1529 		planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1530 					       wm_info, fifo_size, cpp,
1531 					       latency_ns);
1532 		if (enabled == NULL)
1533 			enabled = crtc;
1534 		else
1535 			enabled = NULL;
1536 	} else
1537 		planeb_wm = fifo_size - wm_info->guard_size;
1538 
1539 	DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1540 
1541 	/*
1542 	 * Overlay gets an aggressive default since video jitter is bad.
1543 	 */
1544 	cwm = 2;
1545 
1546 	/* Play safe and disable self-refresh before adjusting watermarks. */
1547 	if (IS_I945G(dev) || IS_I945GM(dev))
1548 		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN_MASK | 0);
1549 	else if (IS_I915GM(dev))
1550 		I915_WRITE(INSTPM, _MASKED_BIT_DISABLE(INSTPM_SELF_EN));
1551 
1552 	/* Calc sr entries for one plane configs */
1553 	if (HAS_FW_BLC(dev) && enabled) {
1554 		/* self-refresh has much higher latency */
1555 		static const int sr_latency_ns = 6000;
1556 		const struct drm_display_mode *adjusted_mode =
1557 			&to_intel_crtc(enabled)->config.adjusted_mode;
1558 		int clock = adjusted_mode->crtc_clock;
1559 		int htotal = adjusted_mode->crtc_htotal;
1560 		int hdisplay = to_intel_crtc(enabled)->config.pipe_src_w;
1561 		int pixel_size = enabled->fb->bits_per_pixel / 8;
1562 		unsigned long line_time_us;
1563 		int entries;
1564 
1565 		line_time_us = (htotal * 1000) / clock;
1566 
1567 		/* Use ns/us then divide to preserve precision */
1568 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1569 			pixel_size * hdisplay;
1570 		entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1571 		DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1572 		srwm = wm_info->fifo_size - entries;
1573 		if (srwm < 0)
1574 			srwm = 1;
1575 
1576 		if (IS_I945G(dev) || IS_I945GM(dev))
1577 			I915_WRITE(FW_BLC_SELF,
1578 				   FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1579 		else if (IS_I915GM(dev))
1580 			I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1581 	}
1582 
1583 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1584 		      planea_wm, planeb_wm, cwm, srwm);
1585 
1586 	fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1587 	fwater_hi = (cwm & 0x1f);
1588 
1589 	/* Set request length to 8 cachelines per fetch */
1590 	fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1591 	fwater_hi = fwater_hi | (1 << 8);
1592 
1593 	I915_WRITE(FW_BLC, fwater_lo);
1594 	I915_WRITE(FW_BLC2, fwater_hi);
1595 
1596 	if (HAS_FW_BLC(dev)) {
1597 		if (enabled) {
1598 			if (IS_I945G(dev) || IS_I945GM(dev))
1599 				I915_WRITE(FW_BLC_SELF,
1600 					   FW_BLC_SELF_EN_MASK | FW_BLC_SELF_EN);
1601 			else if (IS_I915GM(dev))
1602 				I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_SELF_EN));
1603 			DRM_DEBUG_KMS("memory self refresh enabled\n");
1604 		} else
1605 			DRM_DEBUG_KMS("memory self refresh disabled\n");
1606 	}
1607 }
1608 
1609 static void i845_update_wm(struct drm_crtc *unused_crtc)
1610 {
1611 	struct drm_device *dev = unused_crtc->dev;
1612 	struct drm_i915_private *dev_priv = dev->dev_private;
1613 	struct drm_crtc *crtc;
1614 	const struct drm_display_mode *adjusted_mode;
1615 	uint32_t fwater_lo;
1616 	int planea_wm;
1617 
1618 	crtc = single_enabled_crtc(dev);
1619 	if (crtc == NULL)
1620 		return;
1621 
1622 	adjusted_mode = &to_intel_crtc(crtc)->config.adjusted_mode;
1623 	planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1624 				       &i845_wm_info,
1625 				       dev_priv->display.get_fifo_size(dev, 0),
1626 				       4, latency_ns);
1627 	fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1628 	fwater_lo |= (3<<8) | planea_wm;
1629 
1630 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1631 
1632 	I915_WRITE(FW_BLC, fwater_lo);
1633 }
1634 
1635 static uint32_t ilk_pipe_pixel_rate(struct drm_device *dev,
1636 				    struct drm_crtc *crtc)
1637 {
1638 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1639 	uint32_t pixel_rate;
1640 
1641 	pixel_rate = intel_crtc->config.adjusted_mode.crtc_clock;
1642 
1643 	/* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1644 	 * adjust the pixel_rate here. */
1645 
1646 	if (intel_crtc->config.pch_pfit.enabled) {
1647 		uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1648 		uint32_t pfit_size = intel_crtc->config.pch_pfit.size;
1649 
1650 		pipe_w = intel_crtc->config.pipe_src_w;
1651 		pipe_h = intel_crtc->config.pipe_src_h;
1652 		pfit_w = (pfit_size >> 16) & 0xFFFF;
1653 		pfit_h = pfit_size & 0xFFFF;
1654 		if (pipe_w < pfit_w)
1655 			pipe_w = pfit_w;
1656 		if (pipe_h < pfit_h)
1657 			pipe_h = pfit_h;
1658 
1659 		pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1660 				     pfit_w * pfit_h);
1661 	}
1662 
1663 	return pixel_rate;
1664 }
1665 
1666 /* latency must be in 0.1us units. */
1667 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
1668 			       uint32_t latency)
1669 {
1670 	uint64_t ret;
1671 
1672 	if (WARN(latency == 0, "Latency value missing\n"))
1673 		return UINT_MAX;
1674 
1675 	ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
1676 	ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1677 
1678 	return ret;
1679 }
1680 
1681 /* latency must be in 0.1us units. */
1682 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1683 			       uint32_t horiz_pixels, uint8_t bytes_per_pixel,
1684 			       uint32_t latency)
1685 {
1686 	uint32_t ret;
1687 
1688 	if (WARN(latency == 0, "Latency value missing\n"))
1689 		return UINT_MAX;
1690 
1691 	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1692 	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
1693 	ret = DIV_ROUND_UP(ret, 64) + 2;
1694 	return ret;
1695 }
1696 
1697 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1698 			   uint8_t bytes_per_pixel)
1699 {
1700 	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
1701 }
1702 
1703 struct ilk_pipe_wm_parameters {
1704 	bool active;
1705 	uint32_t pipe_htotal;
1706 	uint32_t pixel_rate;
1707 	struct intel_plane_wm_parameters pri;
1708 	struct intel_plane_wm_parameters spr;
1709 	struct intel_plane_wm_parameters cur;
1710 };
1711 
1712 struct ilk_wm_maximums {
1713 	uint16_t pri;
1714 	uint16_t spr;
1715 	uint16_t cur;
1716 	uint16_t fbc;
1717 };
1718 
1719 /* used in computing the new watermarks state */
1720 struct intel_wm_config {
1721 	unsigned int num_pipes_active;
1722 	bool sprites_enabled;
1723 	bool sprites_scaled;
1724 };
1725 
1726 /*
1727  * For both WM_PIPE and WM_LP.
1728  * mem_value must be in 0.1us units.
1729  */
1730 static uint32_t ilk_compute_pri_wm(const struct ilk_pipe_wm_parameters *params,
1731 				   uint32_t mem_value,
1732 				   bool is_lp)
1733 {
1734 	uint32_t method1, method2;
1735 
1736 	if (!params->active || !params->pri.enabled)
1737 		return 0;
1738 
1739 	method1 = ilk_wm_method1(params->pixel_rate,
1740 				 params->pri.bytes_per_pixel,
1741 				 mem_value);
1742 
1743 	if (!is_lp)
1744 		return method1;
1745 
1746 	method2 = ilk_wm_method2(params->pixel_rate,
1747 				 params->pipe_htotal,
1748 				 params->pri.horiz_pixels,
1749 				 params->pri.bytes_per_pixel,
1750 				 mem_value);
1751 
1752 	return min(method1, method2);
1753 }
1754 
1755 /*
1756  * For both WM_PIPE and WM_LP.
1757  * mem_value must be in 0.1us units.
1758  */
1759 static uint32_t ilk_compute_spr_wm(const struct ilk_pipe_wm_parameters *params,
1760 				   uint32_t mem_value)
1761 {
1762 	uint32_t method1, method2;
1763 
1764 	if (!params->active || !params->spr.enabled)
1765 		return 0;
1766 
1767 	method1 = ilk_wm_method1(params->pixel_rate,
1768 				 params->spr.bytes_per_pixel,
1769 				 mem_value);
1770 	method2 = ilk_wm_method2(params->pixel_rate,
1771 				 params->pipe_htotal,
1772 				 params->spr.horiz_pixels,
1773 				 params->spr.bytes_per_pixel,
1774 				 mem_value);
1775 	return min(method1, method2);
1776 }
1777 
1778 /*
1779  * For both WM_PIPE and WM_LP.
1780  * mem_value must be in 0.1us units.
1781  */
1782 static uint32_t ilk_compute_cur_wm(const struct ilk_pipe_wm_parameters *params,
1783 				   uint32_t mem_value)
1784 {
1785 	if (!params->active || !params->cur.enabled)
1786 		return 0;
1787 
1788 	return ilk_wm_method2(params->pixel_rate,
1789 			      params->pipe_htotal,
1790 			      params->cur.horiz_pixels,
1791 			      params->cur.bytes_per_pixel,
1792 			      mem_value);
1793 }
1794 
1795 /* Only for WM_LP. */
1796 static uint32_t ilk_compute_fbc_wm(const struct ilk_pipe_wm_parameters *params,
1797 				   uint32_t pri_val)
1798 {
1799 	if (!params->active || !params->pri.enabled)
1800 		return 0;
1801 
1802 	return ilk_wm_fbc(pri_val,
1803 			  params->pri.horiz_pixels,
1804 			  params->pri.bytes_per_pixel);
1805 }
1806 
1807 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1808 {
1809 	if (INTEL_INFO(dev)->gen >= 8)
1810 		return 3072;
1811 	else if (INTEL_INFO(dev)->gen >= 7)
1812 		return 768;
1813 	else
1814 		return 512;
1815 }
1816 
1817 /* Calculate the maximum primary/sprite plane watermark */
1818 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1819 				     int level,
1820 				     const struct intel_wm_config *config,
1821 				     enum intel_ddb_partitioning ddb_partitioning,
1822 				     bool is_sprite)
1823 {
1824 	unsigned int fifo_size = ilk_display_fifo_size(dev);
1825 	unsigned int max;
1826 
1827 	/* if sprites aren't enabled, sprites get nothing */
1828 	if (is_sprite && !config->sprites_enabled)
1829 		return 0;
1830 
1831 	/* HSW allows LP1+ watermarks even with multiple pipes */
1832 	if (level == 0 || config->num_pipes_active > 1) {
1833 		fifo_size /= INTEL_INFO(dev)->num_pipes;
1834 
1835 		/*
1836 		 * For some reason the non self refresh
1837 		 * FIFO size is only half of the self
1838 		 * refresh FIFO size on ILK/SNB.
1839 		 */
1840 		if (INTEL_INFO(dev)->gen <= 6)
1841 			fifo_size /= 2;
1842 	}
1843 
1844 	if (config->sprites_enabled) {
1845 		/* level 0 is always calculated with 1:1 split */
1846 		if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1847 			if (is_sprite)
1848 				fifo_size *= 5;
1849 			fifo_size /= 6;
1850 		} else {
1851 			fifo_size /= 2;
1852 		}
1853 	}
1854 
1855 	/* clamp to max that the registers can hold */
1856 	if (INTEL_INFO(dev)->gen >= 8)
1857 		max = level == 0 ? 255 : 2047;
1858 	else if (INTEL_INFO(dev)->gen >= 7)
1859 		/* IVB/HSW primary/sprite plane watermarks */
1860 		max = level == 0 ? 127 : 1023;
1861 	else if (!is_sprite)
1862 		/* ILK/SNB primary plane watermarks */
1863 		max = level == 0 ? 127 : 511;
1864 	else
1865 		/* ILK/SNB sprite plane watermarks */
1866 		max = level == 0 ? 63 : 255;
1867 
1868 	return min(fifo_size, max);
1869 }
1870 
1871 /* Calculate the maximum cursor plane watermark */
1872 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1873 				      int level,
1874 				      const struct intel_wm_config *config)
1875 {
1876 	/* HSW LP1+ watermarks w/ multiple pipes */
1877 	if (level > 0 && config->num_pipes_active > 1)
1878 		return 64;
1879 
1880 	/* otherwise just report max that registers can hold */
1881 	if (INTEL_INFO(dev)->gen >= 7)
1882 		return level == 0 ? 63 : 255;
1883 	else
1884 		return level == 0 ? 31 : 63;
1885 }
1886 
1887 /* Calculate the maximum FBC watermark */
1888 static unsigned int ilk_fbc_wm_max(struct drm_device *dev)
1889 {
1890 	/* max that registers can hold */
1891 	if (INTEL_INFO(dev)->gen >= 8)
1892 		return 31;
1893 	else
1894 		return 15;
1895 }
1896 
1897 static void ilk_compute_wm_maximums(struct drm_device *dev,
1898 				    int level,
1899 				    const struct intel_wm_config *config,
1900 				    enum intel_ddb_partitioning ddb_partitioning,
1901 				    struct ilk_wm_maximums *max)
1902 {
1903 	max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1904 	max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1905 	max->cur = ilk_cursor_wm_max(dev, level, config);
1906 	max->fbc = ilk_fbc_wm_max(dev);
1907 }
1908 
1909 static bool ilk_validate_wm_level(int level,
1910 				  const struct ilk_wm_maximums *max,
1911 				  struct intel_wm_level *result)
1912 {
1913 	bool ret;
1914 
1915 	/* already determined to be invalid? */
1916 	if (!result->enable)
1917 		return false;
1918 
1919 	result->enable = result->pri_val <= max->pri &&
1920 			 result->spr_val <= max->spr &&
1921 			 result->cur_val <= max->cur;
1922 
1923 	ret = result->enable;
1924 
1925 	/*
1926 	 * HACK until we can pre-compute everything,
1927 	 * and thus fail gracefully if LP0 watermarks
1928 	 * are exceeded...
1929 	 */
1930 	if (level == 0 && !result->enable) {
1931 		if (result->pri_val > max->pri)
1932 			DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
1933 				      level, result->pri_val, max->pri);
1934 		if (result->spr_val > max->spr)
1935 			DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
1936 				      level, result->spr_val, max->spr);
1937 		if (result->cur_val > max->cur)
1938 			DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
1939 				      level, result->cur_val, max->cur);
1940 
1941 		result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
1942 		result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
1943 		result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
1944 		result->enable = true;
1945 	}
1946 
1947 	return ret;
1948 }
1949 
1950 static void ilk_compute_wm_level(struct drm_i915_private *dev_priv,
1951 				 int level,
1952 				 const struct ilk_pipe_wm_parameters *p,
1953 				 struct intel_wm_level *result)
1954 {
1955 	uint16_t pri_latency = dev_priv->wm.pri_latency[level];
1956 	uint16_t spr_latency = dev_priv->wm.spr_latency[level];
1957 	uint16_t cur_latency = dev_priv->wm.cur_latency[level];
1958 
1959 	/* WM1+ latency values stored in 0.5us units */
1960 	if (level > 0) {
1961 		pri_latency *= 5;
1962 		spr_latency *= 5;
1963 		cur_latency *= 5;
1964 	}
1965 
1966 	result->pri_val = ilk_compute_pri_wm(p, pri_latency, level);
1967 	result->spr_val = ilk_compute_spr_wm(p, spr_latency);
1968 	result->cur_val = ilk_compute_cur_wm(p, cur_latency);
1969 	result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val);
1970 	result->enable = true;
1971 }
1972 
1973 static uint32_t
1974 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
1975 {
1976 	struct drm_i915_private *dev_priv = dev->dev_private;
1977 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1978 	struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
1979 	u32 linetime, ips_linetime;
1980 
1981 	if (!intel_crtc_active(crtc))
1982 		return 0;
1983 
1984 	/* The WM are computed with base on how long it takes to fill a single
1985 	 * row at the given clock rate, multiplied by 8.
1986 	 * */
1987 	linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
1988 				     mode->crtc_clock);
1989 	ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
1990 					 intel_ddi_get_cdclk_freq(dev_priv));
1991 
1992 	return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
1993 	       PIPE_WM_LINETIME_TIME(linetime);
1994 }
1995 
1996 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[5])
1997 {
1998 	struct drm_i915_private *dev_priv = dev->dev_private;
1999 
2000 	if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2001 		uint64_t sskpd = I915_READ64(MCH_SSKPD);
2002 
2003 		wm[0] = (sskpd >> 56) & 0xFF;
2004 		if (wm[0] == 0)
2005 			wm[0] = sskpd & 0xF;
2006 		wm[1] = (sskpd >> 4) & 0xFF;
2007 		wm[2] = (sskpd >> 12) & 0xFF;
2008 		wm[3] = (sskpd >> 20) & 0x1FF;
2009 		wm[4] = (sskpd >> 32) & 0x1FF;
2010 	} else if (INTEL_INFO(dev)->gen >= 6) {
2011 		uint32_t sskpd = I915_READ(MCH_SSKPD);
2012 
2013 		wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
2014 		wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
2015 		wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
2016 		wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
2017 	} else if (INTEL_INFO(dev)->gen >= 5) {
2018 		uint32_t mltr = I915_READ(MLTR_ILK);
2019 
2020 		/* ILK primary LP0 latency is 700 ns */
2021 		wm[0] = 7;
2022 		wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
2023 		wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
2024 	}
2025 }
2026 
2027 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
2028 {
2029 	/* ILK sprite LP0 latency is 1300 ns */
2030 	if (INTEL_INFO(dev)->gen == 5)
2031 		wm[0] = 13;
2032 }
2033 
2034 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
2035 {
2036 	/* ILK cursor LP0 latency is 1300 ns */
2037 	if (INTEL_INFO(dev)->gen == 5)
2038 		wm[0] = 13;
2039 
2040 	/* WaDoubleCursorLP3Latency:ivb */
2041 	if (IS_IVYBRIDGE(dev))
2042 		wm[3] *= 2;
2043 }
2044 
2045 static int ilk_wm_max_level(const struct drm_device *dev)
2046 {
2047 	/* how many WM levels are we expecting */
2048 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2049 		return 4;
2050 	else if (INTEL_INFO(dev)->gen >= 6)
2051 		return 3;
2052 	else
2053 		return 2;
2054 }
2055 
2056 static void intel_print_wm_latency(struct drm_device *dev,
2057 				   const char *name,
2058 				   const uint16_t wm[5])
2059 {
2060 	int level, max_level = ilk_wm_max_level(dev);
2061 
2062 	for (level = 0; level <= max_level; level++) {
2063 		unsigned int latency = wm[level];
2064 
2065 		if (latency == 0) {
2066 			DRM_ERROR("%s WM%d latency not provided\n",
2067 				  name, level);
2068 			continue;
2069 		}
2070 
2071 		/* WM1+ latency values in 0.5us units */
2072 		if (level > 0)
2073 			latency *= 5;
2074 
2075 		DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
2076 			      name, level, wm[level],
2077 			      latency / 10, latency % 10);
2078 	}
2079 }
2080 
2081 static void intel_setup_wm_latency(struct drm_device *dev)
2082 {
2083 	struct drm_i915_private *dev_priv = dev->dev_private;
2084 
2085 	intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
2086 
2087 	memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
2088 	       sizeof(dev_priv->wm.pri_latency));
2089 	memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
2090 	       sizeof(dev_priv->wm.pri_latency));
2091 
2092 	intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
2093 	intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
2094 
2095 	intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
2096 	intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
2097 	intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
2098 }
2099 
2100 static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
2101 				      struct ilk_pipe_wm_parameters *p,
2102 				      struct intel_wm_config *config)
2103 {
2104 	struct drm_device *dev = crtc->dev;
2105 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2106 	enum i915_pipe pipe = intel_crtc->pipe;
2107 	struct drm_plane *plane;
2108 
2109 	p->active = intel_crtc_active(crtc);
2110 	if (p->active) {
2111 		p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal;
2112 		p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
2113 		p->pri.bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
2114 		p->cur.bytes_per_pixel = 4;
2115 		p->pri.horiz_pixels = intel_crtc->config.pipe_src_w;
2116 		p->cur.horiz_pixels = 64;
2117 		/* TODO: for now, assume primary and cursor planes are always enabled. */
2118 		p->pri.enabled = true;
2119 		p->cur.enabled = true;
2120 	}
2121 
2122 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
2123 		config->num_pipes_active += intel_crtc_active(crtc);
2124 
2125 	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
2126 		struct intel_plane *intel_plane = to_intel_plane(plane);
2127 
2128 		if (intel_plane->pipe == pipe)
2129 			p->spr = intel_plane->wm;
2130 
2131 		config->sprites_enabled |= intel_plane->wm.enabled;
2132 		config->sprites_scaled |= intel_plane->wm.scaled;
2133 	}
2134 }
2135 
2136 /* Compute new watermarks for the pipe */
2137 static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
2138 				  const struct ilk_pipe_wm_parameters *params,
2139 				  struct intel_pipe_wm *pipe_wm)
2140 {
2141 	struct drm_device *dev = crtc->dev;
2142 	struct drm_i915_private *dev_priv = dev->dev_private;
2143 	int level, max_level = ilk_wm_max_level(dev);
2144 	/* LP0 watermark maximums depend on this pipe alone */
2145 	struct intel_wm_config config = {
2146 		.num_pipes_active = 1,
2147 		.sprites_enabled = params->spr.enabled,
2148 		.sprites_scaled = params->spr.scaled,
2149 	};
2150 	struct ilk_wm_maximums max;
2151 
2152 	/* LP0 watermarks always use 1/2 DDB partitioning */
2153 	ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
2154 
2155 	/* ILK/SNB: LP2+ watermarks only w/o sprites */
2156 	if (INTEL_INFO(dev)->gen <= 6 && params->spr.enabled)
2157 		max_level = 1;
2158 
2159 	/* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
2160 	if (params->spr.scaled)
2161 		max_level = 0;
2162 
2163 	for (level = 0; level <= max_level; level++)
2164 		ilk_compute_wm_level(dev_priv, level, params,
2165 				     &pipe_wm->wm[level]);
2166 
2167 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2168 		pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
2169 
2170 	/* At least LP0 must be valid */
2171 	return ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]);
2172 }
2173 
2174 /*
2175  * Merge the watermarks from all active pipes for a specific level.
2176  */
2177 static void ilk_merge_wm_level(struct drm_device *dev,
2178 			       int level,
2179 			       struct intel_wm_level *ret_wm)
2180 {
2181 	struct intel_crtc *intel_crtc;
2182 
2183 	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) {
2184 		const struct intel_wm_level *wm =
2185 			&intel_crtc->wm.active.wm[level];
2186 
2187 		if (!wm->enable)
2188 			return;
2189 
2190 		ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2191 		ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2192 		ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2193 		ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2194 	}
2195 
2196 	ret_wm->enable = true;
2197 }
2198 
2199 /*
2200  * Merge all low power watermarks for all active pipes.
2201  */
2202 static void ilk_wm_merge(struct drm_device *dev,
2203 			 const struct intel_wm_config *config,
2204 			 const struct ilk_wm_maximums *max,
2205 			 struct intel_pipe_wm *merged)
2206 {
2207 	int level, max_level = ilk_wm_max_level(dev);
2208 
2209 	/* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2210 	if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2211 	    config->num_pipes_active > 1)
2212 		return;
2213 
2214 	/* ILK: FBC WM must be disabled always */
2215 	merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2216 
2217 	/* merge each WM1+ level */
2218 	for (level = 1; level <= max_level; level++) {
2219 		struct intel_wm_level *wm = &merged->wm[level];
2220 
2221 		ilk_merge_wm_level(dev, level, wm);
2222 
2223 		if (!ilk_validate_wm_level(level, max, wm))
2224 			break;
2225 
2226 		/*
2227 		 * The spec says it is preferred to disable
2228 		 * FBC WMs instead of disabling a WM level.
2229 		 */
2230 		if (wm->fbc_val > max->fbc) {
2231 			merged->fbc_wm_enabled = false;
2232 			wm->fbc_val = 0;
2233 		}
2234 	}
2235 
2236 	/* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2237 	/*
2238 	 * FIXME this is racy. FBC might get enabled later.
2239 	 * What we should check here is whether FBC can be
2240 	 * enabled sometime later.
2241 	 */
2242 	if (IS_GEN5(dev) && !merged->fbc_wm_enabled && intel_fbc_enabled(dev)) {
2243 		for (level = 2; level <= max_level; level++) {
2244 			struct intel_wm_level *wm = &merged->wm[level];
2245 
2246 			wm->enable = false;
2247 		}
2248 	}
2249 }
2250 
2251 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2252 {
2253 	/* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2254 	return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2255 }
2256 
2257 /* The value we need to program into the WM_LPx latency field */
2258 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2259 {
2260 	struct drm_i915_private *dev_priv = dev->dev_private;
2261 
2262 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2263 		return 2 * level;
2264 	else
2265 		return dev_priv->wm.pri_latency[level];
2266 }
2267 
2268 static void ilk_compute_wm_results(struct drm_device *dev,
2269 				   const struct intel_pipe_wm *merged,
2270 				   enum intel_ddb_partitioning partitioning,
2271 				   struct ilk_wm_values *results)
2272 {
2273 	struct intel_crtc *intel_crtc;
2274 	int level, wm_lp;
2275 
2276 	results->enable_fbc_wm = merged->fbc_wm_enabled;
2277 	results->partitioning = partitioning;
2278 
2279 	/* LP1+ register values */
2280 	for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2281 		const struct intel_wm_level *r;
2282 
2283 		level = ilk_wm_lp_to_level(wm_lp, merged);
2284 
2285 		r = &merged->wm[level];
2286 		if (!r->enable)
2287 			break;
2288 
2289 		results->wm_lp[wm_lp - 1] = WM3_LP_EN |
2290 			(ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2291 			(r->pri_val << WM1_LP_SR_SHIFT) |
2292 			r->cur_val;
2293 
2294 		if (INTEL_INFO(dev)->gen >= 8)
2295 			results->wm_lp[wm_lp - 1] |=
2296 				r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2297 		else
2298 			results->wm_lp[wm_lp - 1] |=
2299 				r->fbc_val << WM1_LP_FBC_SHIFT;
2300 
2301 		if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2302 			WARN_ON(wm_lp != 1);
2303 			results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2304 		} else
2305 			results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2306 	}
2307 
2308 	/* LP0 register values */
2309 	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) {
2310 		enum i915_pipe pipe = intel_crtc->pipe;
2311 		const struct intel_wm_level *r =
2312 			&intel_crtc->wm.active.wm[0];
2313 
2314 		if (WARN_ON(!r->enable))
2315 			continue;
2316 
2317 		results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2318 
2319 		results->wm_pipe[pipe] =
2320 			(r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2321 			(r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2322 			r->cur_val;
2323 	}
2324 }
2325 
2326 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2327  * case both are at the same level. Prefer r1 in case they're the same. */
2328 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2329 						  struct intel_pipe_wm *r1,
2330 						  struct intel_pipe_wm *r2)
2331 {
2332 	int level, max_level = ilk_wm_max_level(dev);
2333 	int level1 = 0, level2 = 0;
2334 
2335 	for (level = 1; level <= max_level; level++) {
2336 		if (r1->wm[level].enable)
2337 			level1 = level;
2338 		if (r2->wm[level].enable)
2339 			level2 = level;
2340 	}
2341 
2342 	if (level1 == level2) {
2343 		if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2344 			return r2;
2345 		else
2346 			return r1;
2347 	} else if (level1 > level2) {
2348 		return r1;
2349 	} else {
2350 		return r2;
2351 	}
2352 }
2353 
2354 /* dirty bits used to track which watermarks need changes */
2355 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2356 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2357 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2358 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2359 #define WM_DIRTY_FBC (1 << 24)
2360 #define WM_DIRTY_DDB (1 << 25)
2361 
2362 static unsigned int ilk_compute_wm_dirty(struct drm_device *dev,
2363 					 const struct ilk_wm_values *old,
2364 					 const struct ilk_wm_values *new)
2365 {
2366 	unsigned int dirty = 0;
2367 	enum i915_pipe pipe;
2368 	int wm_lp;
2369 
2370 	for_each_pipe(pipe) {
2371 		if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2372 			dirty |= WM_DIRTY_LINETIME(pipe);
2373 			/* Must disable LP1+ watermarks too */
2374 			dirty |= WM_DIRTY_LP_ALL;
2375 		}
2376 
2377 		if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2378 			dirty |= WM_DIRTY_PIPE(pipe);
2379 			/* Must disable LP1+ watermarks too */
2380 			dirty |= WM_DIRTY_LP_ALL;
2381 		}
2382 	}
2383 
2384 	if (old->enable_fbc_wm != new->enable_fbc_wm) {
2385 		dirty |= WM_DIRTY_FBC;
2386 		/* Must disable LP1+ watermarks too */
2387 		dirty |= WM_DIRTY_LP_ALL;
2388 	}
2389 
2390 	if (old->partitioning != new->partitioning) {
2391 		dirty |= WM_DIRTY_DDB;
2392 		/* Must disable LP1+ watermarks too */
2393 		dirty |= WM_DIRTY_LP_ALL;
2394 	}
2395 
2396 	/* LP1+ watermarks already deemed dirty, no need to continue */
2397 	if (dirty & WM_DIRTY_LP_ALL)
2398 		return dirty;
2399 
2400 	/* Find the lowest numbered LP1+ watermark in need of an update... */
2401 	for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2402 		if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2403 		    old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2404 			break;
2405 	}
2406 
2407 	/* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2408 	for (; wm_lp <= 3; wm_lp++)
2409 		dirty |= WM_DIRTY_LP(wm_lp);
2410 
2411 	return dirty;
2412 }
2413 
2414 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2415 			       unsigned int dirty)
2416 {
2417 	struct ilk_wm_values *previous = &dev_priv->wm.hw;
2418 	bool changed = false;
2419 
2420 	if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2421 		previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2422 		I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2423 		changed = true;
2424 	}
2425 	if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2426 		previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2427 		I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2428 		changed = true;
2429 	}
2430 	if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2431 		previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2432 		I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2433 		changed = true;
2434 	}
2435 
2436 	/*
2437 	 * Don't touch WM1S_LP_EN here.
2438 	 * Doing so could cause underruns.
2439 	 */
2440 
2441 	return changed;
2442 }
2443 
2444 /*
2445  * The spec says we shouldn't write when we don't need, because every write
2446  * causes WMs to be re-evaluated, expending some power.
2447  */
2448 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2449 				struct ilk_wm_values *results)
2450 {
2451 	struct drm_device *dev = dev_priv->dev;
2452 	struct ilk_wm_values *previous = &dev_priv->wm.hw;
2453 	unsigned int dirty;
2454 	uint32_t val;
2455 
2456 	dirty = ilk_compute_wm_dirty(dev, previous, results);
2457 	if (!dirty)
2458 		return;
2459 
2460 	_ilk_disable_lp_wm(dev_priv, dirty);
2461 
2462 	if (dirty & WM_DIRTY_PIPE(PIPE_A))
2463 		I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2464 	if (dirty & WM_DIRTY_PIPE(PIPE_B))
2465 		I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2466 	if (dirty & WM_DIRTY_PIPE(PIPE_C))
2467 		I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2468 
2469 	if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2470 		I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2471 	if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2472 		I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2473 	if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2474 		I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2475 
2476 	if (dirty & WM_DIRTY_DDB) {
2477 		if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2478 			val = I915_READ(WM_MISC);
2479 			if (results->partitioning == INTEL_DDB_PART_1_2)
2480 				val &= ~WM_MISC_DATA_PARTITION_5_6;
2481 			else
2482 				val |= WM_MISC_DATA_PARTITION_5_6;
2483 			I915_WRITE(WM_MISC, val);
2484 		} else {
2485 			val = I915_READ(DISP_ARB_CTL2);
2486 			if (results->partitioning == INTEL_DDB_PART_1_2)
2487 				val &= ~DISP_DATA_PARTITION_5_6;
2488 			else
2489 				val |= DISP_DATA_PARTITION_5_6;
2490 			I915_WRITE(DISP_ARB_CTL2, val);
2491 		}
2492 	}
2493 
2494 	if (dirty & WM_DIRTY_FBC) {
2495 		val = I915_READ(DISP_ARB_CTL);
2496 		if (results->enable_fbc_wm)
2497 			val &= ~DISP_FBC_WM_DIS;
2498 		else
2499 			val |= DISP_FBC_WM_DIS;
2500 		I915_WRITE(DISP_ARB_CTL, val);
2501 	}
2502 
2503 	if (dirty & WM_DIRTY_LP(1) &&
2504 	    previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2505 		I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2506 
2507 	if (INTEL_INFO(dev)->gen >= 7) {
2508 		if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2509 			I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2510 		if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2511 			I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2512 	}
2513 
2514 	if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2515 		I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2516 	if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2517 		I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2518 	if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2519 		I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2520 
2521 	dev_priv->wm.hw = *results;
2522 }
2523 
2524 static bool ilk_disable_lp_wm(struct drm_device *dev)
2525 {
2526 	struct drm_i915_private *dev_priv = dev->dev_private;
2527 
2528 	return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2529 }
2530 
2531 static void ilk_update_wm(struct drm_crtc *crtc)
2532 {
2533 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2534 	struct drm_device *dev = crtc->dev;
2535 	struct drm_i915_private *dev_priv = dev->dev_private;
2536 	struct ilk_wm_maximums max;
2537 	struct ilk_pipe_wm_parameters params = {};
2538 	struct ilk_wm_values results = {};
2539 	enum intel_ddb_partitioning partitioning;
2540 	struct intel_pipe_wm pipe_wm = {};
2541 	struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
2542 	struct intel_wm_config config = {};
2543 
2544 	ilk_compute_wm_parameters(crtc, &params, &config);
2545 
2546 	intel_compute_pipe_wm(crtc, &params, &pipe_wm);
2547 
2548 	if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
2549 		return;
2550 
2551 	intel_crtc->wm.active = pipe_wm;
2552 
2553 	ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
2554 	ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
2555 
2556 	/* 5/6 split only in single pipe config on IVB+ */
2557 	if (INTEL_INFO(dev)->gen >= 7 &&
2558 	    config.num_pipes_active == 1 && config.sprites_enabled) {
2559 		ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
2560 		ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
2561 
2562 		best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
2563 	} else {
2564 		best_lp_wm = &lp_wm_1_2;
2565 	}
2566 
2567 	partitioning = (best_lp_wm == &lp_wm_1_2) ?
2568 		       INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
2569 
2570 	ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
2571 
2572 	ilk_write_wm_values(dev_priv, &results);
2573 }
2574 
2575 static void ilk_update_sprite_wm(struct drm_plane *plane,
2576 				     struct drm_crtc *crtc,
2577 				     uint32_t sprite_width, int pixel_size,
2578 				     bool enabled, bool scaled)
2579 {
2580 	struct drm_device *dev = plane->dev;
2581 	struct intel_plane *intel_plane = to_intel_plane(plane);
2582 
2583 	intel_plane->wm.enabled = enabled;
2584 	intel_plane->wm.scaled = scaled;
2585 	intel_plane->wm.horiz_pixels = sprite_width;
2586 	intel_plane->wm.bytes_per_pixel = pixel_size;
2587 
2588 	/*
2589 	 * IVB workaround: must disable low power watermarks for at least
2590 	 * one frame before enabling scaling.  LP watermarks can be re-enabled
2591 	 * when scaling is disabled.
2592 	 *
2593 	 * WaCxSRDisabledForSpriteScaling:ivb
2594 	 */
2595 	if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
2596 		intel_wait_for_vblank(dev, intel_plane->pipe);
2597 
2598 	ilk_update_wm(crtc);
2599 }
2600 
2601 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
2602 {
2603 	struct drm_device *dev = crtc->dev;
2604 	struct drm_i915_private *dev_priv = dev->dev_private;
2605 	struct ilk_wm_values *hw = &dev_priv->wm.hw;
2606 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2607 	struct intel_pipe_wm *active = &intel_crtc->wm.active;
2608 	enum i915_pipe pipe = intel_crtc->pipe;
2609 	static const unsigned int wm0_pipe_reg[] = {
2610 		[PIPE_A] = WM0_PIPEA_ILK,
2611 		[PIPE_B] = WM0_PIPEB_ILK,
2612 		[PIPE_C] = WM0_PIPEC_IVB,
2613 	};
2614 
2615 	hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
2616 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2617 		hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
2618 
2619 	if (intel_crtc_active(crtc)) {
2620 		u32 tmp = hw->wm_pipe[pipe];
2621 
2622 		/*
2623 		 * For active pipes LP0 watermark is marked as
2624 		 * enabled, and LP1+ watermaks as disabled since
2625 		 * we can't really reverse compute them in case
2626 		 * multiple pipes are active.
2627 		 */
2628 		active->wm[0].enable = true;
2629 		active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
2630 		active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
2631 		active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
2632 		active->linetime = hw->wm_linetime[pipe];
2633 	} else {
2634 		int level, max_level = ilk_wm_max_level(dev);
2635 
2636 		/*
2637 		 * For inactive pipes, all watermark levels
2638 		 * should be marked as enabled but zeroed,
2639 		 * which is what we'd compute them to.
2640 		 */
2641 		for (level = 0; level <= max_level; level++)
2642 			active->wm[level].enable = true;
2643 	}
2644 }
2645 
2646 void ilk_wm_get_hw_state(struct drm_device *dev)
2647 {
2648 	struct drm_i915_private *dev_priv = dev->dev_private;
2649 	struct ilk_wm_values *hw = &dev_priv->wm.hw;
2650 	struct drm_crtc *crtc;
2651 
2652 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
2653 		ilk_pipe_wm_get_hw_state(crtc);
2654 
2655 	hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
2656 	hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
2657 	hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
2658 
2659 	hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
2660 	hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
2661 	hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
2662 
2663 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2664 		hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
2665 			INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
2666 	else if (IS_IVYBRIDGE(dev))
2667 		hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
2668 			INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
2669 
2670 	hw->enable_fbc_wm =
2671 		!(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
2672 }
2673 
2674 /**
2675  * intel_update_watermarks - update FIFO watermark values based on current modes
2676  *
2677  * Calculate watermark values for the various WM regs based on current mode
2678  * and plane configuration.
2679  *
2680  * There are several cases to deal with here:
2681  *   - normal (i.e. non-self-refresh)
2682  *   - self-refresh (SR) mode
2683  *   - lines are large relative to FIFO size (buffer can hold up to 2)
2684  *   - lines are small relative to FIFO size (buffer can hold more than 2
2685  *     lines), so need to account for TLB latency
2686  *
2687  *   The normal calculation is:
2688  *     watermark = dotclock * bytes per pixel * latency
2689  *   where latency is platform & configuration dependent (we assume pessimal
2690  *   values here).
2691  *
2692  *   The SR calculation is:
2693  *     watermark = (trunc(latency/line time)+1) * surface width *
2694  *       bytes per pixel
2695  *   where
2696  *     line time = htotal / dotclock
2697  *     surface width = hdisplay for normal plane and 64 for cursor
2698  *   and latency is assumed to be high, as above.
2699  *
2700  * The final value programmed to the register should always be rounded up,
2701  * and include an extra 2 entries to account for clock crossings.
2702  *
2703  * We don't use the sprite, so we can ignore that.  And on Crestline we have
2704  * to set the non-SR watermarks to 8.
2705  */
2706 void intel_update_watermarks(struct drm_crtc *crtc)
2707 {
2708 	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
2709 
2710 	if (dev_priv->display.update_wm)
2711 		dev_priv->display.update_wm(crtc);
2712 }
2713 
2714 void intel_update_sprite_watermarks(struct drm_plane *plane,
2715 				    struct drm_crtc *crtc,
2716 				    uint32_t sprite_width, int pixel_size,
2717 				    bool enabled, bool scaled)
2718 {
2719 	struct drm_i915_private *dev_priv = plane->dev->dev_private;
2720 
2721 	if (dev_priv->display.update_sprite_wm)
2722 		dev_priv->display.update_sprite_wm(plane, crtc, sprite_width,
2723 						   pixel_size, enabled, scaled);
2724 }
2725 
2726 static struct drm_i915_gem_object *
2727 intel_alloc_context_page(struct drm_device *dev)
2728 {
2729 	struct drm_i915_gem_object *ctx;
2730 	int ret;
2731 
2732 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
2733 
2734 	ctx = i915_gem_alloc_object(dev, 4096);
2735 	if (!ctx) {
2736 		DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
2737 		return NULL;
2738 	}
2739 
2740 	ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
2741 	if (ret) {
2742 		DRM_ERROR("failed to pin power context: %d\n", ret);
2743 		goto err_unref;
2744 	}
2745 
2746 	ret = i915_gem_object_set_to_gtt_domain(ctx, 1);
2747 	if (ret) {
2748 		DRM_ERROR("failed to set-domain on power context: %d\n", ret);
2749 		goto err_unpin;
2750 	}
2751 
2752 	return ctx;
2753 
2754 err_unpin:
2755 	i915_gem_object_unpin(ctx);
2756 err_unref:
2757 	drm_gem_object_unreference(&ctx->base);
2758 	return NULL;
2759 }
2760 
2761 /**
2762  * Lock protecting IPS related data structures
2763  */
2764 struct lock mchdev_lock;
2765 LOCK_SYSINIT(mchdev, &mchdev_lock, "mchdev", LK_CANRECURSE);
2766 
2767 /* Global for IPS driver to get at the current i915 device. Protected by
2768  * mchdev_lock. */
2769 static struct drm_i915_private *i915_mch_dev;
2770 
2771 bool ironlake_set_drps(struct drm_device *dev, u8 val)
2772 {
2773 	struct drm_i915_private *dev_priv = dev->dev_private;
2774 	u16 rgvswctl;
2775 
2776 	assert_spin_locked(&mchdev_lock);
2777 
2778 	rgvswctl = I915_READ16(MEMSWCTL);
2779 	if (rgvswctl & MEMCTL_CMD_STS) {
2780 		DRM_DEBUG("gpu busy, RCS change rejected\n");
2781 		return false; /* still busy with another command */
2782 	}
2783 
2784 	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
2785 		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
2786 	I915_WRITE16(MEMSWCTL, rgvswctl);
2787 	POSTING_READ16(MEMSWCTL);
2788 
2789 	rgvswctl |= MEMCTL_CMD_STS;
2790 	I915_WRITE16(MEMSWCTL, rgvswctl);
2791 
2792 	return true;
2793 }
2794 
2795 static void ironlake_enable_drps(struct drm_device *dev)
2796 {
2797 	struct drm_i915_private *dev_priv = dev->dev_private;
2798 	u32 rgvmodectl = I915_READ(MEMMODECTL);
2799 	u8 fmax, fmin, fstart, vstart;
2800 
2801 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
2802 
2803 	/* Enable temp reporting */
2804 	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
2805 	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
2806 
2807 	/* 100ms RC evaluation intervals */
2808 	I915_WRITE(RCUPEI, 100000);
2809 	I915_WRITE(RCDNEI, 100000);
2810 
2811 	/* Set max/min thresholds to 90ms and 80ms respectively */
2812 	I915_WRITE(RCBMAXAVG, 90000);
2813 	I915_WRITE(RCBMINAVG, 80000);
2814 
2815 	I915_WRITE(MEMIHYST, 1);
2816 
2817 	/* Set up min, max, and cur for interrupt handling */
2818 	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
2819 	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
2820 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
2821 		MEMMODE_FSTART_SHIFT;
2822 
2823 	vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
2824 		PXVFREQ_PX_SHIFT;
2825 
2826 	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
2827 	dev_priv->ips.fstart = fstart;
2828 
2829 	dev_priv->ips.max_delay = fstart;
2830 	dev_priv->ips.min_delay = fmin;
2831 	dev_priv->ips.cur_delay = fstart;
2832 
2833 	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
2834 			 fmax, fmin, fstart);
2835 
2836 	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
2837 
2838 	/*
2839 	 * Interrupts will be enabled in ironlake_irq_postinstall
2840 	 */
2841 
2842 	I915_WRITE(VIDSTART, vstart);
2843 	POSTING_READ(VIDSTART);
2844 
2845 	rgvmodectl |= MEMMODE_SWMODE_EN;
2846 	I915_WRITE(MEMMODECTL, rgvmodectl);
2847 
2848 	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
2849 		DRM_ERROR("stuck trying to change perf mode\n");
2850 	mdelay(1);
2851 
2852 	ironlake_set_drps(dev, fstart);
2853 
2854 	dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
2855 		I915_READ(0x112e0);
2856 	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
2857 	dev_priv->ips.last_count2 = I915_READ(0x112f4);
2858 	getrawmonotonic(&dev_priv->ips.last_time2);
2859 
2860 	lockmgr(&mchdev_lock, LK_RELEASE);
2861 }
2862 
2863 static void ironlake_disable_drps(struct drm_device *dev)
2864 {
2865 	struct drm_i915_private *dev_priv = dev->dev_private;
2866 	u16 rgvswctl;
2867 
2868 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
2869 
2870 	rgvswctl = I915_READ16(MEMSWCTL);
2871 
2872 	/* Ack interrupts, disable EFC interrupt */
2873 	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
2874 	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
2875 	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
2876 	I915_WRITE(DEIIR, DE_PCU_EVENT);
2877 	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
2878 
2879 	/* Go back to the starting frequency */
2880 	ironlake_set_drps(dev, dev_priv->ips.fstart);
2881 	mdelay(1);
2882 	rgvswctl |= MEMCTL_CMD_STS;
2883 	I915_WRITE(MEMSWCTL, rgvswctl);
2884 	mdelay(1);
2885 
2886 	lockmgr(&mchdev_lock, LK_RELEASE);
2887 }
2888 
2889 /* There's a funny hw issue where the hw returns all 0 when reading from
2890  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
2891  * ourselves, instead of doing a rmw cycle (which might result in us clearing
2892  * all limits and the gpu stuck at whatever frequency it is at atm).
2893  */
2894 static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val)
2895 {
2896 	u32 limits;
2897 
2898 	/* Only set the down limit when we've reached the lowest level to avoid
2899 	 * getting more interrupts, otherwise leave this clear. This prevents a
2900 	 * race in the hw when coming out of rc6: There's a tiny window where
2901 	 * the hw runs at the minimal clock before selecting the desired
2902 	 * frequency, if the down threshold expires in that window we will not
2903 	 * receive a down interrupt. */
2904 	limits = dev_priv->rps.max_delay << 24;
2905 	if (val <= dev_priv->rps.min_delay)
2906 		limits |= dev_priv->rps.min_delay << 16;
2907 
2908 	return limits;
2909 }
2910 
2911 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
2912 {
2913 	int new_power;
2914 
2915 	new_power = dev_priv->rps.power;
2916 	switch (dev_priv->rps.power) {
2917 	case LOW_POWER:
2918 		if (val > dev_priv->rps.rpe_delay + 1 && val > dev_priv->rps.cur_delay)
2919 			new_power = BETWEEN;
2920 		break;
2921 
2922 	case BETWEEN:
2923 		if (val <= dev_priv->rps.rpe_delay && val < dev_priv->rps.cur_delay)
2924 			new_power = LOW_POWER;
2925 		else if (val >= dev_priv->rps.rp0_delay && val > dev_priv->rps.cur_delay)
2926 			new_power = HIGH_POWER;
2927 		break;
2928 
2929 	case HIGH_POWER:
2930 		if (val < (dev_priv->rps.rp1_delay + dev_priv->rps.rp0_delay) >> 1 && val < dev_priv->rps.cur_delay)
2931 			new_power = BETWEEN;
2932 		break;
2933 	}
2934 	/* Max/min bins are special */
2935 	if (val == dev_priv->rps.min_delay)
2936 		new_power = LOW_POWER;
2937 	if (val == dev_priv->rps.max_delay)
2938 		new_power = HIGH_POWER;
2939 	if (new_power == dev_priv->rps.power)
2940 		return;
2941 
2942 	/* Note the units here are not exactly 1us, but 1280ns. */
2943 	switch (new_power) {
2944 	case LOW_POWER:
2945 		/* Upclock if more than 95% busy over 16ms */
2946 		I915_WRITE(GEN6_RP_UP_EI, 12500);
2947 		I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
2948 
2949 		/* Downclock if less than 85% busy over 32ms */
2950 		I915_WRITE(GEN6_RP_DOWN_EI, 25000);
2951 		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
2952 
2953 		I915_WRITE(GEN6_RP_CONTROL,
2954 			   GEN6_RP_MEDIA_TURBO |
2955 			   GEN6_RP_MEDIA_HW_NORMAL_MODE |
2956 			   GEN6_RP_MEDIA_IS_GFX |
2957 			   GEN6_RP_ENABLE |
2958 			   GEN6_RP_UP_BUSY_AVG |
2959 			   GEN6_RP_DOWN_IDLE_AVG);
2960 		break;
2961 
2962 	case BETWEEN:
2963 		/* Upclock if more than 90% busy over 13ms */
2964 		I915_WRITE(GEN6_RP_UP_EI, 10250);
2965 		I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
2966 
2967 		/* Downclock if less than 75% busy over 32ms */
2968 		I915_WRITE(GEN6_RP_DOWN_EI, 25000);
2969 		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
2970 
2971 		I915_WRITE(GEN6_RP_CONTROL,
2972 			   GEN6_RP_MEDIA_TURBO |
2973 			   GEN6_RP_MEDIA_HW_NORMAL_MODE |
2974 			   GEN6_RP_MEDIA_IS_GFX |
2975 			   GEN6_RP_ENABLE |
2976 			   GEN6_RP_UP_BUSY_AVG |
2977 			   GEN6_RP_DOWN_IDLE_AVG);
2978 		break;
2979 
2980 	case HIGH_POWER:
2981 		/* Upclock if more than 85% busy over 10ms */
2982 		I915_WRITE(GEN6_RP_UP_EI, 8000);
2983 		I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
2984 
2985 		/* Downclock if less than 60% busy over 32ms */
2986 		I915_WRITE(GEN6_RP_DOWN_EI, 25000);
2987 		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
2988 
2989 		I915_WRITE(GEN6_RP_CONTROL,
2990 			   GEN6_RP_MEDIA_TURBO |
2991 			   GEN6_RP_MEDIA_HW_NORMAL_MODE |
2992 			   GEN6_RP_MEDIA_IS_GFX |
2993 			   GEN6_RP_ENABLE |
2994 			   GEN6_RP_UP_BUSY_AVG |
2995 			   GEN6_RP_DOWN_IDLE_AVG);
2996 		break;
2997 	}
2998 
2999 	dev_priv->rps.power = new_power;
3000 	dev_priv->rps.last_adj = 0;
3001 }
3002 
3003 void gen6_set_rps(struct drm_device *dev, u8 val)
3004 {
3005 	struct drm_i915_private *dev_priv = dev->dev_private;
3006 
3007 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3008 	WARN_ON(val > dev_priv->rps.max_delay);
3009 	WARN_ON(val < dev_priv->rps.min_delay);
3010 
3011 	if (val == dev_priv->rps.cur_delay)
3012 		return;
3013 
3014 	gen6_set_rps_thresholds(dev_priv, val);
3015 
3016 	if (IS_HASWELL(dev))
3017 		I915_WRITE(GEN6_RPNSWREQ,
3018 			   HSW_FREQUENCY(val));
3019 	else
3020 		I915_WRITE(GEN6_RPNSWREQ,
3021 			   GEN6_FREQUENCY(val) |
3022 			   GEN6_OFFSET(0) |
3023 			   GEN6_AGGRESSIVE_TURBO);
3024 
3025 	/* Make sure we continue to get interrupts
3026 	 * until we hit the minimum or maximum frequencies.
3027 	 */
3028 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
3029 		   gen6_rps_limits(dev_priv, val));
3030 
3031 	POSTING_READ(GEN6_RPNSWREQ);
3032 
3033 	dev_priv->rps.cur_delay = val;
3034 
3035 	trace_intel_gpu_freq_change(val * 50);
3036 }
3037 
3038 void gen6_rps_idle(struct drm_i915_private *dev_priv)
3039 {
3040 	struct drm_device *dev = dev_priv->dev;
3041 
3042 	mutex_lock(&dev_priv->rps.hw_lock);
3043 	if (dev_priv->rps.enabled) {
3044 		if (IS_VALLEYVIEW(dev))
3045 			valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
3046 		else
3047 			gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
3048 		dev_priv->rps.last_adj = 0;
3049 	}
3050 	mutex_unlock(&dev_priv->rps.hw_lock);
3051 }
3052 
3053 void gen6_rps_boost(struct drm_i915_private *dev_priv)
3054 {
3055 	struct drm_device *dev = dev_priv->dev;
3056 
3057 	mutex_lock(&dev_priv->rps.hw_lock);
3058 	if (dev_priv->rps.enabled) {
3059 		if (IS_VALLEYVIEW(dev))
3060 			valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_delay);
3061 		else
3062 			gen6_set_rps(dev_priv->dev, dev_priv->rps.max_delay);
3063 		dev_priv->rps.last_adj = 0;
3064 	}
3065 	mutex_unlock(&dev_priv->rps.hw_lock);
3066 }
3067 
3068 void valleyview_set_rps(struct drm_device *dev, u8 val)
3069 {
3070 	struct drm_i915_private *dev_priv = dev->dev_private;
3071 
3072 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3073 	WARN_ON(val > dev_priv->rps.max_delay);
3074 	WARN_ON(val < dev_priv->rps.min_delay);
3075 
3076 	DRM_DEBUG_DRIVER("GPU freq request from %d MHz (%u) to %d MHz (%u)\n",
3077 			 vlv_gpu_freq(dev_priv, dev_priv->rps.cur_delay),
3078 			 dev_priv->rps.cur_delay,
3079 			 vlv_gpu_freq(dev_priv, val), val);
3080 
3081 	if (val == dev_priv->rps.cur_delay)
3082 		return;
3083 
3084 	vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
3085 
3086 	dev_priv->rps.cur_delay = val;
3087 
3088 	trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val));
3089 }
3090 
3091 static void gen6_disable_rps_interrupts(struct drm_device *dev)
3092 {
3093 	struct drm_i915_private *dev_priv = dev->dev_private;
3094 
3095 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
3096 	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
3097 	/* Complete PM interrupt masking here doesn't race with the rps work
3098 	 * item again unmasking PM interrupts because that is using a different
3099 	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
3100 	 * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
3101 
3102 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
3103 	dev_priv->rps.pm_iir = 0;
3104 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
3105 
3106 	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
3107 }
3108 
3109 static void gen6_disable_rps(struct drm_device *dev)
3110 {
3111 	struct drm_i915_private *dev_priv = dev->dev_private;
3112 
3113 	I915_WRITE(GEN6_RC_CONTROL, 0);
3114 	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
3115 
3116 	gen6_disable_rps_interrupts(dev);
3117 }
3118 
3119 static void valleyview_disable_rps(struct drm_device *dev)
3120 {
3121 	struct drm_i915_private *dev_priv = dev->dev_private;
3122 
3123 	I915_WRITE(GEN6_RC_CONTROL, 0);
3124 
3125 	gen6_disable_rps_interrupts(dev);
3126 
3127 	if (dev_priv->vlv_pctx) {
3128 		drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
3129 		dev_priv->vlv_pctx = NULL;
3130 	}
3131 }
3132 
3133 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
3134 {
3135 	if (IS_GEN6(dev))
3136 		DRM_DEBUG_DRIVER("Sandybridge: deep RC6 disabled\n");
3137 
3138 	if (IS_HASWELL(dev))
3139 		DRM_DEBUG_DRIVER("Haswell: only RC6 available\n");
3140 
3141 	DRM_INFO("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n",
3142 			(mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
3143 			(mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
3144 			(mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
3145 }
3146 
3147 int intel_enable_rc6(const struct drm_device *dev)
3148 {
3149 	/* No RC6 before Ironlake */
3150 	if (INTEL_INFO(dev)->gen < 5)
3151 		return 0;
3152 
3153 	/* Respect the kernel parameter if it is set */
3154 	if (i915_enable_rc6 >= 0)
3155 		return i915_enable_rc6;
3156 
3157 	/* Disable RC6 on Ironlake */
3158 	if (INTEL_INFO(dev)->gen == 5)
3159 		return 0;
3160 
3161 	if (IS_HASWELL(dev))
3162 		return INTEL_RC6_ENABLE;
3163 
3164 	/* snb/ivb have more than one rc6 state. */
3165 	if (INTEL_INFO(dev)->gen == 6)
3166 		return INTEL_RC6_ENABLE;
3167 
3168 	return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
3169 }
3170 
3171 static void gen6_enable_rps_interrupts(struct drm_device *dev)
3172 {
3173 	struct drm_i915_private *dev_priv = dev->dev_private;
3174 	u32 enabled_intrs;
3175 
3176 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
3177 	WARN_ON(dev_priv->rps.pm_iir);
3178 	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
3179 	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
3180 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
3181 
3182 	/* only unmask PM interrupts we need. Mask all others. */
3183 	enabled_intrs = GEN6_PM_RPS_EVENTS;
3184 
3185 	/* IVB and SNB hard hangs on looping batchbuffer
3186 	 * if GEN6_PM_UP_EI_EXPIRED is masked.
3187 	 */
3188 	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
3189 		enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED;
3190 
3191 	I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
3192 }
3193 
3194 static void gen8_enable_rps(struct drm_device *dev)
3195 {
3196 	struct drm_i915_private *dev_priv = dev->dev_private;
3197 	struct intel_ring_buffer *ring;
3198 	uint32_t rc6_mask = 0, rp_state_cap;
3199 	int unused;
3200 
3201 	/* 1a: Software RC state - RC0 */
3202 	I915_WRITE(GEN6_RC_STATE, 0);
3203 
3204 	/* 1c & 1d: Get forcewake during program sequence. Although the driver
3205 	 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
3206 	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
3207 
3208 	/* 2a: Disable RC states. */
3209 	I915_WRITE(GEN6_RC_CONTROL, 0);
3210 
3211 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
3212 
3213 	/* 2b: Program RC6 thresholds.*/
3214 	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
3215 	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
3216 	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
3217 	for_each_ring(ring, dev_priv, unused)
3218 		I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
3219 	I915_WRITE(GEN6_RC_SLEEP, 0);
3220 	I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
3221 
3222 	/* 3: Enable RC6 */
3223 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
3224 		rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
3225 	DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
3226 	I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
3227 			GEN6_RC_CTL_EI_MODE(1) |
3228 			rc6_mask);
3229 
3230 	/* 4 Program defaults and thresholds for RPS*/
3231 	I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(10)); /* Request 500 MHz */
3232 	I915_WRITE(GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(12)); /* Request 600 MHz */
3233 	/* NB: Docs say 1s, and 1000000 - which aren't equivalent */
3234 	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
3235 
3236 	/* Docs recommend 900MHz, and 300 MHz respectively */
3237 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
3238 		   dev_priv->rps.max_delay << 24 |
3239 		   dev_priv->rps.min_delay << 16);
3240 
3241 	I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
3242 	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
3243 	I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
3244 	I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
3245 
3246 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
3247 
3248 	/* 5: Enable RPS */
3249 	I915_WRITE(GEN6_RP_CONTROL,
3250 		   GEN6_RP_MEDIA_TURBO |
3251 		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
3252 		   GEN6_RP_MEDIA_IS_GFX |
3253 		   GEN6_RP_ENABLE |
3254 		   GEN6_RP_UP_BUSY_AVG |
3255 		   GEN6_RP_DOWN_IDLE_AVG);
3256 
3257 	/* 6: Ring frequency + overclocking (our driver does this later */
3258 
3259 	gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
3260 
3261 	gen6_enable_rps_interrupts(dev);
3262 
3263 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
3264 }
3265 
3266 static void gen6_enable_rps(struct drm_device *dev)
3267 {
3268 	struct drm_i915_private *dev_priv = dev->dev_private;
3269 	struct intel_ring_buffer *ring;
3270 	u32 rp_state_cap;
3271 	u32 gt_perf_status;
3272 	u32 rc6vids, pcu_mbox, rc6_mask = 0;
3273 	u32 gtfifodbg;
3274 	int rc6_mode;
3275 	int i, ret;
3276 
3277 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3278 
3279 	/* Here begins a magic sequence of register writes to enable
3280 	 * auto-downclocking.
3281 	 *
3282 	 * Perhaps there might be some value in exposing these to
3283 	 * userspace...
3284 	 */
3285 	I915_WRITE(GEN6_RC_STATE, 0);
3286 
3287 	/* Clear the DBG now so we don't confuse earlier errors */
3288 	if ((gtfifodbg = I915_READ(GTFIFODBG))) {
3289 		DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
3290 		I915_WRITE(GTFIFODBG, gtfifodbg);
3291 	}
3292 
3293 	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
3294 
3295 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
3296 	gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
3297 
3298 	/* In units of 50MHz */
3299 	dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
3300 	dev_priv->rps.min_delay = (rp_state_cap >> 16) & 0xff;
3301 	dev_priv->rps.rp1_delay = (rp_state_cap >>  8) & 0xff;
3302 	dev_priv->rps.rp0_delay = (rp_state_cap >>  0) & 0xff;
3303 	dev_priv->rps.rpe_delay = dev_priv->rps.rp1_delay;
3304 	dev_priv->rps.cur_delay = 0;
3305 
3306 	/* disable the counters and set deterministic thresholds */
3307 	I915_WRITE(GEN6_RC_CONTROL, 0);
3308 
3309 	I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
3310 	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
3311 	I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
3312 	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
3313 	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
3314 
3315 	for_each_ring(ring, dev_priv, i)
3316 		I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
3317 
3318 	I915_WRITE(GEN6_RC_SLEEP, 0);
3319 	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
3320 	if (IS_IVYBRIDGE(dev))
3321 		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
3322 	else
3323 		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
3324 	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
3325 	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
3326 
3327 	/* Check if we are enabling RC6 */
3328 	rc6_mode = intel_enable_rc6(dev_priv->dev);
3329 	if (rc6_mode & INTEL_RC6_ENABLE)
3330 		rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
3331 
3332 	/* We don't use those on Haswell */
3333 	if (!IS_HASWELL(dev)) {
3334 		if (rc6_mode & INTEL_RC6p_ENABLE)
3335 			rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
3336 
3337 		if (rc6_mode & INTEL_RC6pp_ENABLE)
3338 			rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
3339 	}
3340 
3341 	intel_print_rc6_info(dev, rc6_mask);
3342 
3343 	I915_WRITE(GEN6_RC_CONTROL,
3344 		   rc6_mask |
3345 		   GEN6_RC_CTL_EI_MODE(1) |
3346 		   GEN6_RC_CTL_HW_ENABLE);
3347 
3348 	/* Power down if completely idle for over 50ms */
3349 	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
3350 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
3351 
3352 	ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
3353 	if (!ret) {
3354 		pcu_mbox = 0;
3355 		ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
3356 		if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
3357 			DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
3358 					 (dev_priv->rps.max_delay & 0xff) * 50,
3359 					 (pcu_mbox & 0xff) * 50);
3360 			dev_priv->rps.hw_max = pcu_mbox & 0xff;
3361 		}
3362 	} else {
3363 		DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
3364 	}
3365 
3366 	dev_priv->rps.power = HIGH_POWER; /* force a reset */
3367 	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
3368 
3369 	gen6_enable_rps_interrupts(dev);
3370 
3371 	rc6vids = 0;
3372 	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
3373 	if (IS_GEN6(dev) && ret) {
3374 		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
3375 	} else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
3376 		DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
3377 			  GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
3378 		rc6vids &= 0xffff00;
3379 		rc6vids |= GEN6_ENCODE_RC6_VID(450);
3380 		ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
3381 		if (ret)
3382 			DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
3383 	}
3384 
3385 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
3386 }
3387 
3388 void gen6_update_ring_freq(struct drm_device *dev)
3389 {
3390 	struct drm_i915_private *dev_priv = dev->dev_private;
3391 	int min_freq = 15;
3392 	unsigned int gpu_freq;
3393 	unsigned int max_ia_freq, min_ring_freq;
3394 	int scaling_factor = 180;
3395 
3396 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3397 
3398 #if 0
3399 	policy = cpufreq_cpu_get(0);
3400 	if (policy) {
3401 		max_ia_freq = policy->cpuinfo.max_freq;
3402 		cpufreq_cpu_put(policy);
3403 	} else {
3404 		/*
3405 		 * Default to measured freq if none found, PCU will ensure we
3406 		 * don't go over
3407 		 */
3408 		max_ia_freq = tsc_khz;
3409 	}
3410 #else
3411 	max_ia_freq = tsc_frequency / 1000;
3412 #endif
3413 
3414 	/* Convert from kHz to MHz */
3415 	max_ia_freq /= 1000;
3416 
3417 	min_ring_freq = I915_READ(DCLK) & 0xf;
3418 	/* convert DDR frequency from units of 266.6MHz to bandwidth */
3419 	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
3420 
3421 	/*
3422 	 * For each potential GPU frequency, load a ring frequency we'd like
3423 	 * to use for memory access.  We do this by specifying the IA frequency
3424 	 * the PCU should use as a reference to determine the ring frequency.
3425 	 */
3426 	for (gpu_freq = dev_priv->rps.max_delay; gpu_freq >= dev_priv->rps.min_delay;
3427 	     gpu_freq--) {
3428 		int diff = dev_priv->rps.max_delay - gpu_freq;
3429 		unsigned int ia_freq = 0, ring_freq = 0;
3430 
3431 		if (INTEL_INFO(dev)->gen >= 8) {
3432 			/* max(2 * GT, DDR). NB: GT is 50MHz units */
3433 			ring_freq = max(min_ring_freq, gpu_freq);
3434 		} else if (IS_HASWELL(dev)) {
3435 			ring_freq = mult_frac(gpu_freq, 5, 4);
3436 			ring_freq = max(min_ring_freq, ring_freq);
3437 			/* leave ia_freq as the default, chosen by cpufreq */
3438 		} else {
3439 			/* On older processors, there is no separate ring
3440 			 * clock domain, so in order to boost the bandwidth
3441 			 * of the ring, we need to upclock the CPU (ia_freq).
3442 			 *
3443 			 * For GPU frequencies less than 750MHz,
3444 			 * just use the lowest ring freq.
3445 			 */
3446 			if (gpu_freq < min_freq)
3447 				ia_freq = 800;
3448 			else
3449 				ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
3450 			ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
3451 		}
3452 
3453 		sandybridge_pcode_write(dev_priv,
3454 					GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
3455 					ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
3456 					ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
3457 					gpu_freq);
3458 	}
3459 }
3460 
3461 int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
3462 {
3463 	u32 val, rp0;
3464 
3465 	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
3466 
3467 	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
3468 	/* Clamp to max */
3469 	rp0 = min_t(u32, rp0, 0xea);
3470 
3471 	return rp0;
3472 }
3473 
3474 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
3475 {
3476 	u32 val, rpe;
3477 
3478 	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
3479 	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
3480 	val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
3481 	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
3482 
3483 	return rpe;
3484 }
3485 
3486 int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
3487 {
3488 	return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
3489 }
3490 
3491 static void valleyview_setup_pctx(struct drm_device *dev)
3492 {
3493 	struct drm_i915_private *dev_priv = dev->dev_private;
3494 	struct drm_i915_gem_object *pctx;
3495 	unsigned long pctx_paddr;
3496 	u32 pcbr;
3497 	int pctx_size = 24*1024;
3498 
3499 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
3500 
3501 	pcbr = I915_READ(VLV_PCBR);
3502 	if (pcbr) {
3503 		/* BIOS set it up already, grab the pre-alloc'd space */
3504 		int pcbr_offset;
3505 
3506 		pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
3507 		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
3508 								      pcbr_offset,
3509 								      I915_GTT_OFFSET_NONE,
3510 								      pctx_size);
3511 		goto out;
3512 	}
3513 
3514 	/*
3515 	 * From the Gunit register HAS:
3516 	 * The Gfx driver is expected to program this register and ensure
3517 	 * proper allocation within Gfx stolen memory.  For example, this
3518 	 * register should be programmed such than the PCBR range does not
3519 	 * overlap with other ranges, such as the frame buffer, protected
3520 	 * memory, or any other relevant ranges.
3521 	 */
3522 	pctx = i915_gem_object_create_stolen(dev, pctx_size);
3523 	if (!pctx) {
3524 		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
3525 		return;
3526 	}
3527 
3528 	pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
3529 	I915_WRITE(VLV_PCBR, pctx_paddr);
3530 
3531 out:
3532 	dev_priv->vlv_pctx = pctx;
3533 }
3534 
3535 static void valleyview_enable_rps(struct drm_device *dev)
3536 {
3537 	struct drm_i915_private *dev_priv = dev->dev_private;
3538 	struct intel_ring_buffer *ring;
3539 	u32 gtfifodbg, val, rc6_mode = 0;
3540 	int i;
3541 
3542 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3543 
3544 	if ((gtfifodbg = I915_READ(GTFIFODBG))) {
3545 		DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
3546 				 gtfifodbg);
3547 		I915_WRITE(GTFIFODBG, gtfifodbg);
3548 	}
3549 
3550 	/* If VLV, Forcewake all wells, else re-direct to regular path */
3551 	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
3552 
3553 	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
3554 	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
3555 	I915_WRITE(GEN6_RP_UP_EI, 66000);
3556 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
3557 
3558 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
3559 
3560 	I915_WRITE(GEN6_RP_CONTROL,
3561 		   GEN6_RP_MEDIA_TURBO |
3562 		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
3563 		   GEN6_RP_MEDIA_IS_GFX |
3564 		   GEN6_RP_ENABLE |
3565 		   GEN6_RP_UP_BUSY_AVG |
3566 		   GEN6_RP_DOWN_IDLE_CONT);
3567 
3568 	I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
3569 	I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
3570 	I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
3571 
3572 	for_each_ring(ring, dev_priv, i)
3573 		I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
3574 
3575 	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
3576 
3577 	/* allows RC6 residency counter to work */
3578 	I915_WRITE(VLV_COUNTER_CONTROL,
3579 		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
3580 				      VLV_MEDIA_RC6_COUNT_EN |
3581 				      VLV_RENDER_RC6_COUNT_EN));
3582 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
3583 		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
3584 
3585 	intel_print_rc6_info(dev, rc6_mode);
3586 
3587 	I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
3588 
3589 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
3590 
3591 	DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no");
3592 	DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
3593 
3594 	dev_priv->rps.cur_delay = (val >> 8) & 0xff;
3595 	DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
3596 			 vlv_gpu_freq(dev_priv, dev_priv->rps.cur_delay),
3597 			 dev_priv->rps.cur_delay);
3598 
3599 	dev_priv->rps.max_delay = valleyview_rps_max_freq(dev_priv);
3600 	dev_priv->rps.hw_max = dev_priv->rps.max_delay;
3601 	DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
3602 			 vlv_gpu_freq(dev_priv, dev_priv->rps.max_delay),
3603 			 dev_priv->rps.max_delay);
3604 
3605 	dev_priv->rps.rpe_delay = valleyview_rps_rpe_freq(dev_priv);
3606 	DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
3607 			 vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay),
3608 			 dev_priv->rps.rpe_delay);
3609 
3610 	dev_priv->rps.min_delay = valleyview_rps_min_freq(dev_priv);
3611 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
3612 			 vlv_gpu_freq(dev_priv, dev_priv->rps.min_delay),
3613 			 dev_priv->rps.min_delay);
3614 
3615 	DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
3616 			 vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay),
3617 			 dev_priv->rps.rpe_delay);
3618 
3619 	valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
3620 
3621 	gen6_enable_rps_interrupts(dev);
3622 
3623 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
3624 }
3625 
3626 void ironlake_teardown_rc6(struct drm_device *dev)
3627 {
3628 	struct drm_i915_private *dev_priv = dev->dev_private;
3629 
3630 	if (dev_priv->ips.renderctx) {
3631 		i915_gem_object_unpin(dev_priv->ips.renderctx);
3632 		drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
3633 		dev_priv->ips.renderctx = NULL;
3634 	}
3635 
3636 	if (dev_priv->ips.pwrctx) {
3637 		i915_gem_object_unpin(dev_priv->ips.pwrctx);
3638 		drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
3639 		dev_priv->ips.pwrctx = NULL;
3640 	}
3641 }
3642 
3643 static void ironlake_disable_rc6(struct drm_device *dev)
3644 {
3645 	struct drm_i915_private *dev_priv = dev->dev_private;
3646 
3647 	if (I915_READ(PWRCTXA)) {
3648 		/* Wake the GPU, prevent RC6, then restore RSTDBYCTL */
3649 		I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT);
3650 		wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON),
3651 			 50);
3652 
3653 		I915_WRITE(PWRCTXA, 0);
3654 		POSTING_READ(PWRCTXA);
3655 
3656 		I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
3657 		POSTING_READ(RSTDBYCTL);
3658 	}
3659 }
3660 
3661 static int ironlake_setup_rc6(struct drm_device *dev)
3662 {
3663 	struct drm_i915_private *dev_priv = dev->dev_private;
3664 
3665 	if (dev_priv->ips.renderctx == NULL)
3666 		dev_priv->ips.renderctx = intel_alloc_context_page(dev);
3667 	if (!dev_priv->ips.renderctx)
3668 		return -ENOMEM;
3669 
3670 	if (dev_priv->ips.pwrctx == NULL)
3671 		dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
3672 	if (!dev_priv->ips.pwrctx) {
3673 		ironlake_teardown_rc6(dev);
3674 		return -ENOMEM;
3675 	}
3676 
3677 	return 0;
3678 }
3679 
3680 static void ironlake_enable_rc6(struct drm_device *dev)
3681 {
3682 	struct drm_i915_private *dev_priv = dev->dev_private;
3683 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
3684 	bool was_interruptible;
3685 	int ret;
3686 
3687 	/* rc6 disabled by default due to repeated reports of hanging during
3688 	 * boot and resume.
3689 	 */
3690 	if (!intel_enable_rc6(dev))
3691 		return;
3692 
3693 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
3694 
3695 	ret = ironlake_setup_rc6(dev);
3696 	if (ret)
3697 		return;
3698 
3699 	was_interruptible = dev_priv->mm.interruptible;
3700 	dev_priv->mm.interruptible = false;
3701 
3702 	/*
3703 	 * GPU can automatically power down the render unit if given a page
3704 	 * to save state.
3705 	 */
3706 	ret = intel_ring_begin(ring, 6);
3707 	if (ret) {
3708 		ironlake_teardown_rc6(dev);
3709 		dev_priv->mm.interruptible = was_interruptible;
3710 		return;
3711 	}
3712 
3713 	intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
3714 	intel_ring_emit(ring, MI_SET_CONTEXT);
3715 	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
3716 			MI_MM_SPACE_GTT |
3717 			MI_SAVE_EXT_STATE_EN |
3718 			MI_RESTORE_EXT_STATE_EN |
3719 			MI_RESTORE_INHIBIT);
3720 	intel_ring_emit(ring, MI_SUSPEND_FLUSH);
3721 	intel_ring_emit(ring, MI_NOOP);
3722 	intel_ring_emit(ring, MI_FLUSH);
3723 	intel_ring_advance(ring);
3724 
3725 	/*
3726 	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
3727 	 * does an implicit flush, combined with MI_FLUSH above, it should be
3728 	 * safe to assume that renderctx is valid
3729 	 */
3730 	ret = intel_ring_idle(ring);
3731 	dev_priv->mm.interruptible = was_interruptible;
3732 	if (ret) {
3733 		DRM_ERROR("failed to enable ironlake power savings\n");
3734 		ironlake_teardown_rc6(dev);
3735 		return;
3736 	}
3737 
3738 	I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
3739 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
3740 
3741 	intel_print_rc6_info(dev, INTEL_RC6_ENABLE);
3742 }
3743 
3744 static unsigned long intel_pxfreq(u32 vidfreq)
3745 {
3746 	unsigned long freq;
3747 	int div = (vidfreq & 0x3f0000) >> 16;
3748 	int post = (vidfreq & 0x3000) >> 12;
3749 	int pre = (vidfreq & 0x7);
3750 
3751 	if (!pre)
3752 		return 0;
3753 
3754 	freq = ((div * 133333) / ((1<<post) * pre));
3755 
3756 	return freq;
3757 }
3758 
3759 static const struct cparams {
3760 	u16 i;
3761 	u16 t;
3762 	u16 m;
3763 	u16 c;
3764 } cparams[] = {
3765 	{ 1, 1333, 301, 28664 },
3766 	{ 1, 1066, 294, 24460 },
3767 	{ 1, 800, 294, 25192 },
3768 	{ 0, 1333, 276, 27605 },
3769 	{ 0, 1066, 276, 27605 },
3770 	{ 0, 800, 231, 23784 },
3771 };
3772 
3773 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
3774 {
3775 	u64 total_count, diff, ret;
3776 	u32 count1, count2, count3, m = 0, c = 0;
3777 	unsigned long now = jiffies_to_msecs(jiffies), diff1;
3778 	int i;
3779 
3780 	assert_spin_locked(&mchdev_lock);
3781 
3782 	diff1 = now - dev_priv->ips.last_time1;
3783 
3784 	/* Prevent division-by-zero if we are asking too fast.
3785 	 * Also, we don't get interesting results if we are polling
3786 	 * faster than once in 10ms, so just return the saved value
3787 	 * in such cases.
3788 	 */
3789 	if (diff1 <= 10)
3790 		return dev_priv->ips.chipset_power;
3791 
3792 	count1 = I915_READ(DMIEC);
3793 	count2 = I915_READ(DDREC);
3794 	count3 = I915_READ(CSIEC);
3795 
3796 	total_count = count1 + count2 + count3;
3797 
3798 	/* FIXME: handle per-counter overflow */
3799 	if (total_count < dev_priv->ips.last_count1) {
3800 		diff = ~0UL - dev_priv->ips.last_count1;
3801 		diff += total_count;
3802 	} else {
3803 		diff = total_count - dev_priv->ips.last_count1;
3804 	}
3805 
3806 	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
3807 		if (cparams[i].i == dev_priv->ips.c_m &&
3808 		    cparams[i].t == dev_priv->ips.r_t) {
3809 			m = cparams[i].m;
3810 			c = cparams[i].c;
3811 			break;
3812 		}
3813 	}
3814 
3815 	diff = div_u64(diff, diff1);
3816 	ret = ((m * diff) + c);
3817 	ret = div_u64(ret, 10);
3818 
3819 	dev_priv->ips.last_count1 = total_count;
3820 	dev_priv->ips.last_time1 = now;
3821 
3822 	dev_priv->ips.chipset_power = ret;
3823 
3824 	return ret;
3825 }
3826 
3827 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
3828 {
3829 	unsigned long val;
3830 
3831 	if (dev_priv->info->gen != 5)
3832 		return 0;
3833 
3834 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
3835 
3836 	val = __i915_chipset_val(dev_priv);
3837 
3838 	lockmgr(&mchdev_lock, LK_RELEASE);
3839 
3840 	return val;
3841 }
3842 
3843 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
3844 {
3845 	unsigned long m, x, b;
3846 	u32 tsfs;
3847 
3848 	tsfs = I915_READ(TSFS);
3849 
3850 	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
3851 	x = I915_READ8(TR1);
3852 
3853 	b = tsfs & TSFS_INTR_MASK;
3854 
3855 	return ((m * x) / 127) - b;
3856 }
3857 
3858 static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
3859 {
3860 	static const struct v_table {
3861 		u16 vd; /* in .1 mil */
3862 		u16 vm; /* in .1 mil */
3863 	} v_table[] = {
3864 		{ 0, 0, },
3865 		{ 375, 0, },
3866 		{ 500, 0, },
3867 		{ 625, 0, },
3868 		{ 750, 0, },
3869 		{ 875, 0, },
3870 		{ 1000, 0, },
3871 		{ 1125, 0, },
3872 		{ 4125, 3000, },
3873 		{ 4125, 3000, },
3874 		{ 4125, 3000, },
3875 		{ 4125, 3000, },
3876 		{ 4125, 3000, },
3877 		{ 4125, 3000, },
3878 		{ 4125, 3000, },
3879 		{ 4125, 3000, },
3880 		{ 4125, 3000, },
3881 		{ 4125, 3000, },
3882 		{ 4125, 3000, },
3883 		{ 4125, 3000, },
3884 		{ 4125, 3000, },
3885 		{ 4125, 3000, },
3886 		{ 4125, 3000, },
3887 		{ 4125, 3000, },
3888 		{ 4125, 3000, },
3889 		{ 4125, 3000, },
3890 		{ 4125, 3000, },
3891 		{ 4125, 3000, },
3892 		{ 4125, 3000, },
3893 		{ 4125, 3000, },
3894 		{ 4125, 3000, },
3895 		{ 4125, 3000, },
3896 		{ 4250, 3125, },
3897 		{ 4375, 3250, },
3898 		{ 4500, 3375, },
3899 		{ 4625, 3500, },
3900 		{ 4750, 3625, },
3901 		{ 4875, 3750, },
3902 		{ 5000, 3875, },
3903 		{ 5125, 4000, },
3904 		{ 5250, 4125, },
3905 		{ 5375, 4250, },
3906 		{ 5500, 4375, },
3907 		{ 5625, 4500, },
3908 		{ 5750, 4625, },
3909 		{ 5875, 4750, },
3910 		{ 6000, 4875, },
3911 		{ 6125, 5000, },
3912 		{ 6250, 5125, },
3913 		{ 6375, 5250, },
3914 		{ 6500, 5375, },
3915 		{ 6625, 5500, },
3916 		{ 6750, 5625, },
3917 		{ 6875, 5750, },
3918 		{ 7000, 5875, },
3919 		{ 7125, 6000, },
3920 		{ 7250, 6125, },
3921 		{ 7375, 6250, },
3922 		{ 7500, 6375, },
3923 		{ 7625, 6500, },
3924 		{ 7750, 6625, },
3925 		{ 7875, 6750, },
3926 		{ 8000, 6875, },
3927 		{ 8125, 7000, },
3928 		{ 8250, 7125, },
3929 		{ 8375, 7250, },
3930 		{ 8500, 7375, },
3931 		{ 8625, 7500, },
3932 		{ 8750, 7625, },
3933 		{ 8875, 7750, },
3934 		{ 9000, 7875, },
3935 		{ 9125, 8000, },
3936 		{ 9250, 8125, },
3937 		{ 9375, 8250, },
3938 		{ 9500, 8375, },
3939 		{ 9625, 8500, },
3940 		{ 9750, 8625, },
3941 		{ 9875, 8750, },
3942 		{ 10000, 8875, },
3943 		{ 10125, 9000, },
3944 		{ 10250, 9125, },
3945 		{ 10375, 9250, },
3946 		{ 10500, 9375, },
3947 		{ 10625, 9500, },
3948 		{ 10750, 9625, },
3949 		{ 10875, 9750, },
3950 		{ 11000, 9875, },
3951 		{ 11125, 10000, },
3952 		{ 11250, 10125, },
3953 		{ 11375, 10250, },
3954 		{ 11500, 10375, },
3955 		{ 11625, 10500, },
3956 		{ 11750, 10625, },
3957 		{ 11875, 10750, },
3958 		{ 12000, 10875, },
3959 		{ 12125, 11000, },
3960 		{ 12250, 11125, },
3961 		{ 12375, 11250, },
3962 		{ 12500, 11375, },
3963 		{ 12625, 11500, },
3964 		{ 12750, 11625, },
3965 		{ 12875, 11750, },
3966 		{ 13000, 11875, },
3967 		{ 13125, 12000, },
3968 		{ 13250, 12125, },
3969 		{ 13375, 12250, },
3970 		{ 13500, 12375, },
3971 		{ 13625, 12500, },
3972 		{ 13750, 12625, },
3973 		{ 13875, 12750, },
3974 		{ 14000, 12875, },
3975 		{ 14125, 13000, },
3976 		{ 14250, 13125, },
3977 		{ 14375, 13250, },
3978 		{ 14500, 13375, },
3979 		{ 14625, 13500, },
3980 		{ 14750, 13625, },
3981 		{ 14875, 13750, },
3982 		{ 15000, 13875, },
3983 		{ 15125, 14000, },
3984 		{ 15250, 14125, },
3985 		{ 15375, 14250, },
3986 		{ 15500, 14375, },
3987 		{ 15625, 14500, },
3988 		{ 15750, 14625, },
3989 		{ 15875, 14750, },
3990 		{ 16000, 14875, },
3991 		{ 16125, 15000, },
3992 	};
3993 	if (dev_priv->info->is_mobile)
3994 		return v_table[pxvid].vm;
3995 	else
3996 		return v_table[pxvid].vd;
3997 }
3998 
3999 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
4000 {
4001 	struct timespec now, diff1;
4002 	u64 diff;
4003 	unsigned long diffms;
4004 	u32 count;
4005 
4006 	assert_spin_locked(&mchdev_lock);
4007 
4008 	getrawmonotonic(&now);
4009 	diff1 = timespec_sub(now, dev_priv->ips.last_time2);
4010 
4011 	/* Don't divide by 0 */
4012 	diffms = diff1.tv_sec * 1000 + diff1.tv_nsec / 1000000;
4013 	if (!diffms)
4014 		return;
4015 
4016 	count = I915_READ(GFXEC);
4017 
4018 	if (count < dev_priv->ips.last_count2) {
4019 		diff = ~0UL - dev_priv->ips.last_count2;
4020 		diff += count;
4021 	} else {
4022 		diff = count - dev_priv->ips.last_count2;
4023 	}
4024 
4025 	dev_priv->ips.last_count2 = count;
4026 	dev_priv->ips.last_time2 = now;
4027 
4028 	/* More magic constants... */
4029 	diff = diff * 1181;
4030 	diff = div_u64(diff, diffms * 10);
4031 	dev_priv->ips.gfx_power = diff;
4032 }
4033 
4034 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
4035 {
4036 	if (dev_priv->info->gen != 5)
4037 		return;
4038 
4039 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4040 
4041 	__i915_update_gfx_val(dev_priv);
4042 
4043 	lockmgr(&mchdev_lock, LK_RELEASE);
4044 }
4045 
4046 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
4047 {
4048 	unsigned long t, corr, state1, corr2, state2;
4049 	u32 pxvid, ext_v;
4050 
4051 	assert_spin_locked(&mchdev_lock);
4052 
4053 	pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_delay * 4));
4054 	pxvid = (pxvid >> 24) & 0x7f;
4055 	ext_v = pvid_to_extvid(dev_priv, pxvid);
4056 
4057 	state1 = ext_v;
4058 
4059 	t = i915_mch_val(dev_priv);
4060 
4061 	/* Revel in the empirically derived constants */
4062 
4063 	/* Correction factor in 1/100000 units */
4064 	if (t > 80)
4065 		corr = ((t * 2349) + 135940);
4066 	else if (t >= 50)
4067 		corr = ((t * 964) + 29317);
4068 	else /* < 50 */
4069 		corr = ((t * 301) + 1004);
4070 
4071 	corr = corr * ((150142 * state1) / 10000 - 78642);
4072 	corr /= 100000;
4073 	corr2 = (corr * dev_priv->ips.corr);
4074 
4075 	state2 = (corr2 * state1) / 10000;
4076 	state2 /= 100; /* convert to mW */
4077 
4078 	__i915_update_gfx_val(dev_priv);
4079 
4080 	return dev_priv->ips.gfx_power + state2;
4081 }
4082 
4083 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
4084 {
4085 	unsigned long val;
4086 
4087 	if (dev_priv->info->gen != 5)
4088 		return 0;
4089 
4090 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4091 
4092 	val = __i915_gfx_val(dev_priv);
4093 
4094 	lockmgr(&mchdev_lock, LK_RELEASE);
4095 
4096 	return val;
4097 }
4098 
4099 /**
4100  * i915_read_mch_val - return value for IPS use
4101  *
4102  * Calculate and return a value for the IPS driver to use when deciding whether
4103  * we have thermal and power headroom to increase CPU or GPU power budget.
4104  */
4105 unsigned long i915_read_mch_val(void)
4106 {
4107 	struct drm_i915_private *dev_priv;
4108 	unsigned long chipset_val, graphics_val, ret = 0;
4109 
4110 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4111 	if (!i915_mch_dev)
4112 		goto out_unlock;
4113 	dev_priv = i915_mch_dev;
4114 
4115 	chipset_val = __i915_chipset_val(dev_priv);
4116 	graphics_val = __i915_gfx_val(dev_priv);
4117 
4118 	ret = chipset_val + graphics_val;
4119 
4120 out_unlock:
4121 	lockmgr(&mchdev_lock, LK_RELEASE);
4122 
4123 	return ret;
4124 }
4125 
4126 /**
4127  * i915_gpu_raise - raise GPU frequency limit
4128  *
4129  * Raise the limit; IPS indicates we have thermal headroom.
4130  */
4131 bool i915_gpu_raise(void)
4132 {
4133 	struct drm_i915_private *dev_priv;
4134 	bool ret = true;
4135 
4136 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4137 	if (!i915_mch_dev) {
4138 		ret = false;
4139 		goto out_unlock;
4140 	}
4141 	dev_priv = i915_mch_dev;
4142 
4143 	if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
4144 		dev_priv->ips.max_delay--;
4145 
4146 out_unlock:
4147 	lockmgr(&mchdev_lock, LK_RELEASE);
4148 
4149 	return ret;
4150 }
4151 
4152 /**
4153  * i915_gpu_lower - lower GPU frequency limit
4154  *
4155  * IPS indicates we're close to a thermal limit, so throttle back the GPU
4156  * frequency maximum.
4157  */
4158 bool i915_gpu_lower(void)
4159 {
4160 	struct drm_i915_private *dev_priv;
4161 	bool ret = true;
4162 
4163 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4164 	if (!i915_mch_dev) {
4165 		ret = false;
4166 		goto out_unlock;
4167 	}
4168 	dev_priv = i915_mch_dev;
4169 
4170 	if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
4171 		dev_priv->ips.max_delay++;
4172 
4173 out_unlock:
4174 	lockmgr(&mchdev_lock, LK_RELEASE);
4175 
4176 	return ret;
4177 }
4178 
4179 /**
4180  * i915_gpu_busy - indicate GPU business to IPS
4181  *
4182  * Tell the IPS driver whether or not the GPU is busy.
4183  */
4184 bool i915_gpu_busy(void)
4185 {
4186 	struct drm_i915_private *dev_priv;
4187 	struct intel_ring_buffer *ring;
4188 	bool ret = false;
4189 	int i;
4190 
4191 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4192 	if (!i915_mch_dev)
4193 		goto out_unlock;
4194 	dev_priv = i915_mch_dev;
4195 
4196 	for_each_ring(ring, dev_priv, i)
4197 		ret |= !list_empty(&ring->request_list);
4198 
4199 out_unlock:
4200 	lockmgr(&mchdev_lock, LK_RELEASE);
4201 
4202 	return ret;
4203 }
4204 
4205 /**
4206  * i915_gpu_turbo_disable - disable graphics turbo
4207  *
4208  * Disable graphics turbo by resetting the max frequency and setting the
4209  * current frequency to the default.
4210  */
4211 bool i915_gpu_turbo_disable(void)
4212 {
4213 	struct drm_i915_private *dev_priv;
4214 	bool ret = true;
4215 
4216 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4217 	if (!i915_mch_dev) {
4218 		ret = false;
4219 		goto out_unlock;
4220 	}
4221 	dev_priv = i915_mch_dev;
4222 
4223 	dev_priv->ips.max_delay = dev_priv->ips.fstart;
4224 
4225 	if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
4226 		ret = false;
4227 
4228 out_unlock:
4229 	lockmgr(&mchdev_lock, LK_RELEASE);
4230 
4231 	return ret;
4232 }
4233 
4234 #if 0
4235 /**
4236  * Tells the intel_ips driver that the i915 driver is now loaded, if
4237  * IPS got loaded first.
4238  *
4239  * This awkward dance is so that neither module has to depend on the
4240  * other in order for IPS to do the appropriate communication of
4241  * GPU turbo limits to i915.
4242  */
4243 static void
4244 ips_ping_for_i915_load(void)
4245 {
4246 	void (*link)(void);
4247 
4248 	link = symbol_get(ips_link_to_i915_driver);
4249 	if (link) {
4250 		link();
4251 		symbol_put(ips_link_to_i915_driver);
4252 	}
4253 }
4254 #endif
4255 
4256 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
4257 {
4258 	/* We only register the i915 ips part with intel-ips once everything is
4259 	 * set up, to avoid intel-ips sneaking in and reading bogus values. */
4260 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4261 	i915_mch_dev = dev_priv;
4262 	lockmgr(&mchdev_lock, LK_RELEASE);
4263 }
4264 
4265 void intel_gpu_ips_teardown(void)
4266 {
4267 	lockmgr(&mchdev_lock, LK_EXCLUSIVE);
4268 	i915_mch_dev = NULL;
4269 	lockmgr(&mchdev_lock, LK_RELEASE);
4270 }
4271 static void intel_init_emon(struct drm_device *dev)
4272 {
4273 	struct drm_i915_private *dev_priv = dev->dev_private;
4274 	u32 lcfuse;
4275 	u8 pxw[16];
4276 	int i;
4277 
4278 	/* Disable to program */
4279 	I915_WRITE(ECR, 0);
4280 	POSTING_READ(ECR);
4281 
4282 	/* Program energy weights for various events */
4283 	I915_WRITE(SDEW, 0x15040d00);
4284 	I915_WRITE(CSIEW0, 0x007f0000);
4285 	I915_WRITE(CSIEW1, 0x1e220004);
4286 	I915_WRITE(CSIEW2, 0x04000004);
4287 
4288 	for (i = 0; i < 5; i++)
4289 		I915_WRITE(PEW + (i * 4), 0);
4290 	for (i = 0; i < 3; i++)
4291 		I915_WRITE(DEW + (i * 4), 0);
4292 
4293 	/* Program P-state weights to account for frequency power adjustment */
4294 	for (i = 0; i < 16; i++) {
4295 		u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
4296 		unsigned long freq = intel_pxfreq(pxvidfreq);
4297 		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
4298 			PXVFREQ_PX_SHIFT;
4299 		unsigned long val;
4300 
4301 		val = vid * vid;
4302 		val *= (freq / 1000);
4303 		val *= 255;
4304 		val /= (127*127*900);
4305 		if (val > 0xff)
4306 			DRM_ERROR("bad pxval: %ld\n", val);
4307 		pxw[i] = val;
4308 	}
4309 	/* Render standby states get 0 weight */
4310 	pxw[14] = 0;
4311 	pxw[15] = 0;
4312 
4313 	for (i = 0; i < 4; i++) {
4314 		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
4315 			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
4316 		I915_WRITE(PXW + (i * 4), val);
4317 	}
4318 
4319 	/* Adjust magic regs to magic values (more experimental results) */
4320 	I915_WRITE(OGW0, 0);
4321 	I915_WRITE(OGW1, 0);
4322 	I915_WRITE(EG0, 0x00007f00);
4323 	I915_WRITE(EG1, 0x0000000e);
4324 	I915_WRITE(EG2, 0x000e0000);
4325 	I915_WRITE(EG3, 0x68000300);
4326 	I915_WRITE(EG4, 0x42000000);
4327 	I915_WRITE(EG5, 0x00140031);
4328 	I915_WRITE(EG6, 0);
4329 	I915_WRITE(EG7, 0);
4330 
4331 	for (i = 0; i < 8; i++)
4332 		I915_WRITE(PXWL + (i * 4), 0);
4333 
4334 	/* Enable PMON + select events */
4335 	I915_WRITE(ECR, 0x80000019);
4336 
4337 	lcfuse = I915_READ(LCFUSE02);
4338 
4339 	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
4340 }
4341 
4342 void intel_disable_gt_powersave(struct drm_device *dev)
4343 {
4344 	struct drm_i915_private *dev_priv = dev->dev_private;
4345 
4346 	/* Interrupts should be disabled already to avoid re-arming. */
4347 	WARN_ON(dev->irq_enabled);
4348 
4349 	if (IS_IRONLAKE_M(dev)) {
4350 		ironlake_disable_drps(dev);
4351 		ironlake_disable_rc6(dev);
4352 	} else if (INTEL_INFO(dev)->gen >= 6) {
4353 		cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
4354 		cancel_work_sync(&dev_priv->rps.work);
4355 		mutex_lock(&dev_priv->rps.hw_lock);
4356 		if (IS_VALLEYVIEW(dev))
4357 			valleyview_disable_rps(dev);
4358 		else
4359 			gen6_disable_rps(dev);
4360 		dev_priv->rps.enabled = false;
4361 		mutex_unlock(&dev_priv->rps.hw_lock);
4362 	}
4363 }
4364 
4365 static void intel_gen6_powersave_work(struct work_struct *work)
4366 {
4367 	struct drm_i915_private *dev_priv =
4368 		container_of(work, struct drm_i915_private,
4369 			     rps.delayed_resume_work.work);
4370 	struct drm_device *dev = dev_priv->dev;
4371 
4372 	mutex_lock(&dev_priv->rps.hw_lock);
4373 
4374 	if (IS_VALLEYVIEW(dev)) {
4375 		valleyview_enable_rps(dev);
4376 	} else if (IS_BROADWELL(dev)) {
4377 		gen8_enable_rps(dev);
4378 		gen6_update_ring_freq(dev);
4379 	} else {
4380 		gen6_enable_rps(dev);
4381 		gen6_update_ring_freq(dev);
4382 	}
4383 	dev_priv->rps.enabled = true;
4384 	mutex_unlock(&dev_priv->rps.hw_lock);
4385 }
4386 
4387 void intel_enable_gt_powersave(struct drm_device *dev)
4388 {
4389 	struct drm_i915_private *dev_priv = dev->dev_private;
4390 
4391 	if (IS_IRONLAKE_M(dev)) {
4392 		ironlake_enable_drps(dev);
4393 		ironlake_enable_rc6(dev);
4394 		intel_init_emon(dev);
4395 	} else if (IS_GEN6(dev) || IS_GEN7(dev)) {
4396 		if (IS_VALLEYVIEW(dev))
4397 			valleyview_setup_pctx(dev);
4398 		/*
4399 		 * PCU communication is slow and this doesn't need to be
4400 		 * done at any specific time, so do this out of our fast path
4401 		 * to make resume and init faster.
4402 		 */
4403 		schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
4404 				      round_jiffies_up_relative(HZ));
4405 	}
4406 }
4407 
4408 static void ibx_init_clock_gating(struct drm_device *dev)
4409 {
4410 	struct drm_i915_private *dev_priv = dev->dev_private;
4411 
4412 	/*
4413 	 * On Ibex Peak and Cougar Point, we need to disable clock
4414 	 * gating for the panel power sequencer or it will fail to
4415 	 * start up when no ports are active.
4416 	 */
4417 	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
4418 }
4419 
4420 static void g4x_disable_trickle_feed(struct drm_device *dev)
4421 {
4422 	struct drm_i915_private *dev_priv = dev->dev_private;
4423 	int pipe;
4424 
4425 	for_each_pipe(pipe) {
4426 		I915_WRITE(DSPCNTR(pipe),
4427 			   I915_READ(DSPCNTR(pipe)) |
4428 			   DISPPLANE_TRICKLE_FEED_DISABLE);
4429 		intel_flush_primary_plane(dev_priv, pipe);
4430 	}
4431 }
4432 
4433 static void ilk_init_lp_watermarks(struct drm_device *dev)
4434 {
4435 	struct drm_i915_private *dev_priv = dev->dev_private;
4436 
4437 	I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
4438 	I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
4439 	I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
4440 
4441 	/*
4442 	 * Don't touch WM1S_LP_EN here.
4443 	 * Doing so could cause underruns.
4444 	 */
4445 }
4446 
4447 static void ironlake_init_clock_gating(struct drm_device *dev)
4448 {
4449 	struct drm_i915_private *dev_priv = dev->dev_private;
4450 	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
4451 
4452 	/*
4453 	 * Required for FBC
4454 	 * WaFbcDisableDpfcClockGating:ilk
4455 	 */
4456 	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
4457 		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
4458 		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
4459 
4460 	I915_WRITE(PCH_3DCGDIS0,
4461 		   MARIUNIT_CLOCK_GATE_DISABLE |
4462 		   SVSMUNIT_CLOCK_GATE_DISABLE);
4463 	I915_WRITE(PCH_3DCGDIS1,
4464 		   VFMUNIT_CLOCK_GATE_DISABLE);
4465 
4466 	/*
4467 	 * According to the spec the following bits should be set in
4468 	 * order to enable memory self-refresh
4469 	 * The bit 22/21 of 0x42004
4470 	 * The bit 5 of 0x42020
4471 	 * The bit 15 of 0x45000
4472 	 */
4473 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
4474 		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
4475 		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
4476 	dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
4477 	I915_WRITE(DISP_ARB_CTL,
4478 		   (I915_READ(DISP_ARB_CTL) |
4479 		    DISP_FBC_WM_DIS));
4480 
4481 	ilk_init_lp_watermarks(dev);
4482 
4483 	/*
4484 	 * Based on the document from hardware guys the following bits
4485 	 * should be set unconditionally in order to enable FBC.
4486 	 * The bit 22 of 0x42000
4487 	 * The bit 22 of 0x42004
4488 	 * The bit 7,8,9 of 0x42020.
4489 	 */
4490 	if (IS_IRONLAKE_M(dev)) {
4491 		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
4492 		I915_WRITE(ILK_DISPLAY_CHICKEN1,
4493 			   I915_READ(ILK_DISPLAY_CHICKEN1) |
4494 			   ILK_FBCQ_DIS);
4495 		I915_WRITE(ILK_DISPLAY_CHICKEN2,
4496 			   I915_READ(ILK_DISPLAY_CHICKEN2) |
4497 			   ILK_DPARB_GATE);
4498 	}
4499 
4500 	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
4501 
4502 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
4503 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
4504 		   ILK_ELPIN_409_SELECT);
4505 	I915_WRITE(_3D_CHICKEN2,
4506 		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
4507 		   _3D_CHICKEN2_WM_READ_PIPELINED);
4508 
4509 	/* WaDisableRenderCachePipelinedFlush:ilk */
4510 	I915_WRITE(CACHE_MODE_0,
4511 		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
4512 
4513 	g4x_disable_trickle_feed(dev);
4514 
4515 	ibx_init_clock_gating(dev);
4516 }
4517 
4518 static void cpt_init_clock_gating(struct drm_device *dev)
4519 {
4520 	struct drm_i915_private *dev_priv = dev->dev_private;
4521 	int pipe;
4522 	uint32_t val;
4523 
4524 	/*
4525 	 * On Ibex Peak and Cougar Point, we need to disable clock
4526 	 * gating for the panel power sequencer or it will fail to
4527 	 * start up when no ports are active.
4528 	 */
4529 	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
4530 		   PCH_DPLUNIT_CLOCK_GATE_DISABLE |
4531 		   PCH_CPUNIT_CLOCK_GATE_DISABLE);
4532 	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
4533 		   DPLS_EDP_PPS_FIX_DIS);
4534 	/* The below fixes the weird display corruption, a few pixels shifted
4535 	 * downward, on (only) LVDS of some HP laptops with IVY.
4536 	 */
4537 	for_each_pipe(pipe) {
4538 		val = I915_READ(TRANS_CHICKEN2(pipe));
4539 		val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
4540 		val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
4541 		if (dev_priv->vbt.fdi_rx_polarity_inverted)
4542 			val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
4543 		val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
4544 		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
4545 		val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
4546 		I915_WRITE(TRANS_CHICKEN2(pipe), val);
4547 	}
4548 	/* WADP0ClockGatingDisable */
4549 	for_each_pipe(pipe) {
4550 		I915_WRITE(TRANS_CHICKEN1(pipe),
4551 			   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
4552 	}
4553 }
4554 
4555 static void gen6_check_mch_setup(struct drm_device *dev)
4556 {
4557 	struct drm_i915_private *dev_priv = dev->dev_private;
4558 	uint32_t tmp;
4559 
4560 	tmp = I915_READ(MCH_SSKPD);
4561 	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) {
4562 		DRM_INFO("Wrong MCH_SSKPD value: 0x%08x\n", tmp);
4563 		DRM_INFO("This can cause pipe underruns and display issues.\n");
4564 		DRM_INFO("Please upgrade your BIOS to fix this.\n");
4565 	}
4566 }
4567 
4568 static void gen6_init_clock_gating(struct drm_device *dev)
4569 {
4570 	struct drm_i915_private *dev_priv = dev->dev_private;
4571 	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
4572 
4573 	I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
4574 
4575 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
4576 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
4577 		   ILK_ELPIN_409_SELECT);
4578 
4579 	/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
4580 	I915_WRITE(_3D_CHICKEN,
4581 		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
4582 
4583 	/* WaSetupGtModeTdRowDispatch:snb */
4584 	if (IS_SNB_GT1(dev))
4585 		I915_WRITE(GEN6_GT_MODE,
4586 			   _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
4587 
4588 	ilk_init_lp_watermarks(dev);
4589 
4590 	I915_WRITE(CACHE_MODE_0,
4591 		   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
4592 
4593 	I915_WRITE(GEN6_UCGCTL1,
4594 		   I915_READ(GEN6_UCGCTL1) |
4595 		   GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
4596 		   GEN6_CSUNIT_CLOCK_GATE_DISABLE);
4597 
4598 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
4599 	 * gating disable must be set.  Failure to set it results in
4600 	 * flickering pixels due to Z write ordering failures after
4601 	 * some amount of runtime in the Mesa "fire" demo, and Unigine
4602 	 * Sanctuary and Tropics, and apparently anything else with
4603 	 * alpha test or pixel discard.
4604 	 *
4605 	 * According to the spec, bit 11 (RCCUNIT) must also be set,
4606 	 * but we didn't debug actual testcases to find it out.
4607 	 *
4608 	 * Also apply WaDisableVDSUnitClockGating:snb and
4609 	 * WaDisableRCPBUnitClockGating:snb.
4610 	 */
4611 	I915_WRITE(GEN6_UCGCTL2,
4612 		   GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
4613 		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
4614 		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
4615 
4616 	/* Bspec says we need to always set all mask bits. */
4617 	I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) |
4618 		   _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL);
4619 
4620 	/*
4621 	 * According to the spec the following bits should be
4622 	 * set in order to enable memory self-refresh and fbc:
4623 	 * The bit21 and bit22 of 0x42000
4624 	 * The bit21 and bit22 of 0x42004
4625 	 * The bit5 and bit7 of 0x42020
4626 	 * The bit14 of 0x70180
4627 	 * The bit14 of 0x71180
4628 	 *
4629 	 * WaFbcAsynchFlipDisableFbcQueue:snb
4630 	 */
4631 	I915_WRITE(ILK_DISPLAY_CHICKEN1,
4632 		   I915_READ(ILK_DISPLAY_CHICKEN1) |
4633 		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
4634 	I915_WRITE(ILK_DISPLAY_CHICKEN2,
4635 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
4636 		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
4637 	I915_WRITE(ILK_DSPCLK_GATE_D,
4638 		   I915_READ(ILK_DSPCLK_GATE_D) |
4639 		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
4640 		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
4641 
4642 	g4x_disable_trickle_feed(dev);
4643 
4644 	/* The default value should be 0x200 according to docs, but the two
4645 	 * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
4646 	I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff));
4647 	I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI));
4648 
4649 	cpt_init_clock_gating(dev);
4650 
4651 	gen6_check_mch_setup(dev);
4652 }
4653 
4654 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
4655 {
4656 	uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
4657 
4658 	reg &= ~GEN7_FF_SCHED_MASK;
4659 	reg |= GEN7_FF_TS_SCHED_HW;
4660 	reg |= GEN7_FF_VS_SCHED_HW;
4661 	reg |= GEN7_FF_DS_SCHED_HW;
4662 
4663 	if (IS_HASWELL(dev_priv->dev))
4664 		reg &= ~GEN7_FF_VS_REF_CNT_FFME;
4665 
4666 	I915_WRITE(GEN7_FF_THREAD_MODE, reg);
4667 }
4668 
4669 static void lpt_init_clock_gating(struct drm_device *dev)
4670 {
4671 	struct drm_i915_private *dev_priv = dev->dev_private;
4672 
4673 	/*
4674 	 * TODO: this bit should only be enabled when really needed, then
4675 	 * disabled when not needed anymore in order to save power.
4676 	 */
4677 	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
4678 		I915_WRITE(SOUTH_DSPCLK_GATE_D,
4679 			   I915_READ(SOUTH_DSPCLK_GATE_D) |
4680 			   PCH_LP_PARTITION_LEVEL_DISABLE);
4681 
4682 	/* WADPOClockGatingDisable:hsw */
4683 	I915_WRITE(_TRANSA_CHICKEN1,
4684 		   I915_READ(_TRANSA_CHICKEN1) |
4685 		   TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
4686 }
4687 
4688 static void lpt_suspend_hw(struct drm_device *dev)
4689 {
4690 	struct drm_i915_private *dev_priv = dev->dev_private;
4691 
4692 	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
4693 		uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
4694 
4695 		val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
4696 		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
4697 	}
4698 }
4699 
4700 static void gen8_init_clock_gating(struct drm_device *dev)
4701 {
4702 	struct drm_i915_private *dev_priv = dev->dev_private;
4703 	enum i915_pipe i;
4704 
4705 	I915_WRITE(WM3_LP_ILK, 0);
4706 	I915_WRITE(WM2_LP_ILK, 0);
4707 	I915_WRITE(WM1_LP_ILK, 0);
4708 
4709 	/* FIXME(BDW): Check all the w/a, some might only apply to
4710 	 * pre-production hw. */
4711 
4712 	WARN(!i915_preliminary_hw_support,
4713 	     "GEN8_CENTROID_PIXEL_OPT_DIS not be needed for production\n");
4714 	I915_WRITE(HALF_SLICE_CHICKEN3,
4715 		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
4716 	I915_WRITE(HALF_SLICE_CHICKEN3,
4717 		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
4718 	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
4719 
4720 	I915_WRITE(_3D_CHICKEN3,
4721 		   _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2));
4722 
4723 	I915_WRITE(COMMON_SLICE_CHICKEN2,
4724 		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
4725 
4726 	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
4727 		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
4728 
4729 	/* WaSwitchSolVfFArbitrationPriority:bdw */
4730 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
4731 
4732 	/* WaPsrDPAMaskVBlankInSRD:bdw */
4733 	I915_WRITE(CHICKEN_PAR1_1,
4734 		   I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
4735 
4736 	/* WaPsrDPRSUnmaskVBlankInSRD:bdw */
4737 	for_each_pipe(i) {
4738 		I915_WRITE(CHICKEN_PIPESL_1(i),
4739 			   I915_READ(CHICKEN_PIPESL_1(i) |
4740 				     DPRS_MASK_VBLANK_SRD));
4741 	}
4742 
4743 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
4744 	 * workaround for for a possible hang in the unlikely event a TLB
4745 	 * invalidation occurs during a PSD flush.
4746 	 */
4747 	I915_WRITE(HDC_CHICKEN0,
4748 		   I915_READ(HDC_CHICKEN0) |
4749 		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
4750 
4751 	/* WaVSRefCountFullforceMissDisable:bdw */
4752 	/* WaDSRefCountFullforceMissDisable:bdw */
4753 	I915_WRITE(GEN7_FF_THREAD_MODE,
4754 		   I915_READ(GEN7_FF_THREAD_MODE) &
4755 		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
4756 }
4757 
4758 static void haswell_init_clock_gating(struct drm_device *dev)
4759 {
4760 	struct drm_i915_private *dev_priv = dev->dev_private;
4761 
4762 	ilk_init_lp_watermarks(dev);
4763 
4764 	/* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
4765 	 * This implements the WaDisableRCZUnitClockGating:hsw workaround.
4766 	 */
4767 	I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
4768 
4769 	/* Apply the WaDisableRHWOOptimizationForRenderHang:hsw workaround. */
4770 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
4771 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
4772 
4773 	/* WaApplyL3ControlAndL3ChickenMode:hsw */
4774 	I915_WRITE(GEN7_L3CNTLREG1,
4775 			GEN7_WA_FOR_GEN7_L3_CONTROL);
4776 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
4777 			GEN7_WA_L3_CHICKEN_MODE);
4778 
4779 	/* L3 caching of data atomics doesn't work -- disable it. */
4780 	I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
4781 	I915_WRITE(HSW_ROW_CHICKEN3,
4782 		   _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
4783 
4784 	/* This is required by WaCatErrorRejectionIssue:hsw */
4785 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
4786 			I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
4787 			GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
4788 
4789 	/* WaVSRefCountFullforceMissDisable:hsw */
4790 	gen7_setup_fixed_func_scheduler(dev_priv);
4791 
4792 	/* WaDisable4x2SubspanOptimization:hsw */
4793 	I915_WRITE(CACHE_MODE_1,
4794 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
4795 
4796 	/* WaSwitchSolVfFArbitrationPriority:hsw */
4797 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
4798 
4799 	/* WaRsPkgCStateDisplayPMReq:hsw */
4800 	I915_WRITE(CHICKEN_PAR1_1,
4801 		   I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
4802 
4803 	lpt_init_clock_gating(dev);
4804 }
4805 
4806 static void ivybridge_init_clock_gating(struct drm_device *dev)
4807 {
4808 	struct drm_i915_private *dev_priv = dev->dev_private;
4809 	uint32_t snpcr;
4810 
4811 	ilk_init_lp_watermarks(dev);
4812 
4813 	I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
4814 
4815 	/* WaDisableEarlyCull:ivb */
4816 	I915_WRITE(_3D_CHICKEN3,
4817 		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
4818 
4819 	/* WaDisableBackToBackFlipFix:ivb */
4820 	I915_WRITE(IVB_CHICKEN3,
4821 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
4822 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
4823 
4824 	/* WaDisablePSDDualDispatchEnable:ivb */
4825 	if (IS_IVB_GT1(dev))
4826 		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
4827 			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
4828 	else
4829 		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
4830 			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
4831 
4832 	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
4833 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
4834 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
4835 
4836 	/* WaApplyL3ControlAndL3ChickenMode:ivb */
4837 	I915_WRITE(GEN7_L3CNTLREG1,
4838 			GEN7_WA_FOR_GEN7_L3_CONTROL);
4839 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
4840 		   GEN7_WA_L3_CHICKEN_MODE);
4841 	if (IS_IVB_GT1(dev))
4842 		I915_WRITE(GEN7_ROW_CHICKEN2,
4843 			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
4844 	else
4845 		I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
4846 			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
4847 
4848 
4849 	/* WaForceL3Serialization:ivb */
4850 	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
4851 		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
4852 
4853 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
4854 	 * gating disable must be set.  Failure to set it results in
4855 	 * flickering pixels due to Z write ordering failures after
4856 	 * some amount of runtime in the Mesa "fire" demo, and Unigine
4857 	 * Sanctuary and Tropics, and apparently anything else with
4858 	 * alpha test or pixel discard.
4859 	 *
4860 	 * According to the spec, bit 11 (RCCUNIT) must also be set,
4861 	 * but we didn't debug actual testcases to find it out.
4862 	 *
4863 	 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
4864 	 * This implements the WaDisableRCZUnitClockGating:ivb workaround.
4865 	 */
4866 	I915_WRITE(GEN6_UCGCTL2,
4867 		   GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
4868 		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
4869 
4870 	/* This is required by WaCatErrorRejectionIssue:ivb */
4871 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
4872 			I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
4873 			GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
4874 
4875 	g4x_disable_trickle_feed(dev);
4876 
4877 	/* WaVSRefCountFullforceMissDisable:ivb */
4878 	gen7_setup_fixed_func_scheduler(dev_priv);
4879 
4880 	/* WaDisable4x2SubspanOptimization:ivb */
4881 	I915_WRITE(CACHE_MODE_1,
4882 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
4883 
4884 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
4885 	snpcr &= ~GEN6_MBC_SNPCR_MASK;
4886 	snpcr |= GEN6_MBC_SNPCR_MED;
4887 	I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
4888 
4889 	if (!HAS_PCH_NOP(dev))
4890 		cpt_init_clock_gating(dev);
4891 
4892 	gen6_check_mch_setup(dev);
4893 }
4894 
4895 static void valleyview_init_clock_gating(struct drm_device *dev)
4896 {
4897 	struct drm_i915_private *dev_priv = dev->dev_private;
4898 	u32 val;
4899 
4900 	mutex_lock(&dev_priv->rps.hw_lock);
4901 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4902 	mutex_unlock(&dev_priv->rps.hw_lock);
4903 	switch ((val >> 6) & 3) {
4904 	case 0:
4905 		dev_priv->mem_freq = 800;
4906 		break;
4907 	case 1:
4908 		dev_priv->mem_freq = 1066;
4909 		break;
4910 	case 2:
4911 		dev_priv->mem_freq = 1333;
4912 		break;
4913 	case 3:
4914 		dev_priv->mem_freq = 1333;
4915 		break;
4916 	}
4917 	DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
4918 
4919 	I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
4920 
4921 	/* WaDisableEarlyCull:vlv */
4922 	I915_WRITE(_3D_CHICKEN3,
4923 		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
4924 
4925 	/* WaDisableBackToBackFlipFix:vlv */
4926 	I915_WRITE(IVB_CHICKEN3,
4927 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
4928 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
4929 
4930 	/* WaDisablePSDDualDispatchEnable:vlv */
4931 	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
4932 		   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
4933 				      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
4934 
4935 	/* Apply the WaDisableRHWOOptimizationForRenderHang:vlv workaround. */
4936 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
4937 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
4938 
4939 	/* WaApplyL3ControlAndL3ChickenMode:vlv */
4940 	I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
4941 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
4942 
4943 	/* WaForceL3Serialization:vlv */
4944 	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
4945 		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
4946 
4947 	/* WaDisableDopClockGating:vlv */
4948 	I915_WRITE(GEN7_ROW_CHICKEN2,
4949 		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
4950 
4951 	/* This is required by WaCatErrorRejectionIssue:vlv */
4952 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
4953 		   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
4954 		   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
4955 
4956 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
4957 	 * gating disable must be set.  Failure to set it results in
4958 	 * flickering pixels due to Z write ordering failures after
4959 	 * some amount of runtime in the Mesa "fire" demo, and Unigine
4960 	 * Sanctuary and Tropics, and apparently anything else with
4961 	 * alpha test or pixel discard.
4962 	 *
4963 	 * According to the spec, bit 11 (RCCUNIT) must also be set,
4964 	 * but we didn't debug actual testcases to find it out.
4965 	 *
4966 	 * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
4967 	 * This implements the WaDisableRCZUnitClockGating:vlv workaround.
4968 	 *
4969 	 * Also apply WaDisableVDSUnitClockGating:vlv and
4970 	 * WaDisableRCPBUnitClockGating:vlv.
4971 	 */
4972 	I915_WRITE(GEN6_UCGCTL2,
4973 		   GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
4974 		   GEN7_TDLUNIT_CLOCK_GATE_DISABLE |
4975 		   GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
4976 		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
4977 		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
4978 
4979 	I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
4980 
4981 	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
4982 
4983 	I915_WRITE(CACHE_MODE_1,
4984 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
4985 
4986 	/*
4987 	 * WaDisableVLVClockGating_VBIIssue:vlv
4988 	 * Disable clock gating on th GCFG unit to prevent a delay
4989 	 * in the reporting of vblank events.
4990 	 */
4991 	I915_WRITE(VLV_GUNIT_CLOCK_GATE, 0xffffffff);
4992 
4993 	/* Conservative clock gating settings for now */
4994 	I915_WRITE(0x9400, 0xffffffff);
4995 	I915_WRITE(0x9404, 0xffffffff);
4996 	I915_WRITE(0x9408, 0xffffffff);
4997 	I915_WRITE(0x940c, 0xffffffff);
4998 	I915_WRITE(0x9410, 0xffffffff);
4999 	I915_WRITE(0x9414, 0xffffffff);
5000 	I915_WRITE(0x9418, 0xffffffff);
5001 }
5002 
5003 static void g4x_init_clock_gating(struct drm_device *dev)
5004 {
5005 	struct drm_i915_private *dev_priv = dev->dev_private;
5006 	uint32_t dspclk_gate;
5007 
5008 	I915_WRITE(RENCLK_GATE_D1, 0);
5009 	I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
5010 		   GS_UNIT_CLOCK_GATE_DISABLE |
5011 		   CL_UNIT_CLOCK_GATE_DISABLE);
5012 	I915_WRITE(RAMCLK_GATE_D, 0);
5013 	dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
5014 		OVRUNIT_CLOCK_GATE_DISABLE |
5015 		OVCUNIT_CLOCK_GATE_DISABLE;
5016 	if (IS_GM45(dev))
5017 		dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
5018 	I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
5019 
5020 	/* WaDisableRenderCachePipelinedFlush */
5021 	I915_WRITE(CACHE_MODE_0,
5022 		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
5023 
5024 	g4x_disable_trickle_feed(dev);
5025 }
5026 
5027 static void crestline_init_clock_gating(struct drm_device *dev)
5028 {
5029 	struct drm_i915_private *dev_priv = dev->dev_private;
5030 
5031 	I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
5032 	I915_WRITE(RENCLK_GATE_D2, 0);
5033 	I915_WRITE(DSPCLK_GATE_D, 0);
5034 	I915_WRITE(RAMCLK_GATE_D, 0);
5035 	I915_WRITE16(DEUC, 0);
5036 	I915_WRITE(MI_ARB_STATE,
5037 		   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
5038 }
5039 
5040 static void broadwater_init_clock_gating(struct drm_device *dev)
5041 {
5042 	struct drm_i915_private *dev_priv = dev->dev_private;
5043 
5044 	I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
5045 		   I965_RCC_CLOCK_GATE_DISABLE |
5046 		   I965_RCPB_CLOCK_GATE_DISABLE |
5047 		   I965_ISC_CLOCK_GATE_DISABLE |
5048 		   I965_FBC_CLOCK_GATE_DISABLE);
5049 	I915_WRITE(RENCLK_GATE_D2, 0);
5050 	I915_WRITE(MI_ARB_STATE,
5051 		   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
5052 }
5053 
5054 static void gen3_init_clock_gating(struct drm_device *dev)
5055 {
5056 	struct drm_i915_private *dev_priv = dev->dev_private;
5057 	u32 dstate = I915_READ(D_STATE);
5058 
5059 	dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
5060 		DSTATE_DOT_CLOCK_GATING;
5061 	I915_WRITE(D_STATE, dstate);
5062 
5063 	if (IS_PINEVIEW(dev))
5064 		I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
5065 
5066 	/* IIR "flip pending" means done if this bit is set */
5067 	I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
5068 }
5069 
5070 static void i85x_init_clock_gating(struct drm_device *dev)
5071 {
5072 	struct drm_i915_private *dev_priv = dev->dev_private;
5073 
5074 	I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
5075 }
5076 
5077 static void i830_init_clock_gating(struct drm_device *dev)
5078 {
5079 	struct drm_i915_private *dev_priv = dev->dev_private;
5080 
5081 	I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
5082 }
5083 
5084 void intel_init_clock_gating(struct drm_device *dev)
5085 {
5086 	struct drm_i915_private *dev_priv = dev->dev_private;
5087 
5088 	dev_priv->display.init_clock_gating(dev);
5089 }
5090 
5091 void intel_suspend_hw(struct drm_device *dev)
5092 {
5093 	if (HAS_PCH_LPT(dev))
5094 		lpt_suspend_hw(dev);
5095 }
5096 
5097 #define for_each_power_well(i, power_well, domain_mask, power_domains)	\
5098 	for (i = 0;							\
5099 	     i < (power_domains)->power_well_count &&			\
5100 		 ((power_well) = &(power_domains)->power_wells[i]);	\
5101 	     i++)							\
5102 		if ((power_well)->domains & (domain_mask))
5103 
5104 #define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \
5105 	for (i = (power_domains)->power_well_count - 1;			 \
5106 	     i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\
5107 	     i--)							 \
5108 		if ((power_well)->domains & (domain_mask))
5109 
5110 /**
5111  * We should only use the power well if we explicitly asked the hardware to
5112  * enable it, so check if it's enabled and also check if we've requested it to
5113  * be enabled.
5114  */
5115 static bool hsw_power_well_enabled(struct drm_device *dev,
5116 				   struct i915_power_well *power_well)
5117 {
5118 	struct drm_i915_private *dev_priv = dev->dev_private;
5119 
5120 	return I915_READ(HSW_PWR_WELL_DRIVER) ==
5121 		     (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED);
5122 }
5123 
5124 bool intel_display_power_enabled_sw(struct drm_device *dev,
5125 				    enum intel_display_power_domain domain)
5126 {
5127 	struct drm_i915_private *dev_priv = dev->dev_private;
5128 	struct i915_power_domains *power_domains;
5129 
5130 	power_domains = &dev_priv->power_domains;
5131 
5132 	return power_domains->domain_use_count[domain];
5133 }
5134 
5135 bool intel_display_power_enabled(struct drm_device *dev,
5136 				 enum intel_display_power_domain domain)
5137 {
5138 	struct drm_i915_private *dev_priv = dev->dev_private;
5139 	struct i915_power_domains *power_domains;
5140 	struct i915_power_well *power_well;
5141 	bool is_enabled;
5142 	int i;
5143 
5144 	power_domains = &dev_priv->power_domains;
5145 
5146 	is_enabled = true;
5147 
5148 	mutex_lock(&power_domains->lock);
5149 	for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
5150 		if (power_well->always_on)
5151 			continue;
5152 
5153 		if (!power_well->is_enabled(dev, power_well)) {
5154 			is_enabled = false;
5155 			break;
5156 		}
5157 	}
5158 	mutex_unlock(&power_domains->lock);
5159 
5160 	return is_enabled;
5161 }
5162 
5163 static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv)
5164 {
5165 	struct drm_device *dev = dev_priv->dev;
5166 
5167 	/*
5168 	 * After we re-enable the power well, if we touch VGA register 0x3d5
5169 	 * we'll get unclaimed register interrupts. This stops after we write
5170 	 * anything to the VGA MSR register. The vgacon module uses this
5171 	 * register all the time, so if we unbind our driver and, as a
5172 	 * consequence, bind vgacon, we'll get stuck in an infinite loop at
5173 	 * console_unlock(). So make here we touch the VGA MSR register, making
5174 	 * sure vgacon can keep working normally without triggering interrupts
5175 	 * and error messages.
5176 	 */
5177 #if 0
5178 	vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
5179 	outb(inb(VGA_MSR_READ), VGA_MSR_WRITE);
5180 	vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
5181 #else
5182 	outb(VGA_MSR_WRITE, inb(VGA_MSR_READ));
5183 #endif
5184 
5185 	if (IS_BROADWELL(dev)) {
5186 		lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
5187 		I915_WRITE(GEN8_DE_PIPE_IMR(PIPE_B),
5188 			   dev_priv->de_irq_mask[PIPE_B]);
5189 		I915_WRITE(GEN8_DE_PIPE_IER(PIPE_B),
5190 			   ~dev_priv->de_irq_mask[PIPE_B] |
5191 			   GEN8_PIPE_VBLANK);
5192 		I915_WRITE(GEN8_DE_PIPE_IMR(PIPE_C),
5193 			   dev_priv->de_irq_mask[PIPE_C]);
5194 		I915_WRITE(GEN8_DE_PIPE_IER(PIPE_C),
5195 			   ~dev_priv->de_irq_mask[PIPE_C] |
5196 			   GEN8_PIPE_VBLANK);
5197 		POSTING_READ(GEN8_DE_PIPE_IER(PIPE_C));
5198 		lockmgr(&dev_priv->irq_lock, LK_RELEASE);
5199 	}
5200 }
5201 
5202 static void hsw_power_well_post_disable(struct drm_i915_private *dev_priv)
5203 {
5204 	struct drm_device *dev = dev_priv->dev;
5205 	enum i915_pipe p;
5206 
5207 	/*
5208 	 * After this, the registers on the pipes that are part of the power
5209 	 * well will become zero, so we have to adjust our counters according to
5210 	 * that.
5211 	 *
5212 	 * FIXME: Should we do this in general in drm_vblank_post_modeset?
5213 	 */
5214 	lockmgr(&dev->vbl_lock, LK_EXCLUSIVE);
5215 	for_each_pipe(p)
5216 		if (p != PIPE_A)
5217 			dev->vblank[p].last = 0;
5218 	lockmgr(&dev->vbl_lock, LK_RELEASE);
5219 }
5220 
5221 static void hsw_set_power_well(struct drm_device *dev,
5222 			       struct i915_power_well *power_well, bool enable)
5223 {
5224 	struct drm_i915_private *dev_priv = dev->dev_private;
5225 	bool is_enabled, enable_requested;
5226 	uint32_t tmp;
5227 
5228 	WARN_ON(dev_priv->pc8.enabled);
5229 
5230 	tmp = I915_READ(HSW_PWR_WELL_DRIVER);
5231 	is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED;
5232 	enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST;
5233 
5234 	if (enable) {
5235 		if (!enable_requested)
5236 			I915_WRITE(HSW_PWR_WELL_DRIVER,
5237 				   HSW_PWR_WELL_ENABLE_REQUEST);
5238 
5239 		if (!is_enabled) {
5240 			DRM_DEBUG_KMS("Enabling power well\n");
5241 			if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) &
5242 				      HSW_PWR_WELL_STATE_ENABLED), 20))
5243 				DRM_ERROR("Timeout enabling power well\n");
5244 		}
5245 
5246 		hsw_power_well_post_enable(dev_priv);
5247 	} else {
5248 		if (enable_requested) {
5249 			I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
5250 			POSTING_READ(HSW_PWR_WELL_DRIVER);
5251 			DRM_DEBUG_KMS("Requesting to disable the power well\n");
5252 
5253 			hsw_power_well_post_disable(dev_priv);
5254 		}
5255 	}
5256 }
5257 
5258 static void __intel_power_well_get(struct drm_device *dev,
5259 				   struct i915_power_well *power_well)
5260 {
5261 	struct drm_i915_private *dev_priv = dev->dev_private;
5262 
5263 	if (!power_well->count++ && power_well->set) {
5264 		hsw_disable_package_c8(dev_priv);
5265  		power_well->set(dev, power_well, true);
5266 	}
5267 }
5268 
5269 static void __intel_power_well_put(struct drm_device *dev,
5270 				   struct i915_power_well *power_well)
5271 {
5272 	struct drm_i915_private *dev_priv = dev->dev_private;
5273 
5274 	WARN_ON(!power_well->count);
5275 
5276 	if (!--power_well->count && power_well->set &&
5277 	    i915_disable_power_well) {
5278 		power_well->set(dev, power_well, false);
5279 		hsw_enable_package_c8(dev_priv);
5280 	}
5281 }
5282 
5283 void intel_display_power_get(struct drm_device *dev,
5284 			     enum intel_display_power_domain domain)
5285 {
5286 	struct drm_i915_private *dev_priv = dev->dev_private;
5287 	struct i915_power_domains *power_domains;
5288 	struct i915_power_well *power_well;
5289 	int i;
5290 
5291 	power_domains = &dev_priv->power_domains;
5292 
5293 	mutex_lock(&power_domains->lock);
5294 
5295 	for_each_power_well(i, power_well, BIT(domain), power_domains)
5296 		__intel_power_well_get(dev, power_well);
5297 
5298 	power_domains->domain_use_count[domain]++;
5299 
5300 	mutex_unlock(&power_domains->lock);
5301 }
5302 
5303 void intel_display_power_put(struct drm_device *dev,
5304 			     enum intel_display_power_domain domain)
5305 {
5306 	struct drm_i915_private *dev_priv = dev->dev_private;
5307 	struct i915_power_domains *power_domains;
5308 	struct i915_power_well *power_well;
5309 	int i;
5310 
5311 	power_domains = &dev_priv->power_domains;
5312 
5313 	mutex_lock(&power_domains->lock);
5314 
5315 	WARN_ON(!power_domains->domain_use_count[domain]);
5316 	power_domains->domain_use_count[domain]--;
5317 
5318 	for_each_power_well_rev(i, power_well, BIT(domain), power_domains)
5319 		__intel_power_well_put(dev, power_well);
5320 
5321 	mutex_unlock(&power_domains->lock);
5322 }
5323 
5324 static struct i915_power_domains *hsw_pwr;
5325 
5326 /* Display audio driver power well request */
5327 void i915_request_power_well(void)
5328 {
5329 	struct drm_i915_private *dev_priv;
5330 
5331 	if (WARN_ON(!hsw_pwr))
5332 		return;
5333 
5334 	dev_priv = container_of(hsw_pwr, struct drm_i915_private,
5335 				power_domains);
5336 	intel_display_power_get(dev_priv->dev, POWER_DOMAIN_AUDIO);
5337 }
5338 
5339 /* Display audio driver power well release */
5340 void i915_release_power_well(void)
5341 {
5342 	struct drm_i915_private *dev_priv;
5343 
5344 	if (WARN_ON(!hsw_pwr))
5345 		return;
5346 
5347 	dev_priv = container_of(hsw_pwr, struct drm_i915_private,
5348 				power_domains);
5349 	intel_display_power_put(dev_priv->dev, POWER_DOMAIN_AUDIO);
5350 }
5351 
5352 static struct i915_power_well i9xx_always_on_power_well[] = {
5353 	{
5354 		.name = "always-on",
5355 		.always_on = 1,
5356 		.domains = POWER_DOMAIN_MASK,
5357 	},
5358 };
5359 
5360 static struct i915_power_well hsw_power_wells[] = {
5361 	{
5362 		.name = "always-on",
5363 		.always_on = 1,
5364 		.domains = HSW_ALWAYS_ON_POWER_DOMAINS,
5365 	},
5366 	{
5367 		.name = "display",
5368 		.domains = POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS,
5369 		.is_enabled = hsw_power_well_enabled,
5370 		.set = hsw_set_power_well,
5371 	},
5372 };
5373 
5374 static struct i915_power_well bdw_power_wells[] = {
5375 	{
5376 		.name = "always-on",
5377 		.always_on = 1,
5378 		.domains = BDW_ALWAYS_ON_POWER_DOMAINS,
5379 	},
5380 	{
5381 		.name = "display",
5382 		.domains = POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS,
5383 		.is_enabled = hsw_power_well_enabled,
5384 		.set = hsw_set_power_well,
5385 	},
5386 };
5387 
5388 #define set_power_wells(power_domains, __power_wells) ({		\
5389 	(power_domains)->power_wells = (__power_wells);			\
5390 	(power_domains)->power_well_count = ARRAY_SIZE(__power_wells);	\
5391 })
5392 
5393 int intel_power_domains_init(struct drm_device *dev)
5394 {
5395 	struct drm_i915_private *dev_priv = dev->dev_private;
5396 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
5397 
5398 	lockinit(&power_domains->lock, "i915pl", 0, LK_CANRECURSE);
5399 
5400 	/*
5401 	 * The enabling order will be from lower to higher indexed wells,
5402 	 * the disabling order is reversed.
5403 	 */
5404 	if (IS_HASWELL(dev)) {
5405 		set_power_wells(power_domains, hsw_power_wells);
5406 		hsw_pwr = power_domains;
5407 	} else if (IS_BROADWELL(dev)) {
5408 		set_power_wells(power_domains, bdw_power_wells);
5409 		hsw_pwr = power_domains;
5410 	} else {
5411 		set_power_wells(power_domains, i9xx_always_on_power_well);
5412 	}
5413 
5414 	return 0;
5415 }
5416 
5417 void intel_power_domains_remove(struct drm_device *dev)
5418 {
5419 	hsw_pwr = NULL;
5420 }
5421 
5422 static void intel_power_domains_resume(struct drm_device *dev)
5423 {
5424 	struct drm_i915_private *dev_priv = dev->dev_private;
5425 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
5426 	struct i915_power_well *power_well;
5427 	int i;
5428 
5429 	mutex_lock(&power_domains->lock);
5430 	for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) {
5431 		if (power_well->set)
5432 			power_well->set(dev, power_well, power_well->count > 0);
5433 	}
5434 	mutex_unlock(&power_domains->lock);
5435 }
5436 
5437 /*
5438  * Starting with Haswell, we have a "Power Down Well" that can be turned off
5439  * when not needed anymore. We have 4 registers that can request the power well
5440  * to be enabled, and it will only be disabled if none of the registers is
5441  * requesting it to be enabled.
5442  */
5443 void intel_power_domains_init_hw(struct drm_device *dev)
5444 {
5445 	struct drm_i915_private *dev_priv = dev->dev_private;
5446 
5447 	/* For now, we need the power well to be always enabled. */
5448 	intel_display_set_init_power(dev, true);
5449 	intel_power_domains_resume(dev);
5450 
5451 	if (!(IS_HASWELL(dev) || IS_BROADWELL(dev)))
5452 		return;
5453 
5454 	/* We're taking over the BIOS, so clear any requests made by it since
5455 	 * the driver is in charge now. */
5456 	if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST)
5457 		I915_WRITE(HSW_PWR_WELL_BIOS, 0);
5458 }
5459 
5460 /* Disables PC8 so we can use the GMBUS and DP AUX interrupts. */
5461 void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
5462 {
5463 	hsw_disable_package_c8(dev_priv);
5464 }
5465 
5466 void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
5467 {
5468 	hsw_enable_package_c8(dev_priv);
5469 }
5470 
5471 void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
5472 {
5473 #if 0
5474 	struct drm_device *dev = dev_priv->dev;
5475 	struct device *device = &dev->pdev->dev;
5476 
5477 	if (!HAS_RUNTIME_PM(dev))
5478 		return;
5479 
5480 	pm_runtime_get_sync(device);
5481 	WARN(dev_priv->pm.suspended, "Device still suspended.\n");
5482 #endif
5483 }
5484 
5485 void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
5486 {
5487 #if 0
5488 	struct drm_device *dev = dev_priv->dev;
5489 	struct device *device = &dev->pdev->dev;
5490 
5491 	if (!HAS_RUNTIME_PM(dev))
5492 		return;
5493 
5494 	pm_runtime_mark_last_busy(device);
5495 	pm_runtime_put_autosuspend(device);
5496 #endif
5497 }
5498 
5499 void intel_init_runtime_pm(struct drm_i915_private *dev_priv)
5500 {
5501 	struct drm_device *dev = dev_priv->dev;
5502 #if 0
5503 	struct device *device = &dev->pdev->dev;
5504 #endif
5505 
5506 	dev_priv->pm.suspended = false;
5507 
5508 	if (!HAS_RUNTIME_PM(dev))
5509 		return;
5510 
5511 #if 0
5512 	pm_runtime_set_active(device);
5513 
5514 	pm_runtime_set_autosuspend_delay(device, 10000); /* 10s */
5515 	pm_runtime_mark_last_busy(device);
5516 	pm_runtime_use_autosuspend(device);
5517 #endif
5518 }
5519 
5520 void intel_fini_runtime_pm(struct drm_i915_private *dev_priv)
5521 {
5522 #if 0
5523 	struct drm_device *dev = dev_priv->dev;
5524 	struct device *device = &dev->pdev->dev;
5525 
5526 	if (!HAS_RUNTIME_PM(dev))
5527 		return;
5528 
5529 	/* Make sure we're not suspended first. */
5530 	pm_runtime_get_sync(device);
5531 	pm_runtime_disable(device);
5532 #endif
5533 }
5534 
5535 /* Set up chip specific power management-related functions */
5536 void intel_init_pm(struct drm_device *dev)
5537 {
5538 	struct drm_i915_private *dev_priv = dev->dev_private;
5539 
5540 	if (HAS_FBC(dev)) {
5541 		if (INTEL_INFO(dev)->gen >= 7) {
5542 			dev_priv->display.fbc_enabled = ironlake_fbc_enabled;
5543 			dev_priv->display.enable_fbc = gen7_enable_fbc;
5544 			dev_priv->display.disable_fbc = ironlake_disable_fbc;
5545 		} else if (INTEL_INFO(dev)->gen >= 5) {
5546 			dev_priv->display.fbc_enabled = ironlake_fbc_enabled;
5547 			dev_priv->display.enable_fbc = ironlake_enable_fbc;
5548 			dev_priv->display.disable_fbc = ironlake_disable_fbc;
5549 		} else if (IS_GM45(dev)) {
5550 			dev_priv->display.fbc_enabled = g4x_fbc_enabled;
5551 			dev_priv->display.enable_fbc = g4x_enable_fbc;
5552 			dev_priv->display.disable_fbc = g4x_disable_fbc;
5553 		} else {
5554 			dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
5555 			dev_priv->display.enable_fbc = i8xx_enable_fbc;
5556 			dev_priv->display.disable_fbc = i8xx_disable_fbc;
5557 
5558 			/* This value was pulled out of someone's hat */
5559 			I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);
5560 		}
5561 	}
5562 
5563 	/* For cxsr */
5564 	if (IS_PINEVIEW(dev))
5565 		i915_pineview_get_mem_freq(dev);
5566 	else if (IS_GEN5(dev))
5567 		i915_ironlake_get_mem_freq(dev);
5568 
5569 	/* For FIFO watermark updates */
5570 	if (HAS_PCH_SPLIT(dev)) {
5571 		intel_setup_wm_latency(dev);
5572 
5573 		if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
5574 		     dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
5575 		    (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
5576 		     dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
5577 			dev_priv->display.update_wm = ilk_update_wm;
5578 			dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
5579 		} else {
5580 			DRM_DEBUG_KMS("Failed to read display plane latency. "
5581 				      "Disable CxSR\n");
5582 		}
5583 
5584 		if (IS_GEN5(dev))
5585 			dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
5586 		else if (IS_GEN6(dev))
5587 			dev_priv->display.init_clock_gating = gen6_init_clock_gating;
5588 		else if (IS_IVYBRIDGE(dev))
5589 			dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
5590 		else if (IS_HASWELL(dev))
5591 			dev_priv->display.init_clock_gating = haswell_init_clock_gating;
5592 		else if (INTEL_INFO(dev)->gen == 8)
5593 			dev_priv->display.init_clock_gating = gen8_init_clock_gating;
5594 	} else if (IS_VALLEYVIEW(dev)) {
5595 		dev_priv->display.update_wm = valleyview_update_wm;
5596 		dev_priv->display.init_clock_gating =
5597 			valleyview_init_clock_gating;
5598 	} else if (IS_PINEVIEW(dev)) {
5599 		if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
5600 					    dev_priv->is_ddr3,
5601 					    dev_priv->fsb_freq,
5602 					    dev_priv->mem_freq)) {
5603 			DRM_INFO("failed to find known CxSR latency "
5604 				 "(found ddr%s fsb freq %d, mem freq %d), "
5605 				 "disabling CxSR\n",
5606 				 (dev_priv->is_ddr3 == 1) ? "3" : "2",
5607 				 dev_priv->fsb_freq, dev_priv->mem_freq);
5608 			/* Disable CxSR and never update its watermark again */
5609 			pineview_disable_cxsr(dev);
5610 			dev_priv->display.update_wm = NULL;
5611 		} else
5612 			dev_priv->display.update_wm = pineview_update_wm;
5613 		dev_priv->display.init_clock_gating = gen3_init_clock_gating;
5614 	} else if (IS_G4X(dev)) {
5615 		dev_priv->display.update_wm = g4x_update_wm;
5616 		dev_priv->display.init_clock_gating = g4x_init_clock_gating;
5617 	} else if (IS_GEN4(dev)) {
5618 		dev_priv->display.update_wm = i965_update_wm;
5619 		if (IS_CRESTLINE(dev))
5620 			dev_priv->display.init_clock_gating = crestline_init_clock_gating;
5621 		else if (IS_BROADWATER(dev))
5622 			dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
5623 	} else if (IS_GEN3(dev)) {
5624 		dev_priv->display.update_wm = i9xx_update_wm;
5625 		dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
5626 		dev_priv->display.init_clock_gating = gen3_init_clock_gating;
5627 	} else if (IS_GEN2(dev)) {
5628 		if (INTEL_INFO(dev)->num_pipes == 1) {
5629 			dev_priv->display.update_wm = i845_update_wm;
5630 			dev_priv->display.get_fifo_size = i845_get_fifo_size;
5631 		} else {
5632 			dev_priv->display.update_wm = i9xx_update_wm;
5633 			dev_priv->display.get_fifo_size = i830_get_fifo_size;
5634 		}
5635 
5636 		if (IS_I85X(dev) || IS_I865G(dev))
5637 			dev_priv->display.init_clock_gating = i85x_init_clock_gating;
5638 		else
5639 			dev_priv->display.init_clock_gating = i830_init_clock_gating;
5640 	} else {
5641 		DRM_ERROR("unexpected fall-through in intel_init_pm\n");
5642 	}
5643 }
5644 
5645 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
5646 {
5647 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5648 
5649 	if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
5650 		DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
5651 		return -EAGAIN;
5652 	}
5653 
5654 	I915_WRITE(GEN6_PCODE_DATA, *val);
5655 	I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
5656 
5657 	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
5658 		     500)) {
5659 		DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
5660 		return -ETIMEDOUT;
5661 	}
5662 
5663 	*val = I915_READ(GEN6_PCODE_DATA);
5664 	I915_WRITE(GEN6_PCODE_DATA, 0);
5665 
5666 	return 0;
5667 }
5668 
5669 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
5670 {
5671 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
5672 
5673 	if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
5674 		DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
5675 		return -EAGAIN;
5676 	}
5677 
5678 	I915_WRITE(GEN6_PCODE_DATA, val);
5679 	I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
5680 
5681 	if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
5682 		     500)) {
5683 		DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
5684 		return -ETIMEDOUT;
5685 	}
5686 
5687 	I915_WRITE(GEN6_PCODE_DATA, 0);
5688 
5689 	return 0;
5690 }
5691 
5692 int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val)
5693 {
5694 	int div;
5695 
5696 	/* 4 x czclk */
5697 	switch (dev_priv->mem_freq) {
5698 	case 800:
5699 		div = 10;
5700 		break;
5701 	case 1066:
5702 		div = 12;
5703 		break;
5704 	case 1333:
5705 		div = 16;
5706 		break;
5707 	default:
5708 		return -1;
5709 	}
5710 
5711 	return DIV_ROUND_CLOSEST(dev_priv->mem_freq * (val + 6 - 0xbd), 4 * div);
5712 }
5713 
5714 int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val)
5715 {
5716 	int mul;
5717 
5718 	/* 4 x czclk */
5719 	switch (dev_priv->mem_freq) {
5720 	case 800:
5721 		mul = 10;
5722 		break;
5723 	case 1066:
5724 		mul = 12;
5725 		break;
5726 	case 1333:
5727 		mul = 16;
5728 		break;
5729 	default:
5730 		return -1;
5731 	}
5732 
5733 	return DIV_ROUND_CLOSEST(4 * mul * val, dev_priv->mem_freq) + 0xbd - 6;
5734 }
5735 
5736 void intel_pm_setup(struct drm_device *dev)
5737 {
5738 	struct drm_i915_private *dev_priv = dev->dev_private;
5739 
5740 	lockinit(&dev_priv->rps.hw_lock, "i915 rps.hw_lock", 0, LK_CANRECURSE);
5741 
5742 	lockinit(&dev_priv->pc8.lock, "i915pc8", 0, LK_CANRECURSE);
5743 	dev_priv->pc8.requirements_met = false;
5744 	dev_priv->pc8.gpu_idle = false;
5745 	dev_priv->pc8.irqs_disabled = false;
5746 	dev_priv->pc8.enabled = false;
5747 	dev_priv->pc8.disable_count = 2; /* requirements_met + gpu_idle */
5748 	INIT_DELAYED_WORK(&dev_priv->pc8.enable_work, hsw_enable_pc8_work);
5749 	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
5750 			  intel_gen6_powersave_work);
5751 }
5752