xref: /dragonfly/sys/dev/drm/i915/intel_overlay.c (revision 896f2e3a)
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  */
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_reg.h"
32 #include "intel_drv.h"
33 
34 /* Limits for overlay size. According to intel doc, the real limits are:
35  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
36  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
37  * the mininum of both.  */
38 #define IMAGE_MAX_WIDTH		2048
39 #define IMAGE_MAX_HEIGHT	2046 /* 2 * 1023 */
40 /* on 830 and 845 these large limits result in the card hanging */
41 #define IMAGE_MAX_WIDTH_LEGACY	1024
42 #define IMAGE_MAX_HEIGHT_LEGACY	1088
43 
44 /* overlay register definitions */
45 /* OCMD register */
46 #define OCMD_TILED_SURFACE	(0x1<<19)
47 #define OCMD_MIRROR_MASK	(0x3<<17)
48 #define OCMD_MIRROR_MODE	(0x3<<17)
49 #define OCMD_MIRROR_HORIZONTAL	(0x1<<17)
50 #define OCMD_MIRROR_VERTICAL	(0x2<<17)
51 #define OCMD_MIRROR_BOTH	(0x3<<17)
52 #define OCMD_BYTEORDER_MASK	(0x3<<14) /* zero for YUYV or FOURCC YUY2 */
53 #define OCMD_UV_SWAP		(0x1<<14) /* YVYU */
54 #define OCMD_Y_SWAP		(0x2<<14) /* UYVY or FOURCC UYVY */
55 #define OCMD_Y_AND_UV_SWAP	(0x3<<14) /* VYUY */
56 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
57 #define OCMD_RGB_888		(0x1<<10) /* not in i965 Intel docs */
58 #define OCMD_RGB_555		(0x2<<10) /* not in i965 Intel docs */
59 #define OCMD_RGB_565		(0x3<<10) /* not in i965 Intel docs */
60 #define OCMD_YUV_422_PACKED	(0x8<<10)
61 #define OCMD_YUV_411_PACKED	(0x9<<10) /* not in i965 Intel docs */
62 #define OCMD_YUV_420_PLANAR	(0xc<<10)
63 #define OCMD_YUV_422_PLANAR	(0xd<<10)
64 #define OCMD_YUV_410_PLANAR	(0xe<<10) /* also 411 */
65 #define OCMD_TVSYNCFLIP_PARITY	(0x1<<9)
66 #define OCMD_TVSYNCFLIP_ENABLE	(0x1<<7)
67 #define OCMD_BUF_TYPE_MASK	(0x1<<5)
68 #define OCMD_BUF_TYPE_FRAME	(0x0<<5)
69 #define OCMD_BUF_TYPE_FIELD	(0x1<<5)
70 #define OCMD_TEST_MODE		(0x1<<4)
71 #define OCMD_BUFFER_SELECT	(0x3<<2)
72 #define OCMD_BUFFER0		(0x0<<2)
73 #define OCMD_BUFFER1		(0x1<<2)
74 #define OCMD_FIELD_SELECT	(0x1<<2)
75 #define OCMD_FIELD0		(0x0<<1)
76 #define OCMD_FIELD1		(0x1<<1)
77 #define OCMD_ENABLE		(0x1<<0)
78 
79 /* OCONFIG register */
80 #define OCONF_PIPE_MASK		(0x1<<18)
81 #define OCONF_PIPE_A		(0x0<<18)
82 #define OCONF_PIPE_B		(0x1<<18)
83 #define OCONF_GAMMA2_ENABLE	(0x1<<16)
84 #define OCONF_CSC_MODE_BT601	(0x0<<5)
85 #define OCONF_CSC_MODE_BT709	(0x1<<5)
86 #define OCONF_CSC_BYPASS	(0x1<<4)
87 #define OCONF_CC_OUT_8BIT	(0x1<<3)
88 #define OCONF_TEST_MODE		(0x1<<2)
89 #define OCONF_THREE_LINE_BUFFER	(0x1<<0)
90 #define OCONF_TWO_LINE_BUFFER	(0x0<<0)
91 
92 /* DCLRKM (dst-key) register */
93 #define DST_KEY_ENABLE		(0x1<<31)
94 #define CLK_RGB24_MASK		0x0
95 #define CLK_RGB16_MASK		0x070307
96 #define CLK_RGB15_MASK		0x070707
97 #define CLK_RGB8I_MASK		0xffffff
98 
99 #define RGB16_TO_COLORKEY(c) \
100 	(((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
101 #define RGB15_TO_COLORKEY(c) \
102 	(((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
103 
104 /* overlay flip addr flag */
105 #define OFC_UPDATE		0x1
106 
107 /* polyphase filter coefficients */
108 #define N_HORIZ_Y_TAPS          5
109 #define N_VERT_Y_TAPS           3
110 #define N_HORIZ_UV_TAPS         3
111 #define N_VERT_UV_TAPS          3
112 #define N_PHASES                17
113 #define MAX_TAPS                5
114 
115 /* memory bufferd overlay registers */
116 struct overlay_registers {
117 	u32 OBUF_0Y;
118 	u32 OBUF_1Y;
119 	u32 OBUF_0U;
120 	u32 OBUF_0V;
121 	u32 OBUF_1U;
122 	u32 OBUF_1V;
123 	u32 OSTRIDE;
124 	u32 YRGB_VPH;
125 	u32 UV_VPH;
126 	u32 HORZ_PH;
127 	u32 INIT_PHS;
128 	u32 DWINPOS;
129 	u32 DWINSZ;
130 	u32 SWIDTH;
131 	u32 SWIDTHSW;
132 	u32 SHEIGHT;
133 	u32 YRGBSCALE;
134 	u32 UVSCALE;
135 	u32 OCLRC0;
136 	u32 OCLRC1;
137 	u32 DCLRKV;
138 	u32 DCLRKM;
139 	u32 SCLRKVH;
140 	u32 SCLRKVL;
141 	u32 SCLRKEN;
142 	u32 OCONFIG;
143 	u32 OCMD;
144 	u32 RESERVED1; /* 0x6C */
145 	u32 OSTART_0Y;
146 	u32 OSTART_1Y;
147 	u32 OSTART_0U;
148 	u32 OSTART_0V;
149 	u32 OSTART_1U;
150 	u32 OSTART_1V;
151 	u32 OTILEOFF_0Y;
152 	u32 OTILEOFF_1Y;
153 	u32 OTILEOFF_0U;
154 	u32 OTILEOFF_0V;
155 	u32 OTILEOFF_1U;
156 	u32 OTILEOFF_1V;
157 	u32 FASTHSCALE; /* 0xA0 */
158 	u32 UVSCALEV; /* 0xA4 */
159 	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
160 	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
161 	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
162 	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
163 	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
164 	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
165 	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
166 	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
167 	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
168 };
169 
170 struct intel_overlay {
171 	struct drm_device *dev;
172 	struct intel_crtc *crtc;
173 	struct drm_i915_gem_object *vid_bo;
174 	struct drm_i915_gem_object *old_vid_bo;
175 	int active;
176 	int pfit_active;
177 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
178 	u32 color_key;
179 	u32 brightness, contrast, saturation;
180 	u32 old_xscale, old_yscale;
181 	/* register access */
182 	u32 flip_addr;
183 	struct drm_i915_gem_object *reg_bo;
184 	/* flip handling */
185 	uint32_t last_flip_req;
186 	void (*flip_tail)(struct intel_overlay *);
187 };
188 
189 static struct overlay_registers __iomem *
190 intel_overlay_map_regs(struct intel_overlay *overlay)
191 {
192 	struct overlay_registers __iomem *regs;
193 
194 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
195 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_obj->handle->vaddr;
196 	else
197 		regs = pmap_mapdev_attr(overlay->dev->agp->base +
198 		    i915_gem_obj_ggtt_offset(overlay->reg_bo), PAGE_SIZE,
199 		    PAT_WRITE_COMBINING);
200 
201 	return regs;
202 }
203 
204 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
205 				     struct overlay_registers __iomem *regs)
206 {
207 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
208 		pmap_unmapdev((vm_offset_t)regs, PAGE_SIZE);
209 }
210 
211 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
212 					 void (*tail)(struct intel_overlay *))
213 {
214 	struct drm_device *dev = overlay->dev;
215 	drm_i915_private_t *dev_priv = dev->dev_private;
216 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
217 	int ret;
218 
219 	BUG_ON(overlay->last_flip_req);
220 	ret = i915_add_request(ring, &overlay->last_flip_req);
221 	if (ret)
222 		return ret;
223 
224 	overlay->flip_tail = tail;
225 	ret = i915_wait_seqno(ring, overlay->last_flip_req);
226 	if (ret)
227 		return ret;
228 	i915_gem_retire_requests(dev);
229 
230 	overlay->last_flip_req = 0;
231 	return 0;
232 }
233 
234 /* overlay needs to be disable in OCMD reg */
235 static int intel_overlay_on(struct intel_overlay *overlay)
236 {
237 	struct drm_device *dev = overlay->dev;
238 	struct drm_i915_private *dev_priv = dev->dev_private;
239 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
240 	int ret;
241 
242 	BUG_ON(overlay->active);
243 	overlay->active = 1;
244 
245 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
246 
247 	ret = intel_ring_begin(ring, 4);
248 	if (ret)
249 		return ret;
250 
251 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
252 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
253 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
254 	intel_ring_emit(ring, MI_NOOP);
255 	intel_ring_advance(ring);
256 
257 	return intel_overlay_do_wait_request(overlay, NULL);
258 }
259 
260 /* overlay needs to be enabled in OCMD reg */
261 static int intel_overlay_continue(struct intel_overlay *overlay,
262 				  bool load_polyphase_filter)
263 {
264 	struct drm_device *dev = overlay->dev;
265 	drm_i915_private_t *dev_priv = dev->dev_private;
266 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
267 	u32 flip_addr = overlay->flip_addr;
268 	u32 tmp;
269 	int ret;
270 
271 	BUG_ON(!overlay->active);
272 
273 	if (load_polyphase_filter)
274 		flip_addr |= OFC_UPDATE;
275 
276 	/* check for underruns */
277 	tmp = I915_READ(DOVSTA);
278 	if (tmp & (1 << 17))
279 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
280 
281 	ret = intel_ring_begin(ring, 2);
282 	if (ret)
283 		return ret;
284 
285 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
286 	intel_ring_emit(ring, flip_addr);
287 	intel_ring_advance(ring);
288 
289 	return i915_add_request(ring, &overlay->last_flip_req);
290 }
291 
292 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
293 {
294 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
295 
296 	i915_gem_object_unpin(obj);
297 	drm_gem_object_unreference(&obj->base);
298 
299 	overlay->old_vid_bo = NULL;
300 }
301 
302 static void intel_overlay_off_tail(struct intel_overlay *overlay)
303 {
304 	struct drm_i915_gem_object *obj = overlay->vid_bo;
305 
306 	/* never have the overlay hw on without showing a frame */
307 	BUG_ON(!overlay->vid_bo);
308 
309 	i915_gem_object_unpin(obj);
310 	drm_gem_object_unreference(&obj->base);
311 	overlay->vid_bo = NULL;
312 
313 	overlay->crtc->overlay = NULL;
314 	overlay->crtc = NULL;
315 	overlay->active = 0;
316 }
317 
318 /* overlay needs to be disabled in OCMD reg */
319 static int intel_overlay_off(struct intel_overlay *overlay)
320 {
321 	struct drm_device *dev = overlay->dev;
322 	struct drm_i915_private *dev_priv = dev->dev_private;
323 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
324 	u32 flip_addr = overlay->flip_addr;
325 	int ret;
326 
327 	BUG_ON(!overlay->active);
328 
329 	/* According to intel docs the overlay hw may hang (when switching
330 	 * off) without loading the filter coeffs. It is however unclear whether
331 	 * this applies to the disabling of the overlay or to the switching off
332 	 * of the hw. Do it in both cases */
333 	flip_addr |= OFC_UPDATE;
334 
335 	ret = intel_ring_begin(ring, 6);
336 	if (ret)
337 		return ret;
338 
339 	/* wait for overlay to go idle */
340 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
341 	intel_ring_emit(ring, flip_addr);
342 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
343 	/* turn overlay off */
344 	if (IS_I830(dev)) {
345 		/* Workaround: Don't disable the overlay fully, since otherwise
346 		 * it dies on the next OVERLAY_ON cmd. */
347 		intel_ring_emit(ring, MI_NOOP);
348 		intel_ring_emit(ring, MI_NOOP);
349 		intel_ring_emit(ring, MI_NOOP);
350 	} else {
351 		intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
352 		intel_ring_emit(ring, flip_addr);
353 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
354 	}
355 	intel_ring_advance(ring);
356 
357 	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
358 }
359 
360 /* recover from an interruption due to a signal
361  * We have to be careful not to repeat work forever an make forward progess. */
362 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
363 {
364 	struct drm_device *dev = overlay->dev;
365 	drm_i915_private_t *dev_priv = dev->dev_private;
366 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
367 	int ret;
368 
369 	if (overlay->last_flip_req == 0)
370 		return 0;
371 
372 	ret = i915_wait_seqno(ring, overlay->last_flip_req);
373 	if (ret)
374 		return ret;
375 	i915_gem_retire_requests(dev);
376 
377 	if (overlay->flip_tail)
378 		overlay->flip_tail(overlay);
379 
380 	overlay->last_flip_req = 0;
381 	return 0;
382 }
383 
384 /* Wait for pending overlay flip and release old frame.
385  * Needs to be called before the overlay register are changed
386  * via intel_overlay_(un)map_regs
387  */
388 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
389 {
390 	struct drm_device *dev = overlay->dev;
391 	drm_i915_private_t *dev_priv = dev->dev_private;
392 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
393 	int ret;
394 
395 	/* Only wait if there is actually an old frame to release to
396 	 * guarantee forward progress.
397 	 */
398 	if (!overlay->old_vid_bo)
399 		return 0;
400 
401 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
402 		/* synchronous slowpath */
403 		ret = intel_ring_begin(ring, 2);
404 		if (ret)
405 			return ret;
406 
407 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
408 		intel_ring_emit(ring, MI_NOOP);
409 		intel_ring_advance(ring);
410 
411 		ret = intel_overlay_do_wait_request(overlay,
412 						    intel_overlay_release_old_vid_tail);
413 		if (ret)
414 			return ret;
415 	}
416 
417 	intel_overlay_release_old_vid_tail(overlay);
418 	return 0;
419 }
420 
421 struct put_image_params {
422 	int format;
423 	short dst_x;
424 	short dst_y;
425 	short dst_w;
426 	short dst_h;
427 	short src_w;
428 	short src_scan_h;
429 	short src_scan_w;
430 	short src_h;
431 	short stride_Y;
432 	short stride_UV;
433 	int offset_Y;
434 	int offset_U;
435 	int offset_V;
436 };
437 
438 static int packed_depth_bytes(u32 format)
439 {
440 	switch (format & I915_OVERLAY_DEPTH_MASK) {
441 	case I915_OVERLAY_YUV422:
442 		return 4;
443 	case I915_OVERLAY_YUV411:
444 		/* return 6; not implemented */
445 	default:
446 		return -EINVAL;
447 	}
448 }
449 
450 static int packed_width_bytes(u32 format, short width)
451 {
452 	switch (format & I915_OVERLAY_DEPTH_MASK) {
453 	case I915_OVERLAY_YUV422:
454 		return width << 1;
455 	default:
456 		return -EINVAL;
457 	}
458 }
459 
460 static int uv_hsubsampling(u32 format)
461 {
462 	switch (format & I915_OVERLAY_DEPTH_MASK) {
463 	case I915_OVERLAY_YUV422:
464 	case I915_OVERLAY_YUV420:
465 		return 2;
466 	case I915_OVERLAY_YUV411:
467 	case I915_OVERLAY_YUV410:
468 		return 4;
469 	default:
470 		return -EINVAL;
471 	}
472 }
473 
474 static int uv_vsubsampling(u32 format)
475 {
476 	switch (format & I915_OVERLAY_DEPTH_MASK) {
477 	case I915_OVERLAY_YUV420:
478 	case I915_OVERLAY_YUV410:
479 		return 2;
480 	case I915_OVERLAY_YUV422:
481 	case I915_OVERLAY_YUV411:
482 		return 1;
483 	default:
484 		return -EINVAL;
485 	}
486 }
487 
488 static u32 calc_swidthsw(struct drm_device *dev, u32 offset, u32 width)
489 {
490 	u32 mask, shift, ret;
491 	if (IS_GEN2(dev)) {
492 		mask = 0x1f;
493 		shift = 5;
494 	} else {
495 		mask = 0x3f;
496 		shift = 6;
497 	}
498 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
499 	if (!IS_GEN2(dev))
500 		ret <<= 1;
501 	ret -= 1;
502 	return ret << 2;
503 }
504 
505 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
506 	0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
507 	0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
508 	0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
509 	0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
510 	0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
511 	0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
512 	0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
513 	0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
514 	0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
515 	0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
516 	0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
517 	0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
518 	0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
519 	0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
520 	0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
521 	0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
522 	0xb000, 0x3000, 0x0800, 0x3000, 0xb000
523 };
524 
525 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
526 	0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
527 	0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
528 	0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
529 	0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
530 	0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
531 	0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
532 	0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
533 	0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
534 	0x3000, 0x0800, 0x3000
535 };
536 
537 static void update_polyphase_filter(struct overlay_registers __iomem *regs)
538 {
539 	memcpy_toio(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
540 	memcpy_toio(regs->UV_HCOEFS, uv_static_hcoeffs,
541 		    sizeof(uv_static_hcoeffs));
542 }
543 
544 static bool update_scaling_factors(struct intel_overlay *overlay,
545 				   struct overlay_registers __iomem *regs,
546 				   struct put_image_params *params)
547 {
548 	/* fixed point with a 12 bit shift */
549 	u32 xscale, yscale, xscale_UV, yscale_UV;
550 #define FP_SHIFT 12
551 #define FRACT_MASK 0xfff
552 	bool scale_changed = false;
553 	int uv_hscale = uv_hsubsampling(params->format);
554 	int uv_vscale = uv_vsubsampling(params->format);
555 
556 	if (params->dst_w > 1)
557 		xscale = ((params->src_scan_w - 1) << FP_SHIFT)
558 			/(params->dst_w);
559 	else
560 		xscale = 1 << FP_SHIFT;
561 
562 	if (params->dst_h > 1)
563 		yscale = ((params->src_scan_h - 1) << FP_SHIFT)
564 			/(params->dst_h);
565 	else
566 		yscale = 1 << FP_SHIFT;
567 
568 	/*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
569 	xscale_UV = xscale/uv_hscale;
570 	yscale_UV = yscale/uv_vscale;
571 	/* make the Y scale to UV scale ratio an exact multiply */
572 	xscale = xscale_UV * uv_hscale;
573 	yscale = yscale_UV * uv_vscale;
574 	/*} else {
575 	  xscale_UV = 0;
576 	  yscale_UV = 0;
577 	  }*/
578 
579 	if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
580 		scale_changed = true;
581 	overlay->old_xscale = xscale;
582 	overlay->old_yscale = yscale;
583 
584 	iowrite32(((yscale & FRACT_MASK) << 20) |
585 		  ((xscale >> FP_SHIFT)  << 16) |
586 		  ((xscale & FRACT_MASK) << 3),
587 		 &regs->YRGBSCALE);
588 
589 	iowrite32(((yscale_UV & FRACT_MASK) << 20) |
590 		  ((xscale_UV >> FP_SHIFT)  << 16) |
591 		  ((xscale_UV & FRACT_MASK) << 3),
592 		 &regs->UVSCALE);
593 
594 	iowrite32((((yscale    >> FP_SHIFT) << 16) |
595 		   ((yscale_UV >> FP_SHIFT) << 0)),
596 		 &regs->UVSCALEV);
597 
598 	if (scale_changed)
599 		update_polyphase_filter(regs);
600 
601 	return scale_changed;
602 }
603 
604 static void update_colorkey(struct intel_overlay *overlay,
605 			    struct overlay_registers __iomem *regs)
606 {
607 	u32 key = overlay->color_key;
608 
609 	switch (overlay->crtc->base.fb->bits_per_pixel) {
610 	case 8:
611 		iowrite32(0, &regs->DCLRKV);
612 		iowrite32(CLK_RGB8I_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
613 		break;
614 
615 	case 16:
616 		if (overlay->crtc->base.fb->depth == 15) {
617 			iowrite32(RGB15_TO_COLORKEY(key), &regs->DCLRKV);
618 			iowrite32(CLK_RGB15_MASK | DST_KEY_ENABLE,
619 				  &regs->DCLRKM);
620 		} else {
621 			iowrite32(RGB16_TO_COLORKEY(key), &regs->DCLRKV);
622 			iowrite32(CLK_RGB16_MASK | DST_KEY_ENABLE,
623 				  &regs->DCLRKM);
624 		}
625 		break;
626 
627 	case 24:
628 	case 32:
629 		iowrite32(key, &regs->DCLRKV);
630 		iowrite32(CLK_RGB24_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
631 		break;
632 	}
633 }
634 
635 static u32 overlay_cmd_reg(struct put_image_params *params)
636 {
637 	u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
638 
639 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
640 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
641 		case I915_OVERLAY_YUV422:
642 			cmd |= OCMD_YUV_422_PLANAR;
643 			break;
644 		case I915_OVERLAY_YUV420:
645 			cmd |= OCMD_YUV_420_PLANAR;
646 			break;
647 		case I915_OVERLAY_YUV411:
648 		case I915_OVERLAY_YUV410:
649 			cmd |= OCMD_YUV_410_PLANAR;
650 			break;
651 		}
652 	} else { /* YUV packed */
653 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
654 		case I915_OVERLAY_YUV422:
655 			cmd |= OCMD_YUV_422_PACKED;
656 			break;
657 		case I915_OVERLAY_YUV411:
658 			cmd |= OCMD_YUV_411_PACKED;
659 			break;
660 		}
661 
662 		switch (params->format & I915_OVERLAY_SWAP_MASK) {
663 		case I915_OVERLAY_NO_SWAP:
664 			break;
665 		case I915_OVERLAY_UV_SWAP:
666 			cmd |= OCMD_UV_SWAP;
667 			break;
668 		case I915_OVERLAY_Y_SWAP:
669 			cmd |= OCMD_Y_SWAP;
670 			break;
671 		case I915_OVERLAY_Y_AND_UV_SWAP:
672 			cmd |= OCMD_Y_AND_UV_SWAP;
673 			break;
674 		}
675 	}
676 
677 	return cmd;
678 }
679 
680 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
681 				      struct drm_i915_gem_object *new_bo,
682 				      struct put_image_params *params)
683 {
684 	int ret, tmp_width;
685 	struct overlay_registers __iomem *regs;
686 	bool scale_changed = false;
687 	struct drm_device *dev = overlay->dev;
688 	u32 swidth, swidthsw, sheight, ostride;
689 
690 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
691 	BUG_ON(!mutex_is_locked(&dev->mode_config.mutex));
692 	BUG_ON(!overlay);
693 
694 	ret = intel_overlay_release_old_vid(overlay);
695 	if (ret != 0)
696 		return ret;
697 
698 	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
699 	if (ret != 0)
700 		return ret;
701 
702 	ret = i915_gem_object_put_fence(new_bo);
703 	if (ret)
704 		goto out_unpin;
705 
706 	if (!overlay->active) {
707 		u32 oconfig;
708 		regs = intel_overlay_map_regs(overlay);
709 		if (!regs) {
710 			ret = -ENOMEM;
711 			goto out_unpin;
712 		}
713 		oconfig = OCONF_CC_OUT_8BIT;
714 		if (IS_GEN4(overlay->dev))
715 			oconfig |= OCONF_CSC_MODE_BT709;
716 		oconfig |= overlay->crtc->pipe == 0 ?
717 			OCONF_PIPE_A : OCONF_PIPE_B;
718 		iowrite32(oconfig, &regs->OCONFIG);
719 		intel_overlay_unmap_regs(overlay, regs);
720 
721 		ret = intel_overlay_on(overlay);
722 		if (ret != 0)
723 			goto out_unpin;
724 	}
725 
726 	regs = intel_overlay_map_regs(overlay);
727 	if (!regs) {
728 		ret = -ENOMEM;
729 		goto out_unpin;
730 	}
731 
732 	iowrite32((params->dst_y << 16) | params->dst_x, &regs->DWINPOS);
733 	iowrite32((params->dst_h << 16) | params->dst_w, &regs->DWINSZ);
734 
735 	if (params->format & I915_OVERLAY_YUV_PACKED)
736 		tmp_width = packed_width_bytes(params->format, params->src_w);
737 	else
738 		tmp_width = params->src_w;
739 
740 	swidth = params->src_w;
741 	swidthsw = calc_swidthsw(overlay->dev, params->offset_Y, tmp_width);
742 	sheight = params->src_h;
743 	iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
744 	ostride = params->stride_Y;
745 
746 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
747 		int uv_hscale = uv_hsubsampling(params->format);
748 		int uv_vscale = uv_vsubsampling(params->format);
749 		u32 tmp_U, tmp_V;
750 		swidth |= (params->src_w/uv_hscale) << 16;
751 		tmp_U = calc_swidthsw(overlay->dev, params->offset_U,
752 				      params->src_w/uv_hscale);
753 		tmp_V = calc_swidthsw(overlay->dev, params->offset_V,
754 				      params->src_w/uv_hscale);
755 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
756 		sheight |= (params->src_h/uv_vscale) << 16;
757 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
758 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
759 		ostride |= params->stride_UV << 16;
760 	}
761 
762 	iowrite32(swidth, &regs->SWIDTH);
763 	iowrite32(swidthsw, &regs->SWIDTHSW);
764 	iowrite32(sheight, &regs->SHEIGHT);
765 	iowrite32(ostride, &regs->OSTRIDE);
766 
767 	scale_changed = update_scaling_factors(overlay, regs, params);
768 
769 	update_colorkey(overlay, regs);
770 
771 	iowrite32(overlay_cmd_reg(params), &regs->OCMD);
772 
773 	intel_overlay_unmap_regs(overlay, regs);
774 
775 	ret = intel_overlay_continue(overlay, scale_changed);
776 	if (ret)
777 		goto out_unpin;
778 
779 	overlay->old_vid_bo = overlay->vid_bo;
780 	overlay->vid_bo = new_bo;
781 
782 	return 0;
783 
784 out_unpin:
785 	i915_gem_object_unpin(new_bo);
786 	return ret;
787 }
788 
789 int intel_overlay_switch_off(struct intel_overlay *overlay)
790 {
791 	struct overlay_registers __iomem *regs;
792 	struct drm_device *dev = overlay->dev;
793 	int ret;
794 
795 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
796 	BUG_ON(!mutex_is_locked(&dev->mode_config.mutex));
797 
798 	ret = intel_overlay_recover_from_interrupt(overlay);
799 	if (ret != 0)
800 		return ret;
801 
802 	if (!overlay->active)
803 		return 0;
804 
805 	ret = intel_overlay_release_old_vid(overlay);
806 	if (ret != 0)
807 		return ret;
808 
809 	regs = intel_overlay_map_regs(overlay);
810 	iowrite32(0, &regs->OCMD);
811 	intel_overlay_unmap_regs(overlay, regs);
812 
813 	ret = intel_overlay_off(overlay);
814 	if (ret != 0)
815 		return ret;
816 
817 	intel_overlay_off_tail(overlay);
818 	return 0;
819 }
820 
821 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
822 					  struct intel_crtc *crtc)
823 {
824 	if (!crtc->active)
825 		return -EINVAL;
826 
827 	/* can't use the overlay with double wide pipe */
828 	if (crtc->config.double_wide)
829 		return -EINVAL;
830 
831 	return 0;
832 }
833 
834 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
835 {
836 	struct drm_device *dev = overlay->dev;
837 	drm_i915_private_t *dev_priv = dev->dev_private;
838 	u32 pfit_control = I915_READ(PFIT_CONTROL);
839 	u32 ratio;
840 
841 	/* XXX: This is not the same logic as in the xorg driver, but more in
842 	 * line with the intel documentation for the i965
843 	 */
844 	if (INTEL_INFO(dev)->gen >= 4) {
845 		/* on i965 use the PGM reg to read out the autoscaler values */
846 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
847 	} else {
848 		if (pfit_control & VERT_AUTO_SCALE)
849 			ratio = I915_READ(PFIT_AUTO_RATIOS);
850 		else
851 			ratio = I915_READ(PFIT_PGM_RATIOS);
852 		ratio >>= PFIT_VERT_SCALE_SHIFT;
853 	}
854 
855 	overlay->pfit_vscale_ratio = ratio;
856 }
857 
858 static int check_overlay_dst(struct intel_overlay *overlay,
859 			     struct drm_intel_overlay_put_image *rec)
860 {
861 	struct drm_display_mode *mode = &overlay->crtc->base.mode;
862 
863 	if (rec->dst_x < mode->hdisplay &&
864 	    rec->dst_x + rec->dst_width <= mode->hdisplay &&
865 	    rec->dst_y < mode->vdisplay &&
866 	    rec->dst_y + rec->dst_height <= mode->vdisplay)
867 		return 0;
868 	else
869 		return -EINVAL;
870 }
871 
872 static int check_overlay_scaling(struct put_image_params *rec)
873 {
874 	u32 tmp;
875 
876 	/* downscaling limit is 8.0 */
877 	tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
878 	if (tmp > 7)
879 		return -EINVAL;
880 	tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
881 	if (tmp > 7)
882 		return -EINVAL;
883 
884 	return 0;
885 }
886 
887 static int check_overlay_src(struct drm_device *dev,
888 			     struct drm_intel_overlay_put_image *rec,
889 			     struct drm_i915_gem_object *new_bo)
890 {
891 	int uv_hscale = uv_hsubsampling(rec->flags);
892 	int uv_vscale = uv_vsubsampling(rec->flags);
893 	u32 stride_mask;
894 	int depth;
895 	u32 tmp;
896 
897 	/* check src dimensions */
898 	if (IS_845G(dev) || IS_I830(dev)) {
899 		if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
900 		    rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
901 			return -EINVAL;
902 	} else {
903 		if (rec->src_height > IMAGE_MAX_HEIGHT ||
904 		    rec->src_width  > IMAGE_MAX_WIDTH)
905 			return -EINVAL;
906 	}
907 
908 	/* better safe than sorry, use 4 as the maximal subsampling ratio */
909 	if (rec->src_height < N_VERT_Y_TAPS*4 ||
910 	    rec->src_width  < N_HORIZ_Y_TAPS*4)
911 		return -EINVAL;
912 
913 	/* check alignment constraints */
914 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
915 	case I915_OVERLAY_RGB:
916 		/* not implemented */
917 		return -EINVAL;
918 
919 	case I915_OVERLAY_YUV_PACKED:
920 		if (uv_vscale != 1)
921 			return -EINVAL;
922 
923 		depth = packed_depth_bytes(rec->flags);
924 		if (depth < 0)
925 			return depth;
926 
927 		/* ignore UV planes */
928 		rec->stride_UV = 0;
929 		rec->offset_U = 0;
930 		rec->offset_V = 0;
931 		/* check pixel alignment */
932 		if (rec->offset_Y % depth)
933 			return -EINVAL;
934 		break;
935 
936 	case I915_OVERLAY_YUV_PLANAR:
937 		if (uv_vscale < 0 || uv_hscale < 0)
938 			return -EINVAL;
939 		/* no offset restrictions for planar formats */
940 		break;
941 
942 	default:
943 		return -EINVAL;
944 	}
945 
946 	if (rec->src_width % uv_hscale)
947 		return -EINVAL;
948 
949 	/* stride checking */
950 	if (IS_I830(dev) || IS_845G(dev))
951 		stride_mask = 255;
952 	else
953 		stride_mask = 63;
954 
955 	if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
956 		return -EINVAL;
957 	if (IS_GEN4(dev) && rec->stride_Y < 512)
958 		return -EINVAL;
959 
960 	tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
961 		4096 : 8192;
962 	if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
963 		return -EINVAL;
964 
965 	/* check buffer dimensions */
966 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
967 	case I915_OVERLAY_RGB:
968 	case I915_OVERLAY_YUV_PACKED:
969 		/* always 4 Y values per depth pixels */
970 		if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
971 			return -EINVAL;
972 
973 		tmp = rec->stride_Y*rec->src_height;
974 		if (rec->offset_Y + tmp > new_bo->base.size)
975 			return -EINVAL;
976 		break;
977 
978 	case I915_OVERLAY_YUV_PLANAR:
979 		if (rec->src_width > rec->stride_Y)
980 			return -EINVAL;
981 		if (rec->src_width/uv_hscale > rec->stride_UV)
982 			return -EINVAL;
983 
984 		tmp = rec->stride_Y * rec->src_height;
985 		if (rec->offset_Y + tmp > new_bo->base.size)
986 			return -EINVAL;
987 
988 		tmp = rec->stride_UV * (rec->src_height / uv_vscale);
989 		if (rec->offset_U + tmp > new_bo->base.size ||
990 		    rec->offset_V + tmp > new_bo->base.size)
991 			return -EINVAL;
992 		break;
993 	}
994 
995 	return 0;
996 }
997 
998 /**
999  * Return the pipe currently connected to the panel fitter,
1000  * or -1 if the panel fitter is not present or not in use
1001  */
1002 static int intel_panel_fitter_pipe(struct drm_device *dev)
1003 {
1004 	struct drm_i915_private *dev_priv = dev->dev_private;
1005 	u32  pfit_control;
1006 
1007 	/* i830 doesn't have a panel fitter */
1008 	if (INTEL_INFO(dev)->gen <= 3 && (IS_I830(dev) || !IS_MOBILE(dev)))
1009 		return -1;
1010 
1011 	pfit_control = I915_READ(PFIT_CONTROL);
1012 
1013 	/* See if the panel fitter is in use */
1014 	if ((pfit_control & PFIT_ENABLE) == 0)
1015 		return -1;
1016 
1017 	/* 965 can place panel fitter on either pipe */
1018 	if (IS_GEN4(dev))
1019 		return (pfit_control >> 29) & 0x3;
1020 
1021 	/* older chips can only use pipe 1 */
1022 	return 1;
1023 }
1024 
1025 int intel_overlay_put_image(struct drm_device *dev, void *data,
1026 			    struct drm_file *file_priv)
1027 {
1028 	struct drm_intel_overlay_put_image *put_image_rec = data;
1029 	drm_i915_private_t *dev_priv = dev->dev_private;
1030 	struct intel_overlay *overlay;
1031 	struct drm_mode_object *drmmode_obj;
1032 	struct intel_crtc *crtc;
1033 	struct drm_i915_gem_object *new_bo;
1034 	struct put_image_params *params;
1035 	int ret;
1036 
1037 	/* No need to check for DRIVER_MODESET - we don't set it up then. */
1038 	overlay = dev_priv->overlay;
1039 	if (!overlay) {
1040 		DRM_DEBUG("userspace bug: no overlay\n");
1041 		return -ENODEV;
1042 	}
1043 
1044 	if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1045 		drm_modeset_lock_all(dev);
1046 		mutex_lock(&dev->struct_mutex);
1047 
1048 		ret = intel_overlay_switch_off(overlay);
1049 
1050 		mutex_unlock(&dev->struct_mutex);
1051 		drm_modeset_unlock_all(dev);
1052 
1053 		return ret;
1054 	}
1055 
1056 	params = kmalloc(sizeof(*params), M_DRM, M_WAITOK);
1057 	if (!params)
1058 		return -ENOMEM;
1059 
1060 	drmmode_obj = drm_mode_object_find(dev, put_image_rec->crtc_id,
1061 					   DRM_MODE_OBJECT_CRTC);
1062 	if (!drmmode_obj) {
1063 		ret = -ENOENT;
1064 		goto out_free;
1065 	}
1066 	crtc = to_intel_crtc(obj_to_crtc(drmmode_obj));
1067 
1068 	new_bo = to_intel_bo(drm_gem_object_lookup(dev, file_priv,
1069 						   put_image_rec->bo_handle));
1070 	if (&new_bo->base == NULL) {
1071 		ret = -ENOENT;
1072 		goto out_free;
1073 	}
1074 
1075 	drm_modeset_lock_all(dev);
1076 	mutex_lock(&dev->struct_mutex);
1077 
1078 	if (new_bo->tiling_mode) {
1079 		DRM_ERROR("buffer used for overlay image can not be tiled\n");
1080 		ret = -EINVAL;
1081 		goto out_unlock;
1082 	}
1083 
1084 	ret = intel_overlay_recover_from_interrupt(overlay);
1085 	if (ret != 0)
1086 		goto out_unlock;
1087 
1088 	if (overlay->crtc != crtc) {
1089 		struct drm_display_mode *mode = &crtc->base.mode;
1090 		ret = intel_overlay_switch_off(overlay);
1091 		if (ret != 0)
1092 			goto out_unlock;
1093 
1094 		ret = check_overlay_possible_on_crtc(overlay, crtc);
1095 		if (ret != 0)
1096 			goto out_unlock;
1097 
1098 		overlay->crtc = crtc;
1099 		crtc->overlay = overlay;
1100 
1101 		/* line too wide, i.e. one-line-mode */
1102 		if (mode->hdisplay > 1024 &&
1103 		    intel_panel_fitter_pipe(dev) == crtc->pipe) {
1104 			overlay->pfit_active = 1;
1105 			update_pfit_vscale_ratio(overlay);
1106 		} else
1107 			overlay->pfit_active = 0;
1108 	}
1109 
1110 	ret = check_overlay_dst(overlay, put_image_rec);
1111 	if (ret != 0)
1112 		goto out_unlock;
1113 
1114 	if (overlay->pfit_active) {
1115 		params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1116 				 overlay->pfit_vscale_ratio);
1117 		/* shifting right rounds downwards, so add 1 */
1118 		params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1119 				 overlay->pfit_vscale_ratio) + 1;
1120 	} else {
1121 		params->dst_y = put_image_rec->dst_y;
1122 		params->dst_h = put_image_rec->dst_height;
1123 	}
1124 	params->dst_x = put_image_rec->dst_x;
1125 	params->dst_w = put_image_rec->dst_width;
1126 
1127 	params->src_w = put_image_rec->src_width;
1128 	params->src_h = put_image_rec->src_height;
1129 	params->src_scan_w = put_image_rec->src_scan_width;
1130 	params->src_scan_h = put_image_rec->src_scan_height;
1131 	if (params->src_scan_h > params->src_h ||
1132 	    params->src_scan_w > params->src_w) {
1133 		ret = -EINVAL;
1134 		goto out_unlock;
1135 	}
1136 
1137 	ret = check_overlay_src(dev, put_image_rec, new_bo);
1138 	if (ret != 0)
1139 		goto out_unlock;
1140 	params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1141 	params->stride_Y = put_image_rec->stride_Y;
1142 	params->stride_UV = put_image_rec->stride_UV;
1143 	params->offset_Y = put_image_rec->offset_Y;
1144 	params->offset_U = put_image_rec->offset_U;
1145 	params->offset_V = put_image_rec->offset_V;
1146 
1147 	/* Check scaling after src size to prevent a divide-by-zero. */
1148 	ret = check_overlay_scaling(params);
1149 	if (ret != 0)
1150 		goto out_unlock;
1151 
1152 	ret = intel_overlay_do_put_image(overlay, new_bo, params);
1153 	if (ret != 0)
1154 		goto out_unlock;
1155 
1156 	mutex_unlock(&dev->struct_mutex);
1157 	drm_modeset_unlock_all(dev);
1158 
1159 	kfree(params);
1160 
1161 	return 0;
1162 
1163 out_unlock:
1164 	mutex_unlock(&dev->struct_mutex);
1165 	drm_modeset_unlock_all(dev);
1166 	drm_gem_object_unreference_unlocked(&new_bo->base);
1167 out_free:
1168 	kfree(params);
1169 
1170 	return ret;
1171 }
1172 
1173 static void update_reg_attrs(struct intel_overlay *overlay,
1174 			     struct overlay_registers __iomem *regs)
1175 {
1176 	iowrite32((overlay->contrast << 18) | (overlay->brightness & 0xff),
1177 		  &regs->OCLRC0);
1178 	iowrite32(overlay->saturation, &regs->OCLRC1);
1179 }
1180 
1181 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1182 {
1183 	int i;
1184 
1185 	if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1186 		return false;
1187 
1188 	for (i = 0; i < 3; i++) {
1189 		if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1190 			return false;
1191 	}
1192 
1193 	return true;
1194 }
1195 
1196 static bool check_gamma5_errata(u32 gamma5)
1197 {
1198 	int i;
1199 
1200 	for (i = 0; i < 3; i++) {
1201 		if (((gamma5 >> i*8) & 0xff) == 0x80)
1202 			return false;
1203 	}
1204 
1205 	return true;
1206 }
1207 
1208 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1209 {
1210 	if (!check_gamma_bounds(0, attrs->gamma0) ||
1211 	    !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1212 	    !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1213 	    !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1214 	    !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1215 	    !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1216 	    !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1217 		return -EINVAL;
1218 
1219 	if (!check_gamma5_errata(attrs->gamma5))
1220 		return -EINVAL;
1221 
1222 	return 0;
1223 }
1224 
1225 int intel_overlay_attrs(struct drm_device *dev, void *data,
1226 			struct drm_file *file_priv)
1227 {
1228 	struct drm_intel_overlay_attrs *attrs = data;
1229 	drm_i915_private_t *dev_priv = dev->dev_private;
1230 	struct intel_overlay *overlay;
1231 	struct overlay_registers __iomem *regs;
1232 	int ret;
1233 
1234 	/* No need to check for DRIVER_MODESET - we don't set it up then. */
1235 	overlay = dev_priv->overlay;
1236 	if (!overlay) {
1237 		DRM_DEBUG("userspace bug: no overlay\n");
1238 		return -ENODEV;
1239 	}
1240 
1241 	drm_modeset_lock_all(dev);
1242 	mutex_lock(&dev->struct_mutex);
1243 
1244 	ret = -EINVAL;
1245 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1246 		attrs->color_key  = overlay->color_key;
1247 		attrs->brightness = overlay->brightness;
1248 		attrs->contrast   = overlay->contrast;
1249 		attrs->saturation = overlay->saturation;
1250 
1251 		if (!IS_GEN2(dev)) {
1252 			attrs->gamma0 = I915_READ(OGAMC0);
1253 			attrs->gamma1 = I915_READ(OGAMC1);
1254 			attrs->gamma2 = I915_READ(OGAMC2);
1255 			attrs->gamma3 = I915_READ(OGAMC3);
1256 			attrs->gamma4 = I915_READ(OGAMC4);
1257 			attrs->gamma5 = I915_READ(OGAMC5);
1258 		}
1259 	} else {
1260 		if (attrs->brightness < -128 || attrs->brightness > 127)
1261 			goto out_unlock;
1262 		if (attrs->contrast > 255)
1263 			goto out_unlock;
1264 		if (attrs->saturation > 1023)
1265 			goto out_unlock;
1266 
1267 		overlay->color_key  = attrs->color_key;
1268 		overlay->brightness = attrs->brightness;
1269 		overlay->contrast   = attrs->contrast;
1270 		overlay->saturation = attrs->saturation;
1271 
1272 		regs = intel_overlay_map_regs(overlay);
1273 		if (!regs) {
1274 			ret = -ENOMEM;
1275 			goto out_unlock;
1276 		}
1277 
1278 		update_reg_attrs(overlay, regs);
1279 
1280 		intel_overlay_unmap_regs(overlay, regs);
1281 
1282 		if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1283 			if (IS_GEN2(dev))
1284 				goto out_unlock;
1285 
1286 			if (overlay->active) {
1287 				ret = -EBUSY;
1288 				goto out_unlock;
1289 			}
1290 
1291 			ret = check_gamma(attrs);
1292 			if (ret)
1293 				goto out_unlock;
1294 
1295 			I915_WRITE(OGAMC0, attrs->gamma0);
1296 			I915_WRITE(OGAMC1, attrs->gamma1);
1297 			I915_WRITE(OGAMC2, attrs->gamma2);
1298 			I915_WRITE(OGAMC3, attrs->gamma3);
1299 			I915_WRITE(OGAMC4, attrs->gamma4);
1300 			I915_WRITE(OGAMC5, attrs->gamma5);
1301 		}
1302 	}
1303 
1304 	ret = 0;
1305 out_unlock:
1306 	mutex_unlock(&dev->struct_mutex);
1307 	drm_modeset_unlock_all(dev);
1308 
1309 	return ret;
1310 }
1311 
1312 void intel_setup_overlay(struct drm_device *dev)
1313 {
1314 	drm_i915_private_t *dev_priv = dev->dev_private;
1315 	struct intel_overlay *overlay;
1316 	struct drm_i915_gem_object *reg_bo;
1317 	struct overlay_registers __iomem *regs;
1318 	int ret;
1319 
1320 	if (!HAS_OVERLAY(dev))
1321 		return;
1322 
1323 	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
1324 	if (!overlay)
1325 		return;
1326 
1327 	mutex_lock(&dev->struct_mutex);
1328 	if (WARN_ON(dev_priv->overlay))
1329 		goto out_free;
1330 
1331 	overlay->dev = dev;
1332 
1333 	reg_bo = NULL;
1334 	if (!OVERLAY_NEEDS_PHYSICAL(dev))
1335 		reg_bo = i915_gem_object_create_stolen(dev, PAGE_SIZE);
1336 	if (reg_bo == NULL)
1337 		reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
1338 	if (reg_bo == NULL)
1339 		goto out_free;
1340 	overlay->reg_bo = reg_bo;
1341 
1342 	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
1343 		ret = i915_gem_attach_phys_object(dev, reg_bo,
1344 						  I915_GEM_PHYS_OVERLAY_REGS,
1345 						  PAGE_SIZE);
1346 		if (ret) {
1347 			DRM_ERROR("failed to attach phys overlay regs\n");
1348 			goto out_free_bo;
1349 		}
1350 		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
1351 	} else {
1352 		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false);
1353 		if (ret) {
1354 			DRM_ERROR("failed to pin overlay register bo\n");
1355 			goto out_free_bo;
1356 		}
1357 		overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
1358 
1359 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1360 		if (ret) {
1361 			DRM_ERROR("failed to move overlay register bo into the GTT\n");
1362 			goto out_unpin_bo;
1363 		}
1364 	}
1365 
1366 	/* init all values */
1367 	overlay->color_key = 0x0101fe;
1368 	overlay->brightness = -19;
1369 	overlay->contrast = 75;
1370 	overlay->saturation = 146;
1371 
1372 	regs = intel_overlay_map_regs(overlay);
1373 	if (!regs)
1374 		goto out_unpin_bo;
1375 
1376 	memset_io(regs, 0, sizeof(struct overlay_registers));
1377 	update_polyphase_filter(regs);
1378 	update_reg_attrs(overlay, regs);
1379 
1380 	intel_overlay_unmap_regs(overlay, regs);
1381 
1382 	dev_priv->overlay = overlay;
1383 	mutex_unlock(&dev->struct_mutex);
1384 	DRM_INFO("initialized overlay support\n");
1385 	return;
1386 
1387 out_unpin_bo:
1388 	if (!OVERLAY_NEEDS_PHYSICAL(dev))
1389 		i915_gem_object_unpin(reg_bo);
1390 out_free_bo:
1391 	drm_gem_object_unreference(&reg_bo->base);
1392 out_free:
1393 	mutex_unlock(&dev->struct_mutex);
1394 	kfree(overlay);
1395 	return;
1396 }
1397 
1398 void intel_cleanup_overlay(struct drm_device *dev)
1399 {
1400 	drm_i915_private_t *dev_priv = dev->dev_private;
1401 
1402 	if (!dev_priv->overlay)
1403 		return;
1404 
1405 	/* The bo's should be free'd by the generic code already.
1406 	 * Furthermore modesetting teardown happens beforehand so the
1407 	 * hardware should be off already */
1408 	BUG_ON(dev_priv->overlay->active);
1409 
1410 	drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base);
1411 	kfree(dev_priv->overlay);
1412 }
1413 
1414 struct intel_overlay_error_state {
1415 	struct overlay_registers regs;
1416 	unsigned long base;
1417 	u32 dovsta;
1418 	u32 isr;
1419 };
1420 
1421 #if 0
1422 static struct overlay_registers __iomem *
1423 intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1424 {
1425 	drm_i915_private_t *dev_priv = overlay->dev->dev_private;
1426 	struct overlay_registers __iomem *regs;
1427 
1428 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1429 		/* Cast to make sparse happy, but it's wc memory anyway, so
1430 		 * equivalent to the wc io mapping on X86. */
1431 		regs = (struct overlay_registers __iomem *)
1432 			overlay->reg_bo->phys_obj->handle->vaddr;
1433 	else
1434 		regs = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
1435 						i915_gem_obj_ggtt_offset(overlay->reg_bo));
1436 
1437 	return regs;
1438 }
1439 
1440 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
1441 					struct overlay_registers __iomem *regs)
1442 {
1443 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1444 		io_mapping_unmap_atomic(regs);
1445 }
1446 
1447 struct intel_overlay_error_state *
1448 intel_overlay_capture_error_state(struct drm_device *dev)
1449 {
1450 	drm_i915_private_t *dev_priv = dev->dev_private;
1451 	struct intel_overlay *overlay = dev_priv->overlay;
1452 	struct intel_overlay_error_state *error;
1453 	struct overlay_registers __iomem *regs;
1454 
1455 	if (!overlay || !overlay->active)
1456 		return NULL;
1457 
1458 	error = kmalloc(sizeof(*error), M_DRM, M_WAITOK | M_NULLOK);
1459 	if (error == NULL)
1460 		return NULL;
1461 
1462 	error->dovsta = I915_READ(DOVSTA);
1463 	error->isr = I915_READ(ISR);
1464 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1465 		error->base = (__force long)overlay->reg_bo->phys_obj->handle->vaddr;
1466 	else
1467 		error->base = i915_gem_obj_ggtt_offset(overlay->reg_bo);
1468 
1469 	regs = intel_overlay_map_regs_atomic(overlay);
1470 	if (!regs)
1471 		goto err;
1472 
1473 	memcpy_fromio(&error->regs, regs, sizeof(struct overlay_registers));
1474 	intel_overlay_unmap_regs_atomic(overlay, regs);
1475 
1476 	return error;
1477 
1478 err:
1479 	kfree(error);
1480 	return NULL;
1481 }
1482 
1483 void
1484 intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
1485 				struct intel_overlay_error_state *error)
1486 {
1487 	i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1488 			  error->dovsta, error->isr);
1489 	i915_error_printf(m, "  Register file at 0x%08lx:\n",
1490 			  error->base);
1491 
1492 #define P(x) i915_error_printf(m, "    " #x ":	0x%08x\n", error->regs.x)
1493 	P(OBUF_0Y);
1494 	P(OBUF_1Y);
1495 	P(OBUF_0U);
1496 	P(OBUF_0V);
1497 	P(OBUF_1U);
1498 	P(OBUF_1V);
1499 	P(OSTRIDE);
1500 	P(YRGB_VPH);
1501 	P(UV_VPH);
1502 	P(HORZ_PH);
1503 	P(INIT_PHS);
1504 	P(DWINPOS);
1505 	P(DWINSZ);
1506 	P(SWIDTH);
1507 	P(SWIDTHSW);
1508 	P(SHEIGHT);
1509 	P(YRGBSCALE);
1510 	P(UVSCALE);
1511 	P(OCLRC0);
1512 	P(OCLRC1);
1513 	P(DCLRKV);
1514 	P(DCLRKM);
1515 	P(SCLRKVH);
1516 	P(SCLRKVL);
1517 	P(SCLRKEN);
1518 	P(OCONFIG);
1519 	P(OCMD);
1520 	P(OSTART_0Y);
1521 	P(OSTART_1Y);
1522 	P(OSTART_0U);
1523 	P(OSTART_0V);
1524 	P(OSTART_1U);
1525 	P(OSTART_1V);
1526 	P(OTILEOFF_0Y);
1527 	P(OTILEOFF_1Y);
1528 	P(OTILEOFF_0U);
1529 	P(OTILEOFF_0V);
1530 	P(OTILEOFF_1U);
1531 	P(OTILEOFF_1V);
1532 	P(FASTHSCALE);
1533 	P(UVSCALEV);
1534 #undef P
1535 }
1536 #endif
1537