xref: /dragonfly/sys/dev/drm/i915/intel_overlay.c (revision 0db87cb7)
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  */
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_reg.h"
32 #include "intel_drv.h"
33 
34 /* Limits for overlay size. According to intel doc, the real limits are:
35  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
36  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
37  * the mininum of both.  */
38 #define IMAGE_MAX_WIDTH		2048
39 #define IMAGE_MAX_HEIGHT	2046 /* 2 * 1023 */
40 /* on 830 and 845 these large limits result in the card hanging */
41 #define IMAGE_MAX_WIDTH_LEGACY	1024
42 #define IMAGE_MAX_HEIGHT_LEGACY	1088
43 
44 /* overlay register definitions */
45 /* OCMD register */
46 #define OCMD_TILED_SURFACE	(0x1<<19)
47 #define OCMD_MIRROR_MASK	(0x3<<17)
48 #define OCMD_MIRROR_MODE	(0x3<<17)
49 #define OCMD_MIRROR_HORIZONTAL	(0x1<<17)
50 #define OCMD_MIRROR_VERTICAL	(0x2<<17)
51 #define OCMD_MIRROR_BOTH	(0x3<<17)
52 #define OCMD_BYTEORDER_MASK	(0x3<<14) /* zero for YUYV or FOURCC YUY2 */
53 #define OCMD_UV_SWAP		(0x1<<14) /* YVYU */
54 #define OCMD_Y_SWAP		(0x2<<14) /* UYVY or FOURCC UYVY */
55 #define OCMD_Y_AND_UV_SWAP	(0x3<<14) /* VYUY */
56 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
57 #define OCMD_RGB_888		(0x1<<10) /* not in i965 Intel docs */
58 #define OCMD_RGB_555		(0x2<<10) /* not in i965 Intel docs */
59 #define OCMD_RGB_565		(0x3<<10) /* not in i965 Intel docs */
60 #define OCMD_YUV_422_PACKED	(0x8<<10)
61 #define OCMD_YUV_411_PACKED	(0x9<<10) /* not in i965 Intel docs */
62 #define OCMD_YUV_420_PLANAR	(0xc<<10)
63 #define OCMD_YUV_422_PLANAR	(0xd<<10)
64 #define OCMD_YUV_410_PLANAR	(0xe<<10) /* also 411 */
65 #define OCMD_TVSYNCFLIP_PARITY	(0x1<<9)
66 #define OCMD_TVSYNCFLIP_ENABLE	(0x1<<7)
67 #define OCMD_BUF_TYPE_MASK	(0x1<<5)
68 #define OCMD_BUF_TYPE_FRAME	(0x0<<5)
69 #define OCMD_BUF_TYPE_FIELD	(0x1<<5)
70 #define OCMD_TEST_MODE		(0x1<<4)
71 #define OCMD_BUFFER_SELECT	(0x3<<2)
72 #define OCMD_BUFFER0		(0x0<<2)
73 #define OCMD_BUFFER1		(0x1<<2)
74 #define OCMD_FIELD_SELECT	(0x1<<2)
75 #define OCMD_FIELD0		(0x0<<1)
76 #define OCMD_FIELD1		(0x1<<1)
77 #define OCMD_ENABLE		(0x1<<0)
78 
79 /* OCONFIG register */
80 #define OCONF_PIPE_MASK		(0x1<<18)
81 #define OCONF_PIPE_A		(0x0<<18)
82 #define OCONF_PIPE_B		(0x1<<18)
83 #define OCONF_GAMMA2_ENABLE	(0x1<<16)
84 #define OCONF_CSC_MODE_BT601	(0x0<<5)
85 #define OCONF_CSC_MODE_BT709	(0x1<<5)
86 #define OCONF_CSC_BYPASS	(0x1<<4)
87 #define OCONF_CC_OUT_8BIT	(0x1<<3)
88 #define OCONF_TEST_MODE		(0x1<<2)
89 #define OCONF_THREE_LINE_BUFFER	(0x1<<0)
90 #define OCONF_TWO_LINE_BUFFER	(0x0<<0)
91 
92 /* DCLRKM (dst-key) register */
93 #define DST_KEY_ENABLE		(0x1<<31)
94 #define CLK_RGB24_MASK		0x0
95 #define CLK_RGB16_MASK		0x070307
96 #define CLK_RGB15_MASK		0x070707
97 #define CLK_RGB8I_MASK		0xffffff
98 
99 #define RGB16_TO_COLORKEY(c) \
100 	(((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
101 #define RGB15_TO_COLORKEY(c) \
102 	(((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
103 
104 /* overlay flip addr flag */
105 #define OFC_UPDATE		0x1
106 
107 /* polyphase filter coefficients */
108 #define N_HORIZ_Y_TAPS          5
109 #define N_VERT_Y_TAPS           3
110 #define N_HORIZ_UV_TAPS         3
111 #define N_VERT_UV_TAPS          3
112 #define N_PHASES                17
113 #define MAX_TAPS                5
114 
115 /* memory bufferd overlay registers */
116 struct overlay_registers {
117 	u32 OBUF_0Y;
118 	u32 OBUF_1Y;
119 	u32 OBUF_0U;
120 	u32 OBUF_0V;
121 	u32 OBUF_1U;
122 	u32 OBUF_1V;
123 	u32 OSTRIDE;
124 	u32 YRGB_VPH;
125 	u32 UV_VPH;
126 	u32 HORZ_PH;
127 	u32 INIT_PHS;
128 	u32 DWINPOS;
129 	u32 DWINSZ;
130 	u32 SWIDTH;
131 	u32 SWIDTHSW;
132 	u32 SHEIGHT;
133 	u32 YRGBSCALE;
134 	u32 UVSCALE;
135 	u32 OCLRC0;
136 	u32 OCLRC1;
137 	u32 DCLRKV;
138 	u32 DCLRKM;
139 	u32 SCLRKVH;
140 	u32 SCLRKVL;
141 	u32 SCLRKEN;
142 	u32 OCONFIG;
143 	u32 OCMD;
144 	u32 RESERVED1; /* 0x6C */
145 	u32 OSTART_0Y;
146 	u32 OSTART_1Y;
147 	u32 OSTART_0U;
148 	u32 OSTART_0V;
149 	u32 OSTART_1U;
150 	u32 OSTART_1V;
151 	u32 OTILEOFF_0Y;
152 	u32 OTILEOFF_1Y;
153 	u32 OTILEOFF_0U;
154 	u32 OTILEOFF_0V;
155 	u32 OTILEOFF_1U;
156 	u32 OTILEOFF_1V;
157 	u32 FASTHSCALE; /* 0xA0 */
158 	u32 UVSCALEV; /* 0xA4 */
159 	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
160 	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
161 	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
162 	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
163 	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
164 	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
165 	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
166 	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
167 	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
168 };
169 
170 struct intel_overlay {
171 	struct drm_device *dev;
172 	struct intel_crtc *crtc;
173 	struct drm_i915_gem_object *vid_bo;
174 	struct drm_i915_gem_object *old_vid_bo;
175 	int active;
176 	int pfit_active;
177 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
178 	u32 color_key;
179 	u32 brightness, contrast, saturation;
180 	u32 old_xscale, old_yscale;
181 	/* register access */
182 	u32 flip_addr;
183 	struct drm_i915_gem_object *reg_bo;
184 	/* flip handling */
185 	uint32_t last_flip_req;
186 	void (*flip_tail)(struct intel_overlay *);
187 };
188 
189 static struct overlay_registers __iomem *
190 intel_overlay_map_regs(struct intel_overlay *overlay)
191 {
192 	struct drm_i915_private *dev_priv = overlay->dev->dev_private;
193 	struct overlay_registers __iomem *regs;
194 
195 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
196 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
197 	else
198 		regs = io_mapping_map_wc(dev_priv->gtt.mappable,
199 					 i915_gem_obj_ggtt_offset(overlay->reg_bo));
200 
201 	return regs;
202 }
203 
204 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
205 				     struct overlay_registers __iomem *regs)
206 {
207 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
208 		pmap_unmapdev((vm_offset_t)regs, PAGE_SIZE);
209 }
210 
211 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
212 					 void (*tail)(struct intel_overlay *))
213 {
214 	struct drm_device *dev = overlay->dev;
215 	struct drm_i915_private *dev_priv = dev->dev_private;
216 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
217 	int ret;
218 
219 	BUG_ON(overlay->last_flip_req);
220 	ret = i915_add_request(ring, &overlay->last_flip_req);
221 	if (ret)
222 		return ret;
223 
224 	overlay->flip_tail = tail;
225 	ret = i915_wait_seqno(ring, overlay->last_flip_req);
226 	if (ret)
227 		return ret;
228 	i915_gem_retire_requests(dev);
229 
230 	overlay->last_flip_req = 0;
231 	return 0;
232 }
233 
234 /* overlay needs to be disable in OCMD reg */
235 static int intel_overlay_on(struct intel_overlay *overlay)
236 {
237 	struct drm_device *dev = overlay->dev;
238 	struct drm_i915_private *dev_priv = dev->dev_private;
239 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
240 	int ret;
241 
242 	BUG_ON(overlay->active);
243 	overlay->active = 1;
244 
245 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
246 
247 	ret = intel_ring_begin(ring, 4);
248 	if (ret)
249 		return ret;
250 
251 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
252 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
253 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
254 	intel_ring_emit(ring, MI_NOOP);
255 	intel_ring_advance(ring);
256 
257 	return intel_overlay_do_wait_request(overlay, NULL);
258 }
259 
260 /* overlay needs to be enabled in OCMD reg */
261 static int intel_overlay_continue(struct intel_overlay *overlay,
262 				  bool load_polyphase_filter)
263 {
264 	struct drm_device *dev = overlay->dev;
265 	struct drm_i915_private *dev_priv = dev->dev_private;
266 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
267 	u32 flip_addr = overlay->flip_addr;
268 	u32 tmp;
269 	int ret;
270 
271 	BUG_ON(!overlay->active);
272 
273 	if (load_polyphase_filter)
274 		flip_addr |= OFC_UPDATE;
275 
276 	/* check for underruns */
277 	tmp = I915_READ(DOVSTA);
278 	if (tmp & (1 << 17))
279 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
280 
281 	ret = intel_ring_begin(ring, 2);
282 	if (ret)
283 		return ret;
284 
285 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
286 	intel_ring_emit(ring, flip_addr);
287 	intel_ring_advance(ring);
288 
289 	return i915_add_request(ring, &overlay->last_flip_req);
290 }
291 
292 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
293 {
294 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
295 
296 	i915_gem_object_ggtt_unpin(obj);
297 	drm_gem_object_unreference(&obj->base);
298 
299 	overlay->old_vid_bo = NULL;
300 }
301 
302 static void intel_overlay_off_tail(struct intel_overlay *overlay)
303 {
304 	struct drm_i915_gem_object *obj = overlay->vid_bo;
305 
306 	/* never have the overlay hw on without showing a frame */
307 	BUG_ON(!overlay->vid_bo);
308 
309 	i915_gem_object_ggtt_unpin(obj);
310 	drm_gem_object_unreference(&obj->base);
311 	overlay->vid_bo = NULL;
312 
313 	overlay->crtc->overlay = NULL;
314 	overlay->crtc = NULL;
315 	overlay->active = 0;
316 }
317 
318 /* overlay needs to be disabled in OCMD reg */
319 static int intel_overlay_off(struct intel_overlay *overlay)
320 {
321 	struct drm_device *dev = overlay->dev;
322 	struct drm_i915_private *dev_priv = dev->dev_private;
323 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
324 	u32 flip_addr = overlay->flip_addr;
325 	int ret;
326 
327 	BUG_ON(!overlay->active);
328 
329 	/* According to intel docs the overlay hw may hang (when switching
330 	 * off) without loading the filter coeffs. It is however unclear whether
331 	 * this applies to the disabling of the overlay or to the switching off
332 	 * of the hw. Do it in both cases */
333 	flip_addr |= OFC_UPDATE;
334 
335 	ret = intel_ring_begin(ring, 6);
336 	if (ret)
337 		return ret;
338 
339 	/* wait for overlay to go idle */
340 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
341 	intel_ring_emit(ring, flip_addr);
342 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
343 	/* turn overlay off */
344 	if (IS_I830(dev)) {
345 		/* Workaround: Don't disable the overlay fully, since otherwise
346 		 * it dies on the next OVERLAY_ON cmd. */
347 		intel_ring_emit(ring, MI_NOOP);
348 		intel_ring_emit(ring, MI_NOOP);
349 		intel_ring_emit(ring, MI_NOOP);
350 	} else {
351 		intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
352 		intel_ring_emit(ring, flip_addr);
353 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
354 	}
355 	intel_ring_advance(ring);
356 
357 	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
358 }
359 
360 /* recover from an interruption due to a signal
361  * We have to be careful not to repeat work forever an make forward progess. */
362 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
363 {
364 	struct drm_device *dev = overlay->dev;
365 	struct drm_i915_private *dev_priv = dev->dev_private;
366 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
367 	int ret;
368 
369 	if (overlay->last_flip_req == 0)
370 		return 0;
371 
372 	ret = i915_wait_seqno(ring, overlay->last_flip_req);
373 	if (ret)
374 		return ret;
375 	i915_gem_retire_requests(dev);
376 
377 	if (overlay->flip_tail)
378 		overlay->flip_tail(overlay);
379 
380 	overlay->last_flip_req = 0;
381 	return 0;
382 }
383 
384 /* Wait for pending overlay flip and release old frame.
385  * Needs to be called before the overlay register are changed
386  * via intel_overlay_(un)map_regs
387  */
388 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
389 {
390 	struct drm_device *dev = overlay->dev;
391 	struct drm_i915_private *dev_priv = dev->dev_private;
392 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
393 	int ret;
394 
395 	/* Only wait if there is actually an old frame to release to
396 	 * guarantee forward progress.
397 	 */
398 	if (!overlay->old_vid_bo)
399 		return 0;
400 
401 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
402 		/* synchronous slowpath */
403 		ret = intel_ring_begin(ring, 2);
404 		if (ret)
405 			return ret;
406 
407 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
408 		intel_ring_emit(ring, MI_NOOP);
409 		intel_ring_advance(ring);
410 
411 		ret = intel_overlay_do_wait_request(overlay,
412 						    intel_overlay_release_old_vid_tail);
413 		if (ret)
414 			return ret;
415 	}
416 
417 	intel_overlay_release_old_vid_tail(overlay);
418 
419 
420 	i915_gem_track_fb(overlay->old_vid_bo, NULL,
421 			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
422 	return 0;
423 }
424 
425 struct put_image_params {
426 	int format;
427 	short dst_x;
428 	short dst_y;
429 	short dst_w;
430 	short dst_h;
431 	short src_w;
432 	short src_scan_h;
433 	short src_scan_w;
434 	short src_h;
435 	short stride_Y;
436 	short stride_UV;
437 	int offset_Y;
438 	int offset_U;
439 	int offset_V;
440 };
441 
442 static int packed_depth_bytes(u32 format)
443 {
444 	switch (format & I915_OVERLAY_DEPTH_MASK) {
445 	case I915_OVERLAY_YUV422:
446 		return 4;
447 	case I915_OVERLAY_YUV411:
448 		/* return 6; not implemented */
449 	default:
450 		return -EINVAL;
451 	}
452 }
453 
454 static int packed_width_bytes(u32 format, short width)
455 {
456 	switch (format & I915_OVERLAY_DEPTH_MASK) {
457 	case I915_OVERLAY_YUV422:
458 		return width << 1;
459 	default:
460 		return -EINVAL;
461 	}
462 }
463 
464 static int uv_hsubsampling(u32 format)
465 {
466 	switch (format & I915_OVERLAY_DEPTH_MASK) {
467 	case I915_OVERLAY_YUV422:
468 	case I915_OVERLAY_YUV420:
469 		return 2;
470 	case I915_OVERLAY_YUV411:
471 	case I915_OVERLAY_YUV410:
472 		return 4;
473 	default:
474 		return -EINVAL;
475 	}
476 }
477 
478 static int uv_vsubsampling(u32 format)
479 {
480 	switch (format & I915_OVERLAY_DEPTH_MASK) {
481 	case I915_OVERLAY_YUV420:
482 	case I915_OVERLAY_YUV410:
483 		return 2;
484 	case I915_OVERLAY_YUV422:
485 	case I915_OVERLAY_YUV411:
486 		return 1;
487 	default:
488 		return -EINVAL;
489 	}
490 }
491 
492 static u32 calc_swidthsw(struct drm_device *dev, u32 offset, u32 width)
493 {
494 	u32 mask, shift, ret;
495 	if (IS_GEN2(dev)) {
496 		mask = 0x1f;
497 		shift = 5;
498 	} else {
499 		mask = 0x3f;
500 		shift = 6;
501 	}
502 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
503 	if (!IS_GEN2(dev))
504 		ret <<= 1;
505 	ret -= 1;
506 	return ret << 2;
507 }
508 
509 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
510 	0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
511 	0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
512 	0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
513 	0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
514 	0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
515 	0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
516 	0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
517 	0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
518 	0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
519 	0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
520 	0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
521 	0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
522 	0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
523 	0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
524 	0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
525 	0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
526 	0xb000, 0x3000, 0x0800, 0x3000, 0xb000
527 };
528 
529 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
530 	0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
531 	0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
532 	0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
533 	0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
534 	0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
535 	0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
536 	0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
537 	0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
538 	0x3000, 0x0800, 0x3000
539 };
540 
541 static void update_polyphase_filter(struct overlay_registers __iomem *regs)
542 {
543 	memcpy_toio(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
544 	memcpy_toio(regs->UV_HCOEFS, uv_static_hcoeffs,
545 		    sizeof(uv_static_hcoeffs));
546 }
547 
548 static bool update_scaling_factors(struct intel_overlay *overlay,
549 				   struct overlay_registers __iomem *regs,
550 				   struct put_image_params *params)
551 {
552 	/* fixed point with a 12 bit shift */
553 	u32 xscale, yscale, xscale_UV, yscale_UV;
554 #define FP_SHIFT 12
555 #define FRACT_MASK 0xfff
556 	bool scale_changed = false;
557 	int uv_hscale = uv_hsubsampling(params->format);
558 	int uv_vscale = uv_vsubsampling(params->format);
559 
560 	if (params->dst_w > 1)
561 		xscale = ((params->src_scan_w - 1) << FP_SHIFT)
562 			/(params->dst_w);
563 	else
564 		xscale = 1 << FP_SHIFT;
565 
566 	if (params->dst_h > 1)
567 		yscale = ((params->src_scan_h - 1) << FP_SHIFT)
568 			/(params->dst_h);
569 	else
570 		yscale = 1 << FP_SHIFT;
571 
572 	/*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
573 	xscale_UV = xscale/uv_hscale;
574 	yscale_UV = yscale/uv_vscale;
575 	/* make the Y scale to UV scale ratio an exact multiply */
576 	xscale = xscale_UV * uv_hscale;
577 	yscale = yscale_UV * uv_vscale;
578 	/*} else {
579 	  xscale_UV = 0;
580 	  yscale_UV = 0;
581 	  }*/
582 
583 	if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
584 		scale_changed = true;
585 	overlay->old_xscale = xscale;
586 	overlay->old_yscale = yscale;
587 
588 	iowrite32(((yscale & FRACT_MASK) << 20) |
589 		  ((xscale >> FP_SHIFT)  << 16) |
590 		  ((xscale & FRACT_MASK) << 3),
591 		 &regs->YRGBSCALE);
592 
593 	iowrite32(((yscale_UV & FRACT_MASK) << 20) |
594 		  ((xscale_UV >> FP_SHIFT)  << 16) |
595 		  ((xscale_UV & FRACT_MASK) << 3),
596 		 &regs->UVSCALE);
597 
598 	iowrite32((((yscale    >> FP_SHIFT) << 16) |
599 		   ((yscale_UV >> FP_SHIFT) << 0)),
600 		 &regs->UVSCALEV);
601 
602 	if (scale_changed)
603 		update_polyphase_filter(regs);
604 
605 	return scale_changed;
606 }
607 
608 static void update_colorkey(struct intel_overlay *overlay,
609 			    struct overlay_registers __iomem *regs)
610 {
611 	u32 key = overlay->color_key;
612 
613 	switch (overlay->crtc->base.primary->fb->bits_per_pixel) {
614 	case 8:
615 		iowrite32(0, &regs->DCLRKV);
616 		iowrite32(CLK_RGB8I_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
617 		break;
618 
619 	case 16:
620 		if (overlay->crtc->base.primary->fb->depth == 15) {
621 			iowrite32(RGB15_TO_COLORKEY(key), &regs->DCLRKV);
622 			iowrite32(CLK_RGB15_MASK | DST_KEY_ENABLE,
623 				  &regs->DCLRKM);
624 		} else {
625 			iowrite32(RGB16_TO_COLORKEY(key), &regs->DCLRKV);
626 			iowrite32(CLK_RGB16_MASK | DST_KEY_ENABLE,
627 				  &regs->DCLRKM);
628 		}
629 		break;
630 
631 	case 24:
632 	case 32:
633 		iowrite32(key, &regs->DCLRKV);
634 		iowrite32(CLK_RGB24_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
635 		break;
636 	}
637 }
638 
639 static u32 overlay_cmd_reg(struct put_image_params *params)
640 {
641 	u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
642 
643 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
644 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
645 		case I915_OVERLAY_YUV422:
646 			cmd |= OCMD_YUV_422_PLANAR;
647 			break;
648 		case I915_OVERLAY_YUV420:
649 			cmd |= OCMD_YUV_420_PLANAR;
650 			break;
651 		case I915_OVERLAY_YUV411:
652 		case I915_OVERLAY_YUV410:
653 			cmd |= OCMD_YUV_410_PLANAR;
654 			break;
655 		}
656 	} else { /* YUV packed */
657 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
658 		case I915_OVERLAY_YUV422:
659 			cmd |= OCMD_YUV_422_PACKED;
660 			break;
661 		case I915_OVERLAY_YUV411:
662 			cmd |= OCMD_YUV_411_PACKED;
663 			break;
664 		}
665 
666 		switch (params->format & I915_OVERLAY_SWAP_MASK) {
667 		case I915_OVERLAY_NO_SWAP:
668 			break;
669 		case I915_OVERLAY_UV_SWAP:
670 			cmd |= OCMD_UV_SWAP;
671 			break;
672 		case I915_OVERLAY_Y_SWAP:
673 			cmd |= OCMD_Y_SWAP;
674 			break;
675 		case I915_OVERLAY_Y_AND_UV_SWAP:
676 			cmd |= OCMD_Y_AND_UV_SWAP;
677 			break;
678 		}
679 	}
680 
681 	return cmd;
682 }
683 
684 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
685 				      struct drm_i915_gem_object *new_bo,
686 				      struct put_image_params *params)
687 {
688 	int ret, tmp_width;
689 	struct overlay_registers __iomem *regs;
690 	bool scale_changed = false;
691 	struct drm_device *dev = overlay->dev;
692 	u32 swidth, swidthsw, sheight, ostride;
693 	enum i915_pipe pipe = overlay->crtc->pipe;
694 
695 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
696 	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
697 	BUG_ON(!overlay);
698 
699 	ret = intel_overlay_release_old_vid(overlay);
700 	if (ret != 0)
701 		return ret;
702 
703 	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
704 	if (ret != 0)
705 		return ret;
706 
707 	ret = i915_gem_object_put_fence(new_bo);
708 	if (ret)
709 		goto out_unpin;
710 
711 	if (!overlay->active) {
712 		u32 oconfig;
713 		regs = intel_overlay_map_regs(overlay);
714 		if (!regs) {
715 			ret = -ENOMEM;
716 			goto out_unpin;
717 		}
718 		oconfig = OCONF_CC_OUT_8BIT;
719 		if (IS_GEN4(overlay->dev))
720 			oconfig |= OCONF_CSC_MODE_BT709;
721 		oconfig |= pipe == 0 ?
722 			OCONF_PIPE_A : OCONF_PIPE_B;
723 		iowrite32(oconfig, &regs->OCONFIG);
724 		intel_overlay_unmap_regs(overlay, regs);
725 
726 		ret = intel_overlay_on(overlay);
727 		if (ret != 0)
728 			goto out_unpin;
729 	}
730 
731 	regs = intel_overlay_map_regs(overlay);
732 	if (!regs) {
733 		ret = -ENOMEM;
734 		goto out_unpin;
735 	}
736 
737 	iowrite32((params->dst_y << 16) | params->dst_x, &regs->DWINPOS);
738 	iowrite32((params->dst_h << 16) | params->dst_w, &regs->DWINSZ);
739 
740 	if (params->format & I915_OVERLAY_YUV_PACKED)
741 		tmp_width = packed_width_bytes(params->format, params->src_w);
742 	else
743 		tmp_width = params->src_w;
744 
745 	swidth = params->src_w;
746 	swidthsw = calc_swidthsw(overlay->dev, params->offset_Y, tmp_width);
747 	sheight = params->src_h;
748 	iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
749 	ostride = params->stride_Y;
750 
751 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
752 		int uv_hscale = uv_hsubsampling(params->format);
753 		int uv_vscale = uv_vsubsampling(params->format);
754 		u32 tmp_U, tmp_V;
755 		swidth |= (params->src_w/uv_hscale) << 16;
756 		tmp_U = calc_swidthsw(overlay->dev, params->offset_U,
757 				      params->src_w/uv_hscale);
758 		tmp_V = calc_swidthsw(overlay->dev, params->offset_V,
759 				      params->src_w/uv_hscale);
760 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
761 		sheight |= (params->src_h/uv_vscale) << 16;
762 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
763 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
764 		ostride |= params->stride_UV << 16;
765 	}
766 
767 	iowrite32(swidth, &regs->SWIDTH);
768 	iowrite32(swidthsw, &regs->SWIDTHSW);
769 	iowrite32(sheight, &regs->SHEIGHT);
770 	iowrite32(ostride, &regs->OSTRIDE);
771 
772 	scale_changed = update_scaling_factors(overlay, regs, params);
773 
774 	update_colorkey(overlay, regs);
775 
776 	iowrite32(overlay_cmd_reg(params), &regs->OCMD);
777 
778 	intel_overlay_unmap_regs(overlay, regs);
779 
780 	ret = intel_overlay_continue(overlay, scale_changed);
781 	if (ret)
782 		goto out_unpin;
783 
784 	i915_gem_track_fb(overlay->vid_bo, new_bo,
785 			  INTEL_FRONTBUFFER_OVERLAY(pipe));
786 
787 	overlay->old_vid_bo = overlay->vid_bo;
788 	overlay->vid_bo = new_bo;
789 
790 	intel_frontbuffer_flip(dev,
791 			       INTEL_FRONTBUFFER_OVERLAY(pipe));
792 
793 	return 0;
794 
795 out_unpin:
796 	i915_gem_object_ggtt_unpin(new_bo);
797 	return ret;
798 }
799 
800 int intel_overlay_switch_off(struct intel_overlay *overlay)
801 {
802 	struct overlay_registers __iomem *regs;
803 	struct drm_device *dev = overlay->dev;
804 	int ret;
805 
806 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
807 	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
808 
809 	ret = intel_overlay_recover_from_interrupt(overlay);
810 	if (ret != 0)
811 		return ret;
812 
813 	if (!overlay->active)
814 		return 0;
815 
816 	ret = intel_overlay_release_old_vid(overlay);
817 	if (ret != 0)
818 		return ret;
819 
820 	regs = intel_overlay_map_regs(overlay);
821 	iowrite32(0, &regs->OCMD);
822 	intel_overlay_unmap_regs(overlay, regs);
823 
824 	ret = intel_overlay_off(overlay);
825 	if (ret != 0)
826 		return ret;
827 
828 	intel_overlay_off_tail(overlay);
829 	return 0;
830 }
831 
832 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
833 					  struct intel_crtc *crtc)
834 {
835 	if (!crtc->active)
836 		return -EINVAL;
837 
838 	/* can't use the overlay with double wide pipe */
839 	if (crtc->config.double_wide)
840 		return -EINVAL;
841 
842 	return 0;
843 }
844 
845 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
846 {
847 	struct drm_device *dev = overlay->dev;
848 	struct drm_i915_private *dev_priv = dev->dev_private;
849 	u32 pfit_control = I915_READ(PFIT_CONTROL);
850 	u32 ratio;
851 
852 	/* XXX: This is not the same logic as in the xorg driver, but more in
853 	 * line with the intel documentation for the i965
854 	 */
855 	if (INTEL_INFO(dev)->gen >= 4) {
856 		/* on i965 use the PGM reg to read out the autoscaler values */
857 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
858 	} else {
859 		if (pfit_control & VERT_AUTO_SCALE)
860 			ratio = I915_READ(PFIT_AUTO_RATIOS);
861 		else
862 			ratio = I915_READ(PFIT_PGM_RATIOS);
863 		ratio >>= PFIT_VERT_SCALE_SHIFT;
864 	}
865 
866 	overlay->pfit_vscale_ratio = ratio;
867 }
868 
869 static int check_overlay_dst(struct intel_overlay *overlay,
870 			     struct drm_intel_overlay_put_image *rec)
871 {
872 	struct drm_display_mode *mode = &overlay->crtc->base.mode;
873 
874 	if (rec->dst_x < mode->hdisplay &&
875 	    rec->dst_x + rec->dst_width <= mode->hdisplay &&
876 	    rec->dst_y < mode->vdisplay &&
877 	    rec->dst_y + rec->dst_height <= mode->vdisplay)
878 		return 0;
879 	else
880 		return -EINVAL;
881 }
882 
883 static int check_overlay_scaling(struct put_image_params *rec)
884 {
885 	u32 tmp;
886 
887 	/* downscaling limit is 8.0 */
888 	tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
889 	if (tmp > 7)
890 		return -EINVAL;
891 	tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
892 	if (tmp > 7)
893 		return -EINVAL;
894 
895 	return 0;
896 }
897 
898 static int check_overlay_src(struct drm_device *dev,
899 			     struct drm_intel_overlay_put_image *rec,
900 			     struct drm_i915_gem_object *new_bo)
901 {
902 	int uv_hscale = uv_hsubsampling(rec->flags);
903 	int uv_vscale = uv_vsubsampling(rec->flags);
904 	u32 stride_mask;
905 	int depth;
906 	u32 tmp;
907 
908 	/* check src dimensions */
909 	if (IS_845G(dev) || IS_I830(dev)) {
910 		if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
911 		    rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
912 			return -EINVAL;
913 	} else {
914 		if (rec->src_height > IMAGE_MAX_HEIGHT ||
915 		    rec->src_width  > IMAGE_MAX_WIDTH)
916 			return -EINVAL;
917 	}
918 
919 	/* better safe than sorry, use 4 as the maximal subsampling ratio */
920 	if (rec->src_height < N_VERT_Y_TAPS*4 ||
921 	    rec->src_width  < N_HORIZ_Y_TAPS*4)
922 		return -EINVAL;
923 
924 	/* check alignment constraints */
925 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
926 	case I915_OVERLAY_RGB:
927 		/* not implemented */
928 		return -EINVAL;
929 
930 	case I915_OVERLAY_YUV_PACKED:
931 		if (uv_vscale != 1)
932 			return -EINVAL;
933 
934 		depth = packed_depth_bytes(rec->flags);
935 		if (depth < 0)
936 			return depth;
937 
938 		/* ignore UV planes */
939 		rec->stride_UV = 0;
940 		rec->offset_U = 0;
941 		rec->offset_V = 0;
942 		/* check pixel alignment */
943 		if (rec->offset_Y % depth)
944 			return -EINVAL;
945 		break;
946 
947 	case I915_OVERLAY_YUV_PLANAR:
948 		if (uv_vscale < 0 || uv_hscale < 0)
949 			return -EINVAL;
950 		/* no offset restrictions for planar formats */
951 		break;
952 
953 	default:
954 		return -EINVAL;
955 	}
956 
957 	if (rec->src_width % uv_hscale)
958 		return -EINVAL;
959 
960 	/* stride checking */
961 	if (IS_I830(dev) || IS_845G(dev))
962 		stride_mask = 255;
963 	else
964 		stride_mask = 63;
965 
966 	if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
967 		return -EINVAL;
968 	if (IS_GEN4(dev) && rec->stride_Y < 512)
969 		return -EINVAL;
970 
971 	tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
972 		4096 : 8192;
973 	if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
974 		return -EINVAL;
975 
976 	/* check buffer dimensions */
977 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
978 	case I915_OVERLAY_RGB:
979 	case I915_OVERLAY_YUV_PACKED:
980 		/* always 4 Y values per depth pixels */
981 		if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
982 			return -EINVAL;
983 
984 		tmp = rec->stride_Y*rec->src_height;
985 		if (rec->offset_Y + tmp > new_bo->base.size)
986 			return -EINVAL;
987 		break;
988 
989 	case I915_OVERLAY_YUV_PLANAR:
990 		if (rec->src_width > rec->stride_Y)
991 			return -EINVAL;
992 		if (rec->src_width/uv_hscale > rec->stride_UV)
993 			return -EINVAL;
994 
995 		tmp = rec->stride_Y * rec->src_height;
996 		if (rec->offset_Y + tmp > new_bo->base.size)
997 			return -EINVAL;
998 
999 		tmp = rec->stride_UV * (rec->src_height / uv_vscale);
1000 		if (rec->offset_U + tmp > new_bo->base.size ||
1001 		    rec->offset_V + tmp > new_bo->base.size)
1002 			return -EINVAL;
1003 		break;
1004 	}
1005 
1006 	return 0;
1007 }
1008 
1009 /**
1010  * Return the pipe currently connected to the panel fitter,
1011  * or -1 if the panel fitter is not present or not in use
1012  */
1013 static int intel_panel_fitter_pipe(struct drm_device *dev)
1014 {
1015 	struct drm_i915_private *dev_priv = dev->dev_private;
1016 	u32  pfit_control;
1017 
1018 	/* i830 doesn't have a panel fitter */
1019 	if (INTEL_INFO(dev)->gen <= 3 && (IS_I830(dev) || !IS_MOBILE(dev)))
1020 		return -1;
1021 
1022 	pfit_control = I915_READ(PFIT_CONTROL);
1023 
1024 	/* See if the panel fitter is in use */
1025 	if ((pfit_control & PFIT_ENABLE) == 0)
1026 		return -1;
1027 
1028 	/* 965 can place panel fitter on either pipe */
1029 	if (IS_GEN4(dev))
1030 		return (pfit_control >> 29) & 0x3;
1031 
1032 	/* older chips can only use pipe 1 */
1033 	return 1;
1034 }
1035 
1036 int intel_overlay_put_image(struct drm_device *dev, void *data,
1037 			    struct drm_file *file_priv)
1038 {
1039 	struct drm_intel_overlay_put_image *put_image_rec = data;
1040 	struct drm_i915_private *dev_priv = dev->dev_private;
1041 	struct intel_overlay *overlay;
1042 	struct drm_crtc *drmmode_crtc;
1043 	struct intel_crtc *crtc;
1044 	struct drm_i915_gem_object *new_bo;
1045 	struct put_image_params *params;
1046 	int ret;
1047 
1048 	/* No need to check for DRIVER_MODESET - we don't set it up then. */
1049 	overlay = dev_priv->overlay;
1050 	if (!overlay) {
1051 		DRM_DEBUG("userspace bug: no overlay\n");
1052 		return -ENODEV;
1053 	}
1054 
1055 	if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1056 		drm_modeset_lock_all(dev);
1057 		mutex_lock(&dev->struct_mutex);
1058 
1059 		ret = intel_overlay_switch_off(overlay);
1060 
1061 		mutex_unlock(&dev->struct_mutex);
1062 		drm_modeset_unlock_all(dev);
1063 
1064 		return ret;
1065 	}
1066 
1067 	params = kmalloc(sizeof(*params), M_DRM, M_WAITOK);
1068 	if (!params)
1069 		return -ENOMEM;
1070 
1071 	drmmode_crtc = drm_crtc_find(dev, put_image_rec->crtc_id);
1072 	if (!drmmode_crtc) {
1073 		ret = -ENOENT;
1074 		goto out_free;
1075 	}
1076 	crtc = to_intel_crtc(drmmode_crtc);
1077 
1078 	new_bo = to_intel_bo(drm_gem_object_lookup(dev, file_priv,
1079 						   put_image_rec->bo_handle));
1080 	if (&new_bo->base == NULL) {
1081 		ret = -ENOENT;
1082 		goto out_free;
1083 	}
1084 
1085 	drm_modeset_lock_all(dev);
1086 	mutex_lock(&dev->struct_mutex);
1087 
1088 	if (new_bo->tiling_mode) {
1089 		DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
1090 		ret = -EINVAL;
1091 		goto out_unlock;
1092 	}
1093 
1094 	ret = intel_overlay_recover_from_interrupt(overlay);
1095 	if (ret != 0)
1096 		goto out_unlock;
1097 
1098 	if (overlay->crtc != crtc) {
1099 		struct drm_display_mode *mode = &crtc->base.mode;
1100 		ret = intel_overlay_switch_off(overlay);
1101 		if (ret != 0)
1102 			goto out_unlock;
1103 
1104 		ret = check_overlay_possible_on_crtc(overlay, crtc);
1105 		if (ret != 0)
1106 			goto out_unlock;
1107 
1108 		overlay->crtc = crtc;
1109 		crtc->overlay = overlay;
1110 
1111 		/* line too wide, i.e. one-line-mode */
1112 		if (mode->hdisplay > 1024 &&
1113 		    intel_panel_fitter_pipe(dev) == crtc->pipe) {
1114 			overlay->pfit_active = 1;
1115 			update_pfit_vscale_ratio(overlay);
1116 		} else
1117 			overlay->pfit_active = 0;
1118 	}
1119 
1120 	ret = check_overlay_dst(overlay, put_image_rec);
1121 	if (ret != 0)
1122 		goto out_unlock;
1123 
1124 	if (overlay->pfit_active) {
1125 		params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1126 				 overlay->pfit_vscale_ratio);
1127 		/* shifting right rounds downwards, so add 1 */
1128 		params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1129 				 overlay->pfit_vscale_ratio) + 1;
1130 	} else {
1131 		params->dst_y = put_image_rec->dst_y;
1132 		params->dst_h = put_image_rec->dst_height;
1133 	}
1134 	params->dst_x = put_image_rec->dst_x;
1135 	params->dst_w = put_image_rec->dst_width;
1136 
1137 	params->src_w = put_image_rec->src_width;
1138 	params->src_h = put_image_rec->src_height;
1139 	params->src_scan_w = put_image_rec->src_scan_width;
1140 	params->src_scan_h = put_image_rec->src_scan_height;
1141 	if (params->src_scan_h > params->src_h ||
1142 	    params->src_scan_w > params->src_w) {
1143 		ret = -EINVAL;
1144 		goto out_unlock;
1145 	}
1146 
1147 	ret = check_overlay_src(dev, put_image_rec, new_bo);
1148 	if (ret != 0)
1149 		goto out_unlock;
1150 	params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1151 	params->stride_Y = put_image_rec->stride_Y;
1152 	params->stride_UV = put_image_rec->stride_UV;
1153 	params->offset_Y = put_image_rec->offset_Y;
1154 	params->offset_U = put_image_rec->offset_U;
1155 	params->offset_V = put_image_rec->offset_V;
1156 
1157 	/* Check scaling after src size to prevent a divide-by-zero. */
1158 	ret = check_overlay_scaling(params);
1159 	if (ret != 0)
1160 		goto out_unlock;
1161 
1162 	ret = intel_overlay_do_put_image(overlay, new_bo, params);
1163 	if (ret != 0)
1164 		goto out_unlock;
1165 
1166 	mutex_unlock(&dev->struct_mutex);
1167 	drm_modeset_unlock_all(dev);
1168 
1169 	kfree(params);
1170 
1171 	return 0;
1172 
1173 out_unlock:
1174 	mutex_unlock(&dev->struct_mutex);
1175 	drm_modeset_unlock_all(dev);
1176 	drm_gem_object_unreference_unlocked(&new_bo->base);
1177 out_free:
1178 	kfree(params);
1179 
1180 	return ret;
1181 }
1182 
1183 static void update_reg_attrs(struct intel_overlay *overlay,
1184 			     struct overlay_registers __iomem *regs)
1185 {
1186 	iowrite32((overlay->contrast << 18) | (overlay->brightness & 0xff),
1187 		  &regs->OCLRC0);
1188 	iowrite32(overlay->saturation, &regs->OCLRC1);
1189 }
1190 
1191 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1192 {
1193 	int i;
1194 
1195 	if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1196 		return false;
1197 
1198 	for (i = 0; i < 3; i++) {
1199 		if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1200 			return false;
1201 	}
1202 
1203 	return true;
1204 }
1205 
1206 static bool check_gamma5_errata(u32 gamma5)
1207 {
1208 	int i;
1209 
1210 	for (i = 0; i < 3; i++) {
1211 		if (((gamma5 >> i*8) & 0xff) == 0x80)
1212 			return false;
1213 	}
1214 
1215 	return true;
1216 }
1217 
1218 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1219 {
1220 	if (!check_gamma_bounds(0, attrs->gamma0) ||
1221 	    !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1222 	    !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1223 	    !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1224 	    !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1225 	    !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1226 	    !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1227 		return -EINVAL;
1228 
1229 	if (!check_gamma5_errata(attrs->gamma5))
1230 		return -EINVAL;
1231 
1232 	return 0;
1233 }
1234 
1235 int intel_overlay_attrs(struct drm_device *dev, void *data,
1236 			struct drm_file *file_priv)
1237 {
1238 	struct drm_intel_overlay_attrs *attrs = data;
1239 	struct drm_i915_private *dev_priv = dev->dev_private;
1240 	struct intel_overlay *overlay;
1241 	struct overlay_registers __iomem *regs;
1242 	int ret;
1243 
1244 	/* No need to check for DRIVER_MODESET - we don't set it up then. */
1245 	overlay = dev_priv->overlay;
1246 	if (!overlay) {
1247 		DRM_DEBUG("userspace bug: no overlay\n");
1248 		return -ENODEV;
1249 	}
1250 
1251 	drm_modeset_lock_all(dev);
1252 	mutex_lock(&dev->struct_mutex);
1253 
1254 	ret = -EINVAL;
1255 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1256 		attrs->color_key  = overlay->color_key;
1257 		attrs->brightness = overlay->brightness;
1258 		attrs->contrast   = overlay->contrast;
1259 		attrs->saturation = overlay->saturation;
1260 
1261 		if (!IS_GEN2(dev)) {
1262 			attrs->gamma0 = I915_READ(OGAMC0);
1263 			attrs->gamma1 = I915_READ(OGAMC1);
1264 			attrs->gamma2 = I915_READ(OGAMC2);
1265 			attrs->gamma3 = I915_READ(OGAMC3);
1266 			attrs->gamma4 = I915_READ(OGAMC4);
1267 			attrs->gamma5 = I915_READ(OGAMC5);
1268 		}
1269 	} else {
1270 		if (attrs->brightness < -128 || attrs->brightness > 127)
1271 			goto out_unlock;
1272 		if (attrs->contrast > 255)
1273 			goto out_unlock;
1274 		if (attrs->saturation > 1023)
1275 			goto out_unlock;
1276 
1277 		overlay->color_key  = attrs->color_key;
1278 		overlay->brightness = attrs->brightness;
1279 		overlay->contrast   = attrs->contrast;
1280 		overlay->saturation = attrs->saturation;
1281 
1282 		regs = intel_overlay_map_regs(overlay);
1283 		if (!regs) {
1284 			ret = -ENOMEM;
1285 			goto out_unlock;
1286 		}
1287 
1288 		update_reg_attrs(overlay, regs);
1289 
1290 		intel_overlay_unmap_regs(overlay, regs);
1291 
1292 		if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1293 			if (IS_GEN2(dev))
1294 				goto out_unlock;
1295 
1296 			if (overlay->active) {
1297 				ret = -EBUSY;
1298 				goto out_unlock;
1299 			}
1300 
1301 			ret = check_gamma(attrs);
1302 			if (ret)
1303 				goto out_unlock;
1304 
1305 			I915_WRITE(OGAMC0, attrs->gamma0);
1306 			I915_WRITE(OGAMC1, attrs->gamma1);
1307 			I915_WRITE(OGAMC2, attrs->gamma2);
1308 			I915_WRITE(OGAMC3, attrs->gamma3);
1309 			I915_WRITE(OGAMC4, attrs->gamma4);
1310 			I915_WRITE(OGAMC5, attrs->gamma5);
1311 		}
1312 	}
1313 
1314 	ret = 0;
1315 out_unlock:
1316 	mutex_unlock(&dev->struct_mutex);
1317 	drm_modeset_unlock_all(dev);
1318 
1319 	return ret;
1320 }
1321 
1322 void intel_setup_overlay(struct drm_device *dev)
1323 {
1324 	struct drm_i915_private *dev_priv = dev->dev_private;
1325 	struct intel_overlay *overlay;
1326 	struct drm_i915_gem_object *reg_bo;
1327 	struct overlay_registers __iomem *regs;
1328 	int ret;
1329 
1330 	if (!HAS_OVERLAY(dev))
1331 		return;
1332 
1333 	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
1334 	if (!overlay)
1335 		return;
1336 
1337 	mutex_lock(&dev->struct_mutex);
1338 	if (WARN_ON(dev_priv->overlay))
1339 		goto out_free;
1340 
1341 	overlay->dev = dev;
1342 
1343 	reg_bo = NULL;
1344 	if (!OVERLAY_NEEDS_PHYSICAL(dev))
1345 		reg_bo = i915_gem_object_create_stolen(dev, PAGE_SIZE);
1346 	if (reg_bo == NULL)
1347 		reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
1348 	if (reg_bo == NULL)
1349 		goto out_free;
1350 	overlay->reg_bo = reg_bo;
1351 
1352 	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
1353 		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
1354 		if (ret) {
1355 			DRM_ERROR("failed to attach phys overlay regs\n");
1356 			goto out_free_bo;
1357 		}
1358 		overlay->flip_addr = reg_bo->phys_handle->busaddr;
1359 	} else {
1360 		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
1361 		if (ret) {
1362 			DRM_ERROR("failed to pin overlay register bo\n");
1363 			goto out_free_bo;
1364 		}
1365 		overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
1366 
1367 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1368 		if (ret) {
1369 			DRM_ERROR("failed to move overlay register bo into the GTT\n");
1370 			goto out_unpin_bo;
1371 		}
1372 	}
1373 
1374 	/* init all values */
1375 	overlay->color_key = 0x0101fe;
1376 	overlay->brightness = -19;
1377 	overlay->contrast = 75;
1378 	overlay->saturation = 146;
1379 
1380 	regs = intel_overlay_map_regs(overlay);
1381 	if (!regs)
1382 		goto out_unpin_bo;
1383 
1384 	memset_io(regs, 0, sizeof(struct overlay_registers));
1385 	update_polyphase_filter(regs);
1386 	update_reg_attrs(overlay, regs);
1387 
1388 	intel_overlay_unmap_regs(overlay, regs);
1389 
1390 	dev_priv->overlay = overlay;
1391 	mutex_unlock(&dev->struct_mutex);
1392 	DRM_INFO("initialized overlay support\n");
1393 	return;
1394 
1395 out_unpin_bo:
1396 	if (!OVERLAY_NEEDS_PHYSICAL(dev))
1397 		i915_gem_object_ggtt_unpin(reg_bo);
1398 out_free_bo:
1399 	drm_gem_object_unreference(&reg_bo->base);
1400 out_free:
1401 	mutex_unlock(&dev->struct_mutex);
1402 	kfree(overlay);
1403 	return;
1404 }
1405 
1406 void intel_cleanup_overlay(struct drm_device *dev)
1407 {
1408 	struct drm_i915_private *dev_priv = dev->dev_private;
1409 
1410 	if (!dev_priv->overlay)
1411 		return;
1412 
1413 	/* The bo's should be free'd by the generic code already.
1414 	 * Furthermore modesetting teardown happens beforehand so the
1415 	 * hardware should be off already */
1416 	BUG_ON(dev_priv->overlay->active);
1417 
1418 	drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base);
1419 	kfree(dev_priv->overlay);
1420 }
1421 
1422 struct intel_overlay_error_state {
1423 	struct overlay_registers regs;
1424 	unsigned long base;
1425 	u32 dovsta;
1426 	u32 isr;
1427 };
1428 
1429 #if 0
1430 static struct overlay_registers __iomem *
1431 intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1432 {
1433 	struct drm_i915_private *dev_priv = overlay->dev->dev_private;
1434 	struct overlay_registers __iomem *regs;
1435 
1436 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1437 		/* Cast to make sparse happy, but it's wc memory anyway, so
1438 		 * equivalent to the wc io mapping on X86. */
1439 		regs = (struct overlay_registers __iomem *)
1440 			overlay->reg_bo->phys_obj->handle->vaddr;
1441 	else
1442 		regs = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
1443 						i915_gem_obj_ggtt_offset(overlay->reg_bo));
1444 
1445 	return regs;
1446 }
1447 
1448 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
1449 					struct overlay_registers __iomem *regs)
1450 {
1451 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1452 		io_mapping_unmap_atomic(regs);
1453 }
1454 
1455 
1456 struct intel_overlay_error_state *
1457 intel_overlay_capture_error_state(struct drm_device *dev)
1458 {
1459 	struct drm_i915_private *dev_priv = dev->dev_private;
1460 	struct intel_overlay *overlay = dev_priv->overlay;
1461 	struct intel_overlay_error_state *error;
1462 	struct overlay_registers __iomem *regs;
1463 
1464 	if (!overlay || !overlay->active)
1465 		return NULL;
1466 
1467 	error = kmalloc(sizeof(*error), GFP_ATOMIC);
1468 	if (error == NULL)
1469 		return NULL;
1470 
1471 	error->dovsta = I915_READ(DOVSTA);
1472 	error->isr = I915_READ(ISR);
1473 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1474 		error->base = (__force long)overlay->reg_bo->phys_obj->handle->vaddr;
1475 	else
1476 		error->base = i915_gem_obj_ggtt_offset(overlay->reg_bo);
1477 
1478 	regs = intel_overlay_map_regs_atomic(overlay);
1479 	if (!regs)
1480 		goto err;
1481 
1482 	memcpy_fromio(&error->regs, regs, sizeof(struct overlay_registers));
1483 	intel_overlay_unmap_regs_atomic(overlay, regs);
1484 
1485 	return error;
1486 
1487 err:
1488 	kfree(error);
1489 	return NULL;
1490 }
1491 
1492 void
1493 intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
1494 				struct intel_overlay_error_state *error)
1495 {
1496 	i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1497 			  error->dovsta, error->isr);
1498 	i915_error_printf(m, "  Register file at 0x%08lx:\n",
1499 			  error->base);
1500 
1501 #define P(x) i915_error_printf(m, "    " #x ":	0x%08x\n", error->regs.x)
1502 	P(OBUF_0Y);
1503 	P(OBUF_1Y);
1504 	P(OBUF_0U);
1505 	P(OBUF_0V);
1506 	P(OBUF_1U);
1507 	P(OBUF_1V);
1508 	P(OSTRIDE);
1509 	P(YRGB_VPH);
1510 	P(UV_VPH);
1511 	P(HORZ_PH);
1512 	P(INIT_PHS);
1513 	P(DWINPOS);
1514 	P(DWINSZ);
1515 	P(SWIDTH);
1516 	P(SWIDTHSW);
1517 	P(SHEIGHT);
1518 	P(YRGBSCALE);
1519 	P(UVSCALE);
1520 	P(OCLRC0);
1521 	P(OCLRC1);
1522 	P(DCLRKV);
1523 	P(DCLRKM);
1524 	P(SCLRKVH);
1525 	P(SCLRKVL);
1526 	P(SCLRKEN);
1527 	P(OCONFIG);
1528 	P(OCMD);
1529 	P(OSTART_0Y);
1530 	P(OSTART_1Y);
1531 	P(OSTART_0U);
1532 	P(OSTART_0V);
1533 	P(OSTART_1U);
1534 	P(OSTART_1V);
1535 	P(OTILEOFF_0Y);
1536 	P(OTILEOFF_1Y);
1537 	P(OTILEOFF_0U);
1538 	P(OTILEOFF_0V);
1539 	P(OTILEOFF_1U);
1540 	P(OTILEOFF_1V);
1541 	P(FASTHSCALE);
1542 	P(UVSCALEV);
1543 #undef P
1544 }
1545 #endif
1546