xref: /dragonfly/sys/dev/drm/i915/intel_overlay.c (revision 954fc4d3)
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  */
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_reg.h"
32 #include "intel_drv.h"
33 
34 /* Limits for overlay size. According to intel doc, the real limits are:
35  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
36  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
37  * the mininum of both.  */
38 #define IMAGE_MAX_WIDTH		2048
39 #define IMAGE_MAX_HEIGHT	2046 /* 2 * 1023 */
40 /* on 830 and 845 these large limits result in the card hanging */
41 #define IMAGE_MAX_WIDTH_LEGACY	1024
42 #define IMAGE_MAX_HEIGHT_LEGACY	1088
43 
44 /* overlay register definitions */
45 /* OCMD register */
46 #define OCMD_TILED_SURFACE	(0x1<<19)
47 #define OCMD_MIRROR_MASK	(0x3<<17)
48 #define OCMD_MIRROR_MODE	(0x3<<17)
49 #define OCMD_MIRROR_HORIZONTAL	(0x1<<17)
50 #define OCMD_MIRROR_VERTICAL	(0x2<<17)
51 #define OCMD_MIRROR_BOTH	(0x3<<17)
52 #define OCMD_BYTEORDER_MASK	(0x3<<14) /* zero for YUYV or FOURCC YUY2 */
53 #define OCMD_UV_SWAP		(0x1<<14) /* YVYU */
54 #define OCMD_Y_SWAP		(0x2<<14) /* UYVY or FOURCC UYVY */
55 #define OCMD_Y_AND_UV_SWAP	(0x3<<14) /* VYUY */
56 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
57 #define OCMD_RGB_888		(0x1<<10) /* not in i965 Intel docs */
58 #define OCMD_RGB_555		(0x2<<10) /* not in i965 Intel docs */
59 #define OCMD_RGB_565		(0x3<<10) /* not in i965 Intel docs */
60 #define OCMD_YUV_422_PACKED	(0x8<<10)
61 #define OCMD_YUV_411_PACKED	(0x9<<10) /* not in i965 Intel docs */
62 #define OCMD_YUV_420_PLANAR	(0xc<<10)
63 #define OCMD_YUV_422_PLANAR	(0xd<<10)
64 #define OCMD_YUV_410_PLANAR	(0xe<<10) /* also 411 */
65 #define OCMD_TVSYNCFLIP_PARITY	(0x1<<9)
66 #define OCMD_TVSYNCFLIP_ENABLE	(0x1<<7)
67 #define OCMD_BUF_TYPE_MASK	(0x1<<5)
68 #define OCMD_BUF_TYPE_FRAME	(0x0<<5)
69 #define OCMD_BUF_TYPE_FIELD	(0x1<<5)
70 #define OCMD_TEST_MODE		(0x1<<4)
71 #define OCMD_BUFFER_SELECT	(0x3<<2)
72 #define OCMD_BUFFER0		(0x0<<2)
73 #define OCMD_BUFFER1		(0x1<<2)
74 #define OCMD_FIELD_SELECT	(0x1<<2)
75 #define OCMD_FIELD0		(0x0<<1)
76 #define OCMD_FIELD1		(0x1<<1)
77 #define OCMD_ENABLE		(0x1<<0)
78 
79 /* OCONFIG register */
80 #define OCONF_PIPE_MASK		(0x1<<18)
81 #define OCONF_PIPE_A		(0x0<<18)
82 #define OCONF_PIPE_B		(0x1<<18)
83 #define OCONF_GAMMA2_ENABLE	(0x1<<16)
84 #define OCONF_CSC_MODE_BT601	(0x0<<5)
85 #define OCONF_CSC_MODE_BT709	(0x1<<5)
86 #define OCONF_CSC_BYPASS	(0x1<<4)
87 #define OCONF_CC_OUT_8BIT	(0x1<<3)
88 #define OCONF_TEST_MODE		(0x1<<2)
89 #define OCONF_THREE_LINE_BUFFER	(0x1<<0)
90 #define OCONF_TWO_LINE_BUFFER	(0x0<<0)
91 
92 /* DCLRKM (dst-key) register */
93 #define DST_KEY_ENABLE		(0x1<<31)
94 #define CLK_RGB24_MASK		0x0
95 #define CLK_RGB16_MASK		0x070307
96 #define CLK_RGB15_MASK		0x070707
97 #define CLK_RGB8I_MASK		0xffffff
98 
99 #define RGB16_TO_COLORKEY(c) \
100 	(((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
101 #define RGB15_TO_COLORKEY(c) \
102 	(((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
103 
104 /* overlay flip addr flag */
105 #define OFC_UPDATE		0x1
106 
107 /* polyphase filter coefficients */
108 #define N_HORIZ_Y_TAPS          5
109 #define N_VERT_Y_TAPS           3
110 #define N_HORIZ_UV_TAPS         3
111 #define N_VERT_UV_TAPS          3
112 #define N_PHASES                17
113 #define MAX_TAPS                5
114 
115 /* memory bufferd overlay registers */
116 struct overlay_registers {
117 	u32 OBUF_0Y;
118 	u32 OBUF_1Y;
119 	u32 OBUF_0U;
120 	u32 OBUF_0V;
121 	u32 OBUF_1U;
122 	u32 OBUF_1V;
123 	u32 OSTRIDE;
124 	u32 YRGB_VPH;
125 	u32 UV_VPH;
126 	u32 HORZ_PH;
127 	u32 INIT_PHS;
128 	u32 DWINPOS;
129 	u32 DWINSZ;
130 	u32 SWIDTH;
131 	u32 SWIDTHSW;
132 	u32 SHEIGHT;
133 	u32 YRGBSCALE;
134 	u32 UVSCALE;
135 	u32 OCLRC0;
136 	u32 OCLRC1;
137 	u32 DCLRKV;
138 	u32 DCLRKM;
139 	u32 SCLRKVH;
140 	u32 SCLRKVL;
141 	u32 SCLRKEN;
142 	u32 OCONFIG;
143 	u32 OCMD;
144 	u32 RESERVED1; /* 0x6C */
145 	u32 OSTART_0Y;
146 	u32 OSTART_1Y;
147 	u32 OSTART_0U;
148 	u32 OSTART_0V;
149 	u32 OSTART_1U;
150 	u32 OSTART_1V;
151 	u32 OTILEOFF_0Y;
152 	u32 OTILEOFF_1Y;
153 	u32 OTILEOFF_0U;
154 	u32 OTILEOFF_0V;
155 	u32 OTILEOFF_1U;
156 	u32 OTILEOFF_1V;
157 	u32 FASTHSCALE; /* 0xA0 */
158 	u32 UVSCALEV; /* 0xA4 */
159 	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
160 	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
161 	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
162 	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
163 	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
164 	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
165 	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
166 	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
167 	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
168 };
169 
170 struct intel_overlay {
171 	struct drm_i915_private *i915;
172 	struct intel_crtc *crtc;
173 	struct drm_i915_gem_object *vid_bo;
174 	struct drm_i915_gem_object *old_vid_bo;
175 	bool active;
176 	bool pfit_active;
177 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
178 	u32 color_key:24;
179 	u32 color_key_enabled:1;
180 	u32 brightness, contrast, saturation;
181 	u32 old_xscale, old_yscale;
182 	/* register access */
183 	u32 flip_addr;
184 	struct drm_i915_gem_object *reg_bo;
185 	/* flip handling */
186 	struct drm_i915_gem_request *last_flip_req;
187 	void (*flip_tail)(struct intel_overlay *);
188 };
189 
190 static struct overlay_registers __iomem *
191 intel_overlay_map_regs(struct intel_overlay *overlay)
192 {
193 	struct drm_i915_private *dev_priv = overlay->i915;
194 	struct overlay_registers __iomem *regs;
195 
196 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
197 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
198 	else
199 		regs = io_mapping_map_wc(dev_priv->ggtt.mappable,
200 					 overlay->flip_addr,
201 					 PAGE_SIZE);
202 
203 	return regs;
204 }
205 
206 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
207 				     struct overlay_registers __iomem *regs)
208 {
209 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->i915))
210 		io_mapping_unmap(regs);
211 }
212 
213 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
214 					 struct drm_i915_gem_request *req,
215 					 void (*tail)(struct intel_overlay *))
216 {
217 	int ret;
218 
219 	WARN_ON(overlay->last_flip_req);
220 	i915_gem_request_assign(&overlay->last_flip_req, req);
221 	i915_add_request(req);
222 
223 	overlay->flip_tail = tail;
224 	ret = i915_wait_request(overlay->last_flip_req);
225 	if (ret)
226 		return ret;
227 
228 	i915_gem_request_assign(&overlay->last_flip_req, NULL);
229 	return 0;
230 }
231 
232 /* overlay needs to be disable in OCMD reg */
233 static int intel_overlay_on(struct intel_overlay *overlay)
234 {
235 	struct drm_i915_private *dev_priv = overlay->i915;
236 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
237 	struct drm_i915_gem_request *req;
238 	int ret;
239 
240 	WARN_ON(overlay->active);
241 	WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
242 
243 	req = i915_gem_request_alloc(engine, NULL);
244 	if (IS_ERR(req))
245 		return PTR_ERR(req);
246 
247 	ret = intel_ring_begin(req, 4);
248 	if (ret) {
249 		i915_add_request_no_flush(req);
250 		return ret;
251 	}
252 
253 	overlay->active = true;
254 
255 	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
256 	intel_ring_emit(engine, overlay->flip_addr | OFC_UPDATE);
257 	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
258 	intel_ring_emit(engine, MI_NOOP);
259 	intel_ring_advance(engine);
260 
261 	return intel_overlay_do_wait_request(overlay, req, NULL);
262 }
263 
264 /* overlay needs to be enabled in OCMD reg */
265 static int intel_overlay_continue(struct intel_overlay *overlay,
266 				  bool load_polyphase_filter)
267 {
268 	struct drm_i915_private *dev_priv = overlay->i915;
269 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
270 	struct drm_i915_gem_request *req;
271 	u32 flip_addr = overlay->flip_addr;
272 	u32 tmp;
273 	int ret;
274 
275 	WARN_ON(!overlay->active);
276 
277 	if (load_polyphase_filter)
278 		flip_addr |= OFC_UPDATE;
279 
280 	/* check for underruns */
281 	tmp = I915_READ(DOVSTA);
282 	if (tmp & (1 << 17))
283 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
284 
285 	req = i915_gem_request_alloc(engine, NULL);
286 	if (IS_ERR(req))
287 		return PTR_ERR(req);
288 
289 	ret = intel_ring_begin(req, 2);
290 	if (ret) {
291 		i915_add_request_no_flush(req);
292 		return ret;
293 	}
294 
295 	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
296 	intel_ring_emit(engine, flip_addr);
297 	intel_ring_advance(engine);
298 
299 	WARN_ON(overlay->last_flip_req);
300 	i915_gem_request_assign(&overlay->last_flip_req, req);
301 	i915_add_request(req);
302 
303 	return 0;
304 }
305 
306 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
307 {
308 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
309 
310 	i915_gem_object_ggtt_unpin(obj);
311 	drm_gem_object_unreference(&obj->base);
312 
313 	overlay->old_vid_bo = NULL;
314 }
315 
316 static void intel_overlay_off_tail(struct intel_overlay *overlay)
317 {
318 	struct drm_i915_gem_object *obj = overlay->vid_bo;
319 
320 	/* never have the overlay hw on without showing a frame */
321 	if (WARN_ON(!obj))
322 		return;
323 
324 	i915_gem_object_ggtt_unpin(obj);
325 	drm_gem_object_unreference(&obj->base);
326 	overlay->vid_bo = NULL;
327 
328 	overlay->crtc->overlay = NULL;
329 	overlay->crtc = NULL;
330 	overlay->active = false;
331 }
332 
333 /* overlay needs to be disabled in OCMD reg */
334 static int intel_overlay_off(struct intel_overlay *overlay)
335 {
336 	struct drm_i915_private *dev_priv = overlay->i915;
337 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
338 	struct drm_i915_gem_request *req;
339 	u32 flip_addr = overlay->flip_addr;
340 	int ret;
341 
342 	WARN_ON(!overlay->active);
343 
344 	/* According to intel docs the overlay hw may hang (when switching
345 	 * off) without loading the filter coeffs. It is however unclear whether
346 	 * this applies to the disabling of the overlay or to the switching off
347 	 * of the hw. Do it in both cases */
348 	flip_addr |= OFC_UPDATE;
349 
350 	req = i915_gem_request_alloc(engine, NULL);
351 	if (IS_ERR(req))
352 		return PTR_ERR(req);
353 
354 	ret = intel_ring_begin(req, 6);
355 	if (ret) {
356 		i915_add_request_no_flush(req);
357 		return ret;
358 	}
359 
360 	/* wait for overlay to go idle */
361 	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
362 	intel_ring_emit(engine, flip_addr);
363 	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
364 	/* turn overlay off */
365 	if (IS_I830(dev_priv)) {
366 		/* Workaround: Don't disable the overlay fully, since otherwise
367 		 * it dies on the next OVERLAY_ON cmd. */
368 		intel_ring_emit(engine, MI_NOOP);
369 		intel_ring_emit(engine, MI_NOOP);
370 		intel_ring_emit(engine, MI_NOOP);
371 	} else {
372 		intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
373 		intel_ring_emit(engine, flip_addr);
374 		intel_ring_emit(engine,
375 				MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
376 	}
377 	intel_ring_advance(engine);
378 
379 	return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail);
380 }
381 
382 /* recover from an interruption due to a signal
383  * We have to be careful not to repeat work forever an make forward progess. */
384 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
385 {
386 	int ret;
387 
388 	if (overlay->last_flip_req == NULL)
389 		return 0;
390 
391 	ret = i915_wait_request(overlay->last_flip_req);
392 	if (ret)
393 		return ret;
394 
395 	if (overlay->flip_tail)
396 		overlay->flip_tail(overlay);
397 
398 	i915_gem_request_assign(&overlay->last_flip_req, NULL);
399 	return 0;
400 }
401 
402 /* Wait for pending overlay flip and release old frame.
403  * Needs to be called before the overlay register are changed
404  * via intel_overlay_(un)map_regs
405  */
406 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
407 {
408 	struct drm_i915_private *dev_priv = overlay->i915;
409 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
410 	int ret;
411 
412 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
413 
414 	/* Only wait if there is actually an old frame to release to
415 	 * guarantee forward progress.
416 	 */
417 	if (!overlay->old_vid_bo)
418 		return 0;
419 
420 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
421 		/* synchronous slowpath */
422 		struct drm_i915_gem_request *req;
423 
424 		req = i915_gem_request_alloc(engine, NULL);
425 		if (IS_ERR(req))
426 			return PTR_ERR(req);
427 
428 		ret = intel_ring_begin(req, 2);
429 		if (ret) {
430 			i915_add_request_no_flush(req);
431 			return ret;
432 		}
433 
434 		intel_ring_emit(engine,
435 				MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
436 		intel_ring_emit(engine, MI_NOOP);
437 		intel_ring_advance(engine);
438 
439 		ret = intel_overlay_do_wait_request(overlay, req,
440 						    intel_overlay_release_old_vid_tail);
441 		if (ret)
442 			return ret;
443 	}
444 
445 	intel_overlay_release_old_vid_tail(overlay);
446 
447 
448 	i915_gem_track_fb(overlay->old_vid_bo, NULL,
449 			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
450 	return 0;
451 }
452 
453 void intel_overlay_reset(struct drm_i915_private *dev_priv)
454 {
455 	struct intel_overlay *overlay = dev_priv->overlay;
456 
457 	if (!overlay)
458 		return;
459 
460 	intel_overlay_release_old_vid(overlay);
461 
462 	overlay->last_flip_req = NULL;
463 	overlay->old_xscale = 0;
464 	overlay->old_yscale = 0;
465 	overlay->crtc = NULL;
466 	overlay->active = false;
467 }
468 
469 struct put_image_params {
470 	int format;
471 	short dst_x;
472 	short dst_y;
473 	short dst_w;
474 	short dst_h;
475 	short src_w;
476 	short src_scan_h;
477 	short src_scan_w;
478 	short src_h;
479 	short stride_Y;
480 	short stride_UV;
481 	int offset_Y;
482 	int offset_U;
483 	int offset_V;
484 };
485 
486 static int packed_depth_bytes(u32 format)
487 {
488 	switch (format & I915_OVERLAY_DEPTH_MASK) {
489 	case I915_OVERLAY_YUV422:
490 		return 4;
491 	case I915_OVERLAY_YUV411:
492 		/* return 6; not implemented */
493 	default:
494 		return -EINVAL;
495 	}
496 }
497 
498 static int packed_width_bytes(u32 format, short width)
499 {
500 	switch (format & I915_OVERLAY_DEPTH_MASK) {
501 	case I915_OVERLAY_YUV422:
502 		return width << 1;
503 	default:
504 		return -EINVAL;
505 	}
506 }
507 
508 static int uv_hsubsampling(u32 format)
509 {
510 	switch (format & I915_OVERLAY_DEPTH_MASK) {
511 	case I915_OVERLAY_YUV422:
512 	case I915_OVERLAY_YUV420:
513 		return 2;
514 	case I915_OVERLAY_YUV411:
515 	case I915_OVERLAY_YUV410:
516 		return 4;
517 	default:
518 		return -EINVAL;
519 	}
520 }
521 
522 static int uv_vsubsampling(u32 format)
523 {
524 	switch (format & I915_OVERLAY_DEPTH_MASK) {
525 	case I915_OVERLAY_YUV420:
526 	case I915_OVERLAY_YUV410:
527 		return 2;
528 	case I915_OVERLAY_YUV422:
529 	case I915_OVERLAY_YUV411:
530 		return 1;
531 	default:
532 		return -EINVAL;
533 	}
534 }
535 
536 static u32 calc_swidthsw(struct drm_i915_private *dev_priv, u32 offset, u32 width)
537 {
538 	u32 mask, shift, ret;
539 	if (IS_GEN2(dev_priv)) {
540 		mask = 0x1f;
541 		shift = 5;
542 	} else {
543 		mask = 0x3f;
544 		shift = 6;
545 	}
546 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
547 	if (!IS_GEN2(dev_priv))
548 		ret <<= 1;
549 	ret -= 1;
550 	return ret << 2;
551 }
552 
553 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
554 	0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
555 	0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
556 	0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
557 	0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
558 	0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
559 	0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
560 	0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
561 	0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
562 	0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
563 	0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
564 	0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
565 	0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
566 	0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
567 	0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
568 	0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
569 	0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
570 	0xb000, 0x3000, 0x0800, 0x3000, 0xb000
571 };
572 
573 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
574 	0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
575 	0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
576 	0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
577 	0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
578 	0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
579 	0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
580 	0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
581 	0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
582 	0x3000, 0x0800, 0x3000
583 };
584 
585 static void update_polyphase_filter(struct overlay_registers __iomem *regs)
586 {
587 	memcpy_toio(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
588 	memcpy_toio(regs->UV_HCOEFS, uv_static_hcoeffs,
589 		    sizeof(uv_static_hcoeffs));
590 }
591 
592 static bool update_scaling_factors(struct intel_overlay *overlay,
593 				   struct overlay_registers __iomem *regs,
594 				   struct put_image_params *params)
595 {
596 	/* fixed point with a 12 bit shift */
597 	u32 xscale, yscale, xscale_UV, yscale_UV;
598 #define FP_SHIFT 12
599 #define FRACT_MASK 0xfff
600 	bool scale_changed = false;
601 	int uv_hscale = uv_hsubsampling(params->format);
602 	int uv_vscale = uv_vsubsampling(params->format);
603 
604 	if (params->dst_w > 1)
605 		xscale = ((params->src_scan_w - 1) << FP_SHIFT)
606 			/(params->dst_w);
607 	else
608 		xscale = 1 << FP_SHIFT;
609 
610 	if (params->dst_h > 1)
611 		yscale = ((params->src_scan_h - 1) << FP_SHIFT)
612 			/(params->dst_h);
613 	else
614 		yscale = 1 << FP_SHIFT;
615 
616 	/*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
617 	xscale_UV = xscale/uv_hscale;
618 	yscale_UV = yscale/uv_vscale;
619 	/* make the Y scale to UV scale ratio an exact multiply */
620 	xscale = xscale_UV * uv_hscale;
621 	yscale = yscale_UV * uv_vscale;
622 	/*} else {
623 	  xscale_UV = 0;
624 	  yscale_UV = 0;
625 	  }*/
626 
627 	if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
628 		scale_changed = true;
629 	overlay->old_xscale = xscale;
630 	overlay->old_yscale = yscale;
631 
632 	iowrite32(((yscale & FRACT_MASK) << 20) |
633 		  ((xscale >> FP_SHIFT)  << 16) |
634 		  ((xscale & FRACT_MASK) << 3),
635 		 &regs->YRGBSCALE);
636 
637 	iowrite32(((yscale_UV & FRACT_MASK) << 20) |
638 		  ((xscale_UV >> FP_SHIFT)  << 16) |
639 		  ((xscale_UV & FRACT_MASK) << 3),
640 		 &regs->UVSCALE);
641 
642 	iowrite32((((yscale    >> FP_SHIFT) << 16) |
643 		   ((yscale_UV >> FP_SHIFT) << 0)),
644 		 &regs->UVSCALEV);
645 
646 	if (scale_changed)
647 		update_polyphase_filter(regs);
648 
649 	return scale_changed;
650 }
651 
652 static void update_colorkey(struct intel_overlay *overlay,
653 			    struct overlay_registers __iomem *regs)
654 {
655 	u32 key = overlay->color_key;
656 	u32 flags;
657 
658 	flags = 0;
659 	if (overlay->color_key_enabled)
660 		flags |= DST_KEY_ENABLE;
661 
662 	switch (overlay->crtc->base.primary->fb->bits_per_pixel) {
663 	case 8:
664 		key = 0;
665 		flags |= CLK_RGB8I_MASK;
666 		break;
667 
668 	case 16:
669 		if (overlay->crtc->base.primary->fb->depth == 15) {
670 			key = RGB15_TO_COLORKEY(key);
671 			flags |= CLK_RGB15_MASK;
672 		} else {
673 			key = RGB16_TO_COLORKEY(key);
674 			flags |= CLK_RGB16_MASK;
675 		}
676 		break;
677 
678 	case 24:
679 	case 32:
680 		flags |= CLK_RGB24_MASK;
681 		break;
682 	}
683 
684 	iowrite32(key, &regs->DCLRKV);
685 	iowrite32(flags, &regs->DCLRKM);
686 }
687 
688 static u32 overlay_cmd_reg(struct put_image_params *params)
689 {
690 	u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
691 
692 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
693 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
694 		case I915_OVERLAY_YUV422:
695 			cmd |= OCMD_YUV_422_PLANAR;
696 			break;
697 		case I915_OVERLAY_YUV420:
698 			cmd |= OCMD_YUV_420_PLANAR;
699 			break;
700 		case I915_OVERLAY_YUV411:
701 		case I915_OVERLAY_YUV410:
702 			cmd |= OCMD_YUV_410_PLANAR;
703 			break;
704 		}
705 	} else { /* YUV packed */
706 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
707 		case I915_OVERLAY_YUV422:
708 			cmd |= OCMD_YUV_422_PACKED;
709 			break;
710 		case I915_OVERLAY_YUV411:
711 			cmd |= OCMD_YUV_411_PACKED;
712 			break;
713 		}
714 
715 		switch (params->format & I915_OVERLAY_SWAP_MASK) {
716 		case I915_OVERLAY_NO_SWAP:
717 			break;
718 		case I915_OVERLAY_UV_SWAP:
719 			cmd |= OCMD_UV_SWAP;
720 			break;
721 		case I915_OVERLAY_Y_SWAP:
722 			cmd |= OCMD_Y_SWAP;
723 			break;
724 		case I915_OVERLAY_Y_AND_UV_SWAP:
725 			cmd |= OCMD_Y_AND_UV_SWAP;
726 			break;
727 		}
728 	}
729 
730 	return cmd;
731 }
732 
733 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
734 				      struct drm_i915_gem_object *new_bo,
735 				      struct put_image_params *params)
736 {
737 	int ret, tmp_width;
738 	struct overlay_registers __iomem *regs;
739 	bool scale_changed = false;
740 	struct drm_i915_private *dev_priv = overlay->i915;
741 	u32 swidth, swidthsw, sheight, ostride;
742 	enum i915_pipe pipe = overlay->crtc->pipe;
743 
744 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
745 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
746 
747 	ret = intel_overlay_release_old_vid(overlay);
748 	if (ret != 0)
749 		return ret;
750 
751 	ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
752 						   &i915_ggtt_view_normal);
753 	if (ret != 0)
754 		return ret;
755 
756 	ret = i915_gem_object_put_fence(new_bo);
757 	if (ret)
758 		goto out_unpin;
759 
760 	if (!overlay->active) {
761 		u32 oconfig;
762 		regs = intel_overlay_map_regs(overlay);
763 		if (!regs) {
764 			ret = -ENOMEM;
765 			goto out_unpin;
766 		}
767 		oconfig = OCONF_CC_OUT_8BIT;
768 		if (IS_GEN4(dev_priv))
769 			oconfig |= OCONF_CSC_MODE_BT709;
770 		oconfig |= pipe == 0 ?
771 			OCONF_PIPE_A : OCONF_PIPE_B;
772 		iowrite32(oconfig, &regs->OCONFIG);
773 		intel_overlay_unmap_regs(overlay, regs);
774 
775 		ret = intel_overlay_on(overlay);
776 		if (ret != 0)
777 			goto out_unpin;
778 	}
779 
780 	regs = intel_overlay_map_regs(overlay);
781 	if (!regs) {
782 		ret = -ENOMEM;
783 		goto out_unpin;
784 	}
785 
786 	iowrite32((params->dst_y << 16) | params->dst_x, &regs->DWINPOS);
787 	iowrite32((params->dst_h << 16) | params->dst_w, &regs->DWINSZ);
788 
789 	if (params->format & I915_OVERLAY_YUV_PACKED)
790 		tmp_width = packed_width_bytes(params->format, params->src_w);
791 	else
792 		tmp_width = params->src_w;
793 
794 	swidth = params->src_w;
795 	swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
796 	sheight = params->src_h;
797 	iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
798 	ostride = params->stride_Y;
799 
800 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
801 		int uv_hscale = uv_hsubsampling(params->format);
802 		int uv_vscale = uv_vsubsampling(params->format);
803 		u32 tmp_U, tmp_V;
804 		swidth |= (params->src_w/uv_hscale) << 16;
805 		tmp_U = calc_swidthsw(dev_priv, params->offset_U,
806 				      params->src_w/uv_hscale);
807 		tmp_V = calc_swidthsw(dev_priv, params->offset_V,
808 				      params->src_w/uv_hscale);
809 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
810 		sheight |= (params->src_h/uv_vscale) << 16;
811 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
812 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
813 		ostride |= params->stride_UV << 16;
814 	}
815 
816 	iowrite32(swidth, &regs->SWIDTH);
817 	iowrite32(swidthsw, &regs->SWIDTHSW);
818 	iowrite32(sheight, &regs->SHEIGHT);
819 	iowrite32(ostride, &regs->OSTRIDE);
820 
821 	scale_changed = update_scaling_factors(overlay, regs, params);
822 
823 	update_colorkey(overlay, regs);
824 
825 	iowrite32(overlay_cmd_reg(params), &regs->OCMD);
826 
827 	intel_overlay_unmap_regs(overlay, regs);
828 
829 	ret = intel_overlay_continue(overlay, scale_changed);
830 	if (ret)
831 		goto out_unpin;
832 
833 	i915_gem_track_fb(overlay->vid_bo, new_bo,
834 			  INTEL_FRONTBUFFER_OVERLAY(pipe));
835 
836 	overlay->old_vid_bo = overlay->vid_bo;
837 	overlay->vid_bo = new_bo;
838 
839 	intel_frontbuffer_flip(&dev_priv->drm,
840 			       INTEL_FRONTBUFFER_OVERLAY(pipe));
841 
842 	return 0;
843 
844 out_unpin:
845 	i915_gem_object_ggtt_unpin(new_bo);
846 	return ret;
847 }
848 
849 int intel_overlay_switch_off(struct intel_overlay *overlay)
850 {
851 	struct drm_i915_private *dev_priv = overlay->i915;
852 	struct overlay_registers __iomem *regs;
853 	int ret;
854 
855 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
856 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
857 
858 	ret = intel_overlay_recover_from_interrupt(overlay);
859 	if (ret != 0)
860 		return ret;
861 
862 	if (!overlay->active)
863 		return 0;
864 
865 	ret = intel_overlay_release_old_vid(overlay);
866 	if (ret != 0)
867 		return ret;
868 
869 	regs = intel_overlay_map_regs(overlay);
870 	iowrite32(0, &regs->OCMD);
871 	intel_overlay_unmap_regs(overlay, regs);
872 
873 	ret = intel_overlay_off(overlay);
874 	if (ret != 0)
875 		return ret;
876 
877 	intel_overlay_off_tail(overlay);
878 	return 0;
879 }
880 
881 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
882 					  struct intel_crtc *crtc)
883 {
884 	if (!crtc->active)
885 		return -EINVAL;
886 
887 	/* can't use the overlay with double wide pipe */
888 	if (crtc->config->double_wide)
889 		return -EINVAL;
890 
891 	return 0;
892 }
893 
894 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
895 {
896 	struct drm_i915_private *dev_priv = overlay->i915;
897 	u32 pfit_control = I915_READ(PFIT_CONTROL);
898 	u32 ratio;
899 
900 	/* XXX: This is not the same logic as in the xorg driver, but more in
901 	 * line with the intel documentation for the i965
902 	 */
903 	if (INTEL_GEN(dev_priv) >= 4) {
904 		/* on i965 use the PGM reg to read out the autoscaler values */
905 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
906 	} else {
907 		if (pfit_control & VERT_AUTO_SCALE)
908 			ratio = I915_READ(PFIT_AUTO_RATIOS);
909 		else
910 			ratio = I915_READ(PFIT_PGM_RATIOS);
911 		ratio >>= PFIT_VERT_SCALE_SHIFT;
912 	}
913 
914 	overlay->pfit_vscale_ratio = ratio;
915 }
916 
917 static int check_overlay_dst(struct intel_overlay *overlay,
918 			     struct drm_intel_overlay_put_image *rec)
919 {
920 	struct drm_display_mode *mode = &overlay->crtc->base.mode;
921 
922 	if (rec->dst_x < mode->hdisplay &&
923 	    rec->dst_x + rec->dst_width <= mode->hdisplay &&
924 	    rec->dst_y < mode->vdisplay &&
925 	    rec->dst_y + rec->dst_height <= mode->vdisplay)
926 		return 0;
927 	else
928 		return -EINVAL;
929 }
930 
931 static int check_overlay_scaling(struct put_image_params *rec)
932 {
933 	u32 tmp;
934 
935 	/* downscaling limit is 8.0 */
936 	tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
937 	if (tmp > 7)
938 		return -EINVAL;
939 	tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
940 	if (tmp > 7)
941 		return -EINVAL;
942 
943 	return 0;
944 }
945 
946 static int check_overlay_src(struct drm_i915_private *dev_priv,
947 			     struct drm_intel_overlay_put_image *rec,
948 			     struct drm_i915_gem_object *new_bo)
949 {
950 	int uv_hscale = uv_hsubsampling(rec->flags);
951 	int uv_vscale = uv_vsubsampling(rec->flags);
952 	u32 stride_mask;
953 	int depth;
954 	u32 tmp;
955 
956 	/* check src dimensions */
957 	if (IS_845G(dev_priv) || IS_I830(dev_priv)) {
958 		if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
959 		    rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
960 			return -EINVAL;
961 	} else {
962 		if (rec->src_height > IMAGE_MAX_HEIGHT ||
963 		    rec->src_width  > IMAGE_MAX_WIDTH)
964 			return -EINVAL;
965 	}
966 
967 	/* better safe than sorry, use 4 as the maximal subsampling ratio */
968 	if (rec->src_height < N_VERT_Y_TAPS*4 ||
969 	    rec->src_width  < N_HORIZ_Y_TAPS*4)
970 		return -EINVAL;
971 
972 	/* check alignment constraints */
973 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
974 	case I915_OVERLAY_RGB:
975 		/* not implemented */
976 		return -EINVAL;
977 
978 	case I915_OVERLAY_YUV_PACKED:
979 		if (uv_vscale != 1)
980 			return -EINVAL;
981 
982 		depth = packed_depth_bytes(rec->flags);
983 		if (depth < 0)
984 			return depth;
985 
986 		/* ignore UV planes */
987 		rec->stride_UV = 0;
988 		rec->offset_U = 0;
989 		rec->offset_V = 0;
990 		/* check pixel alignment */
991 		if (rec->offset_Y % depth)
992 			return -EINVAL;
993 		break;
994 
995 	case I915_OVERLAY_YUV_PLANAR:
996 		if (uv_vscale < 0 || uv_hscale < 0)
997 			return -EINVAL;
998 		/* no offset restrictions for planar formats */
999 		break;
1000 
1001 	default:
1002 		return -EINVAL;
1003 	}
1004 
1005 	if (rec->src_width % uv_hscale)
1006 		return -EINVAL;
1007 
1008 	/* stride checking */
1009 	if (IS_I830(dev_priv) || IS_845G(dev_priv))
1010 		stride_mask = 255;
1011 	else
1012 		stride_mask = 63;
1013 
1014 	if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
1015 		return -EINVAL;
1016 	if (IS_GEN4(dev_priv) && rec->stride_Y < 512)
1017 		return -EINVAL;
1018 
1019 	tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
1020 		4096 : 8192;
1021 	if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
1022 		return -EINVAL;
1023 
1024 	/* check buffer dimensions */
1025 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
1026 	case I915_OVERLAY_RGB:
1027 	case I915_OVERLAY_YUV_PACKED:
1028 		/* always 4 Y values per depth pixels */
1029 		if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
1030 			return -EINVAL;
1031 
1032 		tmp = rec->stride_Y*rec->src_height;
1033 		if (rec->offset_Y + tmp > new_bo->base.size)
1034 			return -EINVAL;
1035 		break;
1036 
1037 	case I915_OVERLAY_YUV_PLANAR:
1038 		if (rec->src_width > rec->stride_Y)
1039 			return -EINVAL;
1040 		if (rec->src_width/uv_hscale > rec->stride_UV)
1041 			return -EINVAL;
1042 
1043 		tmp = rec->stride_Y * rec->src_height;
1044 		if (rec->offset_Y + tmp > new_bo->base.size)
1045 			return -EINVAL;
1046 
1047 		tmp = rec->stride_UV * (rec->src_height / uv_vscale);
1048 		if (rec->offset_U + tmp > new_bo->base.size ||
1049 		    rec->offset_V + tmp > new_bo->base.size)
1050 			return -EINVAL;
1051 		break;
1052 	}
1053 
1054 	return 0;
1055 }
1056 
1057 /**
1058  * Return the pipe currently connected to the panel fitter,
1059  * or -1 if the panel fitter is not present or not in use
1060  */
1061 static int intel_panel_fitter_pipe(struct drm_i915_private *dev_priv)
1062 {
1063 	u32  pfit_control;
1064 
1065 	/* i830 doesn't have a panel fitter */
1066 	if (INTEL_GEN(dev_priv) <= 3 &&
1067 	    (IS_I830(dev_priv) || !IS_MOBILE(dev_priv)))
1068 		return -1;
1069 
1070 	pfit_control = I915_READ(PFIT_CONTROL);
1071 
1072 	/* See if the panel fitter is in use */
1073 	if ((pfit_control & PFIT_ENABLE) == 0)
1074 		return -1;
1075 
1076 	/* 965 can place panel fitter on either pipe */
1077 	if (IS_GEN4(dev_priv))
1078 		return (pfit_control >> 29) & 0x3;
1079 
1080 	/* older chips can only use pipe 1 */
1081 	return 1;
1082 }
1083 
1084 int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
1085 				  struct drm_file *file_priv)
1086 {
1087 	struct drm_intel_overlay_put_image *put_image_rec = data;
1088 	struct drm_i915_private *dev_priv = to_i915(dev);
1089 	struct intel_overlay *overlay;
1090 	struct drm_crtc *drmmode_crtc;
1091 	struct intel_crtc *crtc;
1092 	struct drm_i915_gem_object *new_bo;
1093 	struct put_image_params *params;
1094 	int ret;
1095 
1096 	overlay = dev_priv->overlay;
1097 	if (!overlay) {
1098 		DRM_DEBUG("userspace bug: no overlay\n");
1099 		return -ENODEV;
1100 	}
1101 
1102 	if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1103 		drm_modeset_lock_all(dev);
1104 		mutex_lock(&dev->struct_mutex);
1105 
1106 		ret = intel_overlay_switch_off(overlay);
1107 
1108 		mutex_unlock(&dev->struct_mutex);
1109 		drm_modeset_unlock_all(dev);
1110 
1111 		return ret;
1112 	}
1113 
1114 	params = kmalloc(sizeof(*params), M_DRM, GFP_KERNEL);
1115 	if (!params)
1116 		return -ENOMEM;
1117 
1118 	drmmode_crtc = drm_crtc_find(dev, put_image_rec->crtc_id);
1119 	if (!drmmode_crtc) {
1120 		ret = -ENOENT;
1121 		goto out_free;
1122 	}
1123 	crtc = to_intel_crtc(drmmode_crtc);
1124 
1125 	new_bo = to_intel_bo(drm_gem_object_lookup(file_priv,
1126 						   put_image_rec->bo_handle));
1127 	if (&new_bo->base == NULL) {
1128 		ret = -ENOENT;
1129 		goto out_free;
1130 	}
1131 
1132 	drm_modeset_lock_all(dev);
1133 	mutex_lock(&dev->struct_mutex);
1134 
1135 	if (new_bo->tiling_mode) {
1136 		DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
1137 		ret = -EINVAL;
1138 		goto out_unlock;
1139 	}
1140 
1141 	ret = intel_overlay_recover_from_interrupt(overlay);
1142 	if (ret != 0)
1143 		goto out_unlock;
1144 
1145 	if (overlay->crtc != crtc) {
1146 		struct drm_display_mode *mode = &crtc->base.mode;
1147 		ret = intel_overlay_switch_off(overlay);
1148 		if (ret != 0)
1149 			goto out_unlock;
1150 
1151 		ret = check_overlay_possible_on_crtc(overlay, crtc);
1152 		if (ret != 0)
1153 			goto out_unlock;
1154 
1155 		overlay->crtc = crtc;
1156 		crtc->overlay = overlay;
1157 
1158 		/* line too wide, i.e. one-line-mode */
1159 		if (mode->hdisplay > 1024 &&
1160 		    intel_panel_fitter_pipe(dev_priv) == crtc->pipe) {
1161 			overlay->pfit_active = true;
1162 			update_pfit_vscale_ratio(overlay);
1163 		} else
1164 			overlay->pfit_active = false;
1165 	}
1166 
1167 	ret = check_overlay_dst(overlay, put_image_rec);
1168 	if (ret != 0)
1169 		goto out_unlock;
1170 
1171 	if (overlay->pfit_active) {
1172 		params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1173 				 overlay->pfit_vscale_ratio);
1174 		/* shifting right rounds downwards, so add 1 */
1175 		params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1176 				 overlay->pfit_vscale_ratio) + 1;
1177 	} else {
1178 		params->dst_y = put_image_rec->dst_y;
1179 		params->dst_h = put_image_rec->dst_height;
1180 	}
1181 	params->dst_x = put_image_rec->dst_x;
1182 	params->dst_w = put_image_rec->dst_width;
1183 
1184 	params->src_w = put_image_rec->src_width;
1185 	params->src_h = put_image_rec->src_height;
1186 	params->src_scan_w = put_image_rec->src_scan_width;
1187 	params->src_scan_h = put_image_rec->src_scan_height;
1188 	if (params->src_scan_h > params->src_h ||
1189 	    params->src_scan_w > params->src_w) {
1190 		ret = -EINVAL;
1191 		goto out_unlock;
1192 	}
1193 
1194 	ret = check_overlay_src(dev_priv, put_image_rec, new_bo);
1195 	if (ret != 0)
1196 		goto out_unlock;
1197 	params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1198 	params->stride_Y = put_image_rec->stride_Y;
1199 	params->stride_UV = put_image_rec->stride_UV;
1200 	params->offset_Y = put_image_rec->offset_Y;
1201 	params->offset_U = put_image_rec->offset_U;
1202 	params->offset_V = put_image_rec->offset_V;
1203 
1204 	/* Check scaling after src size to prevent a divide-by-zero. */
1205 	ret = check_overlay_scaling(params);
1206 	if (ret != 0)
1207 		goto out_unlock;
1208 
1209 	ret = intel_overlay_do_put_image(overlay, new_bo, params);
1210 	if (ret != 0)
1211 		goto out_unlock;
1212 
1213 	mutex_unlock(&dev->struct_mutex);
1214 	drm_modeset_unlock_all(dev);
1215 
1216 	kfree(params);
1217 
1218 	return 0;
1219 
1220 out_unlock:
1221 	mutex_unlock(&dev->struct_mutex);
1222 	drm_modeset_unlock_all(dev);
1223 	drm_gem_object_unreference_unlocked(&new_bo->base);
1224 out_free:
1225 	kfree(params);
1226 
1227 	return ret;
1228 }
1229 
1230 static void update_reg_attrs(struct intel_overlay *overlay,
1231 			     struct overlay_registers __iomem *regs)
1232 {
1233 	iowrite32((overlay->contrast << 18) | (overlay->brightness & 0xff),
1234 		  &regs->OCLRC0);
1235 	iowrite32(overlay->saturation, &regs->OCLRC1);
1236 }
1237 
1238 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1239 {
1240 	int i;
1241 
1242 	if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1243 		return false;
1244 
1245 	for (i = 0; i < 3; i++) {
1246 		if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1247 			return false;
1248 	}
1249 
1250 	return true;
1251 }
1252 
1253 static bool check_gamma5_errata(u32 gamma5)
1254 {
1255 	int i;
1256 
1257 	for (i = 0; i < 3; i++) {
1258 		if (((gamma5 >> i*8) & 0xff) == 0x80)
1259 			return false;
1260 	}
1261 
1262 	return true;
1263 }
1264 
1265 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1266 {
1267 	if (!check_gamma_bounds(0, attrs->gamma0) ||
1268 	    !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1269 	    !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1270 	    !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1271 	    !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1272 	    !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1273 	    !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1274 		return -EINVAL;
1275 
1276 	if (!check_gamma5_errata(attrs->gamma5))
1277 		return -EINVAL;
1278 
1279 	return 0;
1280 }
1281 
1282 int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
1283 			      struct drm_file *file_priv)
1284 {
1285 	struct drm_intel_overlay_attrs *attrs = data;
1286 	struct drm_i915_private *dev_priv = to_i915(dev);
1287 	struct intel_overlay *overlay;
1288 	struct overlay_registers __iomem *regs;
1289 	int ret;
1290 
1291 	overlay = dev_priv->overlay;
1292 	if (!overlay) {
1293 		DRM_DEBUG("userspace bug: no overlay\n");
1294 		return -ENODEV;
1295 	}
1296 
1297 	drm_modeset_lock_all(dev);
1298 	mutex_lock(&dev->struct_mutex);
1299 
1300 	ret = -EINVAL;
1301 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1302 		attrs->color_key  = overlay->color_key;
1303 		attrs->brightness = overlay->brightness;
1304 		attrs->contrast   = overlay->contrast;
1305 		attrs->saturation = overlay->saturation;
1306 
1307 		if (!IS_GEN2(dev_priv)) {
1308 			attrs->gamma0 = I915_READ(OGAMC0);
1309 			attrs->gamma1 = I915_READ(OGAMC1);
1310 			attrs->gamma2 = I915_READ(OGAMC2);
1311 			attrs->gamma3 = I915_READ(OGAMC3);
1312 			attrs->gamma4 = I915_READ(OGAMC4);
1313 			attrs->gamma5 = I915_READ(OGAMC5);
1314 		}
1315 	} else {
1316 		if (attrs->brightness < -128 || attrs->brightness > 127)
1317 			goto out_unlock;
1318 		if (attrs->contrast > 255)
1319 			goto out_unlock;
1320 		if (attrs->saturation > 1023)
1321 			goto out_unlock;
1322 
1323 		overlay->color_key  = attrs->color_key;
1324 		overlay->brightness = attrs->brightness;
1325 		overlay->contrast   = attrs->contrast;
1326 		overlay->saturation = attrs->saturation;
1327 
1328 		regs = intel_overlay_map_regs(overlay);
1329 		if (!regs) {
1330 			ret = -ENOMEM;
1331 			goto out_unlock;
1332 		}
1333 
1334 		update_reg_attrs(overlay, regs);
1335 
1336 		intel_overlay_unmap_regs(overlay, regs);
1337 
1338 		if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1339 			if (IS_GEN2(dev_priv))
1340 				goto out_unlock;
1341 
1342 			if (overlay->active) {
1343 				ret = -EBUSY;
1344 				goto out_unlock;
1345 			}
1346 
1347 			ret = check_gamma(attrs);
1348 			if (ret)
1349 				goto out_unlock;
1350 
1351 			I915_WRITE(OGAMC0, attrs->gamma0);
1352 			I915_WRITE(OGAMC1, attrs->gamma1);
1353 			I915_WRITE(OGAMC2, attrs->gamma2);
1354 			I915_WRITE(OGAMC3, attrs->gamma3);
1355 			I915_WRITE(OGAMC4, attrs->gamma4);
1356 			I915_WRITE(OGAMC5, attrs->gamma5);
1357 		}
1358 	}
1359 	overlay->color_key_enabled = (attrs->flags & I915_OVERLAY_DISABLE_DEST_COLORKEY) == 0;
1360 
1361 	ret = 0;
1362 out_unlock:
1363 	mutex_unlock(&dev->struct_mutex);
1364 	drm_modeset_unlock_all(dev);
1365 
1366 	return ret;
1367 }
1368 
1369 void intel_setup_overlay(struct drm_i915_private *dev_priv)
1370 {
1371 	struct intel_overlay *overlay;
1372 	struct drm_i915_gem_object *reg_bo;
1373 	struct overlay_registers __iomem *regs;
1374 	int ret;
1375 
1376 	if (!HAS_OVERLAY(dev_priv))
1377 		return;
1378 
1379 	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
1380 	if (!overlay)
1381 		return;
1382 
1383 	mutex_lock(&dev_priv->drm.struct_mutex);
1384 	if (WARN_ON(dev_priv->overlay))
1385 		goto out_free;
1386 
1387 	overlay->i915 = dev_priv;
1388 
1389 	reg_bo = NULL;
1390 	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
1391 		reg_bo = i915_gem_object_create_stolen(&dev_priv->drm,
1392 						       PAGE_SIZE);
1393 	if (reg_bo == NULL)
1394 		reg_bo = i915_gem_object_create(&dev_priv->drm, PAGE_SIZE);
1395 	if (IS_ERR(reg_bo))
1396 		goto out_free;
1397 	overlay->reg_bo = reg_bo;
1398 
1399 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) {
1400 		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
1401 		if (ret) {
1402 			DRM_ERROR("failed to attach phys overlay regs\n");
1403 			goto out_free_bo;
1404 		}
1405 		overlay->flip_addr = reg_bo->phys_handle->busaddr;
1406 	} else {
1407 		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
1408 		if (ret) {
1409 			DRM_ERROR("failed to pin overlay register bo\n");
1410 			goto out_free_bo;
1411 		}
1412 		overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
1413 
1414 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1415 		if (ret) {
1416 			DRM_ERROR("failed to move overlay register bo into the GTT\n");
1417 			goto out_unpin_bo;
1418 		}
1419 	}
1420 
1421 	/* init all values */
1422 	overlay->color_key = 0x0101fe;
1423 	overlay->color_key_enabled = true;
1424 	overlay->brightness = -19;
1425 	overlay->contrast = 75;
1426 	overlay->saturation = 146;
1427 
1428 	regs = intel_overlay_map_regs(overlay);
1429 	if (!regs)
1430 		goto out_unpin_bo;
1431 
1432 	memset_io(regs, 0, sizeof(struct overlay_registers));
1433 	update_polyphase_filter(regs);
1434 	update_reg_attrs(overlay, regs);
1435 
1436 	intel_overlay_unmap_regs(overlay, regs);
1437 
1438 	dev_priv->overlay = overlay;
1439 	mutex_unlock(&dev_priv->drm.struct_mutex);
1440 	DRM_INFO("initialized overlay support\n");
1441 	return;
1442 
1443 out_unpin_bo:
1444 	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
1445 		i915_gem_object_ggtt_unpin(reg_bo);
1446 out_free_bo:
1447 	drm_gem_object_unreference(&reg_bo->base);
1448 out_free:
1449 	mutex_unlock(&dev_priv->drm.struct_mutex);
1450 	kfree(overlay);
1451 	return;
1452 }
1453 
1454 void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
1455 {
1456 	if (!dev_priv->overlay)
1457 		return;
1458 
1459 	/* The bo's should be free'd by the generic code already.
1460 	 * Furthermore modesetting teardown happens beforehand so the
1461 	 * hardware should be off already */
1462 	WARN_ON(dev_priv->overlay->active);
1463 
1464 	drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base);
1465 	kfree(dev_priv->overlay);
1466 }
1467 
1468 struct intel_overlay_error_state {
1469 	struct overlay_registers regs;
1470 	unsigned long base;
1471 	u32 dovsta;
1472 	u32 isr;
1473 };
1474 
1475 static struct overlay_registers __iomem *
1476 intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1477 {
1478 	struct drm_i915_private *dev_priv = overlay->i915;
1479 	struct overlay_registers __iomem *regs;
1480 
1481 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
1482 		/* Cast to make sparse happy, but it's wc memory anyway, so
1483 		 * equivalent to the wc io mapping on X86. */
1484 		regs = (struct overlay_registers __iomem *)
1485 			overlay->reg_bo->phys_handle->vaddr;
1486 	else
1487 		regs = io_mapping_map_atomic_wc(dev_priv->ggtt.mappable,
1488 						overlay->flip_addr);
1489 
1490 	return regs;
1491 }
1492 
1493 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
1494 					struct overlay_registers __iomem *regs)
1495 {
1496 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->i915))
1497 		io_mapping_unmap_atomic(regs);
1498 }
1499 
1500 struct intel_overlay_error_state *
1501 intel_overlay_capture_error_state(struct drm_i915_private *dev_priv)
1502 {
1503 	struct intel_overlay *overlay = dev_priv->overlay;
1504 	struct intel_overlay_error_state *error;
1505 	struct overlay_registers __iomem *regs;
1506 
1507 	if (!overlay || !overlay->active)
1508 		return NULL;
1509 
1510 	error = kmalloc(sizeof(*error), M_DRM, GFP_ATOMIC);
1511 	if (error == NULL)
1512 		return NULL;
1513 
1514 	error->dovsta = I915_READ(DOVSTA);
1515 	error->isr = I915_READ(ISR);
1516 	error->base = overlay->flip_addr;
1517 
1518 	regs = intel_overlay_map_regs_atomic(overlay);
1519 	if (!regs)
1520 		goto err;
1521 
1522 	memcpy_fromio(&error->regs, regs, sizeof(struct overlay_registers));
1523 	intel_overlay_unmap_regs_atomic(overlay, regs);
1524 
1525 	return error;
1526 
1527 err:
1528 	kfree(error);
1529 	return NULL;
1530 }
1531 
1532 void
1533 intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
1534 				struct intel_overlay_error_state *error)
1535 {
1536 	i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1537 			  error->dovsta, error->isr);
1538 	i915_error_printf(m, "  Register file at 0x%08lx:\n",
1539 			  error->base);
1540 
1541 #define P(x) i915_error_printf(m, "    " #x ":	0x%08x\n", error->regs.x)
1542 	P(OBUF_0Y);
1543 	P(OBUF_1Y);
1544 	P(OBUF_0U);
1545 	P(OBUF_0V);
1546 	P(OBUF_1U);
1547 	P(OBUF_1V);
1548 	P(OSTRIDE);
1549 	P(YRGB_VPH);
1550 	P(UV_VPH);
1551 	P(HORZ_PH);
1552 	P(INIT_PHS);
1553 	P(DWINPOS);
1554 	P(DWINSZ);
1555 	P(SWIDTH);
1556 	P(SWIDTHSW);
1557 	P(SHEIGHT);
1558 	P(YRGBSCALE);
1559 	P(UVSCALE);
1560 	P(OCLRC0);
1561 	P(OCLRC1);
1562 	P(DCLRKV);
1563 	P(DCLRKM);
1564 	P(SCLRKVH);
1565 	P(SCLRKVL);
1566 	P(SCLRKEN);
1567 	P(OCONFIG);
1568 	P(OCMD);
1569 	P(OSTART_0Y);
1570 	P(OSTART_1Y);
1571 	P(OSTART_0U);
1572 	P(OSTART_0V);
1573 	P(OSTART_1U);
1574 	P(OSTART_1V);
1575 	P(OTILEOFF_0Y);
1576 	P(OTILEOFF_1Y);
1577 	P(OTILEOFF_0U);
1578 	P(OTILEOFF_0V);
1579 	P(OTILEOFF_1U);
1580 	P(OTILEOFF_1V);
1581 	P(FASTHSCALE);
1582 	P(UVSCALEV);
1583 #undef P
1584 }
1585