xref: /dragonfly/sys/dev/drm/i915/intel_overlay.c (revision 40657594)
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  */
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_reg.h"
32 #include "intel_drv.h"
33 
34 /* Limits for overlay size. According to intel doc, the real limits are:
35  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
36  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
37  * the mininum of both.  */
38 #define IMAGE_MAX_WIDTH		2048
39 #define IMAGE_MAX_HEIGHT	2046 /* 2 * 1023 */
40 /* on 830 and 845 these large limits result in the card hanging */
41 #define IMAGE_MAX_WIDTH_LEGACY	1024
42 #define IMAGE_MAX_HEIGHT_LEGACY	1088
43 
44 /* overlay register definitions */
45 /* OCMD register */
46 #define OCMD_TILED_SURFACE	(0x1<<19)
47 #define OCMD_MIRROR_MASK	(0x3<<17)
48 #define OCMD_MIRROR_MODE	(0x3<<17)
49 #define OCMD_MIRROR_HORIZONTAL	(0x1<<17)
50 #define OCMD_MIRROR_VERTICAL	(0x2<<17)
51 #define OCMD_MIRROR_BOTH	(0x3<<17)
52 #define OCMD_BYTEORDER_MASK	(0x3<<14) /* zero for YUYV or FOURCC YUY2 */
53 #define OCMD_UV_SWAP		(0x1<<14) /* YVYU */
54 #define OCMD_Y_SWAP		(0x2<<14) /* UYVY or FOURCC UYVY */
55 #define OCMD_Y_AND_UV_SWAP	(0x3<<14) /* VYUY */
56 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
57 #define OCMD_RGB_888		(0x1<<10) /* not in i965 Intel docs */
58 #define OCMD_RGB_555		(0x2<<10) /* not in i965 Intel docs */
59 #define OCMD_RGB_565		(0x3<<10) /* not in i965 Intel docs */
60 #define OCMD_YUV_422_PACKED	(0x8<<10)
61 #define OCMD_YUV_411_PACKED	(0x9<<10) /* not in i965 Intel docs */
62 #define OCMD_YUV_420_PLANAR	(0xc<<10)
63 #define OCMD_YUV_422_PLANAR	(0xd<<10)
64 #define OCMD_YUV_410_PLANAR	(0xe<<10) /* also 411 */
65 #define OCMD_TVSYNCFLIP_PARITY	(0x1<<9)
66 #define OCMD_TVSYNCFLIP_ENABLE	(0x1<<7)
67 #define OCMD_BUF_TYPE_MASK	(0x1<<5)
68 #define OCMD_BUF_TYPE_FRAME	(0x0<<5)
69 #define OCMD_BUF_TYPE_FIELD	(0x1<<5)
70 #define OCMD_TEST_MODE		(0x1<<4)
71 #define OCMD_BUFFER_SELECT	(0x3<<2)
72 #define OCMD_BUFFER0		(0x0<<2)
73 #define OCMD_BUFFER1		(0x1<<2)
74 #define OCMD_FIELD_SELECT	(0x1<<2)
75 #define OCMD_FIELD0		(0x0<<1)
76 #define OCMD_FIELD1		(0x1<<1)
77 #define OCMD_ENABLE		(0x1<<0)
78 
79 /* OCONFIG register */
80 #define OCONF_PIPE_MASK		(0x1<<18)
81 #define OCONF_PIPE_A		(0x0<<18)
82 #define OCONF_PIPE_B		(0x1<<18)
83 #define OCONF_GAMMA2_ENABLE	(0x1<<16)
84 #define OCONF_CSC_MODE_BT601	(0x0<<5)
85 #define OCONF_CSC_MODE_BT709	(0x1<<5)
86 #define OCONF_CSC_BYPASS	(0x1<<4)
87 #define OCONF_CC_OUT_8BIT	(0x1<<3)
88 #define OCONF_TEST_MODE		(0x1<<2)
89 #define OCONF_THREE_LINE_BUFFER	(0x1<<0)
90 #define OCONF_TWO_LINE_BUFFER	(0x0<<0)
91 
92 /* DCLRKM (dst-key) register */
93 #define DST_KEY_ENABLE		(0x1<<31)
94 #define CLK_RGB24_MASK		0x0
95 #define CLK_RGB16_MASK		0x070307
96 #define CLK_RGB15_MASK		0x070707
97 #define CLK_RGB8I_MASK		0xffffff
98 
99 #define RGB16_TO_COLORKEY(c) \
100 	(((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
101 #define RGB15_TO_COLORKEY(c) \
102 	(((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
103 
104 /* overlay flip addr flag */
105 #define OFC_UPDATE		0x1
106 
107 /* polyphase filter coefficients */
108 #define N_HORIZ_Y_TAPS          5
109 #define N_VERT_Y_TAPS           3
110 #define N_HORIZ_UV_TAPS         3
111 #define N_VERT_UV_TAPS          3
112 #define N_PHASES                17
113 #define MAX_TAPS                5
114 
115 /* memory bufferd overlay registers */
116 struct overlay_registers {
117 	u32 OBUF_0Y;
118 	u32 OBUF_1Y;
119 	u32 OBUF_0U;
120 	u32 OBUF_0V;
121 	u32 OBUF_1U;
122 	u32 OBUF_1V;
123 	u32 OSTRIDE;
124 	u32 YRGB_VPH;
125 	u32 UV_VPH;
126 	u32 HORZ_PH;
127 	u32 INIT_PHS;
128 	u32 DWINPOS;
129 	u32 DWINSZ;
130 	u32 SWIDTH;
131 	u32 SWIDTHSW;
132 	u32 SHEIGHT;
133 	u32 YRGBSCALE;
134 	u32 UVSCALE;
135 	u32 OCLRC0;
136 	u32 OCLRC1;
137 	u32 DCLRKV;
138 	u32 DCLRKM;
139 	u32 SCLRKVH;
140 	u32 SCLRKVL;
141 	u32 SCLRKEN;
142 	u32 OCONFIG;
143 	u32 OCMD;
144 	u32 RESERVED1; /* 0x6C */
145 	u32 OSTART_0Y;
146 	u32 OSTART_1Y;
147 	u32 OSTART_0U;
148 	u32 OSTART_0V;
149 	u32 OSTART_1U;
150 	u32 OSTART_1V;
151 	u32 OTILEOFF_0Y;
152 	u32 OTILEOFF_1Y;
153 	u32 OTILEOFF_0U;
154 	u32 OTILEOFF_0V;
155 	u32 OTILEOFF_1U;
156 	u32 OTILEOFF_1V;
157 	u32 FASTHSCALE; /* 0xA0 */
158 	u32 UVSCALEV; /* 0xA4 */
159 	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
160 	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
161 	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
162 	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
163 	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
164 	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
165 	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
166 	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
167 	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
168 };
169 
170 struct intel_overlay {
171 	struct drm_i915_private *i915;
172 	struct intel_crtc *crtc;
173 	struct drm_i915_gem_object *vid_bo;
174 	struct drm_i915_gem_object *old_vid_bo;
175 	bool active;
176 	bool pfit_active;
177 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
178 	u32 color_key:24;
179 	u32 color_key_enabled:1;
180 	u32 brightness, contrast, saturation;
181 	u32 old_xscale, old_yscale;
182 	/* register access */
183 	u32 flip_addr;
184 	struct drm_i915_gem_object *reg_bo;
185 	/* flip handling */
186 	struct drm_i915_gem_request *last_flip_req;
187 	void (*flip_tail)(struct intel_overlay *);
188 };
189 
190 static struct overlay_registers __iomem *
191 intel_overlay_map_regs(struct intel_overlay *overlay)
192 {
193 	struct drm_i915_private *dev_priv = overlay->i915;
194 	struct overlay_registers __iomem *regs;
195 
196 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
197 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
198 	else
199 		regs = io_mapping_map_wc(dev_priv->ggtt.mappable,
200 					 overlay->flip_addr,
201 					 PAGE_SIZE);
202 
203 	return regs;
204 }
205 
206 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
207 				     struct overlay_registers __iomem *regs)
208 {
209 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->i915))
210 		io_mapping_unmap(regs);
211 }
212 
213 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
214 					 struct drm_i915_gem_request *req,
215 					 void (*tail)(struct intel_overlay *))
216 {
217 	int ret;
218 
219 	WARN_ON(overlay->last_flip_req);
220 	i915_gem_request_assign(&overlay->last_flip_req, req);
221 	i915_add_request(req);
222 
223 	overlay->flip_tail = tail;
224 	ret = i915_wait_request(overlay->last_flip_req);
225 	if (ret)
226 		return ret;
227 
228 	i915_gem_request_assign(&overlay->last_flip_req, NULL);
229 	return 0;
230 }
231 
232 /* overlay needs to be disable in OCMD reg */
233 static int intel_overlay_on(struct intel_overlay *overlay)
234 {
235 	struct drm_i915_private *dev_priv = overlay->i915;
236 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
237 	struct drm_i915_gem_request *req;
238 	int ret;
239 
240 	WARN_ON(overlay->active);
241 	WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
242 
243 	req = i915_gem_request_alloc(engine, NULL);
244 	if (IS_ERR(req))
245 		return PTR_ERR(req);
246 
247 	ret = intel_ring_begin(req, 4);
248 	if (ret) {
249 		i915_add_request_no_flush(req);
250 		return ret;
251 	}
252 
253 	overlay->active = true;
254 
255 	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
256 	intel_ring_emit(engine, overlay->flip_addr | OFC_UPDATE);
257 	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
258 	intel_ring_emit(engine, MI_NOOP);
259 	intel_ring_advance(engine);
260 
261 	return intel_overlay_do_wait_request(overlay, req, NULL);
262 }
263 
264 /* overlay needs to be enabled in OCMD reg */
265 static int intel_overlay_continue(struct intel_overlay *overlay,
266 				  bool load_polyphase_filter)
267 {
268 	struct drm_i915_private *dev_priv = overlay->i915;
269 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
270 	struct drm_i915_gem_request *req;
271 	u32 flip_addr = overlay->flip_addr;
272 	u32 tmp;
273 	int ret;
274 
275 	WARN_ON(!overlay->active);
276 
277 	if (load_polyphase_filter)
278 		flip_addr |= OFC_UPDATE;
279 
280 	/* check for underruns */
281 	tmp = I915_READ(DOVSTA);
282 	if (tmp & (1 << 17))
283 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
284 
285 	req = i915_gem_request_alloc(engine, NULL);
286 	if (IS_ERR(req))
287 		return PTR_ERR(req);
288 
289 	ret = intel_ring_begin(req, 2);
290 	if (ret) {
291 		i915_add_request_no_flush(req);
292 		return ret;
293 	}
294 
295 	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
296 	intel_ring_emit(engine, flip_addr);
297 	intel_ring_advance(engine);
298 
299 	WARN_ON(overlay->last_flip_req);
300 	i915_gem_request_assign(&overlay->last_flip_req, req);
301 	i915_add_request(req);
302 
303 	return 0;
304 }
305 
306 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
307 {
308 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
309 
310 	i915_gem_object_ggtt_unpin(obj);
311 	i915_gem_object_put(obj);
312 
313 	overlay->old_vid_bo = NULL;
314 }
315 
316 static void intel_overlay_off_tail(struct intel_overlay *overlay)
317 {
318 	struct drm_i915_gem_object *obj = overlay->vid_bo;
319 
320 	/* never have the overlay hw on without showing a frame */
321 	if (WARN_ON(!obj))
322 		return;
323 
324 	i915_gem_object_ggtt_unpin(obj);
325 	i915_gem_object_put(obj);
326 	overlay->vid_bo = NULL;
327 
328 	overlay->crtc->overlay = NULL;
329 	overlay->crtc = NULL;
330 	overlay->active = false;
331 }
332 
333 /* overlay needs to be disabled in OCMD reg */
334 static int intel_overlay_off(struct intel_overlay *overlay)
335 {
336 	struct drm_i915_private *dev_priv = overlay->i915;
337 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
338 	struct drm_i915_gem_request *req;
339 	u32 flip_addr = overlay->flip_addr;
340 	int ret;
341 
342 	WARN_ON(!overlay->active);
343 
344 	/* According to intel docs the overlay hw may hang (when switching
345 	 * off) without loading the filter coeffs. It is however unclear whether
346 	 * this applies to the disabling of the overlay or to the switching off
347 	 * of the hw. Do it in both cases */
348 	flip_addr |= OFC_UPDATE;
349 
350 	req = i915_gem_request_alloc(engine, NULL);
351 	if (IS_ERR(req))
352 		return PTR_ERR(req);
353 
354 	ret = intel_ring_begin(req, 6);
355 	if (ret) {
356 		i915_add_request_no_flush(req);
357 		return ret;
358 	}
359 
360 	/* wait for overlay to go idle */
361 	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
362 	intel_ring_emit(engine, flip_addr);
363 	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
364 	/* turn overlay off */
365 	if (IS_I830(dev_priv)) {
366 		/* Workaround: Don't disable the overlay fully, since otherwise
367 		 * it dies on the next OVERLAY_ON cmd. */
368 		intel_ring_emit(engine, MI_NOOP);
369 		intel_ring_emit(engine, MI_NOOP);
370 		intel_ring_emit(engine, MI_NOOP);
371 	} else {
372 		intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
373 		intel_ring_emit(engine, flip_addr);
374 		intel_ring_emit(engine,
375 				MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
376 	}
377 	intel_ring_advance(engine);
378 
379 	return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail);
380 }
381 
382 /* recover from an interruption due to a signal
383  * We have to be careful not to repeat work forever an make forward progess. */
384 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
385 {
386 	int ret;
387 
388 	if (overlay->last_flip_req == NULL)
389 		return 0;
390 
391 	ret = i915_wait_request(overlay->last_flip_req);
392 	if (ret)
393 		return ret;
394 
395 	if (overlay->flip_tail)
396 		overlay->flip_tail(overlay);
397 
398 	i915_gem_request_assign(&overlay->last_flip_req, NULL);
399 	return 0;
400 }
401 
402 /* Wait for pending overlay flip and release old frame.
403  * Needs to be called before the overlay register are changed
404  * via intel_overlay_(un)map_regs
405  */
406 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
407 {
408 	struct drm_i915_private *dev_priv = overlay->i915;
409 	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
410 	int ret;
411 
412 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
413 
414 	/* Only wait if there is actually an old frame to release to
415 	 * guarantee forward progress.
416 	 */
417 	if (!overlay->old_vid_bo)
418 		return 0;
419 
420 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
421 		/* synchronous slowpath */
422 		struct drm_i915_gem_request *req;
423 
424 		req = i915_gem_request_alloc(engine, NULL);
425 		if (IS_ERR(req))
426 			return PTR_ERR(req);
427 
428 		ret = intel_ring_begin(req, 2);
429 		if (ret) {
430 			i915_add_request_no_flush(req);
431 			return ret;
432 		}
433 
434 		intel_ring_emit(engine,
435 				MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
436 		intel_ring_emit(engine, MI_NOOP);
437 		intel_ring_advance(engine);
438 
439 		ret = intel_overlay_do_wait_request(overlay, req,
440 						    intel_overlay_release_old_vid_tail);
441 		if (ret)
442 			return ret;
443 	}
444 
445 	intel_overlay_release_old_vid_tail(overlay);
446 
447 
448 	i915_gem_track_fb(overlay->old_vid_bo, NULL,
449 			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
450 	return 0;
451 }
452 
453 void intel_overlay_reset(struct drm_i915_private *dev_priv)
454 {
455 	struct intel_overlay *overlay = dev_priv->overlay;
456 
457 	if (!overlay)
458 		return;
459 
460 	intel_overlay_release_old_vid(overlay);
461 
462 	overlay->last_flip_req = NULL;
463 	overlay->old_xscale = 0;
464 	overlay->old_yscale = 0;
465 	overlay->crtc = NULL;
466 	overlay->active = false;
467 }
468 
469 struct put_image_params {
470 	int format;
471 	short dst_x;
472 	short dst_y;
473 	short dst_w;
474 	short dst_h;
475 	short src_w;
476 	short src_scan_h;
477 	short src_scan_w;
478 	short src_h;
479 	short stride_Y;
480 	short stride_UV;
481 	int offset_Y;
482 	int offset_U;
483 	int offset_V;
484 };
485 
486 static int packed_depth_bytes(u32 format)
487 {
488 	switch (format & I915_OVERLAY_DEPTH_MASK) {
489 	case I915_OVERLAY_YUV422:
490 		return 4;
491 	case I915_OVERLAY_YUV411:
492 		/* return 6; not implemented */
493 	default:
494 		return -EINVAL;
495 	}
496 }
497 
498 static int packed_width_bytes(u32 format, short width)
499 {
500 	switch (format & I915_OVERLAY_DEPTH_MASK) {
501 	case I915_OVERLAY_YUV422:
502 		return width << 1;
503 	default:
504 		return -EINVAL;
505 	}
506 }
507 
508 static int uv_hsubsampling(u32 format)
509 {
510 	switch (format & I915_OVERLAY_DEPTH_MASK) {
511 	case I915_OVERLAY_YUV422:
512 	case I915_OVERLAY_YUV420:
513 		return 2;
514 	case I915_OVERLAY_YUV411:
515 	case I915_OVERLAY_YUV410:
516 		return 4;
517 	default:
518 		return -EINVAL;
519 	}
520 }
521 
522 static int uv_vsubsampling(u32 format)
523 {
524 	switch (format & I915_OVERLAY_DEPTH_MASK) {
525 	case I915_OVERLAY_YUV420:
526 	case I915_OVERLAY_YUV410:
527 		return 2;
528 	case I915_OVERLAY_YUV422:
529 	case I915_OVERLAY_YUV411:
530 		return 1;
531 	default:
532 		return -EINVAL;
533 	}
534 }
535 
536 static u32 calc_swidthsw(struct drm_i915_private *dev_priv, u32 offset, u32 width)
537 {
538 	u32 mask, shift, ret;
539 	if (IS_GEN2(dev_priv)) {
540 		mask = 0x1f;
541 		shift = 5;
542 	} else {
543 		mask = 0x3f;
544 		shift = 6;
545 	}
546 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
547 	if (!IS_GEN2(dev_priv))
548 		ret <<= 1;
549 	ret -= 1;
550 	return ret << 2;
551 }
552 
553 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
554 	0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
555 	0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
556 	0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
557 	0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
558 	0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
559 	0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
560 	0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
561 	0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
562 	0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
563 	0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
564 	0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
565 	0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
566 	0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
567 	0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
568 	0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
569 	0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
570 	0xb000, 0x3000, 0x0800, 0x3000, 0xb000
571 };
572 
573 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
574 	0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
575 	0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
576 	0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
577 	0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
578 	0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
579 	0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
580 	0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
581 	0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
582 	0x3000, 0x0800, 0x3000
583 };
584 
585 static void update_polyphase_filter(struct overlay_registers __iomem *regs)
586 {
587 	memcpy_toio(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
588 	memcpy_toio(regs->UV_HCOEFS, uv_static_hcoeffs,
589 		    sizeof(uv_static_hcoeffs));
590 }
591 
592 static bool update_scaling_factors(struct intel_overlay *overlay,
593 				   struct overlay_registers __iomem *regs,
594 				   struct put_image_params *params)
595 {
596 	/* fixed point with a 12 bit shift */
597 	u32 xscale, yscale, xscale_UV, yscale_UV;
598 #define FP_SHIFT 12
599 #define FRACT_MASK 0xfff
600 	bool scale_changed = false;
601 	int uv_hscale = uv_hsubsampling(params->format);
602 	int uv_vscale = uv_vsubsampling(params->format);
603 
604 	if (params->dst_w > 1)
605 		xscale = ((params->src_scan_w - 1) << FP_SHIFT)
606 			/(params->dst_w);
607 	else
608 		xscale = 1 << FP_SHIFT;
609 
610 	if (params->dst_h > 1)
611 		yscale = ((params->src_scan_h - 1) << FP_SHIFT)
612 			/(params->dst_h);
613 	else
614 		yscale = 1 << FP_SHIFT;
615 
616 	/*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
617 	xscale_UV = xscale/uv_hscale;
618 	yscale_UV = yscale/uv_vscale;
619 	/* make the Y scale to UV scale ratio an exact multiply */
620 	xscale = xscale_UV * uv_hscale;
621 	yscale = yscale_UV * uv_vscale;
622 	/*} else {
623 	  xscale_UV = 0;
624 	  yscale_UV = 0;
625 	  }*/
626 
627 	if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
628 		scale_changed = true;
629 	overlay->old_xscale = xscale;
630 	overlay->old_yscale = yscale;
631 
632 	iowrite32(((yscale & FRACT_MASK) << 20) |
633 		  ((xscale >> FP_SHIFT)  << 16) |
634 		  ((xscale & FRACT_MASK) << 3),
635 		 &regs->YRGBSCALE);
636 
637 	iowrite32(((yscale_UV & FRACT_MASK) << 20) |
638 		  ((xscale_UV >> FP_SHIFT)  << 16) |
639 		  ((xscale_UV & FRACT_MASK) << 3),
640 		 &regs->UVSCALE);
641 
642 	iowrite32((((yscale    >> FP_SHIFT) << 16) |
643 		   ((yscale_UV >> FP_SHIFT) << 0)),
644 		 &regs->UVSCALEV);
645 
646 	if (scale_changed)
647 		update_polyphase_filter(regs);
648 
649 	return scale_changed;
650 }
651 
652 static void update_colorkey(struct intel_overlay *overlay,
653 			    struct overlay_registers __iomem *regs)
654 {
655 	u32 key = overlay->color_key;
656 	u32 flags;
657 
658 	flags = 0;
659 	if (overlay->color_key_enabled)
660 		flags |= DST_KEY_ENABLE;
661 
662 	switch (overlay->crtc->base.primary->fb->bits_per_pixel) {
663 	case 8:
664 		key = 0;
665 		flags |= CLK_RGB8I_MASK;
666 		break;
667 
668 	case 16:
669 		if (overlay->crtc->base.primary->fb->depth == 15) {
670 			key = RGB15_TO_COLORKEY(key);
671 			flags |= CLK_RGB15_MASK;
672 		} else {
673 			key = RGB16_TO_COLORKEY(key);
674 			flags |= CLK_RGB16_MASK;
675 		}
676 		break;
677 
678 	case 24:
679 	case 32:
680 		flags |= CLK_RGB24_MASK;
681 		break;
682 	}
683 
684 	iowrite32(key, &regs->DCLRKV);
685 	iowrite32(flags, &regs->DCLRKM);
686 }
687 
688 static u32 overlay_cmd_reg(struct put_image_params *params)
689 {
690 	u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
691 
692 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
693 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
694 		case I915_OVERLAY_YUV422:
695 			cmd |= OCMD_YUV_422_PLANAR;
696 			break;
697 		case I915_OVERLAY_YUV420:
698 			cmd |= OCMD_YUV_420_PLANAR;
699 			break;
700 		case I915_OVERLAY_YUV411:
701 		case I915_OVERLAY_YUV410:
702 			cmd |= OCMD_YUV_410_PLANAR;
703 			break;
704 		}
705 	} else { /* YUV packed */
706 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
707 		case I915_OVERLAY_YUV422:
708 			cmd |= OCMD_YUV_422_PACKED;
709 			break;
710 		case I915_OVERLAY_YUV411:
711 			cmd |= OCMD_YUV_411_PACKED;
712 			break;
713 		}
714 
715 		switch (params->format & I915_OVERLAY_SWAP_MASK) {
716 		case I915_OVERLAY_NO_SWAP:
717 			break;
718 		case I915_OVERLAY_UV_SWAP:
719 			cmd |= OCMD_UV_SWAP;
720 			break;
721 		case I915_OVERLAY_Y_SWAP:
722 			cmd |= OCMD_Y_SWAP;
723 			break;
724 		case I915_OVERLAY_Y_AND_UV_SWAP:
725 			cmd |= OCMD_Y_AND_UV_SWAP;
726 			break;
727 		}
728 	}
729 
730 	return cmd;
731 }
732 
733 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
734 				      struct drm_i915_gem_object *new_bo,
735 				      struct put_image_params *params)
736 {
737 	int ret, tmp_width;
738 	struct overlay_registers __iomem *regs;
739 	bool scale_changed = false;
740 	struct drm_i915_private *dev_priv = overlay->i915;
741 	u32 swidth, swidthsw, sheight, ostride;
742 	enum i915_pipe pipe = overlay->crtc->pipe;
743 
744 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
745 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
746 
747 	ret = intel_overlay_release_old_vid(overlay);
748 	if (ret != 0)
749 		return ret;
750 
751 	ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
752 						   &i915_ggtt_view_normal);
753 	if (ret != 0)
754 		return ret;
755 
756 	ret = i915_gem_object_put_fence(new_bo);
757 	if (ret)
758 		goto out_unpin;
759 
760 	if (!overlay->active) {
761 		u32 oconfig;
762 		regs = intel_overlay_map_regs(overlay);
763 		if (!regs) {
764 			ret = -ENOMEM;
765 			goto out_unpin;
766 		}
767 		oconfig = OCONF_CC_OUT_8BIT;
768 		if (IS_GEN4(dev_priv))
769 			oconfig |= OCONF_CSC_MODE_BT709;
770 		oconfig |= pipe == 0 ?
771 			OCONF_PIPE_A : OCONF_PIPE_B;
772 		iowrite32(oconfig, &regs->OCONFIG);
773 		intel_overlay_unmap_regs(overlay, regs);
774 
775 		ret = intel_overlay_on(overlay);
776 		if (ret != 0)
777 			goto out_unpin;
778 	}
779 
780 	regs = intel_overlay_map_regs(overlay);
781 	if (!regs) {
782 		ret = -ENOMEM;
783 		goto out_unpin;
784 	}
785 
786 	iowrite32((params->dst_y << 16) | params->dst_x, &regs->DWINPOS);
787 	iowrite32((params->dst_h << 16) | params->dst_w, &regs->DWINSZ);
788 
789 	if (params->format & I915_OVERLAY_YUV_PACKED)
790 		tmp_width = packed_width_bytes(params->format, params->src_w);
791 	else
792 		tmp_width = params->src_w;
793 
794 	swidth = params->src_w;
795 	swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
796 	sheight = params->src_h;
797 	iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
798 	ostride = params->stride_Y;
799 
800 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
801 		int uv_hscale = uv_hsubsampling(params->format);
802 		int uv_vscale = uv_vsubsampling(params->format);
803 		u32 tmp_U, tmp_V;
804 		swidth |= (params->src_w/uv_hscale) << 16;
805 		tmp_U = calc_swidthsw(dev_priv, params->offset_U,
806 				      params->src_w/uv_hscale);
807 		tmp_V = calc_swidthsw(dev_priv, params->offset_V,
808 				      params->src_w/uv_hscale);
809 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
810 		sheight |= (params->src_h/uv_vscale) << 16;
811 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
812 		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
813 		ostride |= params->stride_UV << 16;
814 	}
815 
816 	iowrite32(swidth, &regs->SWIDTH);
817 	iowrite32(swidthsw, &regs->SWIDTHSW);
818 	iowrite32(sheight, &regs->SHEIGHT);
819 	iowrite32(ostride, &regs->OSTRIDE);
820 
821 	scale_changed = update_scaling_factors(overlay, regs, params);
822 
823 	update_colorkey(overlay, regs);
824 
825 	iowrite32(overlay_cmd_reg(params), &regs->OCMD);
826 
827 	intel_overlay_unmap_regs(overlay, regs);
828 
829 	ret = intel_overlay_continue(overlay, scale_changed);
830 	if (ret)
831 		goto out_unpin;
832 
833 	i915_gem_track_fb(overlay->vid_bo, new_bo,
834 			  INTEL_FRONTBUFFER_OVERLAY(pipe));
835 
836 	overlay->old_vid_bo = overlay->vid_bo;
837 	overlay->vid_bo = new_bo;
838 
839 	intel_frontbuffer_flip(&dev_priv->drm,
840 			       INTEL_FRONTBUFFER_OVERLAY(pipe));
841 
842 	return 0;
843 
844 out_unpin:
845 	i915_gem_object_ggtt_unpin(new_bo);
846 	return ret;
847 }
848 
849 int intel_overlay_switch_off(struct intel_overlay *overlay)
850 {
851 	struct drm_i915_private *dev_priv = overlay->i915;
852 	struct overlay_registers __iomem *regs;
853 	int ret;
854 
855 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
856 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
857 
858 	ret = intel_overlay_recover_from_interrupt(overlay);
859 	if (ret != 0)
860 		return ret;
861 
862 	if (!overlay->active)
863 		return 0;
864 
865 	ret = intel_overlay_release_old_vid(overlay);
866 	if (ret != 0)
867 		return ret;
868 
869 	regs = intel_overlay_map_regs(overlay);
870 	iowrite32(0, &regs->OCMD);
871 	intel_overlay_unmap_regs(overlay, regs);
872 
873 	ret = intel_overlay_off(overlay);
874 	if (ret != 0)
875 		return ret;
876 
877 	intel_overlay_off_tail(overlay);
878 	return 0;
879 }
880 
881 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
882 					  struct intel_crtc *crtc)
883 {
884 	if (!crtc->active)
885 		return -EINVAL;
886 
887 	/* can't use the overlay with double wide pipe */
888 	if (crtc->config->double_wide)
889 		return -EINVAL;
890 
891 	return 0;
892 }
893 
894 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
895 {
896 	struct drm_i915_private *dev_priv = overlay->i915;
897 	u32 pfit_control = I915_READ(PFIT_CONTROL);
898 	u32 ratio;
899 
900 	/* XXX: This is not the same logic as in the xorg driver, but more in
901 	 * line with the intel documentation for the i965
902 	 */
903 	if (INTEL_GEN(dev_priv) >= 4) {
904 		/* on i965 use the PGM reg to read out the autoscaler values */
905 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
906 	} else {
907 		if (pfit_control & VERT_AUTO_SCALE)
908 			ratio = I915_READ(PFIT_AUTO_RATIOS);
909 		else
910 			ratio = I915_READ(PFIT_PGM_RATIOS);
911 		ratio >>= PFIT_VERT_SCALE_SHIFT;
912 	}
913 
914 	overlay->pfit_vscale_ratio = ratio;
915 }
916 
917 static int check_overlay_dst(struct intel_overlay *overlay,
918 			     struct drm_intel_overlay_put_image *rec)
919 {
920 	struct drm_display_mode *mode = &overlay->crtc->base.mode;
921 
922 	if (rec->dst_x < mode->hdisplay &&
923 	    rec->dst_x + rec->dst_width <= mode->hdisplay &&
924 	    rec->dst_y < mode->vdisplay &&
925 	    rec->dst_y + rec->dst_height <= mode->vdisplay)
926 		return 0;
927 	else
928 		return -EINVAL;
929 }
930 
931 static int check_overlay_scaling(struct put_image_params *rec)
932 {
933 	u32 tmp;
934 
935 	/* downscaling limit is 8.0 */
936 	tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
937 	if (tmp > 7)
938 		return -EINVAL;
939 	tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
940 	if (tmp > 7)
941 		return -EINVAL;
942 
943 	return 0;
944 }
945 
946 static int check_overlay_src(struct drm_i915_private *dev_priv,
947 			     struct drm_intel_overlay_put_image *rec,
948 			     struct drm_i915_gem_object *new_bo)
949 {
950 	int uv_hscale = uv_hsubsampling(rec->flags);
951 	int uv_vscale = uv_vsubsampling(rec->flags);
952 	u32 stride_mask;
953 	int depth;
954 	u32 tmp;
955 
956 	/* check src dimensions */
957 	if (IS_845G(dev_priv) || IS_I830(dev_priv)) {
958 		if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
959 		    rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
960 			return -EINVAL;
961 	} else {
962 		if (rec->src_height > IMAGE_MAX_HEIGHT ||
963 		    rec->src_width  > IMAGE_MAX_WIDTH)
964 			return -EINVAL;
965 	}
966 
967 	/* better safe than sorry, use 4 as the maximal subsampling ratio */
968 	if (rec->src_height < N_VERT_Y_TAPS*4 ||
969 	    rec->src_width  < N_HORIZ_Y_TAPS*4)
970 		return -EINVAL;
971 
972 	/* check alignment constraints */
973 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
974 	case I915_OVERLAY_RGB:
975 		/* not implemented */
976 		return -EINVAL;
977 
978 	case I915_OVERLAY_YUV_PACKED:
979 		if (uv_vscale != 1)
980 			return -EINVAL;
981 
982 		depth = packed_depth_bytes(rec->flags);
983 		if (depth < 0)
984 			return depth;
985 
986 		/* ignore UV planes */
987 		rec->stride_UV = 0;
988 		rec->offset_U = 0;
989 		rec->offset_V = 0;
990 		/* check pixel alignment */
991 		if (rec->offset_Y % depth)
992 			return -EINVAL;
993 		break;
994 
995 	case I915_OVERLAY_YUV_PLANAR:
996 		if (uv_vscale < 0 || uv_hscale < 0)
997 			return -EINVAL;
998 		/* no offset restrictions for planar formats */
999 		break;
1000 
1001 	default:
1002 		return -EINVAL;
1003 	}
1004 
1005 	if (rec->src_width % uv_hscale)
1006 		return -EINVAL;
1007 
1008 	/* stride checking */
1009 	if (IS_I830(dev_priv) || IS_845G(dev_priv))
1010 		stride_mask = 255;
1011 	else
1012 		stride_mask = 63;
1013 
1014 	if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
1015 		return -EINVAL;
1016 	if (IS_GEN4(dev_priv) && rec->stride_Y < 512)
1017 		return -EINVAL;
1018 
1019 	tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
1020 		4096 : 8192;
1021 	if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
1022 		return -EINVAL;
1023 
1024 	/* check buffer dimensions */
1025 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
1026 	case I915_OVERLAY_RGB:
1027 	case I915_OVERLAY_YUV_PACKED:
1028 		/* always 4 Y values per depth pixels */
1029 		if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
1030 			return -EINVAL;
1031 
1032 		tmp = rec->stride_Y*rec->src_height;
1033 		if (rec->offset_Y + tmp > new_bo->base.size)
1034 			return -EINVAL;
1035 		break;
1036 
1037 	case I915_OVERLAY_YUV_PLANAR:
1038 		if (rec->src_width > rec->stride_Y)
1039 			return -EINVAL;
1040 		if (rec->src_width/uv_hscale > rec->stride_UV)
1041 			return -EINVAL;
1042 
1043 		tmp = rec->stride_Y * rec->src_height;
1044 		if (rec->offset_Y + tmp > new_bo->base.size)
1045 			return -EINVAL;
1046 
1047 		tmp = rec->stride_UV * (rec->src_height / uv_vscale);
1048 		if (rec->offset_U + tmp > new_bo->base.size ||
1049 		    rec->offset_V + tmp > new_bo->base.size)
1050 			return -EINVAL;
1051 		break;
1052 	}
1053 
1054 	return 0;
1055 }
1056 
1057 /**
1058  * Return the pipe currently connected to the panel fitter,
1059  * or -1 if the panel fitter is not present or not in use
1060  */
1061 static int intel_panel_fitter_pipe(struct drm_i915_private *dev_priv)
1062 {
1063 	u32  pfit_control;
1064 
1065 	/* i830 doesn't have a panel fitter */
1066 	if (INTEL_GEN(dev_priv) <= 3 &&
1067 	    (IS_I830(dev_priv) || !IS_MOBILE(dev_priv)))
1068 		return -1;
1069 
1070 	pfit_control = I915_READ(PFIT_CONTROL);
1071 
1072 	/* See if the panel fitter is in use */
1073 	if ((pfit_control & PFIT_ENABLE) == 0)
1074 		return -1;
1075 
1076 	/* 965 can place panel fitter on either pipe */
1077 	if (IS_GEN4(dev_priv))
1078 		return (pfit_control >> 29) & 0x3;
1079 
1080 	/* older chips can only use pipe 1 */
1081 	return 1;
1082 }
1083 
1084 int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
1085 				  struct drm_file *file_priv)
1086 {
1087 	struct drm_intel_overlay_put_image *put_image_rec = data;
1088 	struct drm_i915_private *dev_priv = to_i915(dev);
1089 	struct intel_overlay *overlay;
1090 	struct drm_crtc *drmmode_crtc;
1091 	struct intel_crtc *crtc;
1092 	struct drm_i915_gem_object *new_bo;
1093 	struct put_image_params *params;
1094 	int ret;
1095 
1096 	overlay = dev_priv->overlay;
1097 	if (!overlay) {
1098 		DRM_DEBUG("userspace bug: no overlay\n");
1099 		return -ENODEV;
1100 	}
1101 
1102 	if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1103 		drm_modeset_lock_all(dev);
1104 		mutex_lock(&dev->struct_mutex);
1105 
1106 		ret = intel_overlay_switch_off(overlay);
1107 
1108 		mutex_unlock(&dev->struct_mutex);
1109 		drm_modeset_unlock_all(dev);
1110 
1111 		return ret;
1112 	}
1113 
1114 	params = kmalloc(sizeof(*params), M_DRM, GFP_KERNEL);
1115 	if (!params)
1116 		return -ENOMEM;
1117 
1118 	drmmode_crtc = drm_crtc_find(dev, put_image_rec->crtc_id);
1119 	if (!drmmode_crtc) {
1120 		ret = -ENOENT;
1121 		goto out_free;
1122 	}
1123 	crtc = to_intel_crtc(drmmode_crtc);
1124 
1125 	new_bo = i915_gem_object_lookup(file_priv, put_image_rec->bo_handle);
1126 	if (!new_bo) {
1127 		ret = -ENOENT;
1128 		goto out_free;
1129 	}
1130 
1131 	drm_modeset_lock_all(dev);
1132 	mutex_lock(&dev->struct_mutex);
1133 
1134 	if (new_bo->tiling_mode) {
1135 		DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
1136 		ret = -EINVAL;
1137 		goto out_unlock;
1138 	}
1139 
1140 	ret = intel_overlay_recover_from_interrupt(overlay);
1141 	if (ret != 0)
1142 		goto out_unlock;
1143 
1144 	if (overlay->crtc != crtc) {
1145 		struct drm_display_mode *mode = &crtc->base.mode;
1146 		ret = intel_overlay_switch_off(overlay);
1147 		if (ret != 0)
1148 			goto out_unlock;
1149 
1150 		ret = check_overlay_possible_on_crtc(overlay, crtc);
1151 		if (ret != 0)
1152 			goto out_unlock;
1153 
1154 		overlay->crtc = crtc;
1155 		crtc->overlay = overlay;
1156 
1157 		/* line too wide, i.e. one-line-mode */
1158 		if (mode->hdisplay > 1024 &&
1159 		    intel_panel_fitter_pipe(dev_priv) == crtc->pipe) {
1160 			overlay->pfit_active = true;
1161 			update_pfit_vscale_ratio(overlay);
1162 		} else
1163 			overlay->pfit_active = false;
1164 	}
1165 
1166 	ret = check_overlay_dst(overlay, put_image_rec);
1167 	if (ret != 0)
1168 		goto out_unlock;
1169 
1170 	if (overlay->pfit_active) {
1171 		params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1172 				 overlay->pfit_vscale_ratio);
1173 		/* shifting right rounds downwards, so add 1 */
1174 		params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1175 				 overlay->pfit_vscale_ratio) + 1;
1176 	} else {
1177 		params->dst_y = put_image_rec->dst_y;
1178 		params->dst_h = put_image_rec->dst_height;
1179 	}
1180 	params->dst_x = put_image_rec->dst_x;
1181 	params->dst_w = put_image_rec->dst_width;
1182 
1183 	params->src_w = put_image_rec->src_width;
1184 	params->src_h = put_image_rec->src_height;
1185 	params->src_scan_w = put_image_rec->src_scan_width;
1186 	params->src_scan_h = put_image_rec->src_scan_height;
1187 	if (params->src_scan_h > params->src_h ||
1188 	    params->src_scan_w > params->src_w) {
1189 		ret = -EINVAL;
1190 		goto out_unlock;
1191 	}
1192 
1193 	ret = check_overlay_src(dev_priv, put_image_rec, new_bo);
1194 	if (ret != 0)
1195 		goto out_unlock;
1196 	params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1197 	params->stride_Y = put_image_rec->stride_Y;
1198 	params->stride_UV = put_image_rec->stride_UV;
1199 	params->offset_Y = put_image_rec->offset_Y;
1200 	params->offset_U = put_image_rec->offset_U;
1201 	params->offset_V = put_image_rec->offset_V;
1202 
1203 	/* Check scaling after src size to prevent a divide-by-zero. */
1204 	ret = check_overlay_scaling(params);
1205 	if (ret != 0)
1206 		goto out_unlock;
1207 
1208 	ret = intel_overlay_do_put_image(overlay, new_bo, params);
1209 	if (ret != 0)
1210 		goto out_unlock;
1211 
1212 	mutex_unlock(&dev->struct_mutex);
1213 	drm_modeset_unlock_all(dev);
1214 
1215 	kfree(params);
1216 
1217 	return 0;
1218 
1219 out_unlock:
1220 	mutex_unlock(&dev->struct_mutex);
1221 	drm_modeset_unlock_all(dev);
1222 	i915_gem_object_put_unlocked(new_bo);
1223 out_free:
1224 	kfree(params);
1225 
1226 	return ret;
1227 }
1228 
1229 static void update_reg_attrs(struct intel_overlay *overlay,
1230 			     struct overlay_registers __iomem *regs)
1231 {
1232 	iowrite32((overlay->contrast << 18) | (overlay->brightness & 0xff),
1233 		  &regs->OCLRC0);
1234 	iowrite32(overlay->saturation, &regs->OCLRC1);
1235 }
1236 
1237 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1238 {
1239 	int i;
1240 
1241 	if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1242 		return false;
1243 
1244 	for (i = 0; i < 3; i++) {
1245 		if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1246 			return false;
1247 	}
1248 
1249 	return true;
1250 }
1251 
1252 static bool check_gamma5_errata(u32 gamma5)
1253 {
1254 	int i;
1255 
1256 	for (i = 0; i < 3; i++) {
1257 		if (((gamma5 >> i*8) & 0xff) == 0x80)
1258 			return false;
1259 	}
1260 
1261 	return true;
1262 }
1263 
1264 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1265 {
1266 	if (!check_gamma_bounds(0, attrs->gamma0) ||
1267 	    !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1268 	    !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1269 	    !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1270 	    !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1271 	    !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1272 	    !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1273 		return -EINVAL;
1274 
1275 	if (!check_gamma5_errata(attrs->gamma5))
1276 		return -EINVAL;
1277 
1278 	return 0;
1279 }
1280 
1281 int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
1282 			      struct drm_file *file_priv)
1283 {
1284 	struct drm_intel_overlay_attrs *attrs = data;
1285 	struct drm_i915_private *dev_priv = to_i915(dev);
1286 	struct intel_overlay *overlay;
1287 	struct overlay_registers __iomem *regs;
1288 	int ret;
1289 
1290 	overlay = dev_priv->overlay;
1291 	if (!overlay) {
1292 		DRM_DEBUG("userspace bug: no overlay\n");
1293 		return -ENODEV;
1294 	}
1295 
1296 	drm_modeset_lock_all(dev);
1297 	mutex_lock(&dev->struct_mutex);
1298 
1299 	ret = -EINVAL;
1300 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1301 		attrs->color_key  = overlay->color_key;
1302 		attrs->brightness = overlay->brightness;
1303 		attrs->contrast   = overlay->contrast;
1304 		attrs->saturation = overlay->saturation;
1305 
1306 		if (!IS_GEN2(dev_priv)) {
1307 			attrs->gamma0 = I915_READ(OGAMC0);
1308 			attrs->gamma1 = I915_READ(OGAMC1);
1309 			attrs->gamma2 = I915_READ(OGAMC2);
1310 			attrs->gamma3 = I915_READ(OGAMC3);
1311 			attrs->gamma4 = I915_READ(OGAMC4);
1312 			attrs->gamma5 = I915_READ(OGAMC5);
1313 		}
1314 	} else {
1315 		if (attrs->brightness < -128 || attrs->brightness > 127)
1316 			goto out_unlock;
1317 		if (attrs->contrast > 255)
1318 			goto out_unlock;
1319 		if (attrs->saturation > 1023)
1320 			goto out_unlock;
1321 
1322 		overlay->color_key  = attrs->color_key;
1323 		overlay->brightness = attrs->brightness;
1324 		overlay->contrast   = attrs->contrast;
1325 		overlay->saturation = attrs->saturation;
1326 
1327 		regs = intel_overlay_map_regs(overlay);
1328 		if (!regs) {
1329 			ret = -ENOMEM;
1330 			goto out_unlock;
1331 		}
1332 
1333 		update_reg_attrs(overlay, regs);
1334 
1335 		intel_overlay_unmap_regs(overlay, regs);
1336 
1337 		if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1338 			if (IS_GEN2(dev_priv))
1339 				goto out_unlock;
1340 
1341 			if (overlay->active) {
1342 				ret = -EBUSY;
1343 				goto out_unlock;
1344 			}
1345 
1346 			ret = check_gamma(attrs);
1347 			if (ret)
1348 				goto out_unlock;
1349 
1350 			I915_WRITE(OGAMC0, attrs->gamma0);
1351 			I915_WRITE(OGAMC1, attrs->gamma1);
1352 			I915_WRITE(OGAMC2, attrs->gamma2);
1353 			I915_WRITE(OGAMC3, attrs->gamma3);
1354 			I915_WRITE(OGAMC4, attrs->gamma4);
1355 			I915_WRITE(OGAMC5, attrs->gamma5);
1356 		}
1357 	}
1358 	overlay->color_key_enabled = (attrs->flags & I915_OVERLAY_DISABLE_DEST_COLORKEY) == 0;
1359 
1360 	ret = 0;
1361 out_unlock:
1362 	mutex_unlock(&dev->struct_mutex);
1363 	drm_modeset_unlock_all(dev);
1364 
1365 	return ret;
1366 }
1367 
1368 void intel_setup_overlay(struct drm_i915_private *dev_priv)
1369 {
1370 	struct intel_overlay *overlay;
1371 	struct drm_i915_gem_object *reg_bo;
1372 	struct overlay_registers __iomem *regs;
1373 	int ret;
1374 
1375 	if (!HAS_OVERLAY(dev_priv))
1376 		return;
1377 
1378 	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
1379 	if (!overlay)
1380 		return;
1381 
1382 	mutex_lock(&dev_priv->drm.struct_mutex);
1383 	if (WARN_ON(dev_priv->overlay))
1384 		goto out_free;
1385 
1386 	overlay->i915 = dev_priv;
1387 
1388 	reg_bo = NULL;
1389 	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
1390 		reg_bo = i915_gem_object_create_stolen(&dev_priv->drm,
1391 						       PAGE_SIZE);
1392 	if (reg_bo == NULL)
1393 		reg_bo = i915_gem_object_create(&dev_priv->drm, PAGE_SIZE);
1394 	if (IS_ERR(reg_bo))
1395 		goto out_free;
1396 	overlay->reg_bo = reg_bo;
1397 
1398 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) {
1399 		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
1400 		if (ret) {
1401 			DRM_ERROR("failed to attach phys overlay regs\n");
1402 			goto out_free_bo;
1403 		}
1404 		overlay->flip_addr = reg_bo->phys_handle->busaddr;
1405 	} else {
1406 		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
1407 		if (ret) {
1408 			DRM_ERROR("failed to pin overlay register bo\n");
1409 			goto out_free_bo;
1410 		}
1411 		overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
1412 
1413 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1414 		if (ret) {
1415 			DRM_ERROR("failed to move overlay register bo into the GTT\n");
1416 			goto out_unpin_bo;
1417 		}
1418 	}
1419 
1420 	/* init all values */
1421 	overlay->color_key = 0x0101fe;
1422 	overlay->color_key_enabled = true;
1423 	overlay->brightness = -19;
1424 	overlay->contrast = 75;
1425 	overlay->saturation = 146;
1426 
1427 	regs = intel_overlay_map_regs(overlay);
1428 	if (!regs)
1429 		goto out_unpin_bo;
1430 
1431 	memset_io(regs, 0, sizeof(struct overlay_registers));
1432 	update_polyphase_filter(regs);
1433 	update_reg_attrs(overlay, regs);
1434 
1435 	intel_overlay_unmap_regs(overlay, regs);
1436 
1437 	dev_priv->overlay = overlay;
1438 	mutex_unlock(&dev_priv->drm.struct_mutex);
1439 	DRM_INFO("initialized overlay support\n");
1440 	return;
1441 
1442 out_unpin_bo:
1443 	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
1444 		i915_gem_object_ggtt_unpin(reg_bo);
1445 out_free_bo:
1446 	i915_gem_object_put(reg_bo);
1447 out_free:
1448 	mutex_unlock(&dev_priv->drm.struct_mutex);
1449 	kfree(overlay);
1450 	return;
1451 }
1452 
1453 void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
1454 {
1455 	if (!dev_priv->overlay)
1456 		return;
1457 
1458 	/* The bo's should be free'd by the generic code already.
1459 	 * Furthermore modesetting teardown happens beforehand so the
1460 	 * hardware should be off already */
1461 	WARN_ON(dev_priv->overlay->active);
1462 
1463 	i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo);
1464 	kfree(dev_priv->overlay);
1465 }
1466 
1467 struct intel_overlay_error_state {
1468 	struct overlay_registers regs;
1469 	unsigned long base;
1470 	u32 dovsta;
1471 	u32 isr;
1472 };
1473 
1474 static struct overlay_registers __iomem *
1475 intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1476 {
1477 	struct drm_i915_private *dev_priv = overlay->i915;
1478 	struct overlay_registers __iomem *regs;
1479 
1480 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
1481 		/* Cast to make sparse happy, but it's wc memory anyway, so
1482 		 * equivalent to the wc io mapping on X86. */
1483 		regs = (struct overlay_registers __iomem *)
1484 			overlay->reg_bo->phys_handle->vaddr;
1485 	else
1486 		regs = io_mapping_map_atomic_wc(dev_priv->ggtt.mappable,
1487 						overlay->flip_addr);
1488 
1489 	return regs;
1490 }
1491 
1492 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
1493 					struct overlay_registers __iomem *regs)
1494 {
1495 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->i915))
1496 		io_mapping_unmap_atomic(regs);
1497 }
1498 
1499 struct intel_overlay_error_state *
1500 intel_overlay_capture_error_state(struct drm_i915_private *dev_priv)
1501 {
1502 	struct intel_overlay *overlay = dev_priv->overlay;
1503 	struct intel_overlay_error_state *error;
1504 	struct overlay_registers __iomem *regs;
1505 
1506 	if (!overlay || !overlay->active)
1507 		return NULL;
1508 
1509 	error = kmalloc(sizeof(*error), M_DRM, GFP_ATOMIC);
1510 	if (error == NULL)
1511 		return NULL;
1512 
1513 	error->dovsta = I915_READ(DOVSTA);
1514 	error->isr = I915_READ(ISR);
1515 	error->base = overlay->flip_addr;
1516 
1517 	regs = intel_overlay_map_regs_atomic(overlay);
1518 	if (!regs)
1519 		goto err;
1520 
1521 	memcpy_fromio(&error->regs, regs, sizeof(struct overlay_registers));
1522 	intel_overlay_unmap_regs_atomic(overlay, regs);
1523 
1524 	return error;
1525 
1526 err:
1527 	kfree(error);
1528 	return NULL;
1529 }
1530 
1531 void
1532 intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
1533 				struct intel_overlay_error_state *error)
1534 {
1535 	i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1536 			  error->dovsta, error->isr);
1537 	i915_error_printf(m, "  Register file at 0x%08lx:\n",
1538 			  error->base);
1539 
1540 #define P(x) i915_error_printf(m, "    " #x ":	0x%08x\n", error->regs.x)
1541 	P(OBUF_0Y);
1542 	P(OBUF_1Y);
1543 	P(OBUF_0U);
1544 	P(OBUF_0V);
1545 	P(OBUF_1U);
1546 	P(OBUF_1V);
1547 	P(OSTRIDE);
1548 	P(YRGB_VPH);
1549 	P(UV_VPH);
1550 	P(HORZ_PH);
1551 	P(INIT_PHS);
1552 	P(DWINPOS);
1553 	P(DWINSZ);
1554 	P(SWIDTH);
1555 	P(SWIDTHSW);
1556 	P(SHEIGHT);
1557 	P(YRGBSCALE);
1558 	P(UVSCALE);
1559 	P(OCLRC0);
1560 	P(OCLRC1);
1561 	P(DCLRKV);
1562 	P(DCLRKM);
1563 	P(SCLRKVH);
1564 	P(SCLRKVL);
1565 	P(SCLRKEN);
1566 	P(OCONFIG);
1567 	P(OCMD);
1568 	P(OSTART_0Y);
1569 	P(OSTART_1Y);
1570 	P(OSTART_0U);
1571 	P(OSTART_0V);
1572 	P(OSTART_1U);
1573 	P(OSTART_1V);
1574 	P(OTILEOFF_0Y);
1575 	P(OTILEOFF_1Y);
1576 	P(OTILEOFF_0U);
1577 	P(OTILEOFF_0V);
1578 	P(OTILEOFF_1U);
1579 	P(OTILEOFF_1V);
1580 	P(FASTHSCALE);
1581 	P(UVSCALEV);
1582 #undef P
1583 }
1584