xref: /dragonfly/sys/dev/drm/i915/intel_overlay.c (revision 6a3cbbc2)
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  */
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_reg.h"
32 #include "intel_drv.h"
33 #include "intel_frontbuffer.h"
34 
35 /* Limits for overlay size. According to intel doc, the real limits are:
36  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
37  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
38  * the mininum of both.  */
39 #define IMAGE_MAX_WIDTH		2048
40 #define IMAGE_MAX_HEIGHT	2046 /* 2 * 1023 */
41 /* on 830 and 845 these large limits result in the card hanging */
42 #define IMAGE_MAX_WIDTH_LEGACY	1024
43 #define IMAGE_MAX_HEIGHT_LEGACY	1088
44 
45 /* overlay register definitions */
46 /* OCMD register */
47 #define OCMD_TILED_SURFACE	(0x1<<19)
48 #define OCMD_MIRROR_MASK	(0x3<<17)
49 #define OCMD_MIRROR_MODE	(0x3<<17)
50 #define OCMD_MIRROR_HORIZONTAL	(0x1<<17)
51 #define OCMD_MIRROR_VERTICAL	(0x2<<17)
52 #define OCMD_MIRROR_BOTH	(0x3<<17)
53 #define OCMD_BYTEORDER_MASK	(0x3<<14) /* zero for YUYV or FOURCC YUY2 */
54 #define OCMD_UV_SWAP		(0x1<<14) /* YVYU */
55 #define OCMD_Y_SWAP		(0x2<<14) /* UYVY or FOURCC UYVY */
56 #define OCMD_Y_AND_UV_SWAP	(0x3<<14) /* VYUY */
57 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
58 #define OCMD_RGB_888		(0x1<<10) /* not in i965 Intel docs */
59 #define OCMD_RGB_555		(0x2<<10) /* not in i965 Intel docs */
60 #define OCMD_RGB_565		(0x3<<10) /* not in i965 Intel docs */
61 #define OCMD_YUV_422_PACKED	(0x8<<10)
62 #define OCMD_YUV_411_PACKED	(0x9<<10) /* not in i965 Intel docs */
63 #define OCMD_YUV_420_PLANAR	(0xc<<10)
64 #define OCMD_YUV_422_PLANAR	(0xd<<10)
65 #define OCMD_YUV_410_PLANAR	(0xe<<10) /* also 411 */
66 #define OCMD_TVSYNCFLIP_PARITY	(0x1<<9)
67 #define OCMD_TVSYNCFLIP_ENABLE	(0x1<<7)
68 #define OCMD_BUF_TYPE_MASK	(0x1<<5)
69 #define OCMD_BUF_TYPE_FRAME	(0x0<<5)
70 #define OCMD_BUF_TYPE_FIELD	(0x1<<5)
71 #define OCMD_TEST_MODE		(0x1<<4)
72 #define OCMD_BUFFER_SELECT	(0x3<<2)
73 #define OCMD_BUFFER0		(0x0<<2)
74 #define OCMD_BUFFER1		(0x1<<2)
75 #define OCMD_FIELD_SELECT	(0x1<<2)
76 #define OCMD_FIELD0		(0x0<<1)
77 #define OCMD_FIELD1		(0x1<<1)
78 #define OCMD_ENABLE		(0x1<<0)
79 
80 /* OCONFIG register */
81 #define OCONF_PIPE_MASK		(0x1<<18)
82 #define OCONF_PIPE_A		(0x0<<18)
83 #define OCONF_PIPE_B		(0x1<<18)
84 #define OCONF_GAMMA2_ENABLE	(0x1<<16)
85 #define OCONF_CSC_MODE_BT601	(0x0<<5)
86 #define OCONF_CSC_MODE_BT709	(0x1<<5)
87 #define OCONF_CSC_BYPASS	(0x1<<4)
88 #define OCONF_CC_OUT_8BIT	(0x1<<3)
89 #define OCONF_TEST_MODE		(0x1<<2)
90 #define OCONF_THREE_LINE_BUFFER	(0x1<<0)
91 #define OCONF_TWO_LINE_BUFFER	(0x0<<0)
92 
93 /* DCLRKM (dst-key) register */
94 #define DST_KEY_ENABLE		(0x1<<31)
95 #define CLK_RGB24_MASK		0x0
96 #define CLK_RGB16_MASK		0x070307
97 #define CLK_RGB15_MASK		0x070707
98 #define CLK_RGB8I_MASK		0xffffff
99 
100 #define RGB16_TO_COLORKEY(c) \
101 	(((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
102 #define RGB15_TO_COLORKEY(c) \
103 	(((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
104 
105 /* overlay flip addr flag */
106 #define OFC_UPDATE		0x1
107 
108 /* polyphase filter coefficients */
109 #define N_HORIZ_Y_TAPS          5
110 #define N_VERT_Y_TAPS           3
111 #define N_HORIZ_UV_TAPS         3
112 #define N_VERT_UV_TAPS          3
113 #define N_PHASES                17
114 #define MAX_TAPS                5
115 
116 /* memory bufferd overlay registers */
117 struct overlay_registers {
118 	u32 OBUF_0Y;
119 	u32 OBUF_1Y;
120 	u32 OBUF_0U;
121 	u32 OBUF_0V;
122 	u32 OBUF_1U;
123 	u32 OBUF_1V;
124 	u32 OSTRIDE;
125 	u32 YRGB_VPH;
126 	u32 UV_VPH;
127 	u32 HORZ_PH;
128 	u32 INIT_PHS;
129 	u32 DWINPOS;
130 	u32 DWINSZ;
131 	u32 SWIDTH;
132 	u32 SWIDTHSW;
133 	u32 SHEIGHT;
134 	u32 YRGBSCALE;
135 	u32 UVSCALE;
136 	u32 OCLRC0;
137 	u32 OCLRC1;
138 	u32 DCLRKV;
139 	u32 DCLRKM;
140 	u32 SCLRKVH;
141 	u32 SCLRKVL;
142 	u32 SCLRKEN;
143 	u32 OCONFIG;
144 	u32 OCMD;
145 	u32 RESERVED1; /* 0x6C */
146 	u32 OSTART_0Y;
147 	u32 OSTART_1Y;
148 	u32 OSTART_0U;
149 	u32 OSTART_0V;
150 	u32 OSTART_1U;
151 	u32 OSTART_1V;
152 	u32 OTILEOFF_0Y;
153 	u32 OTILEOFF_1Y;
154 	u32 OTILEOFF_0U;
155 	u32 OTILEOFF_0V;
156 	u32 OTILEOFF_1U;
157 	u32 OTILEOFF_1V;
158 	u32 FASTHSCALE; /* 0xA0 */
159 	u32 UVSCALEV; /* 0xA4 */
160 	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
161 	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
162 	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
163 	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
164 	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
165 	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
166 	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
167 	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
168 	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
169 };
170 
171 struct intel_overlay {
172 	struct drm_i915_private *i915;
173 	struct intel_crtc *crtc;
174 	struct i915_vma *vma;
175 	struct i915_vma *old_vma;
176 	bool active;
177 	bool pfit_active;
178 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
179 	u32 color_key:24;
180 	u32 color_key_enabled:1;
181 	u32 brightness, contrast, saturation;
182 	u32 old_xscale, old_yscale;
183 	/* register access */
184 	u32 flip_addr;
185 	struct drm_i915_gem_object *reg_bo;
186 	/* flip handling */
187 	struct i915_gem_active last_flip;
188 };
189 
190 static struct overlay_registers __iomem *
191 intel_overlay_map_regs(struct intel_overlay *overlay)
192 {
193 	struct drm_i915_private *dev_priv = overlay->i915;
194 	struct overlay_registers __iomem *regs;
195 
196 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
197 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
198 	else
199 		regs = io_mapping_map_wc(&dev_priv->ggtt.mappable,
200 					 overlay->flip_addr,
201 					 PAGE_SIZE);
202 
203 	return regs;
204 }
205 
206 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
207 				     struct overlay_registers __iomem *regs)
208 {
209 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->i915))
210 		io_mapping_unmap(regs);
211 }
212 
213 static void intel_overlay_submit_request(struct intel_overlay *overlay,
214 					 struct drm_i915_gem_request *req,
215 					 i915_gem_retire_fn retire)
216 {
217 	GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip,
218 					&overlay->i915->drm.struct_mutex));
219 	overlay->last_flip.retire = retire;
220 	i915_gem_active_set(&overlay->last_flip, req);
221 	i915_add_request(req);
222 }
223 
224 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
225 					 struct drm_i915_gem_request *req,
226 					 i915_gem_retire_fn retire)
227 {
228 	intel_overlay_submit_request(overlay, req, retire);
229 	return i915_gem_active_retire(&overlay->last_flip,
230 				      &overlay->i915->drm.struct_mutex);
231 }
232 
233 static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay)
234 {
235 	struct drm_i915_private *dev_priv = overlay->i915;
236 	struct intel_engine_cs *engine = dev_priv->engine[RCS];
237 
238 	return i915_gem_request_alloc(engine, dev_priv->kernel_context);
239 }
240 
241 /* overlay needs to be disable in OCMD reg */
242 static int intel_overlay_on(struct intel_overlay *overlay)
243 {
244 	struct drm_i915_private *dev_priv = overlay->i915;
245 	struct drm_i915_gem_request *req;
246 	struct intel_ring *ring;
247 	int ret;
248 
249 	WARN_ON(overlay->active);
250 	WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
251 
252 	req = alloc_request(overlay);
253 	if (IS_ERR(req))
254 		return PTR_ERR(req);
255 
256 	ret = intel_ring_begin(req, 4);
257 	if (ret) {
258 		i915_add_request_no_flush(req);
259 		return ret;
260 	}
261 
262 	overlay->active = true;
263 
264 	ring = req->ring;
265 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
266 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
267 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
268 	intel_ring_emit(ring, MI_NOOP);
269 	intel_ring_advance(ring);
270 
271 	return intel_overlay_do_wait_request(overlay, req, NULL);
272 }
273 
274 /* overlay needs to be enabled in OCMD reg */
275 static int intel_overlay_continue(struct intel_overlay *overlay,
276 				  bool load_polyphase_filter)
277 {
278 	struct drm_i915_private *dev_priv = overlay->i915;
279 	struct drm_i915_gem_request *req;
280 	struct intel_ring *ring;
281 	u32 flip_addr = overlay->flip_addr;
282 	u32 tmp;
283 	int ret;
284 
285 	WARN_ON(!overlay->active);
286 
287 	if (load_polyphase_filter)
288 		flip_addr |= OFC_UPDATE;
289 
290 	/* check for underruns */
291 	tmp = I915_READ(DOVSTA);
292 	if (tmp & (1 << 17))
293 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
294 
295 	req = alloc_request(overlay);
296 	if (IS_ERR(req))
297 		return PTR_ERR(req);
298 
299 	ret = intel_ring_begin(req, 2);
300 	if (ret) {
301 		i915_add_request_no_flush(req);
302 		return ret;
303 	}
304 
305 	ring = req->ring;
306 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
307 	intel_ring_emit(ring, flip_addr);
308 	intel_ring_advance(ring);
309 
310 	intel_overlay_submit_request(overlay, req, NULL);
311 
312 	return 0;
313 }
314 
315 static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
316 					       struct drm_i915_gem_request *req)
317 {
318 	struct intel_overlay *overlay =
319 		container_of(active, typeof(*overlay), last_flip);
320 	struct i915_vma *vma;
321 
322 	vma = fetch_and_zero(&overlay->old_vma);
323 	if (WARN_ON(!vma))
324 		return;
325 
326 	i915_gem_track_fb(vma->obj, NULL,
327 			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
328 
329 	i915_gem_object_unpin_from_display_plane(vma);
330 	i915_vma_put(vma);
331 }
332 
333 static void intel_overlay_off_tail(struct i915_gem_active *active,
334 				   struct drm_i915_gem_request *req)
335 {
336 	struct intel_overlay *overlay =
337 		container_of(active, typeof(*overlay), last_flip);
338 	struct i915_vma *vma;
339 
340 	/* never have the overlay hw on without showing a frame */
341 	vma = fetch_and_zero(&overlay->vma);
342 	if (WARN_ON(!vma))
343 		return;
344 
345 	i915_gem_object_unpin_from_display_plane(vma);
346 	i915_vma_put(vma);
347 
348 	overlay->crtc->overlay = NULL;
349 	overlay->crtc = NULL;
350 	overlay->active = false;
351 }
352 
353 /* overlay needs to be disabled in OCMD reg */
354 static int intel_overlay_off(struct intel_overlay *overlay)
355 {
356 	struct drm_i915_private *dev_priv = overlay->i915;
357 	struct drm_i915_gem_request *req;
358 	struct intel_ring *ring;
359 	u32 flip_addr = overlay->flip_addr;
360 	int ret;
361 
362 	WARN_ON(!overlay->active);
363 
364 	/* According to intel docs the overlay hw may hang (when switching
365 	 * off) without loading the filter coeffs. It is however unclear whether
366 	 * this applies to the disabling of the overlay or to the switching off
367 	 * of the hw. Do it in both cases */
368 	flip_addr |= OFC_UPDATE;
369 
370 	req = alloc_request(overlay);
371 	if (IS_ERR(req))
372 		return PTR_ERR(req);
373 
374 	ret = intel_ring_begin(req, 6);
375 	if (ret) {
376 		i915_add_request_no_flush(req);
377 		return ret;
378 	}
379 
380 	ring = req->ring;
381 	/* wait for overlay to go idle */
382 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
383 	intel_ring_emit(ring, flip_addr);
384 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
385 	/* turn overlay off */
386 	if (IS_I830(dev_priv)) {
387 		/* Workaround: Don't disable the overlay fully, since otherwise
388 		 * it dies on the next OVERLAY_ON cmd. */
389 		intel_ring_emit(ring, MI_NOOP);
390 		intel_ring_emit(ring, MI_NOOP);
391 		intel_ring_emit(ring, MI_NOOP);
392 	} else {
393 		intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
394 		intel_ring_emit(ring, flip_addr);
395 		intel_ring_emit(ring,
396 				MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
397 	}
398 	intel_ring_advance(ring);
399 
400 	return intel_overlay_do_wait_request(overlay, req,
401 					     intel_overlay_off_tail);
402 }
403 
404 /* recover from an interruption due to a signal
405  * We have to be careful not to repeat work forever an make forward progess. */
406 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
407 {
408 	return i915_gem_active_retire(&overlay->last_flip,
409 				      &overlay->i915->drm.struct_mutex);
410 }
411 
412 /* Wait for pending overlay flip and release old frame.
413  * Needs to be called before the overlay register are changed
414  * via intel_overlay_(un)map_regs
415  */
416 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
417 {
418 	struct drm_i915_private *dev_priv = overlay->i915;
419 	int ret;
420 
421 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
422 
423 	/* Only wait if there is actually an old frame to release to
424 	 * guarantee forward progress.
425 	 */
426 	if (!overlay->old_vma)
427 		return 0;
428 
429 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
430 		/* synchronous slowpath */
431 		struct drm_i915_gem_request *req;
432 		struct intel_ring *ring;
433 
434 		req = alloc_request(overlay);
435 		if (IS_ERR(req))
436 			return PTR_ERR(req);
437 
438 		ret = intel_ring_begin(req, 2);
439 		if (ret) {
440 			i915_add_request_no_flush(req);
441 			return ret;
442 		}
443 
444 		ring = req->ring;
445 		intel_ring_emit(ring,
446 				MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
447 		intel_ring_emit(ring, MI_NOOP);
448 		intel_ring_advance(ring);
449 
450 		ret = intel_overlay_do_wait_request(overlay, req,
451 						    intel_overlay_release_old_vid_tail);
452 		if (ret)
453 			return ret;
454 	} else
455 		intel_overlay_release_old_vid_tail(&overlay->last_flip, NULL);
456 
457 	return 0;
458 }
459 
460 void intel_overlay_reset(struct drm_i915_private *dev_priv)
461 {
462 	struct intel_overlay *overlay = dev_priv->overlay;
463 
464 	if (!overlay)
465 		return;
466 
467 	intel_overlay_release_old_vid(overlay);
468 
469 	overlay->old_xscale = 0;
470 	overlay->old_yscale = 0;
471 	overlay->crtc = NULL;
472 	overlay->active = false;
473 }
474 
475 struct put_image_params {
476 	int format;
477 	short dst_x;
478 	short dst_y;
479 	short dst_w;
480 	short dst_h;
481 	short src_w;
482 	short src_scan_h;
483 	short src_scan_w;
484 	short src_h;
485 	short stride_Y;
486 	short stride_UV;
487 	int offset_Y;
488 	int offset_U;
489 	int offset_V;
490 };
491 
492 static int packed_depth_bytes(u32 format)
493 {
494 	switch (format & I915_OVERLAY_DEPTH_MASK) {
495 	case I915_OVERLAY_YUV422:
496 		return 4;
497 	case I915_OVERLAY_YUV411:
498 		/* return 6; not implemented */
499 	default:
500 		return -EINVAL;
501 	}
502 }
503 
504 static int packed_width_bytes(u32 format, short width)
505 {
506 	switch (format & I915_OVERLAY_DEPTH_MASK) {
507 	case I915_OVERLAY_YUV422:
508 		return width << 1;
509 	default:
510 		return -EINVAL;
511 	}
512 }
513 
514 static int uv_hsubsampling(u32 format)
515 {
516 	switch (format & I915_OVERLAY_DEPTH_MASK) {
517 	case I915_OVERLAY_YUV422:
518 	case I915_OVERLAY_YUV420:
519 		return 2;
520 	case I915_OVERLAY_YUV411:
521 	case I915_OVERLAY_YUV410:
522 		return 4;
523 	default:
524 		return -EINVAL;
525 	}
526 }
527 
528 static int uv_vsubsampling(u32 format)
529 {
530 	switch (format & I915_OVERLAY_DEPTH_MASK) {
531 	case I915_OVERLAY_YUV420:
532 	case I915_OVERLAY_YUV410:
533 		return 2;
534 	case I915_OVERLAY_YUV422:
535 	case I915_OVERLAY_YUV411:
536 		return 1;
537 	default:
538 		return -EINVAL;
539 	}
540 }
541 
542 static u32 calc_swidthsw(struct drm_i915_private *dev_priv, u32 offset, u32 width)
543 {
544 	u32 mask, shift, ret;
545 	if (IS_GEN2(dev_priv)) {
546 		mask = 0x1f;
547 		shift = 5;
548 	} else {
549 		mask = 0x3f;
550 		shift = 6;
551 	}
552 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
553 	if (!IS_GEN2(dev_priv))
554 		ret <<= 1;
555 	ret -= 1;
556 	return ret << 2;
557 }
558 
559 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
560 	0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
561 	0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
562 	0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
563 	0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
564 	0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
565 	0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
566 	0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
567 	0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
568 	0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
569 	0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
570 	0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
571 	0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
572 	0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
573 	0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
574 	0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
575 	0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
576 	0xb000, 0x3000, 0x0800, 0x3000, 0xb000
577 };
578 
579 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
580 	0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
581 	0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
582 	0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
583 	0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
584 	0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
585 	0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
586 	0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
587 	0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
588 	0x3000, 0x0800, 0x3000
589 };
590 
591 static void update_polyphase_filter(struct overlay_registers __iomem *regs)
592 {
593 	memcpy_toio(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
594 	memcpy_toio(regs->UV_HCOEFS, uv_static_hcoeffs,
595 		    sizeof(uv_static_hcoeffs));
596 }
597 
598 static bool update_scaling_factors(struct intel_overlay *overlay,
599 				   struct overlay_registers __iomem *regs,
600 				   struct put_image_params *params)
601 {
602 	/* fixed point with a 12 bit shift */
603 	u32 xscale, yscale, xscale_UV, yscale_UV;
604 #define FP_SHIFT 12
605 #define FRACT_MASK 0xfff
606 	bool scale_changed = false;
607 	int uv_hscale = uv_hsubsampling(params->format);
608 	int uv_vscale = uv_vsubsampling(params->format);
609 
610 	if (params->dst_w > 1)
611 		xscale = ((params->src_scan_w - 1) << FP_SHIFT)
612 			/(params->dst_w);
613 	else
614 		xscale = 1 << FP_SHIFT;
615 
616 	if (params->dst_h > 1)
617 		yscale = ((params->src_scan_h - 1) << FP_SHIFT)
618 			/(params->dst_h);
619 	else
620 		yscale = 1 << FP_SHIFT;
621 
622 	/*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
623 	xscale_UV = xscale/uv_hscale;
624 	yscale_UV = yscale/uv_vscale;
625 	/* make the Y scale to UV scale ratio an exact multiply */
626 	xscale = xscale_UV * uv_hscale;
627 	yscale = yscale_UV * uv_vscale;
628 	/*} else {
629 	  xscale_UV = 0;
630 	  yscale_UV = 0;
631 	  }*/
632 
633 	if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
634 		scale_changed = true;
635 	overlay->old_xscale = xscale;
636 	overlay->old_yscale = yscale;
637 
638 	iowrite32(((yscale & FRACT_MASK) << 20) |
639 		  ((xscale >> FP_SHIFT)  << 16) |
640 		  ((xscale & FRACT_MASK) << 3),
641 		 &regs->YRGBSCALE);
642 
643 	iowrite32(((yscale_UV & FRACT_MASK) << 20) |
644 		  ((xscale_UV >> FP_SHIFT)  << 16) |
645 		  ((xscale_UV & FRACT_MASK) << 3),
646 		 &regs->UVSCALE);
647 
648 	iowrite32((((yscale    >> FP_SHIFT) << 16) |
649 		   ((yscale_UV >> FP_SHIFT) << 0)),
650 		 &regs->UVSCALEV);
651 
652 	if (scale_changed)
653 		update_polyphase_filter(regs);
654 
655 	return scale_changed;
656 }
657 
658 static void update_colorkey(struct intel_overlay *overlay,
659 			    struct overlay_registers __iomem *regs)
660 {
661 	u32 key = overlay->color_key;
662 	u32 flags;
663 
664 	flags = 0;
665 	if (overlay->color_key_enabled)
666 		flags |= DST_KEY_ENABLE;
667 
668 	switch (overlay->crtc->base.primary->fb->bits_per_pixel) {
669 	case 8:
670 		key = 0;
671 		flags |= CLK_RGB8I_MASK;
672 		break;
673 
674 	case 16:
675 		if (overlay->crtc->base.primary->fb->depth == 15) {
676 			key = RGB15_TO_COLORKEY(key);
677 			flags |= CLK_RGB15_MASK;
678 		} else {
679 			key = RGB16_TO_COLORKEY(key);
680 			flags |= CLK_RGB16_MASK;
681 		}
682 		break;
683 
684 	case 24:
685 	case 32:
686 		flags |= CLK_RGB24_MASK;
687 		break;
688 	}
689 
690 	iowrite32(key, &regs->DCLRKV);
691 	iowrite32(flags, &regs->DCLRKM);
692 }
693 
694 static u32 overlay_cmd_reg(struct put_image_params *params)
695 {
696 	u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
697 
698 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
699 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
700 		case I915_OVERLAY_YUV422:
701 			cmd |= OCMD_YUV_422_PLANAR;
702 			break;
703 		case I915_OVERLAY_YUV420:
704 			cmd |= OCMD_YUV_420_PLANAR;
705 			break;
706 		case I915_OVERLAY_YUV411:
707 		case I915_OVERLAY_YUV410:
708 			cmd |= OCMD_YUV_410_PLANAR;
709 			break;
710 		}
711 	} else { /* YUV packed */
712 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
713 		case I915_OVERLAY_YUV422:
714 			cmd |= OCMD_YUV_422_PACKED;
715 			break;
716 		case I915_OVERLAY_YUV411:
717 			cmd |= OCMD_YUV_411_PACKED;
718 			break;
719 		}
720 
721 		switch (params->format & I915_OVERLAY_SWAP_MASK) {
722 		case I915_OVERLAY_NO_SWAP:
723 			break;
724 		case I915_OVERLAY_UV_SWAP:
725 			cmd |= OCMD_UV_SWAP;
726 			break;
727 		case I915_OVERLAY_Y_SWAP:
728 			cmd |= OCMD_Y_SWAP;
729 			break;
730 		case I915_OVERLAY_Y_AND_UV_SWAP:
731 			cmd |= OCMD_Y_AND_UV_SWAP;
732 			break;
733 		}
734 	}
735 
736 	return cmd;
737 }
738 
739 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
740 				      struct drm_i915_gem_object *new_bo,
741 				      struct put_image_params *params)
742 {
743 	int ret, tmp_width;
744 	struct overlay_registers __iomem *regs;
745 	bool scale_changed = false;
746 	struct drm_i915_private *dev_priv = overlay->i915;
747 	u32 swidth, swidthsw, sheight, ostride;
748 	enum i915_pipe pipe = overlay->crtc->pipe;
749 	struct i915_vma *vma;
750 
751 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
752 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
753 
754 	ret = intel_overlay_release_old_vid(overlay);
755 	if (ret != 0)
756 		return ret;
757 
758 	vma = i915_gem_object_pin_to_display_plane(new_bo, 0,
759 						   &i915_ggtt_view_normal);
760 	if (IS_ERR(vma))
761 		return PTR_ERR(vma);
762 
763 	ret = i915_vma_put_fence(vma);
764 	if (ret)
765 		goto out_unpin;
766 
767 	if (!overlay->active) {
768 		u32 oconfig;
769 		regs = intel_overlay_map_regs(overlay);
770 		if (!regs) {
771 			ret = -ENOMEM;
772 			goto out_unpin;
773 		}
774 		oconfig = OCONF_CC_OUT_8BIT;
775 		if (IS_GEN4(dev_priv))
776 			oconfig |= OCONF_CSC_MODE_BT709;
777 		oconfig |= pipe == 0 ?
778 			OCONF_PIPE_A : OCONF_PIPE_B;
779 		iowrite32(oconfig, &regs->OCONFIG);
780 		intel_overlay_unmap_regs(overlay, regs);
781 
782 		ret = intel_overlay_on(overlay);
783 		if (ret != 0)
784 			goto out_unpin;
785 	}
786 
787 	regs = intel_overlay_map_regs(overlay);
788 	if (!regs) {
789 		ret = -ENOMEM;
790 		goto out_unpin;
791 	}
792 
793 	iowrite32((params->dst_y << 16) | params->dst_x, &regs->DWINPOS);
794 	iowrite32((params->dst_h << 16) | params->dst_w, &regs->DWINSZ);
795 
796 	if (params->format & I915_OVERLAY_YUV_PACKED)
797 		tmp_width = packed_width_bytes(params->format, params->src_w);
798 	else
799 		tmp_width = params->src_w;
800 
801 	swidth = params->src_w;
802 	swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width);
803 	sheight = params->src_h;
804 	iowrite32(i915_ggtt_offset(vma) + params->offset_Y, &regs->OBUF_0Y);
805 	ostride = params->stride_Y;
806 
807 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
808 		int uv_hscale = uv_hsubsampling(params->format);
809 		int uv_vscale = uv_vsubsampling(params->format);
810 		u32 tmp_U, tmp_V;
811 		swidth |= (params->src_w/uv_hscale) << 16;
812 		tmp_U = calc_swidthsw(dev_priv, params->offset_U,
813 				      params->src_w/uv_hscale);
814 		tmp_V = calc_swidthsw(dev_priv, params->offset_V,
815 				      params->src_w/uv_hscale);
816 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
817 		sheight |= (params->src_h/uv_vscale) << 16;
818 		iowrite32(i915_ggtt_offset(vma) + params->offset_U,
819 			  &regs->OBUF_0U);
820 		iowrite32(i915_ggtt_offset(vma) + params->offset_V,
821 			  &regs->OBUF_0V);
822 		ostride |= params->stride_UV << 16;
823 	}
824 
825 	iowrite32(swidth, &regs->SWIDTH);
826 	iowrite32(swidthsw, &regs->SWIDTHSW);
827 	iowrite32(sheight, &regs->SHEIGHT);
828 	iowrite32(ostride, &regs->OSTRIDE);
829 
830 	scale_changed = update_scaling_factors(overlay, regs, params);
831 
832 	update_colorkey(overlay, regs);
833 
834 	iowrite32(overlay_cmd_reg(params), &regs->OCMD);
835 
836 	intel_overlay_unmap_regs(overlay, regs);
837 
838 	ret = intel_overlay_continue(overlay, scale_changed);
839 	if (ret)
840 		goto out_unpin;
841 
842 	i915_gem_track_fb(overlay->vma->obj, new_bo,
843 			  INTEL_FRONTBUFFER_OVERLAY(pipe));
844 
845 	overlay->old_vma = overlay->vma;
846 	overlay->vma = vma;
847 
848 	intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe));
849 
850 	return 0;
851 
852 out_unpin:
853 	i915_gem_object_unpin_from_display_plane(vma);
854 	return ret;
855 }
856 
857 int intel_overlay_switch_off(struct intel_overlay *overlay)
858 {
859 	struct drm_i915_private *dev_priv = overlay->i915;
860 	struct overlay_registers __iomem *regs;
861 	int ret;
862 
863 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
864 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
865 
866 	ret = intel_overlay_recover_from_interrupt(overlay);
867 	if (ret != 0)
868 		return ret;
869 
870 	if (!overlay->active)
871 		return 0;
872 
873 	ret = intel_overlay_release_old_vid(overlay);
874 	if (ret != 0)
875 		return ret;
876 
877 	regs = intel_overlay_map_regs(overlay);
878 	iowrite32(0, &regs->OCMD);
879 	intel_overlay_unmap_regs(overlay, regs);
880 
881 	return intel_overlay_off(overlay);
882 }
883 
884 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
885 					  struct intel_crtc *crtc)
886 {
887 	if (!crtc->active)
888 		return -EINVAL;
889 
890 	/* can't use the overlay with double wide pipe */
891 	if (crtc->config->double_wide)
892 		return -EINVAL;
893 
894 	return 0;
895 }
896 
897 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
898 {
899 	struct drm_i915_private *dev_priv = overlay->i915;
900 	u32 pfit_control = I915_READ(PFIT_CONTROL);
901 	u32 ratio;
902 
903 	/* XXX: This is not the same logic as in the xorg driver, but more in
904 	 * line with the intel documentation for the i965
905 	 */
906 	if (INTEL_GEN(dev_priv) >= 4) {
907 		/* on i965 use the PGM reg to read out the autoscaler values */
908 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
909 	} else {
910 		if (pfit_control & VERT_AUTO_SCALE)
911 			ratio = I915_READ(PFIT_AUTO_RATIOS);
912 		else
913 			ratio = I915_READ(PFIT_PGM_RATIOS);
914 		ratio >>= PFIT_VERT_SCALE_SHIFT;
915 	}
916 
917 	overlay->pfit_vscale_ratio = ratio;
918 }
919 
920 static int check_overlay_dst(struct intel_overlay *overlay,
921 			     struct drm_intel_overlay_put_image *rec)
922 {
923 	struct drm_display_mode *mode = &overlay->crtc->base.mode;
924 
925 	if (rec->dst_x < mode->hdisplay &&
926 	    rec->dst_x + rec->dst_width <= mode->hdisplay &&
927 	    rec->dst_y < mode->vdisplay &&
928 	    rec->dst_y + rec->dst_height <= mode->vdisplay)
929 		return 0;
930 	else
931 		return -EINVAL;
932 }
933 
934 static int check_overlay_scaling(struct put_image_params *rec)
935 {
936 	u32 tmp;
937 
938 	/* downscaling limit is 8.0 */
939 	tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
940 	if (tmp > 7)
941 		return -EINVAL;
942 	tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
943 	if (tmp > 7)
944 		return -EINVAL;
945 
946 	return 0;
947 }
948 
949 static int check_overlay_src(struct drm_i915_private *dev_priv,
950 			     struct drm_intel_overlay_put_image *rec,
951 			     struct drm_i915_gem_object *new_bo)
952 {
953 	int uv_hscale = uv_hsubsampling(rec->flags);
954 	int uv_vscale = uv_vsubsampling(rec->flags);
955 	u32 stride_mask;
956 	int depth;
957 	u32 tmp;
958 
959 	/* check src dimensions */
960 	if (IS_845G(dev_priv) || IS_I830(dev_priv)) {
961 		if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
962 		    rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
963 			return -EINVAL;
964 	} else {
965 		if (rec->src_height > IMAGE_MAX_HEIGHT ||
966 		    rec->src_width  > IMAGE_MAX_WIDTH)
967 			return -EINVAL;
968 	}
969 
970 	/* better safe than sorry, use 4 as the maximal subsampling ratio */
971 	if (rec->src_height < N_VERT_Y_TAPS*4 ||
972 	    rec->src_width  < N_HORIZ_Y_TAPS*4)
973 		return -EINVAL;
974 
975 	/* check alignment constraints */
976 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
977 	case I915_OVERLAY_RGB:
978 		/* not implemented */
979 		return -EINVAL;
980 
981 	case I915_OVERLAY_YUV_PACKED:
982 		if (uv_vscale != 1)
983 			return -EINVAL;
984 
985 		depth = packed_depth_bytes(rec->flags);
986 		if (depth < 0)
987 			return depth;
988 
989 		/* ignore UV planes */
990 		rec->stride_UV = 0;
991 		rec->offset_U = 0;
992 		rec->offset_V = 0;
993 		/* check pixel alignment */
994 		if (rec->offset_Y % depth)
995 			return -EINVAL;
996 		break;
997 
998 	case I915_OVERLAY_YUV_PLANAR:
999 		if (uv_vscale < 0 || uv_hscale < 0)
1000 			return -EINVAL;
1001 		/* no offset restrictions for planar formats */
1002 		break;
1003 
1004 	default:
1005 		return -EINVAL;
1006 	}
1007 
1008 	if (rec->src_width % uv_hscale)
1009 		return -EINVAL;
1010 
1011 	/* stride checking */
1012 	if (IS_I830(dev_priv) || IS_845G(dev_priv))
1013 		stride_mask = 255;
1014 	else
1015 		stride_mask = 63;
1016 
1017 	if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
1018 		return -EINVAL;
1019 	if (IS_GEN4(dev_priv) && rec->stride_Y < 512)
1020 		return -EINVAL;
1021 
1022 	tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
1023 		4096 : 8192;
1024 	if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
1025 		return -EINVAL;
1026 
1027 	/* check buffer dimensions */
1028 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
1029 	case I915_OVERLAY_RGB:
1030 	case I915_OVERLAY_YUV_PACKED:
1031 		/* always 4 Y values per depth pixels */
1032 		if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
1033 			return -EINVAL;
1034 
1035 		tmp = rec->stride_Y*rec->src_height;
1036 		if (rec->offset_Y + tmp > new_bo->base.size)
1037 			return -EINVAL;
1038 		break;
1039 
1040 	case I915_OVERLAY_YUV_PLANAR:
1041 		if (rec->src_width > rec->stride_Y)
1042 			return -EINVAL;
1043 		if (rec->src_width/uv_hscale > rec->stride_UV)
1044 			return -EINVAL;
1045 
1046 		tmp = rec->stride_Y * rec->src_height;
1047 		if (rec->offset_Y + tmp > new_bo->base.size)
1048 			return -EINVAL;
1049 
1050 		tmp = rec->stride_UV * (rec->src_height / uv_vscale);
1051 		if (rec->offset_U + tmp > new_bo->base.size ||
1052 		    rec->offset_V + tmp > new_bo->base.size)
1053 			return -EINVAL;
1054 		break;
1055 	}
1056 
1057 	return 0;
1058 }
1059 
1060 /**
1061  * Return the pipe currently connected to the panel fitter,
1062  * or -1 if the panel fitter is not present or not in use
1063  */
1064 static int intel_panel_fitter_pipe(struct drm_i915_private *dev_priv)
1065 {
1066 	u32  pfit_control;
1067 
1068 	/* i830 doesn't have a panel fitter */
1069 	if (INTEL_GEN(dev_priv) <= 3 &&
1070 	    (IS_I830(dev_priv) || !IS_MOBILE(dev_priv)))
1071 		return -1;
1072 
1073 	pfit_control = I915_READ(PFIT_CONTROL);
1074 
1075 	/* See if the panel fitter is in use */
1076 	if ((pfit_control & PFIT_ENABLE) == 0)
1077 		return -1;
1078 
1079 	/* 965 can place panel fitter on either pipe */
1080 	if (IS_GEN4(dev_priv))
1081 		return (pfit_control >> 29) & 0x3;
1082 
1083 	/* older chips can only use pipe 1 */
1084 	return 1;
1085 }
1086 
1087 int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
1088 				  struct drm_file *file_priv)
1089 {
1090 	struct drm_intel_overlay_put_image *put_image_rec = data;
1091 	struct drm_i915_private *dev_priv = to_i915(dev);
1092 	struct intel_overlay *overlay;
1093 	struct drm_crtc *drmmode_crtc;
1094 	struct intel_crtc *crtc;
1095 	struct drm_i915_gem_object *new_bo;
1096 	struct put_image_params *params;
1097 	int ret;
1098 
1099 	overlay = dev_priv->overlay;
1100 	if (!overlay) {
1101 		DRM_DEBUG("userspace bug: no overlay\n");
1102 		return -ENODEV;
1103 	}
1104 
1105 	if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1106 		drm_modeset_lock_all(dev);
1107 		mutex_lock(&dev->struct_mutex);
1108 
1109 		ret = intel_overlay_switch_off(overlay);
1110 
1111 		mutex_unlock(&dev->struct_mutex);
1112 		drm_modeset_unlock_all(dev);
1113 
1114 		return ret;
1115 	}
1116 
1117 	params = kmalloc(sizeof(*params), M_DRM, GFP_KERNEL);
1118 	if (!params)
1119 		return -ENOMEM;
1120 
1121 	drmmode_crtc = drm_crtc_find(dev, put_image_rec->crtc_id);
1122 	if (!drmmode_crtc) {
1123 		ret = -ENOENT;
1124 		goto out_free;
1125 	}
1126 	crtc = to_intel_crtc(drmmode_crtc);
1127 
1128 	new_bo = i915_gem_object_lookup(file_priv, put_image_rec->bo_handle);
1129 	if (!new_bo) {
1130 		ret = -ENOENT;
1131 		goto out_free;
1132 	}
1133 
1134 	drm_modeset_lock_all(dev);
1135 	mutex_lock(&dev->struct_mutex);
1136 
1137 	if (i915_gem_object_is_tiled(new_bo)) {
1138 		DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
1139 		ret = -EINVAL;
1140 		goto out_unlock;
1141 	}
1142 
1143 	ret = intel_overlay_recover_from_interrupt(overlay);
1144 	if (ret != 0)
1145 		goto out_unlock;
1146 
1147 	if (overlay->crtc != crtc) {
1148 		struct drm_display_mode *mode = &crtc->base.mode;
1149 		ret = intel_overlay_switch_off(overlay);
1150 		if (ret != 0)
1151 			goto out_unlock;
1152 
1153 		ret = check_overlay_possible_on_crtc(overlay, crtc);
1154 		if (ret != 0)
1155 			goto out_unlock;
1156 
1157 		overlay->crtc = crtc;
1158 		crtc->overlay = overlay;
1159 
1160 		/* line too wide, i.e. one-line-mode */
1161 		if (mode->hdisplay > 1024 &&
1162 		    intel_panel_fitter_pipe(dev_priv) == crtc->pipe) {
1163 			overlay->pfit_active = true;
1164 			update_pfit_vscale_ratio(overlay);
1165 		} else
1166 			overlay->pfit_active = false;
1167 	}
1168 
1169 	ret = check_overlay_dst(overlay, put_image_rec);
1170 	if (ret != 0)
1171 		goto out_unlock;
1172 
1173 	if (overlay->pfit_active) {
1174 		params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1175 				 overlay->pfit_vscale_ratio);
1176 		/* shifting right rounds downwards, so add 1 */
1177 		params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1178 				 overlay->pfit_vscale_ratio) + 1;
1179 	} else {
1180 		params->dst_y = put_image_rec->dst_y;
1181 		params->dst_h = put_image_rec->dst_height;
1182 	}
1183 	params->dst_x = put_image_rec->dst_x;
1184 	params->dst_w = put_image_rec->dst_width;
1185 
1186 	params->src_w = put_image_rec->src_width;
1187 	params->src_h = put_image_rec->src_height;
1188 	params->src_scan_w = put_image_rec->src_scan_width;
1189 	params->src_scan_h = put_image_rec->src_scan_height;
1190 	if (params->src_scan_h > params->src_h ||
1191 	    params->src_scan_w > params->src_w) {
1192 		ret = -EINVAL;
1193 		goto out_unlock;
1194 	}
1195 
1196 	ret = check_overlay_src(dev_priv, put_image_rec, new_bo);
1197 	if (ret != 0)
1198 		goto out_unlock;
1199 	params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1200 	params->stride_Y = put_image_rec->stride_Y;
1201 	params->stride_UV = put_image_rec->stride_UV;
1202 	params->offset_Y = put_image_rec->offset_Y;
1203 	params->offset_U = put_image_rec->offset_U;
1204 	params->offset_V = put_image_rec->offset_V;
1205 
1206 	/* Check scaling after src size to prevent a divide-by-zero. */
1207 	ret = check_overlay_scaling(params);
1208 	if (ret != 0)
1209 		goto out_unlock;
1210 
1211 	ret = intel_overlay_do_put_image(overlay, new_bo, params);
1212 	if (ret != 0)
1213 		goto out_unlock;
1214 
1215 	mutex_unlock(&dev->struct_mutex);
1216 	drm_modeset_unlock_all(dev);
1217 
1218 	kfree(params);
1219 
1220 	return 0;
1221 
1222 out_unlock:
1223 	mutex_unlock(&dev->struct_mutex);
1224 	drm_modeset_unlock_all(dev);
1225 	i915_gem_object_put_unlocked(new_bo);
1226 out_free:
1227 	kfree(params);
1228 
1229 	return ret;
1230 }
1231 
1232 static void update_reg_attrs(struct intel_overlay *overlay,
1233 			     struct overlay_registers __iomem *regs)
1234 {
1235 	iowrite32((overlay->contrast << 18) | (overlay->brightness & 0xff),
1236 		  &regs->OCLRC0);
1237 	iowrite32(overlay->saturation, &regs->OCLRC1);
1238 }
1239 
1240 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1241 {
1242 	int i;
1243 
1244 	if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1245 		return false;
1246 
1247 	for (i = 0; i < 3; i++) {
1248 		if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1249 			return false;
1250 	}
1251 
1252 	return true;
1253 }
1254 
1255 static bool check_gamma5_errata(u32 gamma5)
1256 {
1257 	int i;
1258 
1259 	for (i = 0; i < 3; i++) {
1260 		if (((gamma5 >> i*8) & 0xff) == 0x80)
1261 			return false;
1262 	}
1263 
1264 	return true;
1265 }
1266 
1267 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1268 {
1269 	if (!check_gamma_bounds(0, attrs->gamma0) ||
1270 	    !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1271 	    !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1272 	    !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1273 	    !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1274 	    !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1275 	    !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1276 		return -EINVAL;
1277 
1278 	if (!check_gamma5_errata(attrs->gamma5))
1279 		return -EINVAL;
1280 
1281 	return 0;
1282 }
1283 
1284 int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
1285 			      struct drm_file *file_priv)
1286 {
1287 	struct drm_intel_overlay_attrs *attrs = data;
1288 	struct drm_i915_private *dev_priv = to_i915(dev);
1289 	struct intel_overlay *overlay;
1290 	struct overlay_registers __iomem *regs;
1291 	int ret;
1292 
1293 	overlay = dev_priv->overlay;
1294 	if (!overlay) {
1295 		DRM_DEBUG("userspace bug: no overlay\n");
1296 		return -ENODEV;
1297 	}
1298 
1299 	drm_modeset_lock_all(dev);
1300 	mutex_lock(&dev->struct_mutex);
1301 
1302 	ret = -EINVAL;
1303 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1304 		attrs->color_key  = overlay->color_key;
1305 		attrs->brightness = overlay->brightness;
1306 		attrs->contrast   = overlay->contrast;
1307 		attrs->saturation = overlay->saturation;
1308 
1309 		if (!IS_GEN2(dev_priv)) {
1310 			attrs->gamma0 = I915_READ(OGAMC0);
1311 			attrs->gamma1 = I915_READ(OGAMC1);
1312 			attrs->gamma2 = I915_READ(OGAMC2);
1313 			attrs->gamma3 = I915_READ(OGAMC3);
1314 			attrs->gamma4 = I915_READ(OGAMC4);
1315 			attrs->gamma5 = I915_READ(OGAMC5);
1316 		}
1317 	} else {
1318 		if (attrs->brightness < -128 || attrs->brightness > 127)
1319 			goto out_unlock;
1320 		if (attrs->contrast > 255)
1321 			goto out_unlock;
1322 		if (attrs->saturation > 1023)
1323 			goto out_unlock;
1324 
1325 		overlay->color_key  = attrs->color_key;
1326 		overlay->brightness = attrs->brightness;
1327 		overlay->contrast   = attrs->contrast;
1328 		overlay->saturation = attrs->saturation;
1329 
1330 		regs = intel_overlay_map_regs(overlay);
1331 		if (!regs) {
1332 			ret = -ENOMEM;
1333 			goto out_unlock;
1334 		}
1335 
1336 		update_reg_attrs(overlay, regs);
1337 
1338 		intel_overlay_unmap_regs(overlay, regs);
1339 
1340 		if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1341 			if (IS_GEN2(dev_priv))
1342 				goto out_unlock;
1343 
1344 			if (overlay->active) {
1345 				ret = -EBUSY;
1346 				goto out_unlock;
1347 			}
1348 
1349 			ret = check_gamma(attrs);
1350 			if (ret)
1351 				goto out_unlock;
1352 
1353 			I915_WRITE(OGAMC0, attrs->gamma0);
1354 			I915_WRITE(OGAMC1, attrs->gamma1);
1355 			I915_WRITE(OGAMC2, attrs->gamma2);
1356 			I915_WRITE(OGAMC3, attrs->gamma3);
1357 			I915_WRITE(OGAMC4, attrs->gamma4);
1358 			I915_WRITE(OGAMC5, attrs->gamma5);
1359 		}
1360 	}
1361 	overlay->color_key_enabled = (attrs->flags & I915_OVERLAY_DISABLE_DEST_COLORKEY) == 0;
1362 
1363 	ret = 0;
1364 out_unlock:
1365 	mutex_unlock(&dev->struct_mutex);
1366 	drm_modeset_unlock_all(dev);
1367 
1368 	return ret;
1369 }
1370 
1371 void intel_setup_overlay(struct drm_i915_private *dev_priv)
1372 {
1373 	struct intel_overlay *overlay;
1374 	struct drm_i915_gem_object *reg_bo;
1375 	struct overlay_registers __iomem *regs;
1376 	struct i915_vma *vma = NULL;
1377 	int ret;
1378 
1379 	if (!HAS_OVERLAY(dev_priv))
1380 		return;
1381 
1382 	overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
1383 	if (!overlay)
1384 		return;
1385 
1386 	mutex_lock(&dev_priv->drm.struct_mutex);
1387 	if (WARN_ON(dev_priv->overlay))
1388 		goto out_free;
1389 
1390 	overlay->i915 = dev_priv;
1391 
1392 	reg_bo = NULL;
1393 	if (!OVERLAY_NEEDS_PHYSICAL(dev_priv))
1394 		reg_bo = i915_gem_object_create_stolen(&dev_priv->drm,
1395 						       PAGE_SIZE);
1396 	if (reg_bo == NULL)
1397 		reg_bo = i915_gem_object_create(&dev_priv->drm, PAGE_SIZE);
1398 	if (IS_ERR(reg_bo))
1399 		goto out_free;
1400 	overlay->reg_bo = reg_bo;
1401 
1402 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) {
1403 		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
1404 		if (ret) {
1405 			DRM_ERROR("failed to attach phys overlay regs\n");
1406 			goto out_free_bo;
1407 		}
1408 		overlay->flip_addr = reg_bo->phys_handle->busaddr;
1409 	} else {
1410 		vma = i915_gem_object_ggtt_pin(reg_bo, NULL,
1411 					       0, PAGE_SIZE, PIN_MAPPABLE);
1412 		if (IS_ERR(vma)) {
1413 			DRM_ERROR("failed to pin overlay register bo\n");
1414 			ret = PTR_ERR(vma);
1415 			goto out_free_bo;
1416 		}
1417 		overlay->flip_addr = i915_ggtt_offset(vma);
1418 
1419 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1420 		if (ret) {
1421 			DRM_ERROR("failed to move overlay register bo into the GTT\n");
1422 			goto out_unpin_bo;
1423 		}
1424 	}
1425 
1426 	/* init all values */
1427 	overlay->color_key = 0x0101fe;
1428 	overlay->color_key_enabled = true;
1429 	overlay->brightness = -19;
1430 	overlay->contrast = 75;
1431 	overlay->saturation = 146;
1432 
1433 	regs = intel_overlay_map_regs(overlay);
1434 	if (!regs)
1435 		goto out_unpin_bo;
1436 
1437 	memset_io(regs, 0, sizeof(struct overlay_registers));
1438 	update_polyphase_filter(regs);
1439 	update_reg_attrs(overlay, regs);
1440 
1441 	intel_overlay_unmap_regs(overlay, regs);
1442 
1443 	dev_priv->overlay = overlay;
1444 	mutex_unlock(&dev_priv->drm.struct_mutex);
1445 	DRM_INFO("initialized overlay support\n");
1446 	return;
1447 
1448 out_unpin_bo:
1449 	if (vma)
1450 		i915_vma_unpin(vma);
1451 out_free_bo:
1452 	i915_gem_object_put(reg_bo);
1453 out_free:
1454 	mutex_unlock(&dev_priv->drm.struct_mutex);
1455 	kfree(overlay);
1456 	return;
1457 }
1458 
1459 void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
1460 {
1461 	if (!dev_priv->overlay)
1462 		return;
1463 
1464 	/* The bo's should be free'd by the generic code already.
1465 	 * Furthermore modesetting teardown happens beforehand so the
1466 	 * hardware should be off already */
1467 	WARN_ON(dev_priv->overlay->active);
1468 
1469 	i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo);
1470 	kfree(dev_priv->overlay);
1471 }
1472 
1473 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
1474 
1475 struct intel_overlay_error_state {
1476 	struct overlay_registers regs;
1477 	unsigned long base;
1478 	u32 dovsta;
1479 	u32 isr;
1480 };
1481 
1482 static struct overlay_registers __iomem *
1483 intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1484 {
1485 	struct drm_i915_private *dev_priv = overlay->i915;
1486 	struct overlay_registers __iomem *regs;
1487 
1488 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
1489 		/* Cast to make sparse happy, but it's wc memory anyway, so
1490 		 * equivalent to the wc io mapping on X86. */
1491 		regs = (struct overlay_registers __iomem *)
1492 			overlay->reg_bo->phys_handle->vaddr;
1493 	else
1494 		regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable,
1495 						overlay->flip_addr);
1496 
1497 	return regs;
1498 }
1499 
1500 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
1501 					struct overlay_registers __iomem *regs)
1502 {
1503 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->i915))
1504 		io_mapping_unmap_atomic(regs);
1505 }
1506 
1507 struct intel_overlay_error_state *
1508 intel_overlay_capture_error_state(struct drm_i915_private *dev_priv)
1509 {
1510 	struct intel_overlay *overlay = dev_priv->overlay;
1511 	struct intel_overlay_error_state *error;
1512 	struct overlay_registers __iomem *regs;
1513 
1514 	if (!overlay || !overlay->active)
1515 		return NULL;
1516 
1517 	error = kmalloc(sizeof(*error), M_DRM, GFP_ATOMIC);
1518 	if (error == NULL)
1519 		return NULL;
1520 
1521 	error->dovsta = I915_READ(DOVSTA);
1522 	error->isr = I915_READ(ISR);
1523 	error->base = overlay->flip_addr;
1524 
1525 	regs = intel_overlay_map_regs_atomic(overlay);
1526 	if (!regs)
1527 		goto err;
1528 
1529 	memcpy_fromio(&error->regs, regs, sizeof(struct overlay_registers));
1530 	intel_overlay_unmap_regs_atomic(overlay, regs);
1531 
1532 	return error;
1533 
1534 err:
1535 	kfree(error);
1536 	return NULL;
1537 }
1538 
1539 void
1540 intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
1541 				struct intel_overlay_error_state *error)
1542 {
1543 	i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1544 			  error->dovsta, error->isr);
1545 	i915_error_printf(m, "  Register file at 0x%08lx:\n",
1546 			  error->base);
1547 
1548 #define P(x) i915_error_printf(m, "    " #x ":	0x%08x\n", error->regs.x)
1549 	P(OBUF_0Y);
1550 	P(OBUF_1Y);
1551 	P(OBUF_0U);
1552 	P(OBUF_0V);
1553 	P(OBUF_1U);
1554 	P(OBUF_1V);
1555 	P(OSTRIDE);
1556 	P(YRGB_VPH);
1557 	P(UV_VPH);
1558 	P(HORZ_PH);
1559 	P(INIT_PHS);
1560 	P(DWINPOS);
1561 	P(DWINSZ);
1562 	P(SWIDTH);
1563 	P(SWIDTHSW);
1564 	P(SHEIGHT);
1565 	P(YRGBSCALE);
1566 	P(UVSCALE);
1567 	P(OCLRC0);
1568 	P(OCLRC1);
1569 	P(DCLRKV);
1570 	P(DCLRKM);
1571 	P(SCLRKVH);
1572 	P(SCLRKVL);
1573 	P(SCLRKEN);
1574 	P(OCONFIG);
1575 	P(OCMD);
1576 	P(OSTART_0Y);
1577 	P(OSTART_1Y);
1578 	P(OSTART_0U);
1579 	P(OSTART_0V);
1580 	P(OSTART_1U);
1581 	P(OSTART_1V);
1582 	P(OTILEOFF_0Y);
1583 	P(OTILEOFF_1Y);
1584 	P(OTILEOFF_0U);
1585 	P(OTILEOFF_0V);
1586 	P(OTILEOFF_1U);
1587 	P(OTILEOFF_1V);
1588 	P(FASTHSCALE);
1589 	P(UVSCALEV);
1590 #undef P
1591 }
1592 
1593 #endif
1594