xref: /dragonfly/sys/dev/drm/i915/intel_overlay.c (revision ef3ac1d1)
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  */
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_reg.h"
32 #include "intel_drv.h"
33 
34 /* Limits for overlay size. According to intel doc, the real limits are:
35  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
36  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
37  * the mininum of both.  */
38 #define IMAGE_MAX_WIDTH		2048
39 #define IMAGE_MAX_HEIGHT	2046 /* 2 * 1023 */
40 /* on 830 and 845 these large limits result in the card hanging */
41 #define IMAGE_MAX_WIDTH_LEGACY	1024
42 #define IMAGE_MAX_HEIGHT_LEGACY	1088
43 
44 /* overlay register definitions */
45 /* OCMD register */
46 #define OCMD_TILED_SURFACE	(0x1<<19)
47 #define OCMD_MIRROR_MASK	(0x3<<17)
48 #define OCMD_MIRROR_MODE	(0x3<<17)
49 #define OCMD_MIRROR_HORIZONTAL	(0x1<<17)
50 #define OCMD_MIRROR_VERTICAL	(0x2<<17)
51 #define OCMD_MIRROR_BOTH	(0x3<<17)
52 #define OCMD_BYTEORDER_MASK	(0x3<<14) /* zero for YUYV or FOURCC YUY2 */
53 #define OCMD_UV_SWAP		(0x1<<14) /* YVYU */
54 #define OCMD_Y_SWAP		(0x2<<14) /* UYVY or FOURCC UYVY */
55 #define OCMD_Y_AND_UV_SWAP	(0x3<<14) /* VYUY */
56 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
57 #define OCMD_RGB_888		(0x1<<10) /* not in i965 Intel docs */
58 #define OCMD_RGB_555		(0x2<<10) /* not in i965 Intel docs */
59 #define OCMD_RGB_565		(0x3<<10) /* not in i965 Intel docs */
60 #define OCMD_YUV_422_PACKED	(0x8<<10)
61 #define OCMD_YUV_411_PACKED	(0x9<<10) /* not in i965 Intel docs */
62 #define OCMD_YUV_420_PLANAR	(0xc<<10)
63 #define OCMD_YUV_422_PLANAR	(0xd<<10)
64 #define OCMD_YUV_410_PLANAR	(0xe<<10) /* also 411 */
65 #define OCMD_TVSYNCFLIP_PARITY	(0x1<<9)
66 #define OCMD_TVSYNCFLIP_ENABLE	(0x1<<7)
67 #define OCMD_BUF_TYPE_MASK	(0x1<<5)
68 #define OCMD_BUF_TYPE_FRAME	(0x0<<5)
69 #define OCMD_BUF_TYPE_FIELD	(0x1<<5)
70 #define OCMD_TEST_MODE		(0x1<<4)
71 #define OCMD_BUFFER_SELECT	(0x3<<2)
72 #define OCMD_BUFFER0		(0x0<<2)
73 #define OCMD_BUFFER1		(0x1<<2)
74 #define OCMD_FIELD_SELECT	(0x1<<2)
75 #define OCMD_FIELD0		(0x0<<1)
76 #define OCMD_FIELD1		(0x1<<1)
77 #define OCMD_ENABLE		(0x1<<0)
78 
79 /* OCONFIG register */
80 #define OCONF_PIPE_MASK		(0x1<<18)
81 #define OCONF_PIPE_A		(0x0<<18)
82 #define OCONF_PIPE_B		(0x1<<18)
83 #define OCONF_GAMMA2_ENABLE	(0x1<<16)
84 #define OCONF_CSC_MODE_BT601	(0x0<<5)
85 #define OCONF_CSC_MODE_BT709	(0x1<<5)
86 #define OCONF_CSC_BYPASS	(0x1<<4)
87 #define OCONF_CC_OUT_8BIT	(0x1<<3)
88 #define OCONF_TEST_MODE		(0x1<<2)
89 #define OCONF_THREE_LINE_BUFFER	(0x1<<0)
90 #define OCONF_TWO_LINE_BUFFER	(0x0<<0)
91 
92 /* DCLRKM (dst-key) register */
93 #define DST_KEY_ENABLE		(0x1<<31)
94 #define CLK_RGB24_MASK		0x0
95 #define CLK_RGB16_MASK		0x070307
96 #define CLK_RGB15_MASK		0x070707
97 #define CLK_RGB8I_MASK		0xffffff
98 
99 #define RGB16_TO_COLORKEY(c) \
100 	(((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
101 #define RGB15_TO_COLORKEY(c) \
102 	(((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
103 
104 /* overlay flip addr flag */
105 #define OFC_UPDATE		0x1
106 
107 /* polyphase filter coefficients */
108 #define N_HORIZ_Y_TAPS          5
109 #define N_VERT_Y_TAPS           3
110 #define N_HORIZ_UV_TAPS         3
111 #define N_VERT_UV_TAPS          3
112 #define N_PHASES                17
113 #define MAX_TAPS                5
114 
115 /* memory bufferd overlay registers */
116 struct overlay_registers {
117 	u32 OBUF_0Y;
118 	u32 OBUF_1Y;
119 	u32 OBUF_0U;
120 	u32 OBUF_0V;
121 	u32 OBUF_1U;
122 	u32 OBUF_1V;
123 	u32 OSTRIDE;
124 	u32 YRGB_VPH;
125 	u32 UV_VPH;
126 	u32 HORZ_PH;
127 	u32 INIT_PHS;
128 	u32 DWINPOS;
129 	u32 DWINSZ;
130 	u32 SWIDTH;
131 	u32 SWIDTHSW;
132 	u32 SHEIGHT;
133 	u32 YRGBSCALE;
134 	u32 UVSCALE;
135 	u32 OCLRC0;
136 	u32 OCLRC1;
137 	u32 DCLRKV;
138 	u32 DCLRKM;
139 	u32 SCLRKVH;
140 	u32 SCLRKVL;
141 	u32 SCLRKEN;
142 	u32 OCONFIG;
143 	u32 OCMD;
144 	u32 RESERVED1; /* 0x6C */
145 	u32 OSTART_0Y;
146 	u32 OSTART_1Y;
147 	u32 OSTART_0U;
148 	u32 OSTART_0V;
149 	u32 OSTART_1U;
150 	u32 OSTART_1V;
151 	u32 OTILEOFF_0Y;
152 	u32 OTILEOFF_1Y;
153 	u32 OTILEOFF_0U;
154 	u32 OTILEOFF_0V;
155 	u32 OTILEOFF_1U;
156 	u32 OTILEOFF_1V;
157 	u32 FASTHSCALE; /* 0xA0 */
158 	u32 UVSCALEV; /* 0xA4 */
159 	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
160 	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
161 	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
162 	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
163 	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
164 	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
165 	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
166 	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
167 	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
168 };
169 
170 struct intel_overlay {
171 	struct drm_device *dev;
172 	struct intel_crtc *crtc;
173 	struct drm_i915_gem_object *vid_bo;
174 	struct drm_i915_gem_object *old_vid_bo;
175 	int active;
176 	int pfit_active;
177 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
178 	u32 color_key;
179 	u32 brightness, contrast, saturation;
180 	u32 old_xscale, old_yscale;
181 	/* register access */
182 	u32 flip_addr;
183 	struct drm_i915_gem_object *reg_bo;
184 	/* flip handling */
185 	uint32_t last_flip_req;
186 	void (*flip_tail)(struct intel_overlay *);
187 };
188 
189 static struct overlay_registers __iomem *
190 intel_overlay_map_regs(struct intel_overlay *overlay)
191 {
192 	struct overlay_registers __iomem *regs;
193 
194 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
195 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_obj->handle->vaddr;
196 	else
197 		regs = pmap_mapdev_attr(overlay->dev->agp->base +
198 		    overlay->reg_bo->gtt_offset, PAGE_SIZE,
199 		    PAT_WRITE_COMBINING);
200 
201 	return regs;
202 }
203 
204 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
205 				     struct overlay_registers __iomem *regs)
206 {
207 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
208 		pmap_unmapdev((vm_offset_t)regs, PAGE_SIZE);
209 }
210 
211 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
212 					 void (*tail)(struct intel_overlay *))
213 {
214 	struct drm_device *dev = overlay->dev;
215 	drm_i915_private_t *dev_priv = dev->dev_private;
216 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
217 	int ret;
218 
219 	BUG_ON(overlay->last_flip_req);
220 	ret = i915_add_request(ring, NULL, &overlay->last_flip_req);
221 	if (ret)
222 		return ret;
223 
224 	overlay->flip_tail = tail;
225 	ret = i915_wait_seqno(ring, overlay->last_flip_req);
226 	if (ret)
227 		return ret;
228 	i915_gem_retire_requests(dev);
229 
230 	overlay->last_flip_req = 0;
231 	return 0;
232 }
233 
234 /* overlay needs to be disable in OCMD reg */
235 static int intel_overlay_on(struct intel_overlay *overlay)
236 {
237 	struct drm_device *dev = overlay->dev;
238 	struct drm_i915_private *dev_priv = dev->dev_private;
239 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
240 	int ret;
241 
242 	BUG_ON(overlay->active);
243 	overlay->active = 1;
244 
245 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
246 
247 	ret = intel_ring_begin(ring, 4);
248 	if (ret)
249 		return ret;
250 
251 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
252 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
253 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
254 	intel_ring_emit(ring, MI_NOOP);
255 	intel_ring_advance(ring);
256 
257 	return intel_overlay_do_wait_request(overlay, NULL);
258 }
259 
260 /* overlay needs to be enabled in OCMD reg */
261 static int intel_overlay_continue(struct intel_overlay *overlay,
262 				  bool load_polyphase_filter)
263 {
264 	struct drm_device *dev = overlay->dev;
265 	drm_i915_private_t *dev_priv = dev->dev_private;
266 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
267 	u32 flip_addr = overlay->flip_addr;
268 	u32 tmp;
269 	int ret;
270 
271 	BUG_ON(!overlay->active);
272 
273 	if (load_polyphase_filter)
274 		flip_addr |= OFC_UPDATE;
275 
276 	/* check for underruns */
277 	tmp = I915_READ(DOVSTA);
278 	if (tmp & (1 << 17))
279 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
280 
281 	ret = intel_ring_begin(ring, 2);
282 	if (ret)
283 		return ret;
284 
285 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
286 	intel_ring_emit(ring, flip_addr);
287 	intel_ring_advance(ring);
288 
289 	return i915_add_request(ring, NULL, &overlay->last_flip_req);
290 }
291 
292 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
293 {
294 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
295 
296 	i915_gem_object_unpin(obj);
297 	drm_gem_object_unreference(&obj->base);
298 
299 	overlay->old_vid_bo = NULL;
300 }
301 
302 static void intel_overlay_off_tail(struct intel_overlay *overlay)
303 {
304 	struct drm_i915_gem_object *obj = overlay->vid_bo;
305 
306 	/* never have the overlay hw on without showing a frame */
307 	BUG_ON(!overlay->vid_bo);
308 
309 	i915_gem_object_unpin(obj);
310 	drm_gem_object_unreference(&obj->base);
311 	overlay->vid_bo = NULL;
312 
313 	overlay->crtc->overlay = NULL;
314 	overlay->crtc = NULL;
315 	overlay->active = 0;
316 }
317 
318 /* overlay needs to be disabled in OCMD reg */
319 static int intel_overlay_off(struct intel_overlay *overlay)
320 {
321 	struct drm_device *dev = overlay->dev;
322 	struct drm_i915_private *dev_priv = dev->dev_private;
323 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
324 	u32 flip_addr = overlay->flip_addr;
325 	int ret;
326 
327 	BUG_ON(!overlay->active);
328 
329 	/* According to intel docs the overlay hw may hang (when switching
330 	 * off) without loading the filter coeffs. It is however unclear whether
331 	 * this applies to the disabling of the overlay or to the switching off
332 	 * of the hw. Do it in both cases */
333 	flip_addr |= OFC_UPDATE;
334 
335 	ret = intel_ring_begin(ring, 6);
336 	if (ret)
337 		return ret;
338 
339 	/* wait for overlay to go idle */
340 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
341 	intel_ring_emit(ring, flip_addr);
342 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
343 	/* turn overlay off */
344 	if (IS_I830(dev)) {
345 		/* Workaround: Don't disable the overlay fully, since otherwise
346 		 * it dies on the next OVERLAY_ON cmd. */
347 		intel_ring_emit(ring, MI_NOOP);
348 		intel_ring_emit(ring, MI_NOOP);
349 		intel_ring_emit(ring, MI_NOOP);
350 	} else {
351 		intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
352 		intel_ring_emit(ring, flip_addr);
353 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
354 	}
355 	intel_ring_advance(ring);
356 
357 	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
358 }
359 
360 /* recover from an interruption due to a signal
361  * We have to be careful not to repeat work forever an make forward progess. */
362 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
363 {
364 	struct drm_device *dev = overlay->dev;
365 	drm_i915_private_t *dev_priv = dev->dev_private;
366 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
367 	int ret;
368 
369 	if (overlay->last_flip_req == 0)
370 		return 0;
371 
372 	ret = i915_wait_seqno(ring, overlay->last_flip_req);
373 	if (ret)
374 		return ret;
375 	i915_gem_retire_requests(dev);
376 
377 	if (overlay->flip_tail)
378 		overlay->flip_tail(overlay);
379 
380 	overlay->last_flip_req = 0;
381 	return 0;
382 }
383 
384 /* Wait for pending overlay flip and release old frame.
385  * Needs to be called before the overlay register are changed
386  * via intel_overlay_(un)map_regs
387  */
388 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
389 {
390 	struct drm_device *dev = overlay->dev;
391 	drm_i915_private_t *dev_priv = dev->dev_private;
392 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
393 	int ret;
394 
395 	/* Only wait if there is actually an old frame to release to
396 	 * guarantee forward progress.
397 	 */
398 	if (!overlay->old_vid_bo)
399 		return 0;
400 
401 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
402 		/* synchronous slowpath */
403 		ret = intel_ring_begin(ring, 2);
404 		if (ret)
405 			return ret;
406 
407 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
408 		intel_ring_emit(ring, MI_NOOP);
409 		intel_ring_advance(ring);
410 
411 		ret = intel_overlay_do_wait_request(overlay,
412 						    intel_overlay_release_old_vid_tail);
413 		if (ret)
414 			return ret;
415 	}
416 
417 	intel_overlay_release_old_vid_tail(overlay);
418 	return 0;
419 }
420 
421 struct put_image_params {
422 	int format;
423 	short dst_x;
424 	short dst_y;
425 	short dst_w;
426 	short dst_h;
427 	short src_w;
428 	short src_scan_h;
429 	short src_scan_w;
430 	short src_h;
431 	short stride_Y;
432 	short stride_UV;
433 	int offset_Y;
434 	int offset_U;
435 	int offset_V;
436 };
437 
438 static int packed_depth_bytes(u32 format)
439 {
440 	switch (format & I915_OVERLAY_DEPTH_MASK) {
441 	case I915_OVERLAY_YUV422:
442 		return 4;
443 	case I915_OVERLAY_YUV411:
444 		/* return 6; not implemented */
445 	default:
446 		return -EINVAL;
447 	}
448 }
449 
450 static int packed_width_bytes(u32 format, short width)
451 {
452 	switch (format & I915_OVERLAY_DEPTH_MASK) {
453 	case I915_OVERLAY_YUV422:
454 		return width << 1;
455 	default:
456 		return -EINVAL;
457 	}
458 }
459 
460 static int uv_hsubsampling(u32 format)
461 {
462 	switch (format & I915_OVERLAY_DEPTH_MASK) {
463 	case I915_OVERLAY_YUV422:
464 	case I915_OVERLAY_YUV420:
465 		return 2;
466 	case I915_OVERLAY_YUV411:
467 	case I915_OVERLAY_YUV410:
468 		return 4;
469 	default:
470 		return -EINVAL;
471 	}
472 }
473 
474 static int uv_vsubsampling(u32 format)
475 {
476 	switch (format & I915_OVERLAY_DEPTH_MASK) {
477 	case I915_OVERLAY_YUV420:
478 	case I915_OVERLAY_YUV410:
479 		return 2;
480 	case I915_OVERLAY_YUV422:
481 	case I915_OVERLAY_YUV411:
482 		return 1;
483 	default:
484 		return -EINVAL;
485 	}
486 }
487 
488 static u32 calc_swidthsw(struct drm_device *dev, u32 offset, u32 width)
489 {
490 	u32 mask, shift, ret;
491 	if (IS_GEN2(dev)) {
492 		mask = 0x1f;
493 		shift = 5;
494 	} else {
495 		mask = 0x3f;
496 		shift = 6;
497 	}
498 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
499 	if (!IS_GEN2(dev))
500 		ret <<= 1;
501 	ret -= 1;
502 	return ret << 2;
503 }
504 
505 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
506 	0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
507 	0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
508 	0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
509 	0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
510 	0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
511 	0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
512 	0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
513 	0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
514 	0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
515 	0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
516 	0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
517 	0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
518 	0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
519 	0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
520 	0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
521 	0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
522 	0xb000, 0x3000, 0x0800, 0x3000, 0xb000
523 };
524 
525 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
526 	0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
527 	0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
528 	0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
529 	0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
530 	0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
531 	0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
532 	0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
533 	0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
534 	0x3000, 0x0800, 0x3000
535 };
536 
537 static void update_polyphase_filter(struct overlay_registers __iomem *regs)
538 {
539 	memcpy_toio(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
540 	memcpy_toio(regs->UV_HCOEFS, uv_static_hcoeffs,
541 		    sizeof(uv_static_hcoeffs));
542 }
543 
544 static bool update_scaling_factors(struct intel_overlay *overlay,
545 				   struct overlay_registers __iomem *regs,
546 				   struct put_image_params *params)
547 {
548 	/* fixed point with a 12 bit shift */
549 	u32 xscale, yscale, xscale_UV, yscale_UV;
550 #define FP_SHIFT 12
551 #define FRACT_MASK 0xfff
552 	bool scale_changed = false;
553 	int uv_hscale = uv_hsubsampling(params->format);
554 	int uv_vscale = uv_vsubsampling(params->format);
555 
556 	if (params->dst_w > 1)
557 		xscale = ((params->src_scan_w - 1) << FP_SHIFT)
558 			/(params->dst_w);
559 	else
560 		xscale = 1 << FP_SHIFT;
561 
562 	if (params->dst_h > 1)
563 		yscale = ((params->src_scan_h - 1) << FP_SHIFT)
564 			/(params->dst_h);
565 	else
566 		yscale = 1 << FP_SHIFT;
567 
568 	/*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
569 	xscale_UV = xscale/uv_hscale;
570 	yscale_UV = yscale/uv_vscale;
571 	/* make the Y scale to UV scale ratio an exact multiply */
572 	xscale = xscale_UV * uv_hscale;
573 	yscale = yscale_UV * uv_vscale;
574 	/*} else {
575 	  xscale_UV = 0;
576 	  yscale_UV = 0;
577 	  }*/
578 
579 	if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
580 		scale_changed = true;
581 	overlay->old_xscale = xscale;
582 	overlay->old_yscale = yscale;
583 
584 	iowrite32(((yscale & FRACT_MASK) << 20) |
585 		  ((xscale >> FP_SHIFT)  << 16) |
586 		  ((xscale & FRACT_MASK) << 3),
587 		 &regs->YRGBSCALE);
588 
589 	iowrite32(((yscale_UV & FRACT_MASK) << 20) |
590 		  ((xscale_UV >> FP_SHIFT)  << 16) |
591 		  ((xscale_UV & FRACT_MASK) << 3),
592 		 &regs->UVSCALE);
593 
594 	iowrite32((((yscale    >> FP_SHIFT) << 16) |
595 		   ((yscale_UV >> FP_SHIFT) << 0)),
596 		 &regs->UVSCALEV);
597 
598 	if (scale_changed)
599 		update_polyphase_filter(regs);
600 
601 	return scale_changed;
602 }
603 
604 static void update_colorkey(struct intel_overlay *overlay,
605 			    struct overlay_registers __iomem *regs)
606 {
607 	u32 key = overlay->color_key;
608 
609 	switch (overlay->crtc->base.fb->bits_per_pixel) {
610 	case 8:
611 		iowrite32(0, &regs->DCLRKV);
612 		iowrite32(CLK_RGB8I_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
613 		break;
614 
615 	case 16:
616 		if (overlay->crtc->base.fb->depth == 15) {
617 			iowrite32(RGB15_TO_COLORKEY(key), &regs->DCLRKV);
618 			iowrite32(CLK_RGB15_MASK | DST_KEY_ENABLE,
619 				  &regs->DCLRKM);
620 		} else {
621 			iowrite32(RGB16_TO_COLORKEY(key), &regs->DCLRKV);
622 			iowrite32(CLK_RGB16_MASK | DST_KEY_ENABLE,
623 				  &regs->DCLRKM);
624 		}
625 		break;
626 
627 	case 24:
628 	case 32:
629 		iowrite32(key, &regs->DCLRKV);
630 		iowrite32(CLK_RGB24_MASK | DST_KEY_ENABLE, &regs->DCLRKM);
631 		break;
632 	}
633 }
634 
635 static u32 overlay_cmd_reg(struct put_image_params *params)
636 {
637 	u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
638 
639 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
640 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
641 		case I915_OVERLAY_YUV422:
642 			cmd |= OCMD_YUV_422_PLANAR;
643 			break;
644 		case I915_OVERLAY_YUV420:
645 			cmd |= OCMD_YUV_420_PLANAR;
646 			break;
647 		case I915_OVERLAY_YUV411:
648 		case I915_OVERLAY_YUV410:
649 			cmd |= OCMD_YUV_410_PLANAR;
650 			break;
651 		}
652 	} else { /* YUV packed */
653 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
654 		case I915_OVERLAY_YUV422:
655 			cmd |= OCMD_YUV_422_PACKED;
656 			break;
657 		case I915_OVERLAY_YUV411:
658 			cmd |= OCMD_YUV_411_PACKED;
659 			break;
660 		}
661 
662 		switch (params->format & I915_OVERLAY_SWAP_MASK) {
663 		case I915_OVERLAY_NO_SWAP:
664 			break;
665 		case I915_OVERLAY_UV_SWAP:
666 			cmd |= OCMD_UV_SWAP;
667 			break;
668 		case I915_OVERLAY_Y_SWAP:
669 			cmd |= OCMD_Y_SWAP;
670 			break;
671 		case I915_OVERLAY_Y_AND_UV_SWAP:
672 			cmd |= OCMD_Y_AND_UV_SWAP;
673 			break;
674 		}
675 	}
676 
677 	return cmd;
678 }
679 
680 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
681 				      struct drm_i915_gem_object *new_bo,
682 				      struct put_image_params *params)
683 {
684 	int ret, tmp_width;
685 	struct overlay_registers __iomem *regs;
686 	bool scale_changed = false;
687 	struct drm_device *dev = overlay->dev;
688 	u32 swidth, swidthsw, sheight, ostride;
689 
690 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
691 	BUG_ON(!mutex_is_locked(&dev->mode_config.mutex));
692 	BUG_ON(!overlay);
693 
694 	ret = intel_overlay_release_old_vid(overlay);
695 	if (ret != 0)
696 		return ret;
697 
698 	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
699 	if (ret != 0)
700 		return ret;
701 
702 	ret = i915_gem_object_put_fence(new_bo);
703 	if (ret)
704 		goto out_unpin;
705 
706 	if (!overlay->active) {
707 		u32 oconfig;
708 		regs = intel_overlay_map_regs(overlay);
709 		if (!regs) {
710 			ret = -ENOMEM;
711 			goto out_unpin;
712 		}
713 		oconfig = OCONF_CC_OUT_8BIT;
714 		if (IS_GEN4(overlay->dev))
715 			oconfig |= OCONF_CSC_MODE_BT709;
716 		oconfig |= overlay->crtc->pipe == 0 ?
717 			OCONF_PIPE_A : OCONF_PIPE_B;
718 		iowrite32(oconfig, &regs->OCONFIG);
719 		intel_overlay_unmap_regs(overlay, regs);
720 
721 		ret = intel_overlay_on(overlay);
722 		if (ret != 0)
723 			goto out_unpin;
724 	}
725 
726 	regs = intel_overlay_map_regs(overlay);
727 	if (!regs) {
728 		ret = -ENOMEM;
729 		goto out_unpin;
730 	}
731 
732 	iowrite32((params->dst_y << 16) | params->dst_x, &regs->DWINPOS);
733 	iowrite32((params->dst_h << 16) | params->dst_w, &regs->DWINSZ);
734 
735 	if (params->format & I915_OVERLAY_YUV_PACKED)
736 		tmp_width = packed_width_bytes(params->format, params->src_w);
737 	else
738 		tmp_width = params->src_w;
739 
740 	swidth = params->src_w;
741 	swidthsw = calc_swidthsw(overlay->dev, params->offset_Y, tmp_width);
742 	sheight = params->src_h;
743 	iowrite32(new_bo->gtt_offset + params->offset_Y, &regs->OBUF_0Y);
744 	ostride = params->stride_Y;
745 
746 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
747 		int uv_hscale = uv_hsubsampling(params->format);
748 		int uv_vscale = uv_vsubsampling(params->format);
749 		u32 tmp_U, tmp_V;
750 		swidth |= (params->src_w/uv_hscale) << 16;
751 		tmp_U = calc_swidthsw(overlay->dev, params->offset_U,
752 				      params->src_w/uv_hscale);
753 		tmp_V = calc_swidthsw(overlay->dev, params->offset_V,
754 				      params->src_w/uv_hscale);
755 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
756 		sheight |= (params->src_h/uv_vscale) << 16;
757 		iowrite32(new_bo->gtt_offset + params->offset_U, &regs->OBUF_0U);
758 		iowrite32(new_bo->gtt_offset + params->offset_V, &regs->OBUF_0V);
759 		ostride |= params->stride_UV << 16;
760 	}
761 
762 	iowrite32(swidth, &regs->SWIDTH);
763 	iowrite32(swidthsw, &regs->SWIDTHSW);
764 	iowrite32(sheight, &regs->SHEIGHT);
765 	iowrite32(ostride, &regs->OSTRIDE);
766 
767 	scale_changed = update_scaling_factors(overlay, regs, params);
768 
769 	update_colorkey(overlay, regs);
770 
771 	iowrite32(overlay_cmd_reg(params), &regs->OCMD);
772 
773 	intel_overlay_unmap_regs(overlay, regs);
774 
775 	ret = intel_overlay_continue(overlay, scale_changed);
776 	if (ret)
777 		goto out_unpin;
778 
779 	overlay->old_vid_bo = overlay->vid_bo;
780 	overlay->vid_bo = new_bo;
781 
782 	return 0;
783 
784 out_unpin:
785 	i915_gem_object_unpin(new_bo);
786 	return ret;
787 }
788 
789 int intel_overlay_switch_off(struct intel_overlay *overlay)
790 {
791 	struct overlay_registers __iomem *regs;
792 	struct drm_device *dev = overlay->dev;
793 	int ret;
794 
795 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
796 	BUG_ON(!mutex_is_locked(&dev->mode_config.mutex));
797 
798 	ret = intel_overlay_recover_from_interrupt(overlay);
799 	if (ret != 0)
800 		return ret;
801 
802 	if (!overlay->active)
803 		return 0;
804 
805 	ret = intel_overlay_release_old_vid(overlay);
806 	if (ret != 0)
807 		return ret;
808 
809 	regs = intel_overlay_map_regs(overlay);
810 	iowrite32(0, &regs->OCMD);
811 	intel_overlay_unmap_regs(overlay, regs);
812 
813 	ret = intel_overlay_off(overlay);
814 	if (ret != 0)
815 		return ret;
816 
817 	intel_overlay_off_tail(overlay);
818 	return 0;
819 }
820 
821 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
822 					  struct intel_crtc *crtc)
823 {
824 	drm_i915_private_t *dev_priv = overlay->dev->dev_private;
825 
826 	if (!crtc->active)
827 		return -EINVAL;
828 
829 	/* can't use the overlay with double wide pipe */
830 	if (INTEL_INFO(overlay->dev)->gen < 4 &&
831 	    (I915_READ(PIPECONF(crtc->pipe)) & (PIPECONF_DOUBLE_WIDE | PIPECONF_ENABLE)) != PIPECONF_ENABLE)
832 		return -EINVAL;
833 
834 	return 0;
835 }
836 
837 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
838 {
839 	struct drm_device *dev = overlay->dev;
840 	drm_i915_private_t *dev_priv = dev->dev_private;
841 	u32 pfit_control = I915_READ(PFIT_CONTROL);
842 	u32 ratio;
843 
844 	/* XXX: This is not the same logic as in the xorg driver, but more in
845 	 * line with the intel documentation for the i965
846 	 */
847 	if (INTEL_INFO(dev)->gen >= 4) {
848 		/* on i965 use the PGM reg to read out the autoscaler values */
849 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
850 	} else {
851 		if (pfit_control & VERT_AUTO_SCALE)
852 			ratio = I915_READ(PFIT_AUTO_RATIOS);
853 		else
854 			ratio = I915_READ(PFIT_PGM_RATIOS);
855 		ratio >>= PFIT_VERT_SCALE_SHIFT;
856 	}
857 
858 	overlay->pfit_vscale_ratio = ratio;
859 }
860 
861 static int check_overlay_dst(struct intel_overlay *overlay,
862 			     struct drm_intel_overlay_put_image *rec)
863 {
864 	struct drm_display_mode *mode = &overlay->crtc->base.mode;
865 
866 	if (rec->dst_x < mode->hdisplay &&
867 	    rec->dst_x + rec->dst_width <= mode->hdisplay &&
868 	    rec->dst_y < mode->vdisplay &&
869 	    rec->dst_y + rec->dst_height <= mode->vdisplay)
870 		return 0;
871 	else
872 		return -EINVAL;
873 }
874 
875 static int check_overlay_scaling(struct put_image_params *rec)
876 {
877 	u32 tmp;
878 
879 	/* downscaling limit is 8.0 */
880 	tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
881 	if (tmp > 7)
882 		return -EINVAL;
883 	tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
884 	if (tmp > 7)
885 		return -EINVAL;
886 
887 	return 0;
888 }
889 
890 static int check_overlay_src(struct drm_device *dev,
891 			     struct drm_intel_overlay_put_image *rec,
892 			     struct drm_i915_gem_object *new_bo)
893 {
894 	int uv_hscale = uv_hsubsampling(rec->flags);
895 	int uv_vscale = uv_vsubsampling(rec->flags);
896 	u32 stride_mask;
897 	int depth;
898 	u32 tmp;
899 
900 	/* check src dimensions */
901 	if (IS_845G(dev) || IS_I830(dev)) {
902 		if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
903 		    rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
904 			return -EINVAL;
905 	} else {
906 		if (rec->src_height > IMAGE_MAX_HEIGHT ||
907 		    rec->src_width  > IMAGE_MAX_WIDTH)
908 			return -EINVAL;
909 	}
910 
911 	/* better safe than sorry, use 4 as the maximal subsampling ratio */
912 	if (rec->src_height < N_VERT_Y_TAPS*4 ||
913 	    rec->src_width  < N_HORIZ_Y_TAPS*4)
914 		return -EINVAL;
915 
916 	/* check alignment constraints */
917 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
918 	case I915_OVERLAY_RGB:
919 		/* not implemented */
920 		return -EINVAL;
921 
922 	case I915_OVERLAY_YUV_PACKED:
923 		if (uv_vscale != 1)
924 			return -EINVAL;
925 
926 		depth = packed_depth_bytes(rec->flags);
927 		if (depth < 0)
928 			return depth;
929 
930 		/* ignore UV planes */
931 		rec->stride_UV = 0;
932 		rec->offset_U = 0;
933 		rec->offset_V = 0;
934 		/* check pixel alignment */
935 		if (rec->offset_Y % depth)
936 			return -EINVAL;
937 		break;
938 
939 	case I915_OVERLAY_YUV_PLANAR:
940 		if (uv_vscale < 0 || uv_hscale < 0)
941 			return -EINVAL;
942 		/* no offset restrictions for planar formats */
943 		break;
944 
945 	default:
946 		return -EINVAL;
947 	}
948 
949 	if (rec->src_width % uv_hscale)
950 		return -EINVAL;
951 
952 	/* stride checking */
953 	if (IS_I830(dev) || IS_845G(dev))
954 		stride_mask = 255;
955 	else
956 		stride_mask = 63;
957 
958 	if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
959 		return -EINVAL;
960 	if (IS_GEN4(dev) && rec->stride_Y < 512)
961 		return -EINVAL;
962 
963 	tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
964 		4096 : 8192;
965 	if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
966 		return -EINVAL;
967 
968 	/* check buffer dimensions */
969 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
970 	case I915_OVERLAY_RGB:
971 	case I915_OVERLAY_YUV_PACKED:
972 		/* always 4 Y values per depth pixels */
973 		if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
974 			return -EINVAL;
975 
976 		tmp = rec->stride_Y*rec->src_height;
977 		if (rec->offset_Y + tmp > new_bo->base.size)
978 			return -EINVAL;
979 		break;
980 
981 	case I915_OVERLAY_YUV_PLANAR:
982 		if (rec->src_width > rec->stride_Y)
983 			return -EINVAL;
984 		if (rec->src_width/uv_hscale > rec->stride_UV)
985 			return -EINVAL;
986 
987 		tmp = rec->stride_Y * rec->src_height;
988 		if (rec->offset_Y + tmp > new_bo->base.size)
989 			return -EINVAL;
990 
991 		tmp = rec->stride_UV * (rec->src_height / uv_vscale);
992 		if (rec->offset_U + tmp > new_bo->base.size ||
993 		    rec->offset_V + tmp > new_bo->base.size)
994 			return -EINVAL;
995 		break;
996 	}
997 
998 	return 0;
999 }
1000 
1001 /**
1002  * Return the pipe currently connected to the panel fitter,
1003  * or -1 if the panel fitter is not present or not in use
1004  */
1005 static int intel_panel_fitter_pipe(struct drm_device *dev)
1006 {
1007 	struct drm_i915_private *dev_priv = dev->dev_private;
1008 	u32  pfit_control;
1009 
1010 	/* i830 doesn't have a panel fitter */
1011 	if (IS_I830(dev))
1012 		return -1;
1013 
1014 	pfit_control = I915_READ(PFIT_CONTROL);
1015 
1016 	/* See if the panel fitter is in use */
1017 	if ((pfit_control & PFIT_ENABLE) == 0)
1018 		return -1;
1019 
1020 	/* 965 can place panel fitter on either pipe */
1021 	if (IS_GEN4(dev))
1022 		return (pfit_control >> 29) & 0x3;
1023 
1024 	/* older chips can only use pipe 1 */
1025 	return 1;
1026 }
1027 
1028 int intel_overlay_put_image(struct drm_device *dev, void *data,
1029 			    struct drm_file *file_priv)
1030 {
1031 	struct drm_intel_overlay_put_image *put_image_rec = data;
1032 	drm_i915_private_t *dev_priv = dev->dev_private;
1033 	struct intel_overlay *overlay;
1034 	struct drm_mode_object *drmmode_obj;
1035 	struct intel_crtc *crtc;
1036 	struct drm_i915_gem_object *new_bo;
1037 	struct put_image_params *params;
1038 	int ret;
1039 
1040 	/* No need to check for DRIVER_MODESET - we don't set it up then. */
1041 	overlay = dev_priv->overlay;
1042 	if (!overlay) {
1043 		DRM_DEBUG("userspace bug: no overlay\n");
1044 		return -ENODEV;
1045 	}
1046 
1047 	if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1048 		lockmgr(&dev->mode_config.mutex, LK_EXCLUSIVE);
1049 		DRM_LOCK(dev);
1050 
1051 		ret = intel_overlay_switch_off(overlay);
1052 
1053 		DRM_UNLOCK(dev);
1054 		lockmgr(&dev->mode_config.mutex, LK_RELEASE);
1055 
1056 		return ret;
1057 	}
1058 
1059 	params = kmalloc(sizeof(struct put_image_params), M_DRM,
1060 	    M_WAITOK | M_ZERO);
1061 	if (!params)
1062 		return -ENOMEM;
1063 
1064 	drmmode_obj = drm_mode_object_find(dev, put_image_rec->crtc_id,
1065 					   DRM_MODE_OBJECT_CRTC);
1066 	if (!drmmode_obj) {
1067 		ret = -ENOENT;
1068 		goto out_free;
1069 	}
1070 	crtc = to_intel_crtc(obj_to_crtc(drmmode_obj));
1071 
1072 	new_bo = to_intel_bo(drm_gem_object_lookup(dev, file_priv,
1073 						   put_image_rec->bo_handle));
1074 	if (&new_bo->base == NULL) {
1075 		ret = -ENOENT;
1076 		goto out_free;
1077 	}
1078 
1079 	lockmgr(&dev->mode_config.mutex, LK_EXCLUSIVE);
1080 	DRM_LOCK(dev);
1081 
1082 	if (new_bo->tiling_mode) {
1083 		DRM_ERROR("buffer used for overlay image can not be tiled\n");
1084 		ret = -EINVAL;
1085 		goto out_unlock;
1086 	}
1087 
1088 	ret = intel_overlay_recover_from_interrupt(overlay);
1089 	if (ret != 0)
1090 		goto out_unlock;
1091 
1092 	if (overlay->crtc != crtc) {
1093 		struct drm_display_mode *mode = &crtc->base.mode;
1094 		ret = intel_overlay_switch_off(overlay);
1095 		if (ret != 0)
1096 			goto out_unlock;
1097 
1098 		ret = check_overlay_possible_on_crtc(overlay, crtc);
1099 		if (ret != 0)
1100 			goto out_unlock;
1101 
1102 		overlay->crtc = crtc;
1103 		crtc->overlay = overlay;
1104 
1105 		/* line too wide, i.e. one-line-mode */
1106 		if (mode->hdisplay > 1024 &&
1107 		    intel_panel_fitter_pipe(dev) == crtc->pipe) {
1108 			overlay->pfit_active = 1;
1109 			update_pfit_vscale_ratio(overlay);
1110 		} else
1111 			overlay->pfit_active = 0;
1112 	}
1113 
1114 	ret = check_overlay_dst(overlay, put_image_rec);
1115 	if (ret != 0)
1116 		goto out_unlock;
1117 
1118 	if (overlay->pfit_active) {
1119 		params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1120 				 overlay->pfit_vscale_ratio);
1121 		/* shifting right rounds downwards, so add 1 */
1122 		params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1123 				 overlay->pfit_vscale_ratio) + 1;
1124 	} else {
1125 		params->dst_y = put_image_rec->dst_y;
1126 		params->dst_h = put_image_rec->dst_height;
1127 	}
1128 	params->dst_x = put_image_rec->dst_x;
1129 	params->dst_w = put_image_rec->dst_width;
1130 
1131 	params->src_w = put_image_rec->src_width;
1132 	params->src_h = put_image_rec->src_height;
1133 	params->src_scan_w = put_image_rec->src_scan_width;
1134 	params->src_scan_h = put_image_rec->src_scan_height;
1135 	if (params->src_scan_h > params->src_h ||
1136 	    params->src_scan_w > params->src_w) {
1137 		ret = -EINVAL;
1138 		goto out_unlock;
1139 	}
1140 
1141 	ret = check_overlay_src(dev, put_image_rec, new_bo);
1142 	if (ret != 0)
1143 		goto out_unlock;
1144 	params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1145 	params->stride_Y = put_image_rec->stride_Y;
1146 	params->stride_UV = put_image_rec->stride_UV;
1147 	params->offset_Y = put_image_rec->offset_Y;
1148 	params->offset_U = put_image_rec->offset_U;
1149 	params->offset_V = put_image_rec->offset_V;
1150 
1151 	/* Check scaling after src size to prevent a divide-by-zero. */
1152 	ret = check_overlay_scaling(params);
1153 	if (ret != 0)
1154 		goto out_unlock;
1155 
1156 	ret = intel_overlay_do_put_image(overlay, new_bo, params);
1157 	if (ret != 0)
1158 		goto out_unlock;
1159 
1160 	DRM_UNLOCK(dev);
1161 	lockmgr(&dev->mode_config.mutex, LK_RELEASE);
1162 
1163 	kfree(params, M_DRM);
1164 
1165 	return 0;
1166 
1167 out_unlock:
1168 	DRM_UNLOCK(dev);
1169 	lockmgr(&dev->mode_config.mutex, LK_RELEASE);
1170 	drm_gem_object_unreference_unlocked(&new_bo->base);
1171 out_free:
1172 	kfree(params, M_DRM);
1173 
1174 	return ret;
1175 }
1176 
1177 static void update_reg_attrs(struct intel_overlay *overlay,
1178 			     struct overlay_registers __iomem *regs)
1179 {
1180 	iowrite32((overlay->contrast << 18) | (overlay->brightness & 0xff),
1181 		  &regs->OCLRC0);
1182 	iowrite32(overlay->saturation, &regs->OCLRC1);
1183 }
1184 
1185 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1186 {
1187 	int i;
1188 
1189 	if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1190 		return false;
1191 
1192 	for (i = 0; i < 3; i++) {
1193 		if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1194 			return false;
1195 	}
1196 
1197 	return true;
1198 }
1199 
1200 static bool check_gamma5_errata(u32 gamma5)
1201 {
1202 	int i;
1203 
1204 	for (i = 0; i < 3; i++) {
1205 		if (((gamma5 >> i*8) & 0xff) == 0x80)
1206 			return false;
1207 	}
1208 
1209 	return true;
1210 }
1211 
1212 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1213 {
1214 	if (!check_gamma_bounds(0, attrs->gamma0) ||
1215 	    !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1216 	    !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1217 	    !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1218 	    !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1219 	    !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1220 	    !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1221 		return -EINVAL;
1222 
1223 	if (!check_gamma5_errata(attrs->gamma5))
1224 		return -EINVAL;
1225 
1226 	return 0;
1227 }
1228 
1229 int intel_overlay_attrs(struct drm_device *dev, void *data,
1230 			struct drm_file *file_priv)
1231 {
1232 	struct drm_intel_overlay_attrs *attrs = data;
1233 	drm_i915_private_t *dev_priv = dev->dev_private;
1234 	struct intel_overlay *overlay;
1235 	struct overlay_registers __iomem *regs;
1236 	int ret;
1237 
1238 	/* No need to check for DRIVER_MODESET - we don't set it up then. */
1239 	overlay = dev_priv->overlay;
1240 	if (!overlay) {
1241 		DRM_DEBUG("userspace bug: no overlay\n");
1242 		return -ENODEV;
1243 	}
1244 
1245 	lockmgr(&dev->mode_config.mutex, LK_EXCLUSIVE);
1246 	DRM_LOCK(dev);
1247 
1248 	ret = -EINVAL;
1249 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1250 		attrs->color_key  = overlay->color_key;
1251 		attrs->brightness = overlay->brightness;
1252 		attrs->contrast   = overlay->contrast;
1253 		attrs->saturation = overlay->saturation;
1254 
1255 		if (!IS_GEN2(dev)) {
1256 			attrs->gamma0 = I915_READ(OGAMC0);
1257 			attrs->gamma1 = I915_READ(OGAMC1);
1258 			attrs->gamma2 = I915_READ(OGAMC2);
1259 			attrs->gamma3 = I915_READ(OGAMC3);
1260 			attrs->gamma4 = I915_READ(OGAMC4);
1261 			attrs->gamma5 = I915_READ(OGAMC5);
1262 		}
1263 	} else {
1264 		if (attrs->brightness < -128 || attrs->brightness > 127)
1265 			goto out_unlock;
1266 		if (attrs->contrast > 255)
1267 			goto out_unlock;
1268 		if (attrs->saturation > 1023)
1269 			goto out_unlock;
1270 
1271 		overlay->color_key  = attrs->color_key;
1272 		overlay->brightness = attrs->brightness;
1273 		overlay->contrast   = attrs->contrast;
1274 		overlay->saturation = attrs->saturation;
1275 
1276 		regs = intel_overlay_map_regs(overlay);
1277 		if (!regs) {
1278 			ret = -ENOMEM;
1279 			goto out_unlock;
1280 		}
1281 
1282 		update_reg_attrs(overlay, regs);
1283 
1284 		intel_overlay_unmap_regs(overlay, regs);
1285 
1286 		if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1287 			if (IS_GEN2(dev))
1288 				goto out_unlock;
1289 
1290 			if (overlay->active) {
1291 				ret = -EBUSY;
1292 				goto out_unlock;
1293 			}
1294 
1295 			ret = check_gamma(attrs);
1296 			if (ret)
1297 				goto out_unlock;
1298 
1299 			I915_WRITE(OGAMC0, attrs->gamma0);
1300 			I915_WRITE(OGAMC1, attrs->gamma1);
1301 			I915_WRITE(OGAMC2, attrs->gamma2);
1302 			I915_WRITE(OGAMC3, attrs->gamma3);
1303 			I915_WRITE(OGAMC4, attrs->gamma4);
1304 			I915_WRITE(OGAMC5, attrs->gamma5);
1305 		}
1306 	}
1307 
1308 	ret = 0;
1309 out_unlock:
1310 	DRM_UNLOCK(dev);
1311 	lockmgr(&dev->mode_config.mutex, LK_RELEASE);
1312 
1313 	return ret;
1314 }
1315 
1316 void intel_setup_overlay(struct drm_device *dev)
1317 {
1318 	drm_i915_private_t *dev_priv = dev->dev_private;
1319 	struct intel_overlay *overlay;
1320 	struct drm_i915_gem_object *reg_bo;
1321 	struct overlay_registers __iomem *regs;
1322 	int ret;
1323 
1324 	if (!HAS_OVERLAY(dev))
1325 		return;
1326 
1327 	overlay = kmalloc(sizeof(struct intel_overlay), M_DRM,
1328 	    M_WAITOK | M_ZERO);
1329 	if (!overlay)
1330 		return;
1331 
1332 	DRM_LOCK(dev);
1333 	if (WARN_ON(dev_priv->overlay))
1334 		goto out_free;
1335 
1336 	overlay->dev = dev;
1337 
1338 	reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
1339 	if (!reg_bo)
1340 		goto out_free;
1341 	overlay->reg_bo = reg_bo;
1342 
1343 	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
1344 		ret = i915_gem_attach_phys_object(dev, reg_bo,
1345 						  I915_GEM_PHYS_OVERLAY_REGS,
1346 						  PAGE_SIZE);
1347 		if (ret) {
1348 			DRM_ERROR("failed to attach phys overlay regs\n");
1349 			goto out_free_bo;
1350 		}
1351 		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
1352 	} else {
1353 		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true, false);
1354 		if (ret) {
1355 			DRM_ERROR("failed to pin overlay register bo\n");
1356 			goto out_free_bo;
1357 		}
1358 		overlay->flip_addr = reg_bo->gtt_offset;
1359 
1360 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1361 		if (ret) {
1362 			DRM_ERROR("failed to move overlay register bo into the GTT\n");
1363 			goto out_unpin_bo;
1364 		}
1365 	}
1366 
1367 	/* init all values */
1368 	overlay->color_key = 0x0101fe;
1369 	overlay->brightness = -19;
1370 	overlay->contrast = 75;
1371 	overlay->saturation = 146;
1372 
1373 	regs = intel_overlay_map_regs(overlay);
1374 	if (!regs)
1375 		goto out_unpin_bo;
1376 
1377 	memset_io(regs, 0, sizeof(struct overlay_registers));
1378 	update_polyphase_filter(regs);
1379 	update_reg_attrs(overlay, regs);
1380 
1381 	intel_overlay_unmap_regs(overlay, regs);
1382 
1383 	dev_priv->overlay = overlay;
1384 	DRM_UNLOCK(dev);
1385 	DRM_INFO("initialized overlay support\n");
1386 	return;
1387 
1388 out_unpin_bo:
1389 	if (!OVERLAY_NEEDS_PHYSICAL(dev))
1390 		i915_gem_object_unpin(reg_bo);
1391 out_free_bo:
1392 	drm_gem_object_unreference(&reg_bo->base);
1393 out_free:
1394 	DRM_UNLOCK(dev);
1395 	kfree(overlay, M_DRM);
1396 	return;
1397 }
1398 
1399 void intel_cleanup_overlay(struct drm_device *dev)
1400 {
1401 	drm_i915_private_t *dev_priv = dev->dev_private;
1402 
1403 	if (!dev_priv->overlay)
1404 		return;
1405 
1406 	/* The bo's should be free'd by the generic code already.
1407 	 * Furthermore modesetting teardown happens beforehand so the
1408 	 * hardware should be off already */
1409 	BUG_ON(dev_priv->overlay->active);
1410 
1411 	drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base);
1412 	kfree(dev_priv->overlay, M_DRM);
1413 }
1414 
1415 #ifdef CONFIG_DEBUG_FS
1416 #include <linux/seq_file.h>
1417 
1418 struct intel_overlay_error_state {
1419 	struct overlay_registers regs;
1420 	unsigned long base;
1421 	u32 dovsta;
1422 	u32 isr;
1423 };
1424 
1425 static struct overlay_registers __iomem *
1426 intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1427 {
1428 	drm_i915_private_t *dev_priv = overlay->dev->dev_private;
1429 	struct overlay_registers __iomem *regs;
1430 
1431 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1432 		/* Cast to make sparse happy, but it's wc memory anyway, so
1433 		 * equivalent to the wc io mapping on X86. */
1434 		regs = (struct overlay_registers __iomem *)
1435 			overlay->reg_bo->phys_obj->handle->vaddr;
1436 	else
1437 		regs = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
1438 						overlay->reg_bo->gtt_offset);
1439 
1440 	return regs;
1441 }
1442 
1443 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
1444 					struct overlay_registers __iomem *regs)
1445 {
1446 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1447 		io_mapping_unmap_atomic(regs);
1448 }
1449 
1450 
1451 struct intel_overlay_error_state *
1452 intel_overlay_capture_error_state(struct drm_device *dev)
1453 {
1454 	drm_i915_private_t *dev_priv = dev->dev_private;
1455 	struct intel_overlay *overlay = dev_priv->overlay;
1456 	struct intel_overlay_error_state *error;
1457 	struct overlay_registers __iomem *regs;
1458 
1459 	if (!overlay || !overlay->active)
1460 		return NULL;
1461 
1462 	error = kmalloc(sizeof(*error), M_DRM, M_NOWAIT);
1463 	if (error == NULL)
1464 		return NULL;
1465 
1466 	error->dovsta = I915_READ(DOVSTA);
1467 	error->isr = I915_READ(ISR);
1468 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1469 		error->base = (__force long)overlay->reg_bo->phys_obj->handle->vaddr;
1470 	else
1471 		error->base = overlay->reg_bo->gtt_offset;
1472 
1473 	regs = intel_overlay_map_regs_atomic(overlay);
1474 	if (!regs)
1475 		goto err;
1476 
1477 	memcpy_fromio(&error->regs, regs, sizeof(struct overlay_registers));
1478 	intel_overlay_unmap_regs_atomic(overlay, regs);
1479 
1480 	return error;
1481 
1482 err:
1483 	kfree(error, M_DRM);
1484 	return NULL;
1485 }
1486 
1487 void
1488 intel_overlay_print_error_state(struct seq_file *m, struct intel_overlay_error_state *error)
1489 {
1490 	seq_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1491 		   error->dovsta, error->isr);
1492 	seq_printf(m, "  Register file at 0x%08lx:\n",
1493 		   error->base);
1494 
1495 #define P(x) seq_printf(m, "    " #x ":	0x%08x\n", error->regs.x)
1496 	P(OBUF_0Y);
1497 	P(OBUF_1Y);
1498 	P(OBUF_0U);
1499 	P(OBUF_0V);
1500 	P(OBUF_1U);
1501 	P(OBUF_1V);
1502 	P(OSTRIDE);
1503 	P(YRGB_VPH);
1504 	P(UV_VPH);
1505 	P(HORZ_PH);
1506 	P(INIT_PHS);
1507 	P(DWINPOS);
1508 	P(DWINSZ);
1509 	P(SWIDTH);
1510 	P(SWIDTHSW);
1511 	P(SHEIGHT);
1512 	P(YRGBSCALE);
1513 	P(UVSCALE);
1514 	P(OCLRC0);
1515 	P(OCLRC1);
1516 	P(DCLRKV);
1517 	P(DCLRKM);
1518 	P(SCLRKVH);
1519 	P(SCLRKVL);
1520 	P(SCLRKEN);
1521 	P(OCONFIG);
1522 	P(OCMD);
1523 	P(OSTART_0Y);
1524 	P(OSTART_1Y);
1525 	P(OSTART_0U);
1526 	P(OSTART_0V);
1527 	P(OSTART_1U);
1528 	P(OSTART_1V);
1529 	P(OTILEOFF_0Y);
1530 	P(OTILEOFF_1Y);
1531 	P(OTILEOFF_0U);
1532 	P(OTILEOFF_0V);
1533 	P(OTILEOFF_1U);
1534 	P(OTILEOFF_1V);
1535 	P(FASTHSCALE);
1536 	P(UVSCALEV);
1537 #undef P
1538 }
1539 #endif
1540