xref: /dragonfly/sys/dev/drm/i915/intel_overlay.c (revision f00b70b8)
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  * $FreeBSD: src/sys/dev/drm2/i915/intel_overlay.c,v 1.1 2012/05/22 11:07:44 kib Exp $
28  */
29 
30 #include <drm/drmP.h>
31 #include <drm/i915_drm.h>
32 #include "i915_drv.h"
33 #include "i915_reg.h"
34 #include "intel_drv.h"
35 
36 /* Limits for overlay size. According to intel doc, the real limits are:
37  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
38  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
39  * the mininum of both.  */
40 #define IMAGE_MAX_WIDTH		2048
41 #define IMAGE_MAX_HEIGHT	2046 /* 2 * 1023 */
42 /* on 830 and 845 these large limits result in the card hanging */
43 #define IMAGE_MAX_WIDTH_LEGACY	1024
44 #define IMAGE_MAX_HEIGHT_LEGACY	1088
45 
46 /* overlay register definitions */
47 /* OCMD register */
48 #define OCMD_TILED_SURFACE	(0x1<<19)
49 #define OCMD_MIRROR_MASK	(0x3<<17)
50 #define OCMD_MIRROR_MODE	(0x3<<17)
51 #define OCMD_MIRROR_HORIZONTAL	(0x1<<17)
52 #define OCMD_MIRROR_VERTICAL	(0x2<<17)
53 #define OCMD_MIRROR_BOTH	(0x3<<17)
54 #define OCMD_BYTEORDER_MASK	(0x3<<14) /* zero for YUYV or FOURCC YUY2 */
55 #define OCMD_UV_SWAP		(0x1<<14) /* YVYU */
56 #define OCMD_Y_SWAP		(0x2<<14) /* UYVY or FOURCC UYVY */
57 #define OCMD_Y_AND_UV_SWAP	(0x3<<14) /* VYUY */
58 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
59 #define OCMD_RGB_888		(0x1<<10) /* not in i965 Intel docs */
60 #define OCMD_RGB_555		(0x2<<10) /* not in i965 Intel docs */
61 #define OCMD_RGB_565		(0x3<<10) /* not in i965 Intel docs */
62 #define OCMD_YUV_422_PACKED	(0x8<<10)
63 #define OCMD_YUV_411_PACKED	(0x9<<10) /* not in i965 Intel docs */
64 #define OCMD_YUV_420_PLANAR	(0xc<<10)
65 #define OCMD_YUV_422_PLANAR	(0xd<<10)
66 #define OCMD_YUV_410_PLANAR	(0xe<<10) /* also 411 */
67 #define OCMD_TVSYNCFLIP_PARITY	(0x1<<9)
68 #define OCMD_TVSYNCFLIP_ENABLE	(0x1<<7)
69 #define OCMD_BUF_TYPE_MASK	(0x1<<5)
70 #define OCMD_BUF_TYPE_FRAME	(0x0<<5)
71 #define OCMD_BUF_TYPE_FIELD	(0x1<<5)
72 #define OCMD_TEST_MODE		(0x1<<4)
73 #define OCMD_BUFFER_SELECT	(0x3<<2)
74 #define OCMD_BUFFER0		(0x0<<2)
75 #define OCMD_BUFFER1		(0x1<<2)
76 #define OCMD_FIELD_SELECT	(0x1<<2)
77 #define OCMD_FIELD0		(0x0<<1)
78 #define OCMD_FIELD1		(0x1<<1)
79 #define OCMD_ENABLE		(0x1<<0)
80 
81 /* OCONFIG register */
82 #define OCONF_PIPE_MASK		(0x1<<18)
83 #define OCONF_PIPE_A		(0x0<<18)
84 #define OCONF_PIPE_B		(0x1<<18)
85 #define OCONF_GAMMA2_ENABLE	(0x1<<16)
86 #define OCONF_CSC_MODE_BT601	(0x0<<5)
87 #define OCONF_CSC_MODE_BT709	(0x1<<5)
88 #define OCONF_CSC_BYPASS	(0x1<<4)
89 #define OCONF_CC_OUT_8BIT	(0x1<<3)
90 #define OCONF_TEST_MODE		(0x1<<2)
91 #define OCONF_THREE_LINE_BUFFER	(0x1<<0)
92 #define OCONF_TWO_LINE_BUFFER	(0x0<<0)
93 
94 /* DCLRKM (dst-key) register */
95 #define DST_KEY_ENABLE		(0x1<<31)
96 #define CLK_RGB24_MASK		0x0
97 #define CLK_RGB16_MASK		0x070307
98 #define CLK_RGB15_MASK		0x070707
99 #define CLK_RGB8I_MASK		0xffffff
100 
101 #define RGB16_TO_COLORKEY(c) \
102 	(((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
103 #define RGB15_TO_COLORKEY(c) \
104 	(((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
105 
106 /* overlay flip addr flag */
107 #define OFC_UPDATE		0x1
108 
109 /* polyphase filter coefficients */
110 #define N_HORIZ_Y_TAPS          5
111 #define N_VERT_Y_TAPS           3
112 #define N_HORIZ_UV_TAPS         3
113 #define N_VERT_UV_TAPS          3
114 #define N_PHASES                17
115 #define MAX_TAPS                5
116 
117 /* memory bufferd overlay registers */
118 struct overlay_registers {
119 	u32 OBUF_0Y;
120 	u32 OBUF_1Y;
121 	u32 OBUF_0U;
122 	u32 OBUF_0V;
123 	u32 OBUF_1U;
124 	u32 OBUF_1V;
125 	u32 OSTRIDE;
126 	u32 YRGB_VPH;
127 	u32 UV_VPH;
128 	u32 HORZ_PH;
129 	u32 INIT_PHS;
130 	u32 DWINPOS;
131 	u32 DWINSZ;
132 	u32 SWIDTH;
133 	u32 SWIDTHSW;
134 	u32 SHEIGHT;
135 	u32 YRGBSCALE;
136 	u32 UVSCALE;
137 	u32 OCLRC0;
138 	u32 OCLRC1;
139 	u32 DCLRKV;
140 	u32 DCLRKM;
141 	u32 SCLRKVH;
142 	u32 SCLRKVL;
143 	u32 SCLRKEN;
144 	u32 OCONFIG;
145 	u32 OCMD;
146 	u32 RESERVED1; /* 0x6C */
147 	u32 OSTART_0Y;
148 	u32 OSTART_1Y;
149 	u32 OSTART_0U;
150 	u32 OSTART_0V;
151 	u32 OSTART_1U;
152 	u32 OSTART_1V;
153 	u32 OTILEOFF_0Y;
154 	u32 OTILEOFF_1Y;
155 	u32 OTILEOFF_0U;
156 	u32 OTILEOFF_0V;
157 	u32 OTILEOFF_1U;
158 	u32 OTILEOFF_1V;
159 	u32 FASTHSCALE; /* 0xA0 */
160 	u32 UVSCALEV; /* 0xA4 */
161 	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
162 	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
163 	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
164 	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
165 	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
166 	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
167 	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
168 	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
169 	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
170 };
171 
172 struct intel_overlay {
173 	struct drm_device *dev;
174 	struct intel_crtc *crtc;
175 	struct drm_i915_gem_object *vid_bo;
176 	struct drm_i915_gem_object *old_vid_bo;
177 	int active;
178 	int pfit_active;
179 	u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
180 	u32 color_key;
181 	u32 brightness, contrast, saturation;
182 	u32 old_xscale, old_yscale;
183 	/* register access */
184 	u32 flip_addr;
185 	struct drm_i915_gem_object *reg_bo;
186 	/* flip handling */
187 	uint32_t last_flip_req;
188 	void (*flip_tail)(struct intel_overlay *);
189 };
190 
191 static struct overlay_registers *
192 intel_overlay_map_regs(struct intel_overlay *overlay)
193 {
194 	struct overlay_registers *regs;
195 
196 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev)) {
197 		regs = overlay->reg_bo->phys_obj->handle->vaddr;
198 	} else {
199 		regs = pmap_mapdev_attr(overlay->dev->agp->base +
200 		    overlay->reg_bo->gtt_offset, PAGE_SIZE,
201 		    PAT_WRITE_COMBINING);
202 	}
203 	return (regs);
204 }
205 
206 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
207 				     struct overlay_registers *regs)
208 {
209 	if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
210 		pmap_unmapdev((vm_offset_t)regs, PAGE_SIZE);
211 }
212 
213 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
214 					 struct drm_i915_gem_request *request,
215 					 void (*tail)(struct intel_overlay *))
216 {
217 	struct drm_device *dev = overlay->dev;
218 	drm_i915_private_t *dev_priv = dev->dev_private;
219 	int ret;
220 
221 	BUG_ON(overlay->last_flip_req);
222 	ret = i915_add_request(LP_RING(dev_priv), NULL, request);
223 	if (ret) {
224 		drm_free(request, DRM_I915_GEM);
225 		return ret;
226 	}
227 	overlay->last_flip_req = request->seqno;
228 	overlay->flip_tail = tail;
229 	ret = i915_wait_seqno(LP_RING(dev_priv), overlay->last_flip_req);
230 	if (ret)
231 		return ret;
232 
233 	overlay->last_flip_req = 0;
234 	return 0;
235 }
236 
237 /* overlay needs to be disable in OCMD reg */
238 static int intel_overlay_on(struct intel_overlay *overlay)
239 {
240 	struct drm_device *dev = overlay->dev;
241 	struct drm_i915_private *dev_priv = dev->dev_private;
242 	struct drm_i915_gem_request *request;
243 	int ret;
244 
245 	BUG_ON(overlay->active);
246 	overlay->active = 1;
247 
248 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
249 
250 	request = kmalloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
251 
252 	ret = BEGIN_LP_RING(4);
253 	if (ret) {
254 		drm_free(request, DRM_I915_GEM);
255 		goto out;
256 	}
257 
258 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_ON);
259 	OUT_RING(overlay->flip_addr | OFC_UPDATE);
260 	OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
261 	OUT_RING(MI_NOOP);
262 	ADVANCE_LP_RING();
263 
264 	ret = intel_overlay_do_wait_request(overlay, request, NULL);
265 out:
266 
267 	return ret;
268 }
269 
270 /* overlay needs to be enabled in OCMD reg */
271 static int intel_overlay_continue(struct intel_overlay *overlay,
272 				  bool load_polyphase_filter)
273 {
274 	struct drm_device *dev = overlay->dev;
275 	drm_i915_private_t *dev_priv = dev->dev_private;
276 	struct drm_i915_gem_request *request;
277 	u32 flip_addr = overlay->flip_addr;
278 	u32 tmp;
279 	int ret;
280 
281 	KASSERT(overlay->active, ("Overlay not active"));
282 
283 	request = kmalloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
284 
285 	if (load_polyphase_filter)
286 		flip_addr |= OFC_UPDATE;
287 
288 	/* check for underruns */
289 	tmp = I915_READ(DOVSTA);
290 	if (tmp & (1 << 17))
291 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
292 
293 	ret = BEGIN_LP_RING(2);
294 	if (ret) {
295 		drm_free(request, DRM_I915_GEM);
296 		return ret;
297 	}
298 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
299 	OUT_RING(flip_addr);
300 	ADVANCE_LP_RING();
301 
302 	ret = i915_add_request(LP_RING(dev_priv), NULL, request);
303 	if (ret) {
304 		drm_free(request, DRM_I915_GEM);
305 		return ret;
306 	}
307 
308 	overlay->last_flip_req = request->seqno;
309 	return 0;
310 }
311 
312 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
313 {
314 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
315 
316 	i915_gem_object_unpin(obj);
317 	drm_gem_object_unreference(&obj->base);
318 
319 	overlay->old_vid_bo = NULL;
320 }
321 
322 static void intel_overlay_off_tail(struct intel_overlay *overlay)
323 {
324 	struct drm_i915_gem_object *obj = overlay->vid_bo;
325 
326 	/* never have the overlay hw on without showing a frame */
327 	KASSERT(overlay->vid_bo != NULL, ("No vid_bo"));
328 
329 	i915_gem_object_unpin(obj);
330 	drm_gem_object_unreference(&obj->base);
331 	overlay->vid_bo = NULL;
332 
333 	overlay->crtc->overlay = NULL;
334 	overlay->crtc = NULL;
335 	overlay->active = 0;
336 }
337 
338 /* overlay needs to be disabled in OCMD reg */
339 static int intel_overlay_off(struct intel_overlay *overlay)
340 {
341 	struct drm_device *dev = overlay->dev;
342 	struct drm_i915_private *dev_priv = dev->dev_private;
343 	u32 flip_addr = overlay->flip_addr;
344 	struct drm_i915_gem_request *request;
345 	int ret;
346 
347 	KASSERT(overlay->active, ("Overlay is not active"));
348 
349 	request = kmalloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
350 
351 	/* According to intel docs the overlay hw may hang (when switching
352 	 * off) without loading the filter coeffs. It is however unclear whether
353 	 * this applies to the disabling of the overlay or to the switching off
354 	 * of the hw. Do it in both cases */
355 	flip_addr |= OFC_UPDATE;
356 
357 	ret = BEGIN_LP_RING(6);
358 	if (ret) {
359 		drm_free(request, DRM_I915_GEM);
360 		return ret;
361 	}
362 	/* wait for overlay to go idle */
363 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
364 	OUT_RING(flip_addr);
365 	OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
366 	/* turn overlay off */
367 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
368 	OUT_RING(flip_addr);
369 	OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
370 	ADVANCE_LP_RING();
371 
372 	return intel_overlay_do_wait_request(overlay, request,
373 					     intel_overlay_off_tail);
374 }
375 
376 /* recover from an interruption due to a signal
377  * We have to be careful not to repeat work forever an make forward progess. */
378 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
379 {
380 	struct drm_device *dev = overlay->dev;
381 	drm_i915_private_t *dev_priv = dev->dev_private;
382 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
383 	int ret;
384 
385 	if (overlay->last_flip_req == 0)
386 		return 0;
387 
388 	ret = i915_wait_seqno(ring, overlay->last_flip_req);
389 	if (ret)
390 		return ret;
391 	i915_gem_retire_requests(dev);
392 
393 	if (overlay->flip_tail)
394 		overlay->flip_tail(overlay);
395 
396 	overlay->last_flip_req = 0;
397 	return 0;
398 }
399 
400 /* Wait for pending overlay flip and release old frame.
401  * Needs to be called before the overlay register are changed
402  * via intel_overlay_(un)map_regs
403  */
404 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
405 {
406 	struct drm_device *dev = overlay->dev;
407 	drm_i915_private_t *dev_priv = dev->dev_private;
408 	int ret;
409 
410 	/* Only wait if there is actually an old frame to release to
411 	 * guarantee forward progress.
412 	 */
413 	if (!overlay->old_vid_bo)
414 		return 0;
415 
416 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
417 		struct drm_i915_gem_request *request;
418 
419 		/* synchronous slowpath */
420 		request = kmalloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO);
421 
422 		ret = BEGIN_LP_RING(2);
423 		if (ret) {
424 			drm_free(request, DRM_I915_GEM);
425 			return ret;
426 		}
427 
428 		OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
429 		OUT_RING(MI_NOOP);
430 		ADVANCE_LP_RING();
431 
432 		ret = intel_overlay_do_wait_request(overlay, request,
433 						    intel_overlay_release_old_vid_tail);
434 		if (ret)
435 			return ret;
436 	}
437 
438 	intel_overlay_release_old_vid_tail(overlay);
439 	return 0;
440 }
441 
442 struct put_image_params {
443 	int format;
444 	short dst_x;
445 	short dst_y;
446 	short dst_w;
447 	short dst_h;
448 	short src_w;
449 	short src_scan_h;
450 	short src_scan_w;
451 	short src_h;
452 	short stride_Y;
453 	short stride_UV;
454 	int offset_Y;
455 	int offset_U;
456 	int offset_V;
457 };
458 
459 static int packed_depth_bytes(u32 format)
460 {
461 	switch (format & I915_OVERLAY_DEPTH_MASK) {
462 	case I915_OVERLAY_YUV422:
463 		return 4;
464 	case I915_OVERLAY_YUV411:
465 		/* return 6; not implemented */
466 	default:
467 		return -EINVAL;
468 	}
469 }
470 
471 static int packed_width_bytes(u32 format, short width)
472 {
473 	switch (format & I915_OVERLAY_DEPTH_MASK) {
474 	case I915_OVERLAY_YUV422:
475 		return width << 1;
476 	default:
477 		return -EINVAL;
478 	}
479 }
480 
481 static int uv_hsubsampling(u32 format)
482 {
483 	switch (format & I915_OVERLAY_DEPTH_MASK) {
484 	case I915_OVERLAY_YUV422:
485 	case I915_OVERLAY_YUV420:
486 		return 2;
487 	case I915_OVERLAY_YUV411:
488 	case I915_OVERLAY_YUV410:
489 		return 4;
490 	default:
491 		return -EINVAL;
492 	}
493 }
494 
495 static int uv_vsubsampling(u32 format)
496 {
497 	switch (format & I915_OVERLAY_DEPTH_MASK) {
498 	case I915_OVERLAY_YUV420:
499 	case I915_OVERLAY_YUV410:
500 		return 2;
501 	case I915_OVERLAY_YUV422:
502 	case I915_OVERLAY_YUV411:
503 		return 1;
504 	default:
505 		return -EINVAL;
506 	}
507 }
508 
509 static u32 calc_swidthsw(struct drm_device *dev, u32 offset, u32 width)
510 {
511 	u32 mask, shift, ret;
512 	if (IS_GEN2(dev)) {
513 		mask = 0x1f;
514 		shift = 5;
515 	} else {
516 		mask = 0x3f;
517 		shift = 6;
518 	}
519 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
520 	if (!IS_GEN2(dev))
521 		ret <<= 1;
522 	ret -= 1;
523 	return ret << 2;
524 }
525 
526 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
527 	0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
528 	0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
529 	0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
530 	0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
531 	0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
532 	0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
533 	0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
534 	0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
535 	0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
536 	0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
537 	0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
538 	0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
539 	0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
540 	0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
541 	0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
542 	0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
543 	0xb000, 0x3000, 0x0800, 0x3000, 0xb000
544 };
545 
546 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
547 	0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
548 	0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
549 	0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
550 	0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
551 	0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
552 	0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
553 	0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
554 	0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
555 	0x3000, 0x0800, 0x3000
556 };
557 
558 static void update_polyphase_filter(struct overlay_registers *regs)
559 {
560 	memcpy(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
561 	memcpy(regs->UV_HCOEFS, uv_static_hcoeffs, sizeof(uv_static_hcoeffs));
562 }
563 
564 static bool update_scaling_factors(struct intel_overlay *overlay,
565 				   struct overlay_registers *regs,
566 				   struct put_image_params *params)
567 {
568 	/* fixed point with a 12 bit shift */
569 	u32 xscale, yscale, xscale_UV, yscale_UV;
570 #define FP_SHIFT 12
571 #define FRACT_MASK 0xfff
572 	bool scale_changed = false;
573 	int uv_hscale = uv_hsubsampling(params->format);
574 	int uv_vscale = uv_vsubsampling(params->format);
575 
576 	if (params->dst_w > 1)
577 		xscale = ((params->src_scan_w - 1) << FP_SHIFT)
578 			/(params->dst_w);
579 	else
580 		xscale = 1 << FP_SHIFT;
581 
582 	if (params->dst_h > 1)
583 		yscale = ((params->src_scan_h - 1) << FP_SHIFT)
584 			/(params->dst_h);
585 	else
586 		yscale = 1 << FP_SHIFT;
587 
588 	/*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
589 	xscale_UV = xscale/uv_hscale;
590 	yscale_UV = yscale/uv_vscale;
591 	/* make the Y scale to UV scale ratio an exact multiply */
592 	xscale = xscale_UV * uv_hscale;
593 	yscale = yscale_UV * uv_vscale;
594 	/*} else {
595 	  xscale_UV = 0;
596 	  yscale_UV = 0;
597 	  }*/
598 
599 	if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
600 		scale_changed = true;
601 	overlay->old_xscale = xscale;
602 	overlay->old_yscale = yscale;
603 
604 	regs->YRGBSCALE = (((yscale & FRACT_MASK) << 20) |
605 			   ((xscale >> FP_SHIFT)  << 16) |
606 			   ((xscale & FRACT_MASK) << 3));
607 
608 	regs->UVSCALE = (((yscale_UV & FRACT_MASK) << 20) |
609 			 ((xscale_UV >> FP_SHIFT)  << 16) |
610 			 ((xscale_UV & FRACT_MASK) << 3));
611 
612 	regs->UVSCALEV = ((((yscale    >> FP_SHIFT) << 16) |
613 			   ((yscale_UV >> FP_SHIFT) << 0)));
614 
615 	if (scale_changed)
616 		update_polyphase_filter(regs);
617 
618 	return scale_changed;
619 }
620 
621 static void update_colorkey(struct intel_overlay *overlay,
622 			    struct overlay_registers *regs)
623 {
624 	u32 key = overlay->color_key;
625 
626 	switch (overlay->crtc->base.fb->bits_per_pixel) {
627 	case 8:
628 		regs->DCLRKV = 0;
629 		regs->DCLRKM = CLK_RGB8I_MASK | DST_KEY_ENABLE;
630 		break;
631 
632 	case 16:
633 		if (overlay->crtc->base.fb->depth == 15) {
634 			regs->DCLRKV = RGB15_TO_COLORKEY(key);
635 			regs->DCLRKM = CLK_RGB15_MASK | DST_KEY_ENABLE;
636 		} else {
637 			regs->DCLRKV = RGB16_TO_COLORKEY(key);
638 			regs->DCLRKM = CLK_RGB16_MASK | DST_KEY_ENABLE;
639 		}
640 		break;
641 
642 	case 24:
643 	case 32:
644 		regs->DCLRKV = key;
645 		regs->DCLRKM = CLK_RGB24_MASK | DST_KEY_ENABLE;
646 		break;
647 	}
648 }
649 
650 static u32 overlay_cmd_reg(struct put_image_params *params)
651 {
652 	u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
653 
654 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
655 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
656 		case I915_OVERLAY_YUV422:
657 			cmd |= OCMD_YUV_422_PLANAR;
658 			break;
659 		case I915_OVERLAY_YUV420:
660 			cmd |= OCMD_YUV_420_PLANAR;
661 			break;
662 		case I915_OVERLAY_YUV411:
663 		case I915_OVERLAY_YUV410:
664 			cmd |= OCMD_YUV_410_PLANAR;
665 			break;
666 		}
667 	} else { /* YUV packed */
668 		switch (params->format & I915_OVERLAY_DEPTH_MASK) {
669 		case I915_OVERLAY_YUV422:
670 			cmd |= OCMD_YUV_422_PACKED;
671 			break;
672 		case I915_OVERLAY_YUV411:
673 			cmd |= OCMD_YUV_411_PACKED;
674 			break;
675 		}
676 
677 		switch (params->format & I915_OVERLAY_SWAP_MASK) {
678 		case I915_OVERLAY_NO_SWAP:
679 			break;
680 		case I915_OVERLAY_UV_SWAP:
681 			cmd |= OCMD_UV_SWAP;
682 			break;
683 		case I915_OVERLAY_Y_SWAP:
684 			cmd |= OCMD_Y_SWAP;
685 			break;
686 		case I915_OVERLAY_Y_AND_UV_SWAP:
687 			cmd |= OCMD_Y_AND_UV_SWAP;
688 			break;
689 		}
690 	}
691 
692 	return cmd;
693 }
694 
695 static u32
696 max_u32(u32 a, u32 b)
697 {
698 
699 	return (a > b ? a : b);
700 }
701 
702 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
703 				      struct drm_i915_gem_object *new_bo,
704 				      struct put_image_params *params)
705 {
706 	int ret, tmp_width;
707 	struct overlay_registers *regs;
708 	bool scale_changed = false;
709 
710 	KASSERT(overlay != NULL, ("No overlay ?"));
711 	DRM_LOCK_ASSERT(overlay->dev);
712 
713 	ret = intel_overlay_release_old_vid(overlay);
714 	if (ret != 0)
715 		return ret;
716 
717 	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL);
718 	if (ret != 0)
719 		goto out_unpin;
720 
721 	ret = i915_gem_object_put_fence(new_bo);
722 	if (ret)
723 		goto out_unpin;
724 
725 	if (!overlay->active) {
726 		regs = intel_overlay_map_regs(overlay);
727 		if (!regs) {
728 			ret = -ENOMEM;
729 			goto out_unpin;
730 		}
731 		regs->OCONFIG = OCONF_CC_OUT_8BIT;
732 		if (IS_GEN4(overlay->dev))
733 			regs->OCONFIG |= OCONF_CSC_MODE_BT709;
734 		regs->OCONFIG |= overlay->crtc->pipe == 0 ?
735 			OCONF_PIPE_A : OCONF_PIPE_B;
736 		intel_overlay_unmap_regs(overlay, regs);
737 
738 		ret = intel_overlay_on(overlay);
739 		if (ret != 0)
740 			goto out_unpin;
741 	}
742 
743 	regs = intel_overlay_map_regs(overlay);
744 	if (!regs) {
745 		ret = -ENOMEM;
746 		goto out_unpin;
747 	}
748 
749 	regs->DWINPOS = (params->dst_y << 16) | params->dst_x;
750 	regs->DWINSZ = (params->dst_h << 16) | params->dst_w;
751 
752 	if (params->format & I915_OVERLAY_YUV_PACKED)
753 		tmp_width = packed_width_bytes(params->format, params->src_w);
754 	else
755 		tmp_width = params->src_w;
756 
757 	regs->SWIDTH = params->src_w;
758 	regs->SWIDTHSW = calc_swidthsw(overlay->dev,
759 				       params->offset_Y, tmp_width);
760 	regs->SHEIGHT = params->src_h;
761 	regs->OBUF_0Y = new_bo->gtt_offset + params->offset_Y;
762 	regs->OSTRIDE = params->stride_Y;
763 
764 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
765 		int uv_hscale = uv_hsubsampling(params->format);
766 		int uv_vscale = uv_vsubsampling(params->format);
767 		u32 tmp_U, tmp_V;
768 		regs->SWIDTH |= (params->src_w/uv_hscale) << 16;
769 		tmp_U = calc_swidthsw(overlay->dev, params->offset_U,
770 				      params->src_w/uv_hscale);
771 		tmp_V = calc_swidthsw(overlay->dev, params->offset_V,
772 				      params->src_w/uv_hscale);
773 		regs->SWIDTHSW |= max_u32(tmp_U, tmp_V) << 16;
774 		regs->SHEIGHT |= (params->src_h/uv_vscale) << 16;
775 		regs->OBUF_0U = new_bo->gtt_offset + params->offset_U;
776 		regs->OBUF_0V = new_bo->gtt_offset + params->offset_V;
777 		regs->OSTRIDE |= params->stride_UV << 16;
778 	}
779 
780 	scale_changed = update_scaling_factors(overlay, regs, params);
781 
782 	update_colorkey(overlay, regs);
783 
784 	regs->OCMD = overlay_cmd_reg(params);
785 
786 	intel_overlay_unmap_regs(overlay, regs);
787 
788 	ret = intel_overlay_continue(overlay, scale_changed);
789 	if (ret)
790 		goto out_unpin;
791 
792 	overlay->old_vid_bo = overlay->vid_bo;
793 	overlay->vid_bo = new_bo;
794 
795 	return 0;
796 
797 out_unpin:
798 	i915_gem_object_unpin(new_bo);
799 	return ret;
800 }
801 
802 int intel_overlay_switch_off(struct intel_overlay *overlay)
803 {
804 	struct overlay_registers *regs;
805 	int ret;
806 
807 	DRM_LOCK_ASSERT(overlay->dev);
808 
809 	ret = intel_overlay_recover_from_interrupt(overlay);
810 	if (ret != 0)
811 		return ret;
812 
813 	if (!overlay->active)
814 		return 0;
815 
816 	ret = intel_overlay_release_old_vid(overlay);
817 	if (ret != 0)
818 		return ret;
819 
820 	regs = intel_overlay_map_regs(overlay);
821 	regs->OCMD = 0;
822 	intel_overlay_unmap_regs(overlay, regs);
823 
824 	ret = intel_overlay_off(overlay);
825 	if (ret != 0)
826 		return ret;
827 
828 	intel_overlay_off_tail(overlay);
829 	return 0;
830 }
831 
832 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
833 					  struct intel_crtc *crtc)
834 {
835 	drm_i915_private_t *dev_priv = overlay->dev->dev_private;
836 
837 	if (!crtc->active)
838 		return -EINVAL;
839 
840 	/* can't use the overlay with double wide pipe */
841 	if (INTEL_INFO(overlay->dev)->gen < 4 &&
842 	    (I915_READ(PIPECONF(crtc->pipe)) & (PIPECONF_DOUBLE_WIDE | PIPECONF_ENABLE)) != PIPECONF_ENABLE)
843 		return -EINVAL;
844 
845 	return 0;
846 }
847 
848 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
849 {
850 	struct drm_device *dev = overlay->dev;
851 	drm_i915_private_t *dev_priv = dev->dev_private;
852 	u32 pfit_control = I915_READ(PFIT_CONTROL);
853 	u32 ratio;
854 
855 	/* XXX: This is not the same logic as in the xorg driver, but more in
856 	 * line with the intel documentation for the i965
857 	 */
858 	if (INTEL_INFO(dev)->gen >= 4) {
859 		/* on i965 use the PGM reg to read out the autoscaler values */
860 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
861 	} else {
862 		if (pfit_control & VERT_AUTO_SCALE)
863 			ratio = I915_READ(PFIT_AUTO_RATIOS);
864 		else
865 			ratio = I915_READ(PFIT_PGM_RATIOS);
866 		ratio >>= PFIT_VERT_SCALE_SHIFT;
867 	}
868 
869 	overlay->pfit_vscale_ratio = ratio;
870 }
871 
872 static int check_overlay_dst(struct intel_overlay *overlay,
873 			     struct drm_intel_overlay_put_image *rec)
874 {
875 	struct drm_display_mode *mode = &overlay->crtc->base.mode;
876 
877 	if (rec->dst_x < mode->hdisplay &&
878 	    rec->dst_x + rec->dst_width <= mode->hdisplay &&
879 	    rec->dst_y < mode->vdisplay &&
880 	    rec->dst_y + rec->dst_height <= mode->vdisplay)
881 		return 0;
882 	else
883 		return -EINVAL;
884 }
885 
886 static int check_overlay_scaling(struct put_image_params *rec)
887 {
888 	u32 tmp;
889 
890 	/* downscaling limit is 8.0 */
891 	tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
892 	if (tmp > 7)
893 		return -EINVAL;
894 	tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
895 	if (tmp > 7)
896 		return -EINVAL;
897 
898 	return 0;
899 }
900 
901 static int check_overlay_src(struct drm_device *dev,
902 			     struct drm_intel_overlay_put_image *rec,
903 			     struct drm_i915_gem_object *new_bo)
904 {
905 	int uv_hscale = uv_hsubsampling(rec->flags);
906 	int uv_vscale = uv_vsubsampling(rec->flags);
907 	u32 stride_mask;
908 	int depth;
909 	u32 tmp;
910 
911 	/* check src dimensions */
912 	if (IS_845G(dev) || IS_I830(dev)) {
913 		if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
914 		    rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
915 			return -EINVAL;
916 	} else {
917 		if (rec->src_height > IMAGE_MAX_HEIGHT ||
918 		    rec->src_width  > IMAGE_MAX_WIDTH)
919 			return -EINVAL;
920 	}
921 
922 	/* better safe than sorry, use 4 as the maximal subsampling ratio */
923 	if (rec->src_height < N_VERT_Y_TAPS*4 ||
924 	    rec->src_width  < N_HORIZ_Y_TAPS*4)
925 		return -EINVAL;
926 
927 	/* check alignment constraints */
928 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
929 	case I915_OVERLAY_RGB:
930 		/* not implemented */
931 		return -EINVAL;
932 
933 	case I915_OVERLAY_YUV_PACKED:
934 		if (uv_vscale != 1)
935 			return -EINVAL;
936 
937 		depth = packed_depth_bytes(rec->flags);
938 		if (depth < 0)
939 			return depth;
940 
941 		/* ignore UV planes */
942 		rec->stride_UV = 0;
943 		rec->offset_U = 0;
944 		rec->offset_V = 0;
945 		/* check pixel alignment */
946 		if (rec->offset_Y % depth)
947 			return -EINVAL;
948 		break;
949 
950 	case I915_OVERLAY_YUV_PLANAR:
951 		if (uv_vscale < 0 || uv_hscale < 0)
952 			return -EINVAL;
953 		/* no offset restrictions for planar formats */
954 		break;
955 
956 	default:
957 		return -EINVAL;
958 	}
959 
960 	if (rec->src_width % uv_hscale)
961 		return -EINVAL;
962 
963 	/* stride checking */
964 	if (IS_I830(dev) || IS_845G(dev))
965 		stride_mask = 255;
966 	else
967 		stride_mask = 63;
968 
969 	if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
970 		return -EINVAL;
971 	if (IS_GEN4(dev) && rec->stride_Y < 512)
972 		return -EINVAL;
973 
974 	tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
975 		4096 : 8192;
976 	if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
977 		return -EINVAL;
978 
979 	/* check buffer dimensions */
980 	switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
981 	case I915_OVERLAY_RGB:
982 	case I915_OVERLAY_YUV_PACKED:
983 		/* always 4 Y values per depth pixels */
984 		if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
985 			return -EINVAL;
986 
987 		tmp = rec->stride_Y*rec->src_height;
988 		if (rec->offset_Y + tmp > new_bo->base.size)
989 			return -EINVAL;
990 		break;
991 
992 	case I915_OVERLAY_YUV_PLANAR:
993 		if (rec->src_width > rec->stride_Y)
994 			return -EINVAL;
995 		if (rec->src_width/uv_hscale > rec->stride_UV)
996 			return -EINVAL;
997 
998 		tmp = rec->stride_Y * rec->src_height;
999 		if (rec->offset_Y + tmp > new_bo->base.size)
1000 			return -EINVAL;
1001 
1002 		tmp = rec->stride_UV * (rec->src_height / uv_vscale);
1003 		if (rec->offset_U + tmp > new_bo->base.size ||
1004 		    rec->offset_V + tmp > new_bo->base.size)
1005 			return -EINVAL;
1006 		break;
1007 	}
1008 
1009 	return 0;
1010 }
1011 
1012 /**
1013  * Return the pipe currently connected to the panel fitter,
1014  * or -1 if the panel fitter is not present or not in use
1015  */
1016 static int intel_panel_fitter_pipe(struct drm_device *dev)
1017 {
1018 	struct drm_i915_private *dev_priv = dev->dev_private;
1019 	u32  pfit_control;
1020 
1021 	/* i830 doesn't have a panel fitter */
1022 	if (IS_I830(dev))
1023 		return -1;
1024 
1025 	pfit_control = I915_READ(PFIT_CONTROL);
1026 
1027 	/* See if the panel fitter is in use */
1028 	if ((pfit_control & PFIT_ENABLE) == 0)
1029 		return -1;
1030 
1031 	/* 965 can place panel fitter on either pipe */
1032 	if (IS_GEN4(dev))
1033 		return (pfit_control >> 29) & 0x3;
1034 
1035 	/* older chips can only use pipe 1 */
1036 	return 1;
1037 }
1038 
1039 int intel_overlay_put_image(struct drm_device *dev, void *data,
1040 			    struct drm_file *file_priv)
1041 {
1042 	struct drm_intel_overlay_put_image *put_image_rec = data;
1043 	drm_i915_private_t *dev_priv = dev->dev_private;
1044 	struct intel_overlay *overlay;
1045 	struct drm_mode_object *drmmode_obj;
1046 	struct intel_crtc *crtc;
1047 	struct drm_i915_gem_object *new_bo;
1048 	struct put_image_params *params;
1049 	int ret;
1050 
1051 	if (!dev_priv) {
1052 		DRM_ERROR("called with no initialization\n");
1053 		return -EINVAL;
1054 	}
1055 
1056 	overlay = dev_priv->overlay;
1057 	if (!overlay) {
1058 		DRM_DEBUG("userspace bug: no overlay\n");
1059 		return -ENODEV;
1060 	}
1061 
1062 	if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1063 		lockmgr(&dev->mode_config.mutex, LK_EXCLUSIVE);
1064 		DRM_LOCK(dev);
1065 
1066 		ret = intel_overlay_switch_off(overlay);
1067 
1068 		DRM_UNLOCK(dev);
1069 		lockmgr(&dev->mode_config.mutex, LK_RELEASE);
1070 
1071 		return ret;
1072 	}
1073 
1074 	params = kmalloc(sizeof(struct put_image_params), DRM_I915_GEM,
1075 	    M_WAITOK | M_ZERO);
1076 
1077 	drmmode_obj = drm_mode_object_find(dev, put_image_rec->crtc_id,
1078 					   DRM_MODE_OBJECT_CRTC);
1079 	if (!drmmode_obj) {
1080 		ret = -ENOENT;
1081 		goto out_free;
1082 	}
1083 	crtc = to_intel_crtc(obj_to_crtc(drmmode_obj));
1084 
1085 	new_bo = to_intel_bo(drm_gem_object_lookup(dev, file_priv,
1086 						   put_image_rec->bo_handle));
1087 	if (&new_bo->base == NULL) {
1088 		ret = -ENOENT;
1089 		goto out_free;
1090 	}
1091 
1092 	lockmgr(&dev->mode_config.mutex, LK_EXCLUSIVE);
1093 	DRM_LOCK(dev);
1094 
1095 	if (new_bo->tiling_mode) {
1096 		DRM_ERROR("buffer used for overlay image can not be tiled\n");
1097 		ret = -EINVAL;
1098 		goto out_unlock;
1099 	}
1100 
1101 	ret = intel_overlay_recover_from_interrupt(overlay);
1102 	if (ret != 0)
1103 		goto out_unlock;
1104 
1105 	if (overlay->crtc != crtc) {
1106 		struct drm_display_mode *mode = &crtc->base.mode;
1107 		ret = intel_overlay_switch_off(overlay);
1108 		if (ret != 0)
1109 			goto out_unlock;
1110 
1111 		ret = check_overlay_possible_on_crtc(overlay, crtc);
1112 		if (ret != 0)
1113 			goto out_unlock;
1114 
1115 		overlay->crtc = crtc;
1116 		crtc->overlay = overlay;
1117 
1118 		/* line too wide, i.e. one-line-mode */
1119 		if (mode->hdisplay > 1024 &&
1120 		    intel_panel_fitter_pipe(dev) == crtc->pipe) {
1121 			overlay->pfit_active = 1;
1122 			update_pfit_vscale_ratio(overlay);
1123 		} else
1124 			overlay->pfit_active = 0;
1125 	}
1126 
1127 	ret = check_overlay_dst(overlay, put_image_rec);
1128 	if (ret != 0)
1129 		goto out_unlock;
1130 
1131 	if (overlay->pfit_active) {
1132 		params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1133 				 overlay->pfit_vscale_ratio);
1134 		/* shifting right rounds downwards, so add 1 */
1135 		params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1136 				 overlay->pfit_vscale_ratio) + 1;
1137 	} else {
1138 		params->dst_y = put_image_rec->dst_y;
1139 		params->dst_h = put_image_rec->dst_height;
1140 	}
1141 	params->dst_x = put_image_rec->dst_x;
1142 	params->dst_w = put_image_rec->dst_width;
1143 
1144 	params->src_w = put_image_rec->src_width;
1145 	params->src_h = put_image_rec->src_height;
1146 	params->src_scan_w = put_image_rec->src_scan_width;
1147 	params->src_scan_h = put_image_rec->src_scan_height;
1148 	if (params->src_scan_h > params->src_h ||
1149 	    params->src_scan_w > params->src_w) {
1150 		ret = -EINVAL;
1151 		goto out_unlock;
1152 	}
1153 
1154 	ret = check_overlay_src(dev, put_image_rec, new_bo);
1155 	if (ret != 0)
1156 		goto out_unlock;
1157 	params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1158 	params->stride_Y = put_image_rec->stride_Y;
1159 	params->stride_UV = put_image_rec->stride_UV;
1160 	params->offset_Y = put_image_rec->offset_Y;
1161 	params->offset_U = put_image_rec->offset_U;
1162 	params->offset_V = put_image_rec->offset_V;
1163 
1164 	/* Check scaling after src size to prevent a divide-by-zero. */
1165 	ret = check_overlay_scaling(params);
1166 	if (ret != 0)
1167 		goto out_unlock;
1168 
1169 	ret = intel_overlay_do_put_image(overlay, new_bo, params);
1170 	if (ret != 0)
1171 		goto out_unlock;
1172 
1173 	DRM_UNLOCK(dev);
1174 	lockmgr(&dev->mode_config.mutex, LK_RELEASE);
1175 
1176 	drm_free(params, DRM_I915_GEM);
1177 
1178 	return 0;
1179 
1180 out_unlock:
1181 	DRM_UNLOCK(dev);
1182 	lockmgr(&dev->mode_config.mutex, LK_RELEASE);
1183 	drm_gem_object_unreference_unlocked(&new_bo->base);
1184 out_free:
1185 	drm_free(params, DRM_I915_GEM);
1186 
1187 	return ret;
1188 }
1189 
1190 static void update_reg_attrs(struct intel_overlay *overlay,
1191 			     struct overlay_registers *regs)
1192 {
1193 	regs->OCLRC0 = (overlay->contrast << 18) | (overlay->brightness & 0xff);
1194 	regs->OCLRC1 = overlay->saturation;
1195 }
1196 
1197 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1198 {
1199 	int i;
1200 
1201 	if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1202 		return false;
1203 
1204 	for (i = 0; i < 3; i++) {
1205 		if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1206 			return false;
1207 	}
1208 
1209 	return true;
1210 }
1211 
1212 static bool check_gamma5_errata(u32 gamma5)
1213 {
1214 	int i;
1215 
1216 	for (i = 0; i < 3; i++) {
1217 		if (((gamma5 >> i*8) & 0xff) == 0x80)
1218 			return false;
1219 	}
1220 
1221 	return true;
1222 }
1223 
1224 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1225 {
1226 	if (!check_gamma_bounds(0, attrs->gamma0) ||
1227 	    !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1228 	    !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1229 	    !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1230 	    !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1231 	    !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1232 	    !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1233 		return -EINVAL;
1234 
1235 	if (!check_gamma5_errata(attrs->gamma5))
1236 		return -EINVAL;
1237 
1238 	return 0;
1239 }
1240 
1241 int intel_overlay_attrs(struct drm_device *dev, void *data,
1242 			struct drm_file *file_priv)
1243 {
1244 	struct drm_intel_overlay_attrs *attrs = data;
1245 	drm_i915_private_t *dev_priv = dev->dev_private;
1246 	struct intel_overlay *overlay;
1247 	struct overlay_registers *regs;
1248 	int ret;
1249 
1250 	if (!dev_priv) {
1251 		DRM_ERROR("called with no initialization\n");
1252 		return -EINVAL;
1253 	}
1254 
1255 	overlay = dev_priv->overlay;
1256 	if (!overlay) {
1257 		DRM_DEBUG("userspace bug: no overlay\n");
1258 		return -ENODEV;
1259 	}
1260 
1261 	lockmgr(&dev->mode_config.mutex, LK_EXCLUSIVE);
1262 	DRM_LOCK(dev);
1263 
1264 	ret = -EINVAL;
1265 	if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1266 		attrs->color_key  = overlay->color_key;
1267 		attrs->brightness = overlay->brightness;
1268 		attrs->contrast   = overlay->contrast;
1269 		attrs->saturation = overlay->saturation;
1270 
1271 		if (!IS_GEN2(dev)) {
1272 			attrs->gamma0 = I915_READ(OGAMC0);
1273 			attrs->gamma1 = I915_READ(OGAMC1);
1274 			attrs->gamma2 = I915_READ(OGAMC2);
1275 			attrs->gamma3 = I915_READ(OGAMC3);
1276 			attrs->gamma4 = I915_READ(OGAMC4);
1277 			attrs->gamma5 = I915_READ(OGAMC5);
1278 		}
1279 	} else {
1280 		if (attrs->brightness < -128 || attrs->brightness > 127)
1281 			goto out_unlock;
1282 		if (attrs->contrast > 255)
1283 			goto out_unlock;
1284 		if (attrs->saturation > 1023)
1285 			goto out_unlock;
1286 
1287 		overlay->color_key  = attrs->color_key;
1288 		overlay->brightness = attrs->brightness;
1289 		overlay->contrast   = attrs->contrast;
1290 		overlay->saturation = attrs->saturation;
1291 
1292 		regs = intel_overlay_map_regs(overlay);
1293 		if (!regs) {
1294 			ret = -ENOMEM;
1295 			goto out_unlock;
1296 		}
1297 
1298 		update_reg_attrs(overlay, regs);
1299 
1300 		intel_overlay_unmap_regs(overlay, regs);
1301 
1302 		if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1303 			if (IS_GEN2(dev))
1304 				goto out_unlock;
1305 
1306 			if (overlay->active) {
1307 				ret = -EBUSY;
1308 				goto out_unlock;
1309 			}
1310 
1311 			ret = check_gamma(attrs);
1312 			if (ret)
1313 				goto out_unlock;
1314 
1315 			I915_WRITE(OGAMC0, attrs->gamma0);
1316 			I915_WRITE(OGAMC1, attrs->gamma1);
1317 			I915_WRITE(OGAMC2, attrs->gamma2);
1318 			I915_WRITE(OGAMC3, attrs->gamma3);
1319 			I915_WRITE(OGAMC4, attrs->gamma4);
1320 			I915_WRITE(OGAMC5, attrs->gamma5);
1321 		}
1322 	}
1323 
1324 	ret = 0;
1325 out_unlock:
1326 	DRM_UNLOCK(dev);
1327 	lockmgr(&dev->mode_config.mutex, LK_RELEASE);
1328 
1329 	return ret;
1330 }
1331 
1332 void intel_setup_overlay(struct drm_device *dev)
1333 {
1334 	drm_i915_private_t *dev_priv = dev->dev_private;
1335 	struct intel_overlay *overlay;
1336 	struct drm_i915_gem_object *reg_bo;
1337 	struct overlay_registers *regs;
1338 	int ret;
1339 
1340 	if (!HAS_OVERLAY(dev))
1341 		return;
1342 
1343 	overlay = kmalloc(sizeof(struct intel_overlay), DRM_I915_GEM,
1344 	    M_WAITOK | M_ZERO);
1345 	DRM_LOCK(dev);
1346 	if (dev_priv->overlay != NULL)
1347 		goto out_free;
1348 	overlay->dev = dev;
1349 
1350 	reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
1351 	if (!reg_bo)
1352 		goto out_free;
1353 	overlay->reg_bo = reg_bo;
1354 
1355 	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
1356 		ret = i915_gem_attach_phys_object(dev, reg_bo,
1357 						  I915_GEM_PHYS_OVERLAY_REGS,
1358 						  PAGE_SIZE);
1359 		if (ret) {
1360 			DRM_ERROR("failed to attach phys overlay regs\n");
1361 			goto out_free_bo;
1362 		}
1363 		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
1364 	} else {
1365 		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true);
1366 		if (ret) {
1367 			DRM_ERROR("failed to pin overlay register bo\n");
1368 			goto out_free_bo;
1369 		}
1370 		overlay->flip_addr = reg_bo->gtt_offset;
1371 
1372 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1373 		if (ret) {
1374 			DRM_ERROR("failed to move overlay register bo into the GTT\n");
1375 			goto out_unpin_bo;
1376 		}
1377 	}
1378 
1379 	/* init all values */
1380 	overlay->color_key = 0x0101fe;
1381 	overlay->brightness = -19;
1382 	overlay->contrast = 75;
1383 	overlay->saturation = 146;
1384 
1385 	regs = intel_overlay_map_regs(overlay);
1386 	if (!regs)
1387 		goto out_unpin_bo;
1388 
1389 	memset(regs, 0, sizeof(struct overlay_registers));
1390 	update_polyphase_filter(regs);
1391 	update_reg_attrs(overlay, regs);
1392 
1393 	intel_overlay_unmap_regs(overlay, regs);
1394 
1395 	dev_priv->overlay = overlay;
1396 	DRM_INFO("initialized overlay support\n");
1397 	DRM_UNLOCK(dev);
1398 	return;
1399 
1400 out_unpin_bo:
1401 	if (!OVERLAY_NEEDS_PHYSICAL(dev))
1402 		i915_gem_object_unpin(reg_bo);
1403 out_free_bo:
1404 	drm_gem_object_unreference(&reg_bo->base);
1405 out_free:
1406 	DRM_UNLOCK(dev);
1407 	drm_free(overlay, DRM_I915_GEM);
1408 	return;
1409 }
1410 
1411 void intel_cleanup_overlay(struct drm_device *dev)
1412 {
1413 	drm_i915_private_t *dev_priv = dev->dev_private;
1414 
1415 	if (!dev_priv->overlay)
1416 		return;
1417 
1418 	/* The bo's should be free'd by the generic code already.
1419 	 * Furthermore modesetting teardown happens beforehand so the
1420 	 * hardware should be off already */
1421 	KASSERT(!dev_priv->overlay->active, ("Overlay still active"));
1422 
1423 	drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base);
1424 	drm_free(dev_priv->overlay, DRM_I915_GEM);
1425 }
1426 
1427 struct intel_overlay_error_state {
1428 	struct overlay_registers regs;
1429 	unsigned long base;
1430 	u32 dovsta;
1431 	u32 isr;
1432 };
1433 
1434 struct intel_overlay_error_state *
1435 intel_overlay_capture_error_state(struct drm_device *dev)
1436 {
1437 	drm_i915_private_t *dev_priv = dev->dev_private;
1438 	struct intel_overlay *overlay = dev_priv->overlay;
1439 	struct intel_overlay_error_state *error;
1440 	struct overlay_registers __iomem *regs;
1441 
1442 	if (!overlay || !overlay->active)
1443 		return NULL;
1444 
1445 	error = kmalloc(sizeof(*error), DRM_I915_GEM, M_NOWAIT);
1446 	if (error == NULL)
1447 		return NULL;
1448 
1449 	error->dovsta = I915_READ(DOVSTA);
1450 	error->isr = I915_READ(ISR);
1451 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1452 		error->base = (long) overlay->reg_bo->phys_obj->handle->vaddr;
1453 	else
1454 		error->base = (long) overlay->reg_bo->gtt_offset;
1455 
1456 	regs = intel_overlay_map_regs(overlay);
1457 	if (!regs)
1458 		goto err;
1459 
1460 	memcpy(&error->regs, regs, sizeof(struct overlay_registers));
1461 	intel_overlay_unmap_regs(overlay, regs);
1462 
1463 	return (error);
1464 
1465 err:
1466 	drm_free(error, DRM_I915_GEM);
1467 	return (NULL);
1468 }
1469 
1470 void
1471 intel_overlay_print_error_state(struct sbuf *m,
1472     struct intel_overlay_error_state *error)
1473 {
1474 	sbuf_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1475 	    error->dovsta, error->isr);
1476 	sbuf_printf(m, "  Register file at 0x%08lx:\n",
1477 	    error->base);
1478 
1479 #define P(x) sbuf_printf(m, "    " #x ":	0x%08x\n", error->regs.x)
1480 	P(OBUF_0Y);
1481 	P(OBUF_1Y);
1482 	P(OBUF_0U);
1483 	P(OBUF_0V);
1484 	P(OBUF_1U);
1485 	P(OBUF_1V);
1486 	P(OSTRIDE);
1487 	P(YRGB_VPH);
1488 	P(UV_VPH);
1489 	P(HORZ_PH);
1490 	P(INIT_PHS);
1491 	P(DWINPOS);
1492 	P(DWINSZ);
1493 	P(SWIDTH);
1494 	P(SWIDTHSW);
1495 	P(SHEIGHT);
1496 	P(YRGBSCALE);
1497 	P(UVSCALE);
1498 	P(OCLRC0);
1499 	P(OCLRC1);
1500 	P(DCLRKV);
1501 	P(DCLRKM);
1502 	P(SCLRKVH);
1503 	P(SCLRKVL);
1504 	P(SCLRKEN);
1505 	P(OCONFIG);
1506 	P(OCMD);
1507 	P(OSTART_0Y);
1508 	P(OSTART_1Y);
1509 	P(OSTART_0U);
1510 	P(OSTART_0V);
1511 	P(OSTART_1U);
1512 	P(OSTART_1V);
1513 	P(OTILEOFF_0Y);
1514 	P(OTILEOFF_1Y);
1515 	P(OTILEOFF_0U);
1516 	P(OTILEOFF_0V);
1517 	P(OTILEOFF_1U);
1518 	P(OTILEOFF_1V);
1519 	P(FASTHSCALE);
1520 	P(UVSCALEV);
1521 #undef P
1522 }
1523