1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29 
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35 
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39 
radeon_check_and_fixup_offset(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,u32 * offset)40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41 						    dev_priv,
42 						    struct drm_file *file_priv,
43 						    u32 * offset)
44 {
45 	u64 off = *offset;
46 	u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47 	struct drm_radeon_driver_file_fields *radeon_priv;
48 
49 	/* Hrm ... the story of the offset ... So this function converts
50 	 * the various ideas of what userland clients might have for an
51 	 * offset in the card address space into an offset into the card
52 	 * address space :) So with a sane client, it should just keep
53 	 * the value intact and just do some boundary checking. However,
54 	 * not all clients are sane. Some older clients pass us 0 based
55 	 * offsets relative to the start of the framebuffer and some may
56 	 * assume the AGP aperture it appended to the framebuffer, so we
57 	 * try to detect those cases and fix them up.
58 	 *
59 	 * Note: It might be a good idea here to make sure the offset lands
60 	 * in some "allowed" area to protect things like the PCIE GART...
61 	 */
62 
63 	/* First, the best case, the offset already lands in either the
64 	 * framebuffer or the GART mapped space
65 	 */
66 	if (radeon_check_offset(dev_priv, off))
67 		return 0;
68 
69 	/* Ok, that didn't happen... now check if we have a zero based
70 	 * offset that fits in the framebuffer + gart space, apply the
71 	 * magic offset we get from SETPARAM or calculated from fb_location
72 	 */
73 	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74 		radeon_priv = file_priv->driver_priv;
75 		off += radeon_priv->radeon_fb_delta;
76 	}
77 
78 	/* Finally, assume we aimed at a GART offset if beyond the fb */
79 	if (off > fb_end)
80 		off = off - fb_end - 1 + dev_priv->gart_vm_start;
81 
82 	/* Now recheck and fail if out of bounds */
83 	if (radeon_check_offset(dev_priv, off)) {
84 		DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85 		*offset = off;
86 		return 0;
87 	}
88 	return -EINVAL;
89 }
90 
radeon_check_and_fixup_packets(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,int id,u32 * data)91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92 						     dev_priv,
93 						     struct drm_file *file_priv,
94 						     int id, u32 *data)
95 {
96 	switch (id) {
97 
98 	case RADEON_EMIT_PP_MISC:
99 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
100 		    &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101 			DRM_ERROR("Invalid depth buffer offset\n");
102 			return -EINVAL;
103 		}
104 		break;
105 
106 	case RADEON_EMIT_PP_CNTL:
107 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
108 		    &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
109 			DRM_ERROR("Invalid colour buffer offset\n");
110 			return -EINVAL;
111 		}
112 		break;
113 
114 	case R200_EMIT_PP_TXOFFSET_0:
115 	case R200_EMIT_PP_TXOFFSET_1:
116 	case R200_EMIT_PP_TXOFFSET_2:
117 	case R200_EMIT_PP_TXOFFSET_3:
118 	case R200_EMIT_PP_TXOFFSET_4:
119 	case R200_EMIT_PP_TXOFFSET_5:
120 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
121 						  &data[0])) {
122 			DRM_ERROR("Invalid R200 texture offset\n");
123 			return -EINVAL;
124 		}
125 		break;
126 
127 	case RADEON_EMIT_PP_TXFILTER_0:
128 	case RADEON_EMIT_PP_TXFILTER_1:
129 	case RADEON_EMIT_PP_TXFILTER_2:
130 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
131 		    &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
132 			DRM_ERROR("Invalid R100 texture offset\n");
133 			return -EINVAL;
134 		}
135 		break;
136 
137 	case R200_EMIT_PP_CUBIC_OFFSETS_0:
138 	case R200_EMIT_PP_CUBIC_OFFSETS_1:
139 	case R200_EMIT_PP_CUBIC_OFFSETS_2:
140 	case R200_EMIT_PP_CUBIC_OFFSETS_3:
141 	case R200_EMIT_PP_CUBIC_OFFSETS_4:
142 	case R200_EMIT_PP_CUBIC_OFFSETS_5:{
143 			int i;
144 			for (i = 0; i < 5; i++) {
145 				if (radeon_check_and_fixup_offset(dev_priv,
146 								  file_priv,
147 								  &data[i])) {
148 					DRM_ERROR
149 					    ("Invalid R200 cubic texture offset\n");
150 					return -EINVAL;
151 				}
152 			}
153 			break;
154 		}
155 
156 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
157 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
158 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
159 			int i;
160 			for (i = 0; i < 5; i++) {
161 				if (radeon_check_and_fixup_offset(dev_priv,
162 								  file_priv,
163 								  &data[i])) {
164 					DRM_ERROR
165 					    ("Invalid R100 cubic texture offset\n");
166 					return -EINVAL;
167 				}
168 			}
169 		}
170 		break;
171 
172 	case R200_EMIT_VAP_CTL: {
173 			RING_LOCALS;
174 			BEGIN_RING(2);
175 			OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
176 			ADVANCE_RING();
177 		}
178 		break;
179 
180 	case RADEON_EMIT_RB3D_COLORPITCH:
181 	case RADEON_EMIT_RE_LINE_PATTERN:
182 	case RADEON_EMIT_SE_LINE_WIDTH:
183 	case RADEON_EMIT_PP_LUM_MATRIX:
184 	case RADEON_EMIT_PP_ROT_MATRIX_0:
185 	case RADEON_EMIT_RB3D_STENCILREFMASK:
186 	case RADEON_EMIT_SE_VPORT_XSCALE:
187 	case RADEON_EMIT_SE_CNTL:
188 	case RADEON_EMIT_SE_CNTL_STATUS:
189 	case RADEON_EMIT_RE_MISC:
190 	case RADEON_EMIT_PP_BORDER_COLOR_0:
191 	case RADEON_EMIT_PP_BORDER_COLOR_1:
192 	case RADEON_EMIT_PP_BORDER_COLOR_2:
193 	case RADEON_EMIT_SE_ZBIAS_FACTOR:
194 	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
195 	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
196 	case R200_EMIT_PP_TXCBLEND_0:
197 	case R200_EMIT_PP_TXCBLEND_1:
198 	case R200_EMIT_PP_TXCBLEND_2:
199 	case R200_EMIT_PP_TXCBLEND_3:
200 	case R200_EMIT_PP_TXCBLEND_4:
201 	case R200_EMIT_PP_TXCBLEND_5:
202 	case R200_EMIT_PP_TXCBLEND_6:
203 	case R200_EMIT_PP_TXCBLEND_7:
204 	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
205 	case R200_EMIT_TFACTOR_0:
206 	case R200_EMIT_VTX_FMT_0:
207 	case R200_EMIT_MATRIX_SELECT_0:
208 	case R200_EMIT_TEX_PROC_CTL_2:
209 	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
210 	case R200_EMIT_PP_TXFILTER_0:
211 	case R200_EMIT_PP_TXFILTER_1:
212 	case R200_EMIT_PP_TXFILTER_2:
213 	case R200_EMIT_PP_TXFILTER_3:
214 	case R200_EMIT_PP_TXFILTER_4:
215 	case R200_EMIT_PP_TXFILTER_5:
216 	case R200_EMIT_VTE_CNTL:
217 	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
218 	case R200_EMIT_PP_TAM_DEBUG3:
219 	case R200_EMIT_PP_CNTL_X:
220 	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
221 	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
222 	case R200_EMIT_RE_SCISSOR_TL_0:
223 	case R200_EMIT_RE_SCISSOR_TL_1:
224 	case R200_EMIT_RE_SCISSOR_TL_2:
225 	case R200_EMIT_SE_VAP_CNTL_STATUS:
226 	case R200_EMIT_SE_VTX_STATE_CNTL:
227 	case R200_EMIT_RE_POINTSIZE:
228 	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
229 	case R200_EMIT_PP_CUBIC_FACES_0:
230 	case R200_EMIT_PP_CUBIC_FACES_1:
231 	case R200_EMIT_PP_CUBIC_FACES_2:
232 	case R200_EMIT_PP_CUBIC_FACES_3:
233 	case R200_EMIT_PP_CUBIC_FACES_4:
234 	case R200_EMIT_PP_CUBIC_FACES_5:
235 	case RADEON_EMIT_PP_TEX_SIZE_0:
236 	case RADEON_EMIT_PP_TEX_SIZE_1:
237 	case RADEON_EMIT_PP_TEX_SIZE_2:
238 	case R200_EMIT_RB3D_BLENDCOLOR:
239 	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
240 	case RADEON_EMIT_PP_CUBIC_FACES_0:
241 	case RADEON_EMIT_PP_CUBIC_FACES_1:
242 	case RADEON_EMIT_PP_CUBIC_FACES_2:
243 	case R200_EMIT_PP_TRI_PERF_CNTL:
244 	case R200_EMIT_PP_AFS_0:
245 	case R200_EMIT_PP_AFS_1:
246 	case R200_EMIT_ATF_TFACTOR:
247 	case R200_EMIT_PP_TXCTLALL_0:
248 	case R200_EMIT_PP_TXCTLALL_1:
249 	case R200_EMIT_PP_TXCTLALL_2:
250 	case R200_EMIT_PP_TXCTLALL_3:
251 	case R200_EMIT_PP_TXCTLALL_4:
252 	case R200_EMIT_PP_TXCTLALL_5:
253 	case R200_EMIT_VAP_PVS_CNTL:
254 		/* These packets don't contain memory offsets */
255 		break;
256 
257 	default:
258 		DRM_ERROR("Unknown state packet ID %d\n", id);
259 		return -EINVAL;
260 	}
261 
262 	return 0;
263 }
264 
radeon_check_and_fixup_packet3(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf,unsigned int * cmdsz)265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
266 						     dev_priv,
267 						     struct drm_file *file_priv,
268 						     drm_radeon_kcmd_buffer_t *
269 						     cmdbuf,
270 						     unsigned int *cmdsz)
271 {
272 	u32 *cmd = (u32 *) cmdbuf->buf;
273 	u32 offset, narrays;
274 	int count, i, k;
275 
276 	*cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
277 
278 	if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
279 		DRM_ERROR("Not a type 3 packet\n");
280 		return -EINVAL;
281 	}
282 
283 	if (4 * *cmdsz > cmdbuf->bufsz) {
284 		DRM_ERROR("Packet size larger than size of data provided\n");
285 		return -EINVAL;
286 	}
287 
288 	switch(cmd[0] & 0xff00) {
289 	/* XXX Are there old drivers needing other packets? */
290 
291 	case RADEON_3D_DRAW_IMMD:
292 	case RADEON_3D_DRAW_VBUF:
293 	case RADEON_3D_DRAW_INDX:
294 	case RADEON_WAIT_FOR_IDLE:
295 	case RADEON_CP_NOP:
296 	case RADEON_3D_CLEAR_ZMASK:
297 /*	case RADEON_CP_NEXT_CHAR:
298 	case RADEON_CP_PLY_NEXTSCAN:
299 	case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
300 		/* these packets are safe */
301 		break;
302 
303 	case RADEON_CP_3D_DRAW_IMMD_2:
304 	case RADEON_CP_3D_DRAW_VBUF_2:
305 	case RADEON_CP_3D_DRAW_INDX_2:
306 	case RADEON_3D_CLEAR_HIZ:
307 		/* safe but r200 only */
308 		if ((dev_priv->chip_family < CHIP_R200) ||
309 		    (dev_priv->chip_family > CHIP_RV280)) {
310 			DRM_ERROR("Invalid 3d packet for non r200-class chip\n");
311 			return -EINVAL;
312 		}
313 		break;
314 
315 	case RADEON_3D_LOAD_VBPNTR:
316 		count = (cmd[0] >> 16) & 0x3fff;
317 
318 		if (count > 18) { /* 12 arrays max */
319 			DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
320 				  count);
321 			return -EINVAL;
322 		}
323 
324 		/* carefully check packet contents */
325 		narrays = cmd[1] & ~0xc000;
326 		k = 0;
327 		i = 2;
328 		while ((k < narrays) && (i < (count + 2))) {
329 			i++;		/* skip attribute field */
330 			if (radeon_check_and_fixup_offset(dev_priv, file_priv,
331 							  &cmd[i])) {
332 				DRM_ERROR
333 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
334 				     k, i);
335 				return -EINVAL;
336 			}
337 			k++;
338 			i++;
339 			if (k == narrays)
340 				break;
341 			/* have one more to process, they come in pairs */
342 			if (radeon_check_and_fixup_offset(dev_priv,
343 							  file_priv, &cmd[i]))
344 			{
345 				DRM_ERROR
346 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
347 				     k, i);
348 				return -EINVAL;
349 			}
350 			k++;
351 			i++;
352 		}
353 		/* do the counts match what we expect ? */
354 		if ((k != narrays) || (i != (count + 2))) {
355 			DRM_ERROR
356 			    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
357 			      k, i, narrays, count + 1);
358 			return -EINVAL;
359 		}
360 		break;
361 
362 	case RADEON_3D_RNDR_GEN_INDX_PRIM:
363 		if (dev_priv->chip_family > CHIP_RS200) {
364 			DRM_ERROR("Invalid 3d packet for non-r100-class chip\n");
365 			return -EINVAL;
366 		}
367 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
368 				DRM_ERROR("Invalid rndr_gen_indx offset\n");
369 				return -EINVAL;
370 		}
371 		break;
372 
373 	case RADEON_CP_INDX_BUFFER:
374 		/* safe but r200 only */
375 		if ((dev_priv->chip_family < CHIP_R200) ||
376 		    (dev_priv->chip_family > CHIP_RV280)) {
377 			DRM_ERROR("Invalid 3d packet for non-r200-class chip\n");
378 			return -EINVAL;
379 		}
380 		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
381 			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
382 			return -EINVAL;
383 		}
384 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
385 			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
386 			return -EINVAL;
387 		}
388 		break;
389 
390 	case RADEON_CNTL_HOSTDATA_BLT:
391 	case RADEON_CNTL_PAINT_MULTI:
392 	case RADEON_CNTL_BITBLT_MULTI:
393 		/* MSB of opcode: next DWORD GUI_CNTL */
394 		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
395 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
396 			offset = cmd[2] << 10;
397 			if (radeon_check_and_fixup_offset
398 			    (dev_priv, file_priv, &offset)) {
399 				DRM_ERROR("Invalid first packet offset\n");
400 				return -EINVAL;
401 			}
402 			cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
403 		}
404 
405 		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
406 		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
407 			offset = cmd[3] << 10;
408 			if (radeon_check_and_fixup_offset
409 			    (dev_priv, file_priv, &offset)) {
410 				DRM_ERROR("Invalid second packet offset\n");
411 				return -EINVAL;
412 			}
413 			cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
414 		}
415 		break;
416 
417 	default:
418 		DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
419 		return -EINVAL;
420 	}
421 
422 	return 0;
423 }
424 
425 /* ================================================================
426  * CP hardware state programming functions
427  */
428 
radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,struct drm_clip_rect * box)429 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
430 					     struct drm_clip_rect * box)
431 {
432 	RING_LOCALS;
433 
434 	DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
435 		  box->x1, box->y1, box->x2, box->y2);
436 
437 	BEGIN_RING(4);
438 	OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
439 	OUT_RING((box->y1 << 16) | box->x1);
440 	OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
441 	OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
442 	ADVANCE_RING();
443 }
444 
445 /* Emit 1.1 state
446  */
radeon_emit_state(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_context_regs_t * ctx,drm_radeon_texture_regs_t * tex,unsigned int dirty)447 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
448 			     struct drm_file *file_priv,
449 			     drm_radeon_context_regs_t * ctx,
450 			     drm_radeon_texture_regs_t * tex,
451 			     unsigned int dirty)
452 {
453 	RING_LOCALS;
454 	DRM_DEBUG("dirty=0x%08x\n", dirty);
455 
456 	if (dirty & RADEON_UPLOAD_CONTEXT) {
457 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
458 						  &ctx->rb3d_depthoffset)) {
459 			DRM_ERROR("Invalid depth buffer offset\n");
460 			return -EINVAL;
461 		}
462 
463 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
464 						  &ctx->rb3d_coloroffset)) {
465 			DRM_ERROR("Invalid depth buffer offset\n");
466 			return -EINVAL;
467 		}
468 
469 		BEGIN_RING(14);
470 		OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
471 		OUT_RING(ctx->pp_misc);
472 		OUT_RING(ctx->pp_fog_color);
473 		OUT_RING(ctx->re_solid_color);
474 		OUT_RING(ctx->rb3d_blendcntl);
475 		OUT_RING(ctx->rb3d_depthoffset);
476 		OUT_RING(ctx->rb3d_depthpitch);
477 		OUT_RING(ctx->rb3d_zstencilcntl);
478 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
479 		OUT_RING(ctx->pp_cntl);
480 		OUT_RING(ctx->rb3d_cntl);
481 		OUT_RING(ctx->rb3d_coloroffset);
482 		OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
483 		OUT_RING(ctx->rb3d_colorpitch);
484 		ADVANCE_RING();
485 	}
486 
487 	if (dirty & RADEON_UPLOAD_VERTFMT) {
488 		BEGIN_RING(2);
489 		OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
490 		OUT_RING(ctx->se_coord_fmt);
491 		ADVANCE_RING();
492 	}
493 
494 	if (dirty & RADEON_UPLOAD_LINE) {
495 		BEGIN_RING(5);
496 		OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
497 		OUT_RING(ctx->re_line_pattern);
498 		OUT_RING(ctx->re_line_state);
499 		OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
500 		OUT_RING(ctx->se_line_width);
501 		ADVANCE_RING();
502 	}
503 
504 	if (dirty & RADEON_UPLOAD_BUMPMAP) {
505 		BEGIN_RING(5);
506 		OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
507 		OUT_RING(ctx->pp_lum_matrix);
508 		OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
509 		OUT_RING(ctx->pp_rot_matrix_0);
510 		OUT_RING(ctx->pp_rot_matrix_1);
511 		ADVANCE_RING();
512 	}
513 
514 	if (dirty & RADEON_UPLOAD_MASKS) {
515 		BEGIN_RING(4);
516 		OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
517 		OUT_RING(ctx->rb3d_stencilrefmask);
518 		OUT_RING(ctx->rb3d_ropcntl);
519 		OUT_RING(ctx->rb3d_planemask);
520 		ADVANCE_RING();
521 	}
522 
523 	if (dirty & RADEON_UPLOAD_VIEWPORT) {
524 		BEGIN_RING(7);
525 		OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
526 		OUT_RING(ctx->se_vport_xscale);
527 		OUT_RING(ctx->se_vport_xoffset);
528 		OUT_RING(ctx->se_vport_yscale);
529 		OUT_RING(ctx->se_vport_yoffset);
530 		OUT_RING(ctx->se_vport_zscale);
531 		OUT_RING(ctx->se_vport_zoffset);
532 		ADVANCE_RING();
533 	}
534 
535 	if (dirty & RADEON_UPLOAD_SETUP) {
536 		BEGIN_RING(4);
537 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
538 		OUT_RING(ctx->se_cntl);
539 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
540 		OUT_RING(ctx->se_cntl_status);
541 		ADVANCE_RING();
542 	}
543 
544 	if (dirty & RADEON_UPLOAD_MISC) {
545 		BEGIN_RING(2);
546 		OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
547 		OUT_RING(ctx->re_misc);
548 		ADVANCE_RING();
549 	}
550 
551 	if (dirty & RADEON_UPLOAD_TEX0) {
552 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
553 						  &tex[0].pp_txoffset)) {
554 			DRM_ERROR("Invalid texture offset for unit 0\n");
555 			return -EINVAL;
556 		}
557 
558 		BEGIN_RING(9);
559 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
560 		OUT_RING(tex[0].pp_txfilter);
561 		OUT_RING(tex[0].pp_txformat);
562 		OUT_RING(tex[0].pp_txoffset);
563 		OUT_RING(tex[0].pp_txcblend);
564 		OUT_RING(tex[0].pp_txablend);
565 		OUT_RING(tex[0].pp_tfactor);
566 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
567 		OUT_RING(tex[0].pp_border_color);
568 		ADVANCE_RING();
569 	}
570 
571 	if (dirty & RADEON_UPLOAD_TEX1) {
572 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
573 						  &tex[1].pp_txoffset)) {
574 			DRM_ERROR("Invalid texture offset for unit 1\n");
575 			return -EINVAL;
576 		}
577 
578 		BEGIN_RING(9);
579 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
580 		OUT_RING(tex[1].pp_txfilter);
581 		OUT_RING(tex[1].pp_txformat);
582 		OUT_RING(tex[1].pp_txoffset);
583 		OUT_RING(tex[1].pp_txcblend);
584 		OUT_RING(tex[1].pp_txablend);
585 		OUT_RING(tex[1].pp_tfactor);
586 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
587 		OUT_RING(tex[1].pp_border_color);
588 		ADVANCE_RING();
589 	}
590 
591 	if (dirty & RADEON_UPLOAD_TEX2) {
592 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
593 						  &tex[2].pp_txoffset)) {
594 			DRM_ERROR("Invalid texture offset for unit 2\n");
595 			return -EINVAL;
596 		}
597 
598 		BEGIN_RING(9);
599 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
600 		OUT_RING(tex[2].pp_txfilter);
601 		OUT_RING(tex[2].pp_txformat);
602 		OUT_RING(tex[2].pp_txoffset);
603 		OUT_RING(tex[2].pp_txcblend);
604 		OUT_RING(tex[2].pp_txablend);
605 		OUT_RING(tex[2].pp_tfactor);
606 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
607 		OUT_RING(tex[2].pp_border_color);
608 		ADVANCE_RING();
609 	}
610 
611 	return 0;
612 }
613 
614 /* Emit 1.2 state
615  */
radeon_emit_state2(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_state_t * state)616 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
617 			      struct drm_file *file_priv,
618 			      drm_radeon_state_t * state)
619 {
620 	RING_LOCALS;
621 
622 	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
623 		BEGIN_RING(3);
624 		OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
625 		OUT_RING(state->context2.se_zbias_factor);
626 		OUT_RING(state->context2.se_zbias_constant);
627 		ADVANCE_RING();
628 	}
629 
630 	return radeon_emit_state(dev_priv, file_priv, &state->context,
631 				 state->tex, state->dirty);
632 }
633 
634 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
635  * 1.3 cmdbuffers allow all previous state to be updated as well as
636  * the tcl scalar and vector areas.
637  */
638 static struct {
639 	int start;
640 	int len;
641 	const char *name;
642 } packet[RADEON_MAX_STATE_PACKETS] = {
643 	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
644 	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
645 	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
646 	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
647 	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
648 	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
649 	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
650 	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
651 	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
652 	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
653 	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
654 	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
655 	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
656 	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
657 	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
658 	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
659 	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
660 	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
661 	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
662 	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
663 	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
664 		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
665 	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
666 	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
667 	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
668 	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
669 	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
670 	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
671 	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
672 	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
673 	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
674 	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
675 	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
676 	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
677 	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
678 	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
679 	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
680 	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
681 	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
682 	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
683 	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
684 	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
685 	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
686 	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
687 	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
688 	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
689 	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
690 	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
691 	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
692 	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
693 	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
694 	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
695 	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
696 	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
697 	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
698 	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
699 	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
700 	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
701 	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
702 	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
703 	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
704 	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
705 	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
706 		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
707 	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
708 	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
709 	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
710 	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
711 	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
712 	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
713 	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
714 	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
715 	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
716 	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
717 	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
718 	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
719 	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
720 	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
721 	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
722 	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
723 	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
724 	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
725 	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
726 	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
727 	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
728 	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
729 	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
730 	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
731 	{R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
732 	{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
733 	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
734 	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
735 	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
736 	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
737 	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
738 	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
739 	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
740 	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
741 };
742 
743 /* ================================================================
744  * Performance monitoring functions
745  */
746 
radeon_clear_box(drm_radeon_private_t * dev_priv,int x,int y,int w,int h,int r,int g,int b)747 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
748 			     int x, int y, int w, int h, int r, int g, int b)
749 {
750 	u32 color;
751 	RING_LOCALS;
752 
753 	x += dev_priv->sarea_priv->boxes[0].x1;
754 	y += dev_priv->sarea_priv->boxes[0].y1;
755 
756 	switch (dev_priv->color_fmt) {
757 	case RADEON_COLOR_FORMAT_RGB565:
758 		color = (((r & 0xf8) << 8) |
759 			 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
760 		break;
761 	case RADEON_COLOR_FORMAT_ARGB8888:
762 	default:
763 		color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
764 		break;
765 	}
766 
767 	BEGIN_RING(4);
768 	RADEON_WAIT_UNTIL_3D_IDLE();
769 	OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
770 	OUT_RING(0xffffffff);
771 	ADVANCE_RING();
772 
773 	BEGIN_RING(6);
774 
775 	OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
776 	OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
777 		 RADEON_GMC_BRUSH_SOLID_COLOR |
778 		 (dev_priv->color_fmt << 8) |
779 		 RADEON_GMC_SRC_DATATYPE_COLOR |
780 		 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
781 
782 	if (dev_priv->sarea_priv->pfCurrentPage == 1) {
783 		OUT_RING(dev_priv->front_pitch_offset);
784 	} else {
785 		OUT_RING(dev_priv->back_pitch_offset);
786 	}
787 
788 	OUT_RING(color);
789 
790 	OUT_RING((x << 16) | y);
791 	OUT_RING((w << 16) | h);
792 
793 	ADVANCE_RING();
794 }
795 
radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)796 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
797 {
798 	/* Collapse various things into a wait flag -- trying to
799 	 * guess if userspase slept -- better just to have them tell us.
800 	 */
801 	if (dev_priv->stats.last_frame_reads > 1 ||
802 	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
803 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
804 	}
805 
806 	if (dev_priv->stats.freelist_loops) {
807 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
808 	}
809 
810 	/* Purple box for page flipping
811 	 */
812 	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
813 		radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
814 
815 	/* Red box if we have to wait for idle at any point
816 	 */
817 	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
818 		radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
819 
820 	/* Blue box: lost context?
821 	 */
822 
823 	/* Yellow box for texture swaps
824 	 */
825 	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
826 		radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
827 
828 	/* Green box if hardware never idles (as far as we can tell)
829 	 */
830 	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
831 		radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
832 
833 	/* Draw bars indicating number of buffers allocated
834 	 * (not a great measure, easily confused)
835 	 */
836 	if (dev_priv->stats.requested_bufs) {
837 		if (dev_priv->stats.requested_bufs > 100)
838 			dev_priv->stats.requested_bufs = 100;
839 
840 		radeon_clear_box(dev_priv, 4, 16,
841 				 dev_priv->stats.requested_bufs, 4,
842 				 196, 128, 128);
843 	}
844 
845 	memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
846 
847 }
848 
849 /* ================================================================
850  * CP command dispatch functions
851  */
852 
radeon_cp_dispatch_clear(struct drm_device * dev,drm_radeon_clear_t * clear,drm_radeon_clear_rect_t * depth_boxes)853 static void radeon_cp_dispatch_clear(struct drm_device * dev,
854 				     drm_radeon_clear_t * clear,
855 				     drm_radeon_clear_rect_t * depth_boxes)
856 {
857 	drm_radeon_private_t *dev_priv = dev->dev_private;
858 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
859 	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
860 	int nbox = sarea_priv->nbox;
861 	struct drm_clip_rect *pbox = sarea_priv->boxes;
862 	unsigned int flags = clear->flags;
863 	u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
864 	int i;
865 	RING_LOCALS;
866 	DRM_DEBUG("flags = 0x%x\n", flags);
867 
868 	dev_priv->stats.clears++;
869 
870 	if (dev_priv->sarea_priv->pfCurrentPage == 1) {
871 		unsigned int tmp = flags;
872 
873 		flags &= ~(RADEON_FRONT | RADEON_BACK);
874 		if (tmp & RADEON_FRONT)
875 			flags |= RADEON_BACK;
876 		if (tmp & RADEON_BACK)
877 			flags |= RADEON_FRONT;
878 	}
879 
880 	if (flags & (RADEON_FRONT | RADEON_BACK)) {
881 
882 		BEGIN_RING(4);
883 
884 		/* Ensure the 3D stream is idle before doing a
885 		 * 2D fill to clear the front or back buffer.
886 		 */
887 		RADEON_WAIT_UNTIL_3D_IDLE();
888 
889 		OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
890 		OUT_RING(clear->color_mask);
891 
892 		ADVANCE_RING();
893 
894 		/* Make sure we restore the 3D state next time.
895 		 */
896 		dev_priv->sarea_priv->ctx_owner = 0;
897 
898 		for (i = 0; i < nbox; i++) {
899 			int x = pbox[i].x1;
900 			int y = pbox[i].y1;
901 			int w = pbox[i].x2 - x;
902 			int h = pbox[i].y2 - y;
903 
904 			DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
905 				  x, y, w, h, flags);
906 
907 			if (flags & RADEON_FRONT) {
908 				BEGIN_RING(6);
909 
910 				OUT_RING(CP_PACKET3
911 					 (RADEON_CNTL_PAINT_MULTI, 4));
912 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
913 					 RADEON_GMC_BRUSH_SOLID_COLOR |
914 					 (dev_priv->
915 					  color_fmt << 8) |
916 					 RADEON_GMC_SRC_DATATYPE_COLOR |
917 					 RADEON_ROP3_P |
918 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
919 
920 				OUT_RING(dev_priv->front_pitch_offset);
921 				OUT_RING(clear->clear_color);
922 
923 				OUT_RING((x << 16) | y);
924 				OUT_RING((w << 16) | h);
925 
926 				ADVANCE_RING();
927 			}
928 
929 			if (flags & RADEON_BACK) {
930 				BEGIN_RING(6);
931 
932 				OUT_RING(CP_PACKET3
933 					 (RADEON_CNTL_PAINT_MULTI, 4));
934 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
935 					 RADEON_GMC_BRUSH_SOLID_COLOR |
936 					 (dev_priv->
937 					  color_fmt << 8) |
938 					 RADEON_GMC_SRC_DATATYPE_COLOR |
939 					 RADEON_ROP3_P |
940 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
941 
942 				OUT_RING(dev_priv->back_pitch_offset);
943 				OUT_RING(clear->clear_color);
944 
945 				OUT_RING((x << 16) | y);
946 				OUT_RING((w << 16) | h);
947 
948 				ADVANCE_RING();
949 			}
950 		}
951 	}
952 
953 	/* hyper z clear */
954 	/* no docs available, based on reverse engeneering by Stephane Marchesin */
955 	if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
956 	    && (flags & RADEON_CLEAR_FASTZ)) {
957 
958 		int depthpixperline =
959 		    dev_priv->depth_fmt ==
960 		    RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
961 						       2) : (dev_priv->
962 							     depth_pitch / 4);
963 
964 		u32 clearmask;
965 
966 		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
967 		    ((clear->depth_mask & 0xff) << 24);
968 
969 		/* Make sure we restore the 3D state next time.
970 		 * we haven't touched any "normal" state - still need this?
971 		 */
972 		dev_priv->sarea_priv->ctx_owner = 0;
973 
974 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
975 		    && (flags & RADEON_USE_HIERZ)) {
976 			/* FIXME : reverse engineer that for Rx00 cards */
977 			/* FIXME : the mask supposedly contains low-res z values. So can't set
978 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
979 			   value into account? */
980 			/* pattern seems to work for r100, though get slight
981 			   rendering errors with glxgears. If hierz is not enabled for r100,
982 			   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
983 			   other ones are ignored, and the same clear mask can be used. That's
984 			   very different behaviour than R200 which needs different clear mask
985 			   and different number of tiles to clear if hierz is enabled or not !?!
986 			 */
987 			clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
988 		} else {
989 			/* clear mask : chooses the clearing pattern.
990 			   rv250: could be used to clear only parts of macrotiles
991 			   (but that would get really complicated...)?
992 			   bit 0 and 1 (either or both of them ?!?!) are used to
993 			   not clear tile (or maybe one of the bits indicates if the tile is
994 			   compressed or not), bit 2 and 3 to not clear tile 1,...,.
995 			   Pattern is as follows:
996 			   | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
997 			   bits -------------------------------------------------
998 			   | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
999 			   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1000 			   covers 256 pixels ?!?
1001 			 */
1002 			clearmask = 0x0;
1003 		}
1004 
1005 		BEGIN_RING(8);
1006 		RADEON_WAIT_UNTIL_2D_IDLE();
1007 		OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1008 			     tempRB3D_DEPTHCLEARVALUE);
1009 		/* what offset is this exactly ? */
1010 		OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1011 		/* need ctlstat, otherwise get some strange black flickering */
1012 		OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1013 			     RADEON_RB3D_ZC_FLUSH_ALL);
1014 		ADVANCE_RING();
1015 
1016 		for (i = 0; i < nbox; i++) {
1017 			int tileoffset, nrtilesx, nrtilesy, j;
1018 			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1019 			if ((dev_priv->flags & RADEON_HAS_HIERZ)
1020 			    && (dev_priv->chip_family < CHIP_R200)) {
1021 				/* FIXME : figure this out for r200 (when hierz is enabled). Or
1022 				   maybe r200 actually doesn't need to put the low-res z value into
1023 				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
1024 				   Works for R100, both with hierz and without.
1025 				   R100 seems to operate on 2x1 8x8 tiles, but...
1026 				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1027 				   problematic with resolutions which are not 64 pix aligned? */
1028 				tileoffset =
1029 				    ((pbox[i].y1 >> 3) * depthpixperline +
1030 				     pbox[i].x1) >> 6;
1031 				nrtilesx =
1032 				    ((pbox[i].x2 & ~63) -
1033 				     (pbox[i].x1 & ~63)) >> 4;
1034 				nrtilesy =
1035 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1036 				for (j = 0; j <= nrtilesy; j++) {
1037 					BEGIN_RING(4);
1038 					OUT_RING(CP_PACKET3
1039 						 (RADEON_3D_CLEAR_ZMASK, 2));
1040 					/* first tile */
1041 					OUT_RING(tileoffset * 8);
1042 					/* the number of tiles to clear */
1043 					OUT_RING(nrtilesx + 4);
1044 					/* clear mask : chooses the clearing pattern. */
1045 					OUT_RING(clearmask);
1046 					ADVANCE_RING();
1047 					tileoffset += depthpixperline >> 6;
1048 				}
1049 			} else if ((dev_priv->chip_family >= CHIP_R200) &&
1050 				   (dev_priv->chip_family <= CHIP_RV280)) {
1051 				/* works for rv250. */
1052 				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
1053 				tileoffset =
1054 				    ((pbox[i].y1 >> 3) * depthpixperline +
1055 				     pbox[i].x1) >> 5;
1056 				nrtilesx =
1057 				    (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1058 				nrtilesy =
1059 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1060 				for (j = 0; j <= nrtilesy; j++) {
1061 					BEGIN_RING(4);
1062 					OUT_RING(CP_PACKET3
1063 						 (RADEON_3D_CLEAR_ZMASK, 2));
1064 					/* first tile */
1065 					/* judging by the first tile offset needed, could possibly
1066 					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
1067 					   macro tiles, though would still need clear mask for
1068 					   right/bottom if truely 4x4 granularity is desired ? */
1069 					OUT_RING(tileoffset * 16);
1070 					/* the number of tiles to clear */
1071 					OUT_RING(nrtilesx + 1);
1072 					/* clear mask : chooses the clearing pattern. */
1073 					OUT_RING(clearmask);
1074 					ADVANCE_RING();
1075 					tileoffset += depthpixperline >> 5;
1076 				}
1077 			} else {	/* rv 100 */
1078 				/* rv100 might not need 64 pix alignment, who knows */
1079 				/* offsets are, hmm, weird */
1080 				tileoffset =
1081 				    ((pbox[i].y1 >> 4) * depthpixperline +
1082 				     pbox[i].x1) >> 6;
1083 				nrtilesx =
1084 				    ((pbox[i].x2 & ~63) -
1085 				     (pbox[i].x1 & ~63)) >> 4;
1086 				nrtilesy =
1087 				    (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1088 				for (j = 0; j <= nrtilesy; j++) {
1089 					BEGIN_RING(4);
1090 					OUT_RING(CP_PACKET3
1091 						 (RADEON_3D_CLEAR_ZMASK, 2));
1092 					OUT_RING(tileoffset * 128);
1093 					/* the number of tiles to clear */
1094 					OUT_RING(nrtilesx + 4);
1095 					/* clear mask : chooses the clearing pattern. */
1096 					OUT_RING(clearmask);
1097 					ADVANCE_RING();
1098 					tileoffset += depthpixperline >> 6;
1099 				}
1100 			}
1101 		}
1102 
1103 		/* TODO don't always clear all hi-level z tiles */
1104 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
1105 		    && ((dev_priv->chip_family >= CHIP_R200) &&
1106 			(dev_priv->chip_family <= CHIP_RV280))
1107 		    && (flags & RADEON_USE_HIERZ))
1108 			/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1109 			/* FIXME : the mask supposedly contains low-res z values. So can't set
1110 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
1111 			   value into account? */
1112 		{
1113 			BEGIN_RING(4);
1114 			OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1115 			OUT_RING(0x0);	/* First tile */
1116 			OUT_RING(0x3cc0);
1117 			OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1118 			ADVANCE_RING();
1119 		}
1120 	}
1121 
1122 	/* We have to clear the depth and/or stencil buffers by
1123 	 * rendering a quad into just those buffers.  Thus, we have to
1124 	 * make sure the 3D engine is configured correctly.
1125 	 */
1126 	else if ((dev_priv->chip_family >= CHIP_R200) &&
1127 		 (dev_priv->chip_family <= CHIP_RV280) &&
1128 		 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1129 
1130 		int tempPP_CNTL;
1131 		int tempRE_CNTL;
1132 		int tempRB3D_CNTL;
1133 		int tempRB3D_ZSTENCILCNTL;
1134 		int tempRB3D_STENCILREFMASK;
1135 		int tempRB3D_PLANEMASK;
1136 		int tempSE_CNTL;
1137 		int tempSE_VTE_CNTL;
1138 		int tempSE_VTX_FMT_0;
1139 		int tempSE_VTX_FMT_1;
1140 		int tempSE_VAP_CNTL;
1141 		int tempRE_AUX_SCISSOR_CNTL;
1142 
1143 		tempPP_CNTL = 0;
1144 		tempRE_CNTL = 0;
1145 
1146 		tempRB3D_CNTL = depth_clear->rb3d_cntl;
1147 
1148 		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1149 		tempRB3D_STENCILREFMASK = 0x0;
1150 
1151 		tempSE_CNTL = depth_clear->se_cntl;
1152 
1153 		/* Disable TCL */
1154 
1155 		tempSE_VAP_CNTL = (	/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1156 					  (0x9 <<
1157 					   SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1158 
1159 		tempRB3D_PLANEMASK = 0x0;
1160 
1161 		tempRE_AUX_SCISSOR_CNTL = 0x0;
1162 
1163 		tempSE_VTE_CNTL =
1164 		    SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1165 
1166 		/* Vertex format (X, Y, Z, W) */
1167 		tempSE_VTX_FMT_0 =
1168 		    SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1169 		    SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1170 		tempSE_VTX_FMT_1 = 0x0;
1171 
1172 		/*
1173 		 * Depth buffer specific enables
1174 		 */
1175 		if (flags & RADEON_DEPTH) {
1176 			/* Enable depth buffer */
1177 			tempRB3D_CNTL |= RADEON_Z_ENABLE;
1178 		} else {
1179 			/* Disable depth buffer */
1180 			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1181 		}
1182 
1183 		/*
1184 		 * Stencil buffer specific enables
1185 		 */
1186 		if (flags & RADEON_STENCIL) {
1187 			tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1188 			tempRB3D_STENCILREFMASK = clear->depth_mask;
1189 		} else {
1190 			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1191 			tempRB3D_STENCILREFMASK = 0x00000000;
1192 		}
1193 
1194 		if (flags & RADEON_USE_COMP_ZBUF) {
1195 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1196 			    RADEON_Z_DECOMPRESSION_ENABLE;
1197 		}
1198 		if (flags & RADEON_USE_HIERZ) {
1199 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1200 		}
1201 
1202 		BEGIN_RING(26);
1203 		RADEON_WAIT_UNTIL_2D_IDLE();
1204 
1205 		OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1206 		OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1207 		OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1208 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1209 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1210 			     tempRB3D_STENCILREFMASK);
1211 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1212 		OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1213 		OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1214 		OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1215 		OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1216 		OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1217 		OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1218 		ADVANCE_RING();
1219 
1220 		/* Make sure we restore the 3D state next time.
1221 		 */
1222 		dev_priv->sarea_priv->ctx_owner = 0;
1223 
1224 		for (i = 0; i < nbox; i++) {
1225 
1226 			/* Funny that this should be required --
1227 			 *  sets top-left?
1228 			 */
1229 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1230 
1231 			BEGIN_RING(14);
1232 			OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1233 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1234 				  RADEON_PRIM_WALK_RING |
1235 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1236 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1237 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1238 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1239 			OUT_RING(0x3f800000);
1240 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1241 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1242 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1243 			OUT_RING(0x3f800000);
1244 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1245 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1246 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1247 			OUT_RING(0x3f800000);
1248 			ADVANCE_RING();
1249 		}
1250 	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1251 
1252 		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1253 
1254 		rb3d_cntl = depth_clear->rb3d_cntl;
1255 
1256 		if (flags & RADEON_DEPTH) {
1257 			rb3d_cntl |= RADEON_Z_ENABLE;
1258 		} else {
1259 			rb3d_cntl &= ~RADEON_Z_ENABLE;
1260 		}
1261 
1262 		if (flags & RADEON_STENCIL) {
1263 			rb3d_cntl |= RADEON_STENCIL_ENABLE;
1264 			rb3d_stencilrefmask = clear->depth_mask;	/* misnamed field */
1265 		} else {
1266 			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1267 			rb3d_stencilrefmask = 0x00000000;
1268 		}
1269 
1270 		if (flags & RADEON_USE_COMP_ZBUF) {
1271 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1272 			    RADEON_Z_DECOMPRESSION_ENABLE;
1273 		}
1274 		if (flags & RADEON_USE_HIERZ) {
1275 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1276 		}
1277 
1278 		BEGIN_RING(13);
1279 		RADEON_WAIT_UNTIL_2D_IDLE();
1280 
1281 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1282 		OUT_RING(0x00000000);
1283 		OUT_RING(rb3d_cntl);
1284 
1285 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1286 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1287 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1288 		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1289 		ADVANCE_RING();
1290 
1291 		/* Make sure we restore the 3D state next time.
1292 		 */
1293 		dev_priv->sarea_priv->ctx_owner = 0;
1294 
1295 		for (i = 0; i < nbox; i++) {
1296 
1297 			/* Funny that this should be required --
1298 			 *  sets top-left?
1299 			 */
1300 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1301 
1302 			BEGIN_RING(15);
1303 
1304 			OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1305 			OUT_RING(RADEON_VTX_Z_PRESENT |
1306 				 RADEON_VTX_PKCOLOR_PRESENT);
1307 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1308 				  RADEON_PRIM_WALK_RING |
1309 				  RADEON_MAOS_ENABLE |
1310 				  RADEON_VTX_FMT_RADEON_MODE |
1311 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1312 
1313 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1314 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1315 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1316 			OUT_RING(0x0);
1317 
1318 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1319 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1320 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1321 			OUT_RING(0x0);
1322 
1323 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1324 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1325 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1326 			OUT_RING(0x0);
1327 
1328 			ADVANCE_RING();
1329 		}
1330 	}
1331 
1332 	/* Increment the clear counter.  The client-side 3D driver must
1333 	 * wait on this value before performing the clear ioctl.  We
1334 	 * need this because the card's so damned fast...
1335 	 */
1336 	dev_priv->sarea_priv->last_clear++;
1337 
1338 	BEGIN_RING(4);
1339 
1340 	RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1341 	RADEON_WAIT_UNTIL_IDLE();
1342 
1343 	ADVANCE_RING();
1344 }
1345 
radeon_cp_dispatch_swap(struct drm_device * dev)1346 static void radeon_cp_dispatch_swap(struct drm_device * dev)
1347 {
1348 	drm_radeon_private_t *dev_priv = dev->dev_private;
1349 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1350 	int nbox = sarea_priv->nbox;
1351 	struct drm_clip_rect *pbox = sarea_priv->boxes;
1352 	int i;
1353 	RING_LOCALS;
1354 	DRM_DEBUG("\n");
1355 
1356 	/* Do some trivial performance monitoring...
1357 	 */
1358 	if (dev_priv->do_boxes)
1359 		radeon_cp_performance_boxes(dev_priv);
1360 
1361 	/* Wait for the 3D stream to idle before dispatching the bitblt.
1362 	 * This will prevent data corruption between the two streams.
1363 	 */
1364 	BEGIN_RING(2);
1365 
1366 	RADEON_WAIT_UNTIL_3D_IDLE();
1367 
1368 	ADVANCE_RING();
1369 
1370 	for (i = 0; i < nbox; i++) {
1371 		int x = pbox[i].x1;
1372 		int y = pbox[i].y1;
1373 		int w = pbox[i].x2 - x;
1374 		int h = pbox[i].y2 - y;
1375 
1376 		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1377 
1378 		BEGIN_RING(9);
1379 
1380 		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1381 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1382 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1383 			 RADEON_GMC_BRUSH_NONE |
1384 			 (dev_priv->color_fmt << 8) |
1385 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1386 			 RADEON_ROP3_S |
1387 			 RADEON_DP_SRC_SOURCE_MEMORY |
1388 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1389 
1390 		/* Make this work even if front & back are flipped:
1391 		 */
1392 		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1393 		if (dev_priv->sarea_priv->pfCurrentPage == 0) {
1394 			OUT_RING(dev_priv->back_pitch_offset);
1395 			OUT_RING(dev_priv->front_pitch_offset);
1396 		} else {
1397 			OUT_RING(dev_priv->front_pitch_offset);
1398 			OUT_RING(dev_priv->back_pitch_offset);
1399 		}
1400 
1401 		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1402 		OUT_RING((x << 16) | y);
1403 		OUT_RING((x << 16) | y);
1404 		OUT_RING((w << 16) | h);
1405 
1406 		ADVANCE_RING();
1407 	}
1408 
1409 	/* Increment the frame counter.  The client-side 3D driver must
1410 	 * throttle the framerate by waiting for this value before
1411 	 * performing the swapbuffer ioctl.
1412 	 */
1413 	dev_priv->sarea_priv->last_frame++;
1414 
1415 	BEGIN_RING(4);
1416 
1417 	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1418 	RADEON_WAIT_UNTIL_2D_IDLE();
1419 
1420 	ADVANCE_RING();
1421 }
1422 
radeon_cp_dispatch_flip(struct drm_device * dev)1423 static void radeon_cp_dispatch_flip(struct drm_device * dev)
1424 {
1425 	drm_radeon_private_t *dev_priv = dev->dev_private;
1426 	struct drm_sarea *sarea = (struct drm_sarea *) dev_priv->sarea->handle;
1427 	int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1428 	    ? dev_priv->front_offset : dev_priv->back_offset;
1429 	RING_LOCALS;
1430 	DRM_DEBUG("pfCurrentPage=%d\n",
1431 		  dev_priv->sarea_priv->pfCurrentPage);
1432 
1433 	/* Do some trivial performance monitoring...
1434 	 */
1435 	if (dev_priv->do_boxes) {
1436 		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1437 		radeon_cp_performance_boxes(dev_priv);
1438 	}
1439 
1440 	/* Update the frame offsets for both CRTCs
1441 	 */
1442 	BEGIN_RING(6);
1443 
1444 	RADEON_WAIT_UNTIL_3D_IDLE();
1445 	OUT_RING_REG(RADEON_CRTC_OFFSET,
1446 		     ((sarea->frame.y * dev_priv->front_pitch +
1447 		       sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1448 		     + offset);
1449 	OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1450 		     + offset);
1451 
1452 	ADVANCE_RING();
1453 
1454 	/* Increment the frame counter.  The client-side 3D driver must
1455 	 * throttle the framerate by waiting for this value before
1456 	 * performing the swapbuffer ioctl.
1457 	 */
1458 	dev_priv->sarea_priv->last_frame++;
1459 	dev_priv->sarea_priv->pfCurrentPage =
1460 		1 - dev_priv->sarea_priv->pfCurrentPage;
1461 
1462 	BEGIN_RING(2);
1463 
1464 	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1465 
1466 	ADVANCE_RING();
1467 }
1468 
bad_prim_vertex_nr(int primitive,int nr)1469 static int bad_prim_vertex_nr(int primitive, int nr)
1470 {
1471 	switch (primitive & RADEON_PRIM_TYPE_MASK) {
1472 	case RADEON_PRIM_TYPE_NONE:
1473 	case RADEON_PRIM_TYPE_POINT:
1474 		return nr < 1;
1475 	case RADEON_PRIM_TYPE_LINE:
1476 		return (nr & 1) || nr == 0;
1477 	case RADEON_PRIM_TYPE_LINE_STRIP:
1478 		return nr < 2;
1479 	case RADEON_PRIM_TYPE_TRI_LIST:
1480 	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1481 	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1482 	case RADEON_PRIM_TYPE_RECT_LIST:
1483 		return nr % 3 || nr == 0;
1484 	case RADEON_PRIM_TYPE_TRI_FAN:
1485 	case RADEON_PRIM_TYPE_TRI_STRIP:
1486 		return nr < 3;
1487 	default:
1488 		return 1;
1489 	}
1490 }
1491 
1492 typedef struct {
1493 	unsigned int start;
1494 	unsigned int finish;
1495 	unsigned int prim;
1496 	unsigned int numverts;
1497 	unsigned int offset;
1498 	unsigned int vc_format;
1499 } drm_radeon_tcl_prim_t;
1500 
radeon_cp_dispatch_vertex(struct drm_device * dev,struct drm_buf * buf,drm_radeon_tcl_prim_t * prim)1501 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1502 				      struct drm_buf * buf,
1503 				      drm_radeon_tcl_prim_t * prim)
1504 {
1505 	drm_radeon_private_t *dev_priv = dev->dev_private;
1506 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1507 	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1508 	int numverts = (int)prim->numverts;
1509 	int nbox = sarea_priv->nbox;
1510 	int i = 0;
1511 	RING_LOCALS;
1512 
1513 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1514 		  prim->prim,
1515 		  prim->vc_format, prim->start, prim->finish, prim->numverts);
1516 
1517 	if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1518 		DRM_ERROR("bad prim %x numverts %d\n",
1519 			  prim->prim, prim->numverts);
1520 		return;
1521 	}
1522 
1523 	do {
1524 		/* Emit the next cliprect */
1525 		if (i < nbox) {
1526 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1527 		}
1528 
1529 		/* Emit the vertex buffer rendering commands */
1530 		BEGIN_RING(5);
1531 
1532 		OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1533 		OUT_RING(offset);
1534 		OUT_RING(numverts);
1535 		OUT_RING(prim->vc_format);
1536 		OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1537 			 RADEON_COLOR_ORDER_RGBA |
1538 			 RADEON_VTX_FMT_RADEON_MODE |
1539 			 (numverts << RADEON_NUM_VERTICES_SHIFT));
1540 
1541 		ADVANCE_RING();
1542 
1543 		i++;
1544 	} while (i < nbox);
1545 }
1546 
radeon_cp_discard_buffer(struct drm_device * dev,struct drm_buf * buf)1547 void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
1548 {
1549 	drm_radeon_private_t *dev_priv = dev->dev_private;
1550 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1551 	RING_LOCALS;
1552 
1553 	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1554 
1555 	/* Emit the vertex buffer age */
1556 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1557 		BEGIN_RING(3);
1558 		R600_DISPATCH_AGE(buf_priv->age);
1559 		ADVANCE_RING();
1560 	} else {
1561 		BEGIN_RING(2);
1562 		RADEON_DISPATCH_AGE(buf_priv->age);
1563 		ADVANCE_RING();
1564 	}
1565 
1566 	buf->pending = 1;
1567 	buf->used = 0;
1568 }
1569 
radeon_cp_dispatch_indirect(struct drm_device * dev,struct drm_buf * buf,int start,int end)1570 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1571 					struct drm_buf * buf, int start, int end)
1572 {
1573 	drm_radeon_private_t *dev_priv = dev->dev_private;
1574 	RING_LOCALS;
1575 	DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1576 
1577 	if (start != end) {
1578 		int offset = (dev_priv->gart_buffers_offset
1579 			      + buf->offset + start);
1580 		int dwords = (end - start + 3) / sizeof(u32);
1581 
1582 		/* Indirect buffer data must be an even number of
1583 		 * dwords, so if we've been given an odd number we must
1584 		 * pad the data with a Type-2 CP packet.
1585 		 */
1586 		if (dwords & 1) {
1587 			u32 *data = (u32 *)
1588 			    ((char *)dev->agp_buffer_map->handle
1589 			     + buf->offset + start);
1590 			data[dwords++] = RADEON_CP_PACKET2;
1591 		}
1592 
1593 		/* Fire off the indirect buffer */
1594 		BEGIN_RING(3);
1595 
1596 		OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1597 		OUT_RING(offset);
1598 		OUT_RING(dwords);
1599 
1600 		ADVANCE_RING();
1601 	}
1602 }
1603 
radeon_cp_dispatch_indices(struct drm_device * dev,struct drm_buf * elt_buf,drm_radeon_tcl_prim_t * prim)1604 static void radeon_cp_dispatch_indices(struct drm_device * dev,
1605 				       struct drm_buf * elt_buf,
1606 				       drm_radeon_tcl_prim_t * prim)
1607 {
1608 	drm_radeon_private_t *dev_priv = dev->dev_private;
1609 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1610 	int offset = dev_priv->gart_buffers_offset + prim->offset;
1611 	u32 *data;
1612 	int dwords;
1613 	int i = 0;
1614 	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1615 	int count = (prim->finish - start) / sizeof(u16);
1616 	int nbox = sarea_priv->nbox;
1617 
1618 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1619 		  prim->prim,
1620 		  prim->vc_format,
1621 		  prim->start, prim->finish, prim->offset, prim->numverts);
1622 
1623 	if (bad_prim_vertex_nr(prim->prim, count)) {
1624 		DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1625 		return;
1626 	}
1627 
1628 	if (start >= prim->finish || (prim->start & 0x7)) {
1629 		DRM_ERROR("buffer prim %d\n", prim->prim);
1630 		return;
1631 	}
1632 
1633 	dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1634 
1635 	data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1636 			elt_buf->offset + prim->start);
1637 
1638 	data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1639 	data[1] = offset;
1640 	data[2] = prim->numverts;
1641 	data[3] = prim->vc_format;
1642 	data[4] = (prim->prim |
1643 		   RADEON_PRIM_WALK_IND |
1644 		   RADEON_COLOR_ORDER_RGBA |
1645 		   RADEON_VTX_FMT_RADEON_MODE |
1646 		   (count << RADEON_NUM_VERTICES_SHIFT));
1647 
1648 	do {
1649 		if (i < nbox)
1650 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1651 
1652 		radeon_cp_dispatch_indirect(dev, elt_buf,
1653 					    prim->start, prim->finish);
1654 
1655 		i++;
1656 	} while (i < nbox);
1657 
1658 }
1659 
1660 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1661 
radeon_cp_dispatch_texture(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_texture_t * tex,drm_radeon_tex_image_t * image)1662 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1663 				      struct drm_file *file_priv,
1664 				      drm_radeon_texture_t * tex,
1665 				      drm_radeon_tex_image_t * image)
1666 {
1667 	drm_radeon_private_t *dev_priv = dev->dev_private;
1668 	struct drm_buf *buf;
1669 	u32 format;
1670 	u32 *buffer;
1671 	const u8 __user *data;
1672 	int size, dwords, tex_width, blit_width, spitch;
1673 	u32 height;
1674 	int i;
1675 	u32 texpitch, microtile;
1676 	u32 offset, byte_offset;
1677 	RING_LOCALS;
1678 
1679 	if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1680 		DRM_ERROR("Invalid destination offset\n");
1681 		return -EINVAL;
1682 	}
1683 
1684 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1685 
1686 	/* Flush the pixel cache.  This ensures no pixel data gets mixed
1687 	 * up with the texture data from the host data blit, otherwise
1688 	 * part of the texture image may be corrupted.
1689 	 */
1690 	BEGIN_RING(4);
1691 	RADEON_FLUSH_CACHE();
1692 	RADEON_WAIT_UNTIL_IDLE();
1693 	ADVANCE_RING();
1694 
1695 	/* The compiler won't optimize away a division by a variable,
1696 	 * even if the only legal values are powers of two.  Thus, we'll
1697 	 * use a shift instead.
1698 	 */
1699 	switch (tex->format) {
1700 	case RADEON_TXFORMAT_ARGB8888:
1701 	case RADEON_TXFORMAT_RGBA8888:
1702 		format = RADEON_COLOR_FORMAT_ARGB8888;
1703 		tex_width = tex->width * 4;
1704 		blit_width = image->width * 4;
1705 		break;
1706 	case RADEON_TXFORMAT_AI88:
1707 	case RADEON_TXFORMAT_ARGB1555:
1708 	case RADEON_TXFORMAT_RGB565:
1709 	case RADEON_TXFORMAT_ARGB4444:
1710 	case RADEON_TXFORMAT_VYUY422:
1711 	case RADEON_TXFORMAT_YVYU422:
1712 		format = RADEON_COLOR_FORMAT_RGB565;
1713 		tex_width = tex->width * 2;
1714 		blit_width = image->width * 2;
1715 		break;
1716 	case RADEON_TXFORMAT_I8:
1717 	case RADEON_TXFORMAT_RGB332:
1718 		format = RADEON_COLOR_FORMAT_CI8;
1719 		tex_width = tex->width * 1;
1720 		blit_width = image->width * 1;
1721 		break;
1722 	default:
1723 		DRM_ERROR("invalid texture format %d\n", tex->format);
1724 		return -EINVAL;
1725 	}
1726 	spitch = blit_width >> 6;
1727 	if (spitch == 0 && image->height > 1)
1728 		return -EINVAL;
1729 
1730 	texpitch = tex->pitch;
1731 	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1732 		microtile = 1;
1733 		if (tex_width < 64) {
1734 			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1735 			/* we got tiled coordinates, untile them */
1736 			image->x *= 2;
1737 		}
1738 	} else
1739 		microtile = 0;
1740 
1741 	/* this might fail for zero-sized uploads - are those illegal? */
1742 	if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1743 				blit_width - 1)) {
1744 		DRM_ERROR("Invalid final destination offset\n");
1745 		return -EINVAL;
1746 	}
1747 
1748 	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1749 
1750 	do {
1751 		DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%u y=%u w=%u h=%u\n",
1752 			  tex->offset >> 10, tex->pitch, tex->format,
1753 			  image->x, image->y, image->width, image->height);
1754 
1755 		/* Make a copy of some parameters in case we have to
1756 		 * update them for a multi-pass texture blit.
1757 		 */
1758 		height = image->height;
1759 		data = (const u8 __user *)image->data;
1760 
1761 		size = height * blit_width;
1762 
1763 		if (size > RADEON_MAX_TEXTURE_SIZE) {
1764 			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1765 			size = height * blit_width;
1766 		} else if (size < 4 && size > 0) {
1767 			size = 4;
1768 		} else if (size == 0) {
1769 			return 0;
1770 		}
1771 
1772 		buf = radeon_freelist_get(dev);
1773 		if (0 && !buf) {
1774 			radeon_do_cp_idle(dev_priv);
1775 			buf = radeon_freelist_get(dev);
1776 		}
1777 		if (!buf) {
1778 			DRM_DEBUG("EAGAIN\n");
1779 			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1780 				return -EFAULT;
1781 			return -EAGAIN;
1782 		}
1783 
1784 		/* Dispatch the indirect buffer.
1785 		 */
1786 		buffer =
1787 		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1788 		dwords = size / 4;
1789 
1790 #define RADEON_COPY_MT(_buf, _data, _width) \
1791 	do { \
1792 		if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1793 			DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1794 			return -EFAULT; \
1795 		} \
1796 	} while(0)
1797 
1798 		if (microtile) {
1799 			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
1800 			   however, we cannot use blitter directly for texture width < 64 bytes,
1801 			   since minimum tex pitch is 64 bytes and we need this to match
1802 			   the texture width, otherwise the blitter will tile it wrong.
1803 			   Thus, tiling manually in this case. Additionally, need to special
1804 			   case tex height = 1, since our actual image will have height 2
1805 			   and we need to ensure we don't read beyond the texture size
1806 			   from user space. */
1807 			if (tex->height == 1) {
1808 				if (tex_width >= 64 || tex_width <= 16) {
1809 					RADEON_COPY_MT(buffer, data,
1810 						(int)(tex_width * sizeof(u32)));
1811 				} else if (tex_width == 32) {
1812 					RADEON_COPY_MT(buffer, data, 16);
1813 					RADEON_COPY_MT(buffer + 8,
1814 						       data + 16, 16);
1815 				}
1816 			} else if (tex_width >= 64 || tex_width == 16) {
1817 				RADEON_COPY_MT(buffer, data,
1818 					       (int)(dwords * sizeof(u32)));
1819 			} else if (tex_width < 16) {
1820 				for (i = 0; i < tex->height; i++) {
1821 					RADEON_COPY_MT(buffer, data, tex_width);
1822 					buffer += 4;
1823 					data += tex_width;
1824 				}
1825 			} else if (tex_width == 32) {
1826 				/* TODO: make sure this works when not fitting in one buffer
1827 				   (i.e. 32bytes x 2048...) */
1828 				for (i = 0; i < tex->height; i += 2) {
1829 					RADEON_COPY_MT(buffer, data, 16);
1830 					data += 16;
1831 					RADEON_COPY_MT(buffer + 8, data, 16);
1832 					data += 16;
1833 					RADEON_COPY_MT(buffer + 4, data, 16);
1834 					data += 16;
1835 					RADEON_COPY_MT(buffer + 12, data, 16);
1836 					data += 16;
1837 					buffer += 16;
1838 				}
1839 			}
1840 		} else {
1841 			if (tex_width >= 32) {
1842 				/* Texture image width is larger than the minimum, so we
1843 				 * can upload it directly.
1844 				 */
1845 				RADEON_COPY_MT(buffer, data,
1846 					       (int)(dwords * sizeof(u32)));
1847 			} else {
1848 				/* Texture image width is less than the minimum, so we
1849 				 * need to pad out each image scanline to the minimum
1850 				 * width.
1851 				 */
1852 				for (i = 0; i < tex->height; i++) {
1853 					RADEON_COPY_MT(buffer, data, tex_width);
1854 					buffer += 8;
1855 					data += tex_width;
1856 				}
1857 			}
1858 		}
1859 
1860 #undef RADEON_COPY_MT
1861 		byte_offset = (image->y & ~2047) * blit_width;
1862 		buf->file_priv = file_priv;
1863 		buf->used = size;
1864 		offset = dev_priv->gart_buffers_offset + buf->offset;
1865 		BEGIN_RING(9);
1866 		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1867 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1868 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1869 			 RADEON_GMC_BRUSH_NONE |
1870 			 (format << 8) |
1871 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1872 			 RADEON_ROP3_S |
1873 			 RADEON_DP_SRC_SOURCE_MEMORY |
1874 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1875 		OUT_RING((spitch << 22) | (offset >> 10));
1876 		OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1877 		OUT_RING(0);
1878 		OUT_RING((image->x << 16) | (image->y % 2048));
1879 		OUT_RING((image->width << 16) | height);
1880 		RADEON_WAIT_UNTIL_2D_IDLE();
1881 		ADVANCE_RING();
1882 		COMMIT_RING();
1883 
1884 		radeon_cp_discard_buffer(dev, buf);
1885 
1886 		/* Update the input parameters for next time */
1887 		image->y += height;
1888 		image->height -= height;
1889 		image->data = (const u8 __user *)image->data + size;
1890 	} while (image->height > 0);
1891 
1892 	/* Flush the pixel cache after the blit completes.  This ensures
1893 	 * the texture data is written out to memory before rendering
1894 	 * continues.
1895 	 */
1896 	BEGIN_RING(4);
1897 	RADEON_FLUSH_CACHE();
1898 	RADEON_WAIT_UNTIL_2D_IDLE();
1899 	ADVANCE_RING();
1900 	COMMIT_RING();
1901 
1902 	return 0;
1903 }
1904 
radeon_cp_dispatch_stipple(struct drm_device * dev,u32 * stipple)1905 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1906 {
1907 	drm_radeon_private_t *dev_priv = dev->dev_private;
1908 	int i;
1909 	RING_LOCALS;
1910 	DRM_DEBUG("\n");
1911 
1912 	BEGIN_RING(35);
1913 
1914 	OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1915 	OUT_RING(0x00000000);
1916 
1917 	OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1918 	for (i = 0; i < 32; i++) {
1919 		OUT_RING(stipple[i]);
1920 	}
1921 
1922 	ADVANCE_RING();
1923 }
1924 
radeon_apply_surface_regs(int surf_index,drm_radeon_private_t * dev_priv)1925 static void radeon_apply_surface_regs(int surf_index,
1926 				      drm_radeon_private_t *dev_priv)
1927 {
1928 	if (!dev_priv->mmio)
1929 		return;
1930 
1931 	radeon_do_cp_idle(dev_priv);
1932 
1933 	RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1934 		     dev_priv->surfaces[surf_index].flags);
1935 	RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1936 		     dev_priv->surfaces[surf_index].lower);
1937 	RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1938 		     dev_priv->surfaces[surf_index].upper);
1939 }
1940 
1941 /* Allocates a virtual surface
1942  * doesn't always allocate a real surface, will stretch an existing
1943  * surface when possible.
1944  *
1945  * Note that refcount can be at most 2, since during a free refcount=3
1946  * might mean we have to allocate a new surface which might not always
1947  * be available.
1948  * For example : we allocate three contigous surfaces ABC. If B is
1949  * freed, we suddenly need two surfaces to store A and C, which might
1950  * not always be available.
1951  */
alloc_surface(drm_radeon_surface_alloc_t * new,drm_radeon_private_t * dev_priv,struct drm_file * file_priv)1952 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1953 			 drm_radeon_private_t *dev_priv,
1954 			 struct drm_file *file_priv)
1955 {
1956 	struct radeon_virt_surface *s;
1957 	int i;
1958 	int virt_surface_index;
1959 	uint32_t new_upper, new_lower;
1960 
1961 	new_lower = new->address;
1962 	new_upper = new_lower + new->size - 1;
1963 
1964 	/* sanity check */
1965 	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1966 	    ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1967 	     RADEON_SURF_ADDRESS_FIXED_MASK)
1968 	    || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1969 		return -1;
1970 
1971 	/* make sure there is no overlap with existing surfaces */
1972 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1973 		if ((dev_priv->surfaces[i].refcount != 0) &&
1974 		    (((new_lower >= dev_priv->surfaces[i].lower) &&
1975 		      (new_lower < dev_priv->surfaces[i].upper)) ||
1976 		     ((new_lower < dev_priv->surfaces[i].lower) &&
1977 		      (new_upper > dev_priv->surfaces[i].lower)))) {
1978 			return -1;
1979 		}
1980 	}
1981 
1982 	/* find a virtual surface */
1983 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1984 		if (dev_priv->virt_surfaces[i].file_priv == 0)
1985 			break;
1986 	if (i == 2 * RADEON_MAX_SURFACES) {
1987 		return -1;
1988 	}
1989 	virt_surface_index = i;
1990 
1991 	/* try to reuse an existing surface */
1992 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1993 		/* extend before */
1994 		if ((dev_priv->surfaces[i].refcount == 1) &&
1995 		    (new->flags == dev_priv->surfaces[i].flags) &&
1996 		    (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1997 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
1998 			s->surface_index = i;
1999 			s->lower = new_lower;
2000 			s->upper = new_upper;
2001 			s->flags = new->flags;
2002 			s->file_priv = file_priv;
2003 			dev_priv->surfaces[i].refcount++;
2004 			dev_priv->surfaces[i].lower = s->lower;
2005 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2006 			return virt_surface_index;
2007 		}
2008 
2009 		/* extend after */
2010 		if ((dev_priv->surfaces[i].refcount == 1) &&
2011 		    (new->flags == dev_priv->surfaces[i].flags) &&
2012 		    (new_lower == dev_priv->surfaces[i].upper + 1)) {
2013 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2014 			s->surface_index = i;
2015 			s->lower = new_lower;
2016 			s->upper = new_upper;
2017 			s->flags = new->flags;
2018 			s->file_priv = file_priv;
2019 			dev_priv->surfaces[i].refcount++;
2020 			dev_priv->surfaces[i].upper = s->upper;
2021 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2022 			return virt_surface_index;
2023 		}
2024 	}
2025 
2026 	/* okay, we need a new one */
2027 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2028 		if (dev_priv->surfaces[i].refcount == 0) {
2029 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2030 			s->surface_index = i;
2031 			s->lower = new_lower;
2032 			s->upper = new_upper;
2033 			s->flags = new->flags;
2034 			s->file_priv = file_priv;
2035 			dev_priv->surfaces[i].refcount = 1;
2036 			dev_priv->surfaces[i].lower = s->lower;
2037 			dev_priv->surfaces[i].upper = s->upper;
2038 			dev_priv->surfaces[i].flags = s->flags;
2039 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2040 			return virt_surface_index;
2041 		}
2042 	}
2043 
2044 	/* we didn't find anything */
2045 	return -1;
2046 }
2047 
free_surface(struct drm_file * file_priv,drm_radeon_private_t * dev_priv,int lower)2048 static int free_surface(struct drm_file *file_priv,
2049 			drm_radeon_private_t * dev_priv,
2050 			int lower)
2051 {
2052 	struct radeon_virt_surface *s;
2053 	int i;
2054 	/* find the virtual surface */
2055 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2056 		s = &(dev_priv->virt_surfaces[i]);
2057 		if (s->file_priv) {
2058 			if ((lower == s->lower) && (file_priv == s->file_priv))
2059 			{
2060 				if (dev_priv->surfaces[s->surface_index].
2061 				    lower == s->lower)
2062 					dev_priv->surfaces[s->surface_index].
2063 					    lower = s->upper;
2064 
2065 				if (dev_priv->surfaces[s->surface_index].
2066 				    upper == s->upper)
2067 					dev_priv->surfaces[s->surface_index].
2068 					    upper = s->lower;
2069 
2070 				dev_priv->surfaces[s->surface_index].refcount--;
2071 				if (dev_priv->surfaces[s->surface_index].
2072 				    refcount == 0)
2073 					dev_priv->surfaces[s->surface_index].
2074 					    flags = 0;
2075 				s->file_priv = NULL;
2076 				radeon_apply_surface_regs(s->surface_index,
2077 							  dev_priv);
2078 				return 0;
2079 			}
2080 		}
2081 	}
2082 	return 1;
2083 }
2084 
radeon_surfaces_release(struct drm_file * file_priv,drm_radeon_private_t * dev_priv)2085 static void radeon_surfaces_release(struct drm_file *file_priv,
2086 				    drm_radeon_private_t * dev_priv)
2087 {
2088 	int i;
2089 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2090 		if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2091 			free_surface(file_priv, dev_priv,
2092 				     dev_priv->virt_surfaces[i].lower);
2093 	}
2094 }
2095 
2096 /* ================================================================
2097  * IOCTL functions
2098  */
radeon_surface_alloc(struct drm_device * dev,void * data,struct drm_file * file_priv)2099 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2100 {
2101 	drm_radeon_private_t *dev_priv = dev->dev_private;
2102 	drm_radeon_surface_alloc_t *alloc = data;
2103 
2104 	if (!dev_priv) {
2105 		DRM_ERROR("called with no initialization\n");
2106 		return -EINVAL;
2107 	}
2108 
2109 	if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2110 		return -EINVAL;
2111 	else
2112 		return 0;
2113 }
2114 
radeon_surface_free(struct drm_device * dev,void * data,struct drm_file * file_priv)2115 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2116 {
2117 	drm_radeon_private_t *dev_priv = dev->dev_private;
2118 	drm_radeon_surface_free_t *memfree = data;
2119 
2120 	if (!dev_priv) {
2121 		DRM_ERROR("called with no initialization\n");
2122 		return -EINVAL;
2123 	}
2124 
2125 	if (free_surface(file_priv, dev_priv, memfree->address))
2126 		return -EINVAL;
2127 	else
2128 		return 0;
2129 }
2130 
radeon_cp_clear(struct drm_device * dev,void * data,struct drm_file * file_priv)2131 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2132 {
2133 	drm_radeon_private_t *dev_priv = dev->dev_private;
2134 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2135 	drm_radeon_clear_t *clear = data;
2136 	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2137 	DRM_DEBUG("\n");
2138 
2139 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2140 
2141 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2142 
2143 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2144 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2145 
2146 	if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2147 			       sarea_priv->nbox * sizeof(depth_boxes[0])))
2148 		return -EFAULT;
2149 
2150 	radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2151 
2152 	COMMIT_RING();
2153 	return 0;
2154 }
2155 
2156 /* Not sure why this isn't set all the time:
2157  */
radeon_do_init_pageflip(struct drm_device * dev)2158 static int radeon_do_init_pageflip(struct drm_device * dev)
2159 {
2160 	drm_radeon_private_t *dev_priv = dev->dev_private;
2161 	RING_LOCALS;
2162 
2163 	DRM_DEBUG("\n");
2164 
2165 	BEGIN_RING(6);
2166 	RADEON_WAIT_UNTIL_3D_IDLE();
2167 	OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2168 	OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2169 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2170 	OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2171 	OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2172 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2173 	ADVANCE_RING();
2174 
2175 	dev_priv->page_flipping = 1;
2176 
2177 	if (dev_priv->sarea_priv->pfCurrentPage != 1)
2178 		dev_priv->sarea_priv->pfCurrentPage = 0;
2179 
2180 	return 0;
2181 }
2182 
2183 /* Swapping and flipping are different operations, need different ioctls.
2184  * They can & should be intermixed to support multiple 3d windows.
2185  */
radeon_cp_flip(struct drm_device * dev,void * data,struct drm_file * file_priv)2186 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2187 {
2188 	drm_radeon_private_t *dev_priv = dev->dev_private;
2189 	DRM_DEBUG("\n");
2190 
2191 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2192 
2193 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2194 
2195 	if (!dev_priv->page_flipping)
2196 		radeon_do_init_pageflip(dev);
2197 
2198 	radeon_cp_dispatch_flip(dev);
2199 
2200 	COMMIT_RING();
2201 	return 0;
2202 }
2203 
radeon_cp_swap(struct drm_device * dev,void * data,struct drm_file * file_priv)2204 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2205 {
2206 	drm_radeon_private_t *dev_priv = dev->dev_private;
2207 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2208 	DRM_DEBUG("\n");
2209 
2210 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2211 
2212 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2213 
2214 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2215 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2216 
2217 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2218 		r600_cp_dispatch_swap(dev);
2219 	else
2220 		radeon_cp_dispatch_swap(dev);
2221 	dev_priv->sarea_priv->ctx_owner = 0;
2222 
2223 	COMMIT_RING();
2224 	return 0;
2225 }
2226 
radeon_cp_vertex(struct drm_device * dev,void * data,struct drm_file * file_priv)2227 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2228 {
2229 	drm_radeon_private_t *dev_priv = dev->dev_private;
2230 	drm_radeon_sarea_t *sarea_priv;
2231 	struct drm_device_dma *dma = dev->dma;
2232 	struct drm_buf *buf;
2233 	drm_radeon_vertex_t *vertex = data;
2234 	drm_radeon_tcl_prim_t prim;
2235 
2236 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2237 
2238 	if (!dev_priv) {
2239 		DRM_ERROR("called with no initialization\n");
2240 		return -EINVAL;
2241 	}
2242 
2243 	sarea_priv = dev_priv->sarea_priv;
2244 
2245 	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2246 		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2247 
2248 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2249 		DRM_ERROR("buffer index %d (of %d max)\n",
2250 			  vertex->idx, dma->buf_count - 1);
2251 		return -EINVAL;
2252 	}
2253 	if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2254 		DRM_ERROR("buffer prim %d\n", vertex->prim);
2255 		return -EINVAL;
2256 	}
2257 
2258 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2259 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2260 
2261 	buf = dma->buflist[vertex->idx];
2262 
2263 	if (buf->file_priv != file_priv) {
2264 		DRM_ERROR("process %d using buffer owned by %p\n",
2265 			  DRM_CURRENTPID, buf->file_priv);
2266 		return -EINVAL;
2267 	}
2268 	if (buf->pending) {
2269 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2270 		return -EINVAL;
2271 	}
2272 
2273 	/* Build up a prim_t record:
2274 	 */
2275 	if (vertex->count) {
2276 		buf->used = vertex->count;	/* not used? */
2277 
2278 		if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2279 			if (radeon_emit_state(dev_priv, file_priv,
2280 					      &sarea_priv->context_state,
2281 					      sarea_priv->tex_state,
2282 					      sarea_priv->dirty)) {
2283 				DRM_ERROR("radeon_emit_state failed\n");
2284 				return -EINVAL;
2285 			}
2286 
2287 			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2288 					       RADEON_UPLOAD_TEX1IMAGES |
2289 					       RADEON_UPLOAD_TEX2IMAGES |
2290 					       RADEON_REQUIRE_QUIESCENCE);
2291 		}
2292 
2293 		prim.start = 0;
2294 		prim.finish = vertex->count;	/* unused */
2295 		prim.prim = vertex->prim;
2296 		prim.numverts = vertex->count;
2297 		prim.vc_format = dev_priv->sarea_priv->vc_format;
2298 
2299 		radeon_cp_dispatch_vertex(dev, buf, &prim);
2300 	}
2301 
2302 	if (vertex->discard) {
2303 		radeon_cp_discard_buffer(dev, buf);
2304 	}
2305 
2306 	COMMIT_RING();
2307 	return 0;
2308 }
2309 
radeon_cp_indices(struct drm_device * dev,void * data,struct drm_file * file_priv)2310 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2311 {
2312 	drm_radeon_private_t *dev_priv = dev->dev_private;
2313 	drm_radeon_sarea_t *sarea_priv;
2314 	struct drm_device_dma *dma = dev->dma;
2315 	struct drm_buf *buf;
2316 	drm_radeon_indices_t *elts = data;
2317 	drm_radeon_tcl_prim_t prim;
2318 
2319 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2320 
2321 	if (!dev_priv) {
2322 		DRM_ERROR("called with no initialization\n");
2323 		return -EINVAL;
2324 	}
2325 	sarea_priv = dev_priv->sarea_priv;
2326 
2327 	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2328 		  DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2329 		  elts->discard);
2330 
2331 	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2332 		DRM_ERROR("buffer index %d (of %d max)\n",
2333 			  elts->idx, dma->buf_count - 1);
2334 		return -EINVAL;
2335 	}
2336 	if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2337 		DRM_ERROR("buffer prim %d\n", elts->prim);
2338 		return -EINVAL;
2339 	}
2340 
2341 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2342 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2343 
2344 	buf = dma->buflist[elts->idx];
2345 
2346 	if (buf->file_priv != file_priv) {
2347 		DRM_ERROR("process %d using buffer owned by %p\n",
2348 			  DRM_CURRENTPID, buf->file_priv);
2349 		return -EINVAL;
2350 	}
2351 	if (buf->pending) {
2352 		DRM_ERROR("sending pending buffer %d\n", elts->idx);
2353 		return -EINVAL;
2354 	}
2355 
2356 	elts->start -= RADEON_INDEX_PRIM_OFFSET;
2357 
2358 	if (elts->start & 0x7) {
2359 		DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2360 		return -EINVAL;
2361 	}
2362 	if (elts->start < buf->used) {
2363 		DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2364 		return -EINVAL;
2365 	}
2366 
2367 	buf->used = elts->end;
2368 
2369 	if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2370 		if (radeon_emit_state(dev_priv, file_priv,
2371 				      &sarea_priv->context_state,
2372 				      sarea_priv->tex_state,
2373 				      sarea_priv->dirty)) {
2374 			DRM_ERROR("radeon_emit_state failed\n");
2375 			return -EINVAL;
2376 		}
2377 
2378 		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2379 				       RADEON_UPLOAD_TEX1IMAGES |
2380 				       RADEON_UPLOAD_TEX2IMAGES |
2381 				       RADEON_REQUIRE_QUIESCENCE);
2382 	}
2383 
2384 	/* Build up a prim_t record:
2385 	 */
2386 	prim.start = elts->start;
2387 	prim.finish = elts->end;
2388 	prim.prim = elts->prim;
2389 	prim.offset = 0;	/* offset from start of dma buffers */
2390 	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2391 	prim.vc_format = dev_priv->sarea_priv->vc_format;
2392 
2393 	radeon_cp_dispatch_indices(dev, buf, &prim);
2394 	if (elts->discard) {
2395 		radeon_cp_discard_buffer(dev, buf);
2396 	}
2397 
2398 	COMMIT_RING();
2399 	return 0;
2400 }
2401 
radeon_cp_texture(struct drm_device * dev,void * data,struct drm_file * file_priv)2402 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2403 {
2404 	drm_radeon_private_t *dev_priv = dev->dev_private;
2405 	drm_radeon_texture_t *tex = data;
2406 	drm_radeon_tex_image_t image;
2407 	int ret;
2408 
2409 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2410 
2411 	if (tex->image == NULL) {
2412 		DRM_ERROR("null texture image!\n");
2413 		return -EINVAL;
2414 	}
2415 
2416 	if (DRM_COPY_FROM_USER(&image,
2417 			       (drm_radeon_tex_image_t __user *) tex->image,
2418 			       sizeof(image)))
2419 		return -EFAULT;
2420 
2421 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2422 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2423 
2424 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2425 		ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2426 	else
2427 		ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2428 
2429 	return ret;
2430 }
2431 
radeon_cp_stipple(struct drm_device * dev,void * data,struct drm_file * file_priv)2432 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2433 {
2434 	drm_radeon_private_t *dev_priv = dev->dev_private;
2435 	drm_radeon_stipple_t *stipple = data;
2436 	u32 mask[32];
2437 
2438 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2439 
2440 	if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2441 		return -EFAULT;
2442 
2443 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2444 
2445 	radeon_cp_dispatch_stipple(dev, mask);
2446 
2447 	COMMIT_RING();
2448 	return 0;
2449 }
2450 
radeon_cp_indirect(struct drm_device * dev,void * data,struct drm_file * file_priv)2451 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2452 {
2453 	drm_radeon_private_t *dev_priv = dev->dev_private;
2454 	struct drm_device_dma *dma = dev->dma;
2455 	struct drm_buf *buf;
2456 	drm_radeon_indirect_t *indirect = data;
2457 	RING_LOCALS;
2458 
2459 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2460 
2461 	if (!dev_priv) {
2462 		DRM_ERROR("called with no initialization\n");
2463 		return -EINVAL;
2464 	}
2465 
2466 	DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2467 		  indirect->idx, indirect->start, indirect->end,
2468 		  indirect->discard);
2469 
2470 	if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2471 		DRM_ERROR("buffer index %d (of %d max)\n",
2472 			  indirect->idx, dma->buf_count - 1);
2473 		return -EINVAL;
2474 	}
2475 
2476 	buf = dma->buflist[indirect->idx];
2477 
2478 	if (buf->file_priv != file_priv) {
2479 		DRM_ERROR("process %d using buffer owned by %p\n",
2480 			  DRM_CURRENTPID, buf->file_priv);
2481 		return -EINVAL;
2482 	}
2483 	if (buf->pending) {
2484 		DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2485 		return -EINVAL;
2486 	}
2487 
2488 	if (indirect->start < buf->used) {
2489 		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2490 			  indirect->start, buf->used);
2491 		return -EINVAL;
2492 	}
2493 
2494 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2495 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2496 
2497 	buf->used = indirect->end;
2498 
2499 	/* Dispatch the indirect buffer full of commands from the
2500 	 * X server.  This is insecure and is thus only available to
2501 	 * privileged clients.
2502 	 */
2503 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2504 		r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2505 	else {
2506 		/* Wait for the 3D stream to idle before the indirect buffer
2507 		 * containing 2D acceleration commands is processed.
2508 		 */
2509 		BEGIN_RING(2);
2510 		RADEON_WAIT_UNTIL_3D_IDLE();
2511 		ADVANCE_RING();
2512 		radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2513 	}
2514 
2515 	if (indirect->discard)
2516 		radeon_cp_discard_buffer(dev, buf);
2517 
2518 	COMMIT_RING();
2519 	return 0;
2520 }
2521 
radeon_cp_vertex2(struct drm_device * dev,void * data,struct drm_file * file_priv)2522 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2523 {
2524 	drm_radeon_private_t *dev_priv = dev->dev_private;
2525 	drm_radeon_sarea_t *sarea_priv;
2526 	struct drm_device_dma *dma = dev->dma;
2527 	struct drm_buf *buf;
2528 	drm_radeon_vertex2_t *vertex = data;
2529 	int i;
2530 	unsigned char laststate;
2531 
2532 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2533 
2534 	if (!dev_priv) {
2535 		DRM_ERROR("called with no initialization\n");
2536 		return -EINVAL;
2537 	}
2538 
2539 	sarea_priv = dev_priv->sarea_priv;
2540 
2541 	DRM_DEBUG("pid=%d index=%d discard=%d\n",
2542 		  DRM_CURRENTPID, vertex->idx, vertex->discard);
2543 
2544 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2545 		DRM_ERROR("buffer index %d (of %d max)\n",
2546 			  vertex->idx, dma->buf_count - 1);
2547 		return -EINVAL;
2548 	}
2549 
2550 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2551 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2552 
2553 	buf = dma->buflist[vertex->idx];
2554 
2555 	if (buf->file_priv != file_priv) {
2556 		DRM_ERROR("process %d using buffer owned by %p\n",
2557 			  DRM_CURRENTPID, buf->file_priv);
2558 		return -EINVAL;
2559 	}
2560 
2561 	if (buf->pending) {
2562 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2563 		return -EINVAL;
2564 	}
2565 
2566 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2567 		return -EINVAL;
2568 
2569 	for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2570 		drm_radeon_prim_t prim;
2571 		drm_radeon_tcl_prim_t tclprim;
2572 
2573 		if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2574 			return -EFAULT;
2575 
2576 		if (prim.stateidx != laststate) {
2577 			drm_radeon_state_t state;
2578 
2579 			if (DRM_COPY_FROM_USER(&state,
2580 					       &vertex->state[prim.stateidx],
2581 					       sizeof(state)))
2582 				return -EFAULT;
2583 
2584 			if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2585 				DRM_ERROR("radeon_emit_state2 failed\n");
2586 				return -EINVAL;
2587 			}
2588 
2589 			laststate = prim.stateidx;
2590 		}
2591 
2592 		tclprim.start = prim.start;
2593 		tclprim.finish = prim.finish;
2594 		tclprim.prim = prim.prim;
2595 		tclprim.vc_format = prim.vc_format;
2596 
2597 		if (prim.prim & RADEON_PRIM_WALK_IND) {
2598 			tclprim.offset = prim.numverts * 64;
2599 			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2600 
2601 			radeon_cp_dispatch_indices(dev, buf, &tclprim);
2602 		} else {
2603 			tclprim.numverts = prim.numverts;
2604 			tclprim.offset = 0;	/* not used */
2605 
2606 			radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2607 		}
2608 
2609 		if (sarea_priv->nbox == 1)
2610 			sarea_priv->nbox = 0;
2611 	}
2612 
2613 	if (vertex->discard) {
2614 		radeon_cp_discard_buffer(dev, buf);
2615 	}
2616 
2617 	COMMIT_RING();
2618 	return 0;
2619 }
2620 
radeon_emit_packets(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2621 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2622 			       struct drm_file *file_priv,
2623 			       drm_radeon_cmd_header_t header,
2624 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2625 {
2626 	int id = (int)header.packet.packet_id;
2627 	int sz, reg;
2628 	int *data = (int *)cmdbuf->buf;
2629 	RING_LOCALS;
2630 
2631 	if (id >= RADEON_MAX_STATE_PACKETS)
2632 		return -EINVAL;
2633 
2634 	sz = packet[id].len;
2635 	reg = packet[id].start;
2636 
2637 	if (sz * sizeof(int) > cmdbuf->bufsz) {
2638 		DRM_ERROR("Packet size provided larger than data provided\n");
2639 		return -EINVAL;
2640 	}
2641 
2642 	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2643 		DRM_ERROR("Packet verification failed\n");
2644 		return -EINVAL;
2645 	}
2646 
2647 	BEGIN_RING(sz + 1);
2648 	OUT_RING(CP_PACKET0(reg, (sz - 1)));
2649 	OUT_RING_TABLE(data, sz);
2650 	ADVANCE_RING();
2651 
2652 	cmdbuf->buf += sz * sizeof(int);
2653 	cmdbuf->bufsz -= sz * sizeof(int);
2654 	return 0;
2655 }
2656 
radeon_emit_scalars(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2657 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2658 					  drm_radeon_cmd_header_t header,
2659 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2660 {
2661 	int sz = header.scalars.count;
2662 	int start = header.scalars.offset;
2663 	int stride = header.scalars.stride;
2664 	RING_LOCALS;
2665 
2666 	BEGIN_RING(3 + sz);
2667 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2668 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2669 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2670 	OUT_RING_TABLE(cmdbuf->buf, sz);
2671 	ADVANCE_RING();
2672 	cmdbuf->buf += sz * sizeof(int);
2673 	cmdbuf->bufsz -= sz * sizeof(int);
2674 	return 0;
2675 }
2676 
2677 /* God this is ugly
2678  */
radeon_emit_scalars2(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2679 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2680 					   drm_radeon_cmd_header_t header,
2681 					   drm_radeon_kcmd_buffer_t *cmdbuf)
2682 {
2683 	int sz = header.scalars.count;
2684 	int start = ((unsigned int)header.scalars.offset) + 0x100;
2685 	int stride = header.scalars.stride;
2686 	RING_LOCALS;
2687 
2688 	BEGIN_RING(3 + sz);
2689 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2690 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2691 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2692 	OUT_RING_TABLE(cmdbuf->buf, sz);
2693 	ADVANCE_RING();
2694 	cmdbuf->buf += sz * sizeof(int);
2695 	cmdbuf->bufsz -= sz * sizeof(int);
2696 	return 0;
2697 }
2698 
radeon_emit_vectors(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2699 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2700 					  drm_radeon_cmd_header_t header,
2701 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2702 {
2703 	int sz = header.vectors.count;
2704 	int start = header.vectors.offset;
2705 	int stride = header.vectors.stride;
2706 	RING_LOCALS;
2707 
2708 	BEGIN_RING(5 + sz);
2709 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2710 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2711 	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2712 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2713 	OUT_RING_TABLE(cmdbuf->buf, sz);
2714 	ADVANCE_RING();
2715 
2716 	cmdbuf->buf += sz * sizeof(int);
2717 	cmdbuf->bufsz -= sz * sizeof(int);
2718 	return 0;
2719 }
2720 
radeon_emit_veclinear(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2721 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2722 					  drm_radeon_cmd_header_t header,
2723 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2724 {
2725 	int sz = header.veclinear.count * 4;
2726 	int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2727 	RING_LOCALS;
2728 
2729 	if (!sz)
2730 		return 0;
2731 	if (sz * 4 > cmdbuf->bufsz)
2732 		return -EINVAL;
2733 
2734 	BEGIN_RING(5 + sz);
2735 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2736 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2737 	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2738 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2739 	OUT_RING_TABLE(cmdbuf->buf, sz);
2740 	ADVANCE_RING();
2741 
2742 	cmdbuf->buf += sz * sizeof(int);
2743 	cmdbuf->bufsz -= sz * sizeof(int);
2744 	return 0;
2745 }
2746 
radeon_emit_packet3(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf)2747 static int radeon_emit_packet3(struct drm_device * dev,
2748 			       struct drm_file *file_priv,
2749 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2750 {
2751 	drm_radeon_private_t *dev_priv = dev->dev_private;
2752 	unsigned int cmdsz;
2753 	int ret;
2754 	RING_LOCALS;
2755 
2756 	DRM_DEBUG("\n");
2757 
2758 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2759 						  cmdbuf, &cmdsz))) {
2760 		DRM_ERROR("Packet verification failed\n");
2761 		return ret;
2762 	}
2763 
2764 	BEGIN_RING(cmdsz);
2765 	OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2766 	ADVANCE_RING();
2767 
2768 	cmdbuf->buf += cmdsz * 4;
2769 	cmdbuf->bufsz -= cmdsz * 4;
2770 	return 0;
2771 }
2772 
radeon_emit_packet3_cliprect(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf,int orig_nbox)2773 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2774 					struct drm_file *file_priv,
2775 					drm_radeon_kcmd_buffer_t *cmdbuf,
2776 					int orig_nbox)
2777 {
2778 	drm_radeon_private_t *dev_priv = dev->dev_private;
2779 	struct drm_clip_rect box;
2780 	unsigned int cmdsz;
2781 	int ret;
2782 	struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2783 	int i = 0;
2784 	RING_LOCALS;
2785 
2786 	DRM_DEBUG("\n");
2787 
2788 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2789 						  cmdbuf, &cmdsz))) {
2790 		DRM_ERROR("Packet verification failed\n");
2791 		return ret;
2792 	}
2793 
2794 	if (!orig_nbox)
2795 		goto out;
2796 
2797 	do {
2798 		if (i < cmdbuf->nbox) {
2799 			if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2800 				return -EFAULT;
2801 			/* FIXME The second and subsequent times round
2802 			 * this loop, send a WAIT_UNTIL_3D_IDLE before
2803 			 * calling emit_clip_rect(). This fixes a
2804 			 * lockup on fast machines when sending
2805 			 * several cliprects with a cmdbuf, as when
2806 			 * waving a 2D window over a 3D
2807 			 * window. Something in the commands from user
2808 			 * space seems to hang the card when they're
2809 			 * sent several times in a row. That would be
2810 			 * the correct place to fix it but this works
2811 			 * around it until I can figure that out - Tim
2812 			 * Smith */
2813 			if (i) {
2814 				BEGIN_RING(2);
2815 				RADEON_WAIT_UNTIL_3D_IDLE();
2816 				ADVANCE_RING();
2817 			}
2818 			radeon_emit_clip_rect(dev_priv, &box);
2819 		}
2820 
2821 		BEGIN_RING(cmdsz);
2822 		OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2823 		ADVANCE_RING();
2824 
2825 	} while (++i < cmdbuf->nbox);
2826 	if (cmdbuf->nbox == 1)
2827 		cmdbuf->nbox = 0;
2828 
2829       out:
2830 	cmdbuf->buf += cmdsz * 4;
2831 	cmdbuf->bufsz -= cmdsz * 4;
2832 	return 0;
2833 }
2834 
radeon_emit_wait(struct drm_device * dev,int flags)2835 static int radeon_emit_wait(struct drm_device * dev, int flags)
2836 {
2837 	drm_radeon_private_t *dev_priv = dev->dev_private;
2838 	RING_LOCALS;
2839 
2840 	DRM_DEBUG("%x\n", flags);
2841 	switch (flags) {
2842 	case RADEON_WAIT_2D:
2843 		BEGIN_RING(2);
2844 		RADEON_WAIT_UNTIL_2D_IDLE();
2845 		ADVANCE_RING();
2846 		break;
2847 	case RADEON_WAIT_3D:
2848 		BEGIN_RING(2);
2849 		RADEON_WAIT_UNTIL_3D_IDLE();
2850 		ADVANCE_RING();
2851 		break;
2852 	case RADEON_WAIT_2D | RADEON_WAIT_3D:
2853 		BEGIN_RING(2);
2854 		RADEON_WAIT_UNTIL_IDLE();
2855 		ADVANCE_RING();
2856 		break;
2857 	default:
2858 		return -EINVAL;
2859 	}
2860 
2861 	return 0;
2862 }
2863 
radeon_cp_cmdbuf(struct drm_device * dev,void * data,struct drm_file * file_priv)2864 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2865 {
2866 	drm_radeon_private_t *dev_priv = dev->dev_private;
2867 	struct drm_device_dma *dma = dev->dma;
2868 	struct drm_buf *buf = NULL;
2869 	int idx;
2870 	drm_radeon_kcmd_buffer_t *cmdbuf = data;
2871 	drm_radeon_cmd_header_t header;
2872 	int orig_nbox, orig_bufsz;
2873 	char *kbuf = NULL;
2874 
2875 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2876 
2877 	if (!dev_priv) {
2878 		DRM_ERROR("called with no initialization\n");
2879 		return -EINVAL;
2880 	}
2881 
2882 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2883 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2884 
2885 	if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2886 		return -EINVAL;
2887 	}
2888 
2889 	/* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2890 	 * races between checking values and using those values in other code,
2891 	 * and simply to avoid a lot of function calls to copy in data.
2892 	 */
2893 	orig_bufsz = cmdbuf->bufsz;
2894 	if (orig_bufsz != 0) {
2895 		kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2896 		if (kbuf == NULL)
2897 			return -ENOMEM;
2898 		if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2899 				       cmdbuf->bufsz)) {
2900 			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2901 			return -EFAULT;
2902 		}
2903 		cmdbuf->buf = kbuf;
2904 	}
2905 
2906 	orig_nbox = cmdbuf->nbox;
2907 
2908 	if (dev_priv->chip_family >= CHIP_R300) {
2909 		int temp;
2910 		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2911 
2912 		if (orig_bufsz != 0)
2913 			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2914 
2915 		return temp;
2916 	}
2917 
2918 	/* microcode_version != r300 */
2919 	while (cmdbuf->bufsz >= sizeof(header)) {
2920 
2921 		header.i = *(int *)cmdbuf->buf;
2922 		cmdbuf->buf += sizeof(header);
2923 		cmdbuf->bufsz -= sizeof(header);
2924 
2925 		switch (header.header.cmd_type) {
2926 		case RADEON_CMD_PACKET:
2927 			DRM_DEBUG("RADEON_CMD_PACKET\n");
2928 			if (radeon_emit_packets
2929 			    (dev_priv, file_priv, header, cmdbuf)) {
2930 				DRM_ERROR("radeon_emit_packets failed\n");
2931 				goto err;
2932 			}
2933 			break;
2934 
2935 		case RADEON_CMD_SCALARS:
2936 			DRM_DEBUG("RADEON_CMD_SCALARS\n");
2937 			if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2938 				DRM_ERROR("radeon_emit_scalars failed\n");
2939 				goto err;
2940 			}
2941 			break;
2942 
2943 		case RADEON_CMD_VECTORS:
2944 			DRM_DEBUG("RADEON_CMD_VECTORS\n");
2945 			if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2946 				DRM_ERROR("radeon_emit_vectors failed\n");
2947 				goto err;
2948 			}
2949 			break;
2950 
2951 		case RADEON_CMD_DMA_DISCARD:
2952 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2953 			idx = header.dma.buf_idx;
2954 			if (idx < 0 || idx >= dma->buf_count) {
2955 				DRM_ERROR("buffer index %d (of %d max)\n",
2956 					  idx, dma->buf_count - 1);
2957 				goto err;
2958 			}
2959 
2960 			buf = dma->buflist[idx];
2961 			if (buf->file_priv != file_priv || buf->pending) {
2962 				DRM_ERROR("bad buffer %p %p %d\n",
2963 					  buf->file_priv, file_priv,
2964 					  buf->pending);
2965 				goto err;
2966 			}
2967 
2968 			radeon_cp_discard_buffer(dev, buf);
2969 			break;
2970 
2971 		case RADEON_CMD_PACKET3:
2972 			DRM_DEBUG("RADEON_CMD_PACKET3\n");
2973 			if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2974 				DRM_ERROR("radeon_emit_packet3 failed\n");
2975 				goto err;
2976 			}
2977 			break;
2978 
2979 		case RADEON_CMD_PACKET3_CLIP:
2980 			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2981 			if (radeon_emit_packet3_cliprect
2982 			    (dev, file_priv, cmdbuf, orig_nbox)) {
2983 				DRM_ERROR("radeon_emit_packet3_clip failed\n");
2984 				goto err;
2985 			}
2986 			break;
2987 
2988 		case RADEON_CMD_SCALARS2:
2989 			DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2990 			if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2991 				DRM_ERROR("radeon_emit_scalars2 failed\n");
2992 				goto err;
2993 			}
2994 			break;
2995 
2996 		case RADEON_CMD_WAIT:
2997 			DRM_DEBUG("RADEON_CMD_WAIT\n");
2998 			if (radeon_emit_wait(dev, header.wait.flags)) {
2999 				DRM_ERROR("radeon_emit_wait failed\n");
3000 				goto err;
3001 			}
3002 			break;
3003 		case RADEON_CMD_VECLINEAR:
3004 			DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3005 			if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
3006 				DRM_ERROR("radeon_emit_veclinear failed\n");
3007 				goto err;
3008 			}
3009 			break;
3010 
3011 		default:
3012 			DRM_ERROR("bad cmd_type %d at %p\n",
3013 				  header.header.cmd_type,
3014 				  cmdbuf->buf - sizeof(header));
3015 			goto err;
3016 		}
3017 	}
3018 
3019 	if (orig_bufsz != 0)
3020 		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3021 
3022 	DRM_DEBUG("DONE\n");
3023 	COMMIT_RING();
3024 	return 0;
3025 
3026       err:
3027 	if (orig_bufsz != 0)
3028 		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3029 	return -EINVAL;
3030 }
3031 
radeon_cp_getparam(struct drm_device * dev,void * data,struct drm_file * file_priv)3032 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3033 {
3034 	drm_radeon_private_t *dev_priv = dev->dev_private;
3035 	drm_radeon_getparam_t *param = data;
3036 	int value;
3037 
3038 	if (!dev_priv) {
3039 		DRM_ERROR("called with no initialization\n");
3040 		return -EINVAL;
3041 	}
3042 
3043 	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3044 
3045 	switch (param->param) {
3046 	case RADEON_PARAM_GART_BUFFER_OFFSET:
3047 		value = dev_priv->gart_buffers_offset;
3048 		break;
3049 	case RADEON_PARAM_LAST_FRAME:
3050 		dev_priv->stats.last_frame_reads++;
3051 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3052 			value = GET_R600_SCRATCH(0);
3053 		else
3054 			value = GET_SCRATCH(0);
3055 		break;
3056 	case RADEON_PARAM_LAST_DISPATCH:
3057 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3058 			value = GET_R600_SCRATCH(1);
3059 		else
3060 			value = GET_SCRATCH(1);
3061 		break;
3062 	case RADEON_PARAM_LAST_CLEAR:
3063 		dev_priv->stats.last_clear_reads++;
3064 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3065 			value = GET_R600_SCRATCH(2);
3066 		else
3067 			value = GET_SCRATCH(2);
3068 		break;
3069 	case RADEON_PARAM_IRQ_NR:
3070 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3071 			value = 0;
3072 		else
3073 			value = dev->irq;
3074 		break;
3075 	case RADEON_PARAM_GART_BASE:
3076 		value = dev_priv->gart_vm_start;
3077 		break;
3078 	case RADEON_PARAM_REGISTER_HANDLE:
3079 		value = dev_priv->mmio->offset;
3080 		break;
3081 	case RADEON_PARAM_STATUS_HANDLE:
3082 		value = dev_priv->ring_rptr_offset;
3083 		break;
3084 #ifndef __LP64__
3085 		/*
3086 		 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3087 		 * pointer which can't fit into an int-sized variable.  According to
3088 		 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3089 		 * not supporting it shouldn't be a problem.  If the same functionality
3090 		 * is needed on 64-bit platforms, a new ioctl() would have to be added,
3091 		 * so backwards-compatibility for the embedded platforms can be
3092 		 * maintained.  --davidm 4-Feb-2004.
3093 		 */
3094 	case RADEON_PARAM_SAREA_HANDLE:
3095 		/* The lock is the first dword in the sarea. */
3096 		value = (long)dev->lock.hw_lock;
3097 		break;
3098 #endif
3099 	case RADEON_PARAM_GART_TEX_HANDLE:
3100 		value = dev_priv->gart_textures_offset;
3101 		break;
3102 	case RADEON_PARAM_SCRATCH_OFFSET:
3103 		if (!dev_priv->writeback_works)
3104 			return -EINVAL;
3105 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3106 			value = R600_SCRATCH_REG_OFFSET;
3107 		else
3108 			value = RADEON_SCRATCH_REG_OFFSET;
3109 		break;
3110 
3111 	case RADEON_PARAM_CARD_TYPE:
3112 		if (dev_priv->flags & RADEON_IS_PCIE)
3113 			value = RADEON_CARD_PCIE;
3114 		else if (dev_priv->flags & RADEON_IS_AGP)
3115 			value = RADEON_CARD_AGP;
3116 		else
3117 			value = RADEON_CARD_PCI;
3118 		break;
3119 	case RADEON_PARAM_VBLANK_CRTC:
3120 		value = radeon_vblank_crtc_get(dev);
3121 		break;
3122 	case RADEON_PARAM_FB_LOCATION:
3123 		value = radeon_read_fb_location(dev_priv);
3124 		break;
3125 	case RADEON_PARAM_NUM_GB_PIPES:
3126 		value = dev_priv->num_gb_pipes;
3127 		break;
3128 	case RADEON_PARAM_NUM_Z_PIPES:
3129 		value = dev_priv->num_z_pipes;
3130 		break;
3131 	default:
3132 		DRM_DEBUG( "Invalid parameter %d\n", param->param );
3133 		return -EINVAL;
3134 	}
3135 
3136 	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3137 		DRM_ERROR("copy_to_user\n");
3138 		return -EFAULT;
3139 	}
3140 
3141 	return 0;
3142 }
3143 
radeon_cp_setparam(struct drm_device * dev,void * data,struct drm_file * file_priv)3144 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3145 {
3146 	drm_radeon_private_t *dev_priv = dev->dev_private;
3147 	drm_radeon_setparam_t *sp = data;
3148 	struct drm_radeon_driver_file_fields *radeon_priv;
3149 
3150 	if (!dev_priv) {
3151 		DRM_ERROR("called with no initialization\n");
3152 		return -EINVAL;
3153 	}
3154 
3155 	switch (sp->param) {
3156 	case RADEON_SETPARAM_FB_LOCATION:
3157 		radeon_priv = file_priv->driver_priv;
3158 		radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3159 		    sp->value;
3160 		break;
3161 	case RADEON_SETPARAM_SWITCH_TILING:
3162 		if (sp->value == 0) {
3163 			DRM_DEBUG("color tiling disabled\n");
3164 			dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3165 			dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3166 			if (dev_priv->sarea_priv)
3167 				dev_priv->sarea_priv->tiling_enabled = 0;
3168 		} else if (sp->value == 1) {
3169 			DRM_DEBUG("color tiling enabled\n");
3170 			dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3171 			dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3172 			if (dev_priv->sarea_priv)
3173 				dev_priv->sarea_priv->tiling_enabled = 1;
3174 		}
3175 		break;
3176 	case RADEON_SETPARAM_PCIGART_LOCATION:
3177 		dev_priv->pcigart_offset = sp->value;
3178 		dev_priv->pcigart_offset_set = 1;
3179 		break;
3180 	case RADEON_SETPARAM_NEW_MEMMAP:
3181 		dev_priv->new_memmap = sp->value;
3182 		break;
3183 	case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3184 		dev_priv->gart_info.table_size = sp->value;
3185 		if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3186 			dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3187 		break;
3188 	case RADEON_SETPARAM_VBLANK_CRTC:
3189 		return radeon_vblank_crtc_set(dev, sp->value);
3190 		break;
3191 	default:
3192 		DRM_DEBUG("Invalid parameter %d\n", sp->param);
3193 		return -EINVAL;
3194 	}
3195 
3196 	return 0;
3197 }
3198 
3199 /* When a client dies:
3200  *    - Check for and clean up flipped page state
3201  *    - Free any alloced GART memory.
3202  *    - Free any alloced radeon surfaces.
3203  *
3204  * DRM infrastructure takes care of reclaiming dma buffers.
3205  */
radeon_driver_preclose(struct drm_device * dev,struct drm_file * file_priv)3206 void radeon_driver_preclose(struct drm_device *dev,
3207 			    struct drm_file *file_priv)
3208 {
3209 	if (dev->dev_private) {
3210 		drm_radeon_private_t *dev_priv = dev->dev_private;
3211 		dev_priv->page_flipping = 0;
3212 		radeon_mem_release(file_priv, dev_priv->gart_heap);
3213 		radeon_mem_release(file_priv, dev_priv->fb_heap);
3214 		radeon_surfaces_release(file_priv, dev_priv);
3215 	}
3216 }
3217 
radeon_driver_lastclose(struct drm_device * dev)3218 void radeon_driver_lastclose(struct drm_device *dev)
3219 {
3220   	//radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3221 	if (dev->dev_private) {
3222 		drm_radeon_private_t *dev_priv = dev->dev_private;
3223 
3224 		if (dev_priv->sarea_priv &&
3225 		    dev_priv->sarea_priv->pfCurrentPage != 0)
3226 			radeon_cp_dispatch_flip(dev);
3227 	}
3228 
3229 	radeon_do_release(dev);
3230 }
3231 
radeon_driver_open(struct drm_device * dev,struct drm_file * file_priv)3232 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3233 {
3234 	drm_radeon_private_t *dev_priv = dev->dev_private;
3235 	struct drm_radeon_driver_file_fields *radeon_priv;
3236 
3237 	DRM_DEBUG("\n");
3238 	radeon_priv =
3239 	    (struct drm_radeon_driver_file_fields *)
3240 	    drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3241 
3242 	if (!radeon_priv)
3243 		return -ENOMEM;
3244 
3245 	file_priv->driver_priv = radeon_priv;
3246 
3247 	if (dev_priv)
3248 		radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3249 	else
3250 		radeon_priv->radeon_fb_delta = 0;
3251 	return 0;
3252 }
3253 
radeon_driver_postclose(struct drm_device * dev,struct drm_file * file_priv)3254 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3255 {
3256 	struct drm_radeon_driver_file_fields *radeon_priv =
3257 	    file_priv->driver_priv;
3258 
3259 	drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3260 }
3261 
3262 struct drm_ioctl_desc radeon_ioctls[] = {
3263 	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3264 	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3265 	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3266 	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3267 	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3268 	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3269 	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3270 	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3271 	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3272 	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3273 	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3274 	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3275 	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3276 	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3277 	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_ROOT_ONLY),
3278 	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3279 	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3280 	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3281 	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3282 	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3283 	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3284 	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3285 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3286 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3287 	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3288 	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3289 	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3290 	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH)
3291 };
3292 
3293 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
3294