1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29 
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35 
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39 
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41 						    dev_priv,
42 						    struct drm_file *file_priv,
43 						    u32 * offset)
44 {
45 	u64 off = *offset;
46 	u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47 	struct drm_radeon_driver_file_fields *radeon_priv;
48 
49 	/* Hrm ... the story of the offset ... So this function converts
50 	 * the various ideas of what userland clients might have for an
51 	 * offset in the card address space into an offset into the card
52 	 * address space :) So with a sane client, it should just keep
53 	 * the value intact and just do some boundary checking. However,
54 	 * not all clients are sane. Some older clients pass us 0 based
55 	 * offsets relative to the start of the framebuffer and some may
56 	 * assume the AGP aperture it appended to the framebuffer, so we
57 	 * try to detect those cases and fix them up.
58 	 *
59 	 * Note: It might be a good idea here to make sure the offset lands
60 	 * in some "allowed" area to protect things like the PCIE GART...
61 	 */
62 
63 	/* First, the best case, the offset already lands in either the
64 	 * framebuffer or the GART mapped space
65 	 */
66 	if (radeon_check_offset(dev_priv, off))
67 		return 0;
68 
69 	/* Ok, that didn't happen... now check if we have a zero based
70 	 * offset that fits in the framebuffer + gart space, apply the
71 	 * magic offset we get from SETPARAM or calculated from fb_location
72 	 */
73 	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74 		radeon_priv = file_priv->driver_priv;
75 		off += radeon_priv->radeon_fb_delta;
76 	}
77 
78 	/* Finally, assume we aimed at a GART offset if beyond the fb */
79 	if (off > fb_end)
80 		off = off - fb_end - 1 + dev_priv->gart_vm_start;
81 
82 	/* Now recheck and fail if out of bounds */
83 	if (radeon_check_offset(dev_priv, off)) {
84 		DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85 		*offset = off;
86 		return 0;
87 	}
88 	return -EINVAL;
89 }
90 
91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92 						     dev_priv,
93 						     struct drm_file *file_priv,
94 						     int id, u32 *data)
95 {
96 	switch (id) {
97 
98 	case RADEON_EMIT_PP_MISC:
99 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
100 		    &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101 			DRM_ERROR("Invalid depth buffer offset\n");
102 			return -EINVAL;
103 		}
104 		break;
105 
106 	case RADEON_EMIT_PP_CNTL:
107 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
108 		    &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
109 			DRM_ERROR("Invalid colour buffer offset\n");
110 			return -EINVAL;
111 		}
112 		break;
113 
114 	case R200_EMIT_PP_TXOFFSET_0:
115 	case R200_EMIT_PP_TXOFFSET_1:
116 	case R200_EMIT_PP_TXOFFSET_2:
117 	case R200_EMIT_PP_TXOFFSET_3:
118 	case R200_EMIT_PP_TXOFFSET_4:
119 	case R200_EMIT_PP_TXOFFSET_5:
120 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
121 						  &data[0])) {
122 			DRM_ERROR("Invalid R200 texture offset\n");
123 			return -EINVAL;
124 		}
125 		break;
126 
127 	case RADEON_EMIT_PP_TXFILTER_0:
128 	case RADEON_EMIT_PP_TXFILTER_1:
129 	case RADEON_EMIT_PP_TXFILTER_2:
130 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
131 		    &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
132 			DRM_ERROR("Invalid R100 texture offset\n");
133 			return -EINVAL;
134 		}
135 		break;
136 
137 	case R200_EMIT_PP_CUBIC_OFFSETS_0:
138 	case R200_EMIT_PP_CUBIC_OFFSETS_1:
139 	case R200_EMIT_PP_CUBIC_OFFSETS_2:
140 	case R200_EMIT_PP_CUBIC_OFFSETS_3:
141 	case R200_EMIT_PP_CUBIC_OFFSETS_4:
142 	case R200_EMIT_PP_CUBIC_OFFSETS_5:{
143 			int i;
144 			for (i = 0; i < 5; i++) {
145 				if (radeon_check_and_fixup_offset(dev_priv,
146 								  file_priv,
147 								  &data[i])) {
148 					DRM_ERROR
149 					    ("Invalid R200 cubic texture offset\n");
150 					return -EINVAL;
151 				}
152 			}
153 			break;
154 		}
155 
156 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
157 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
158 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
159 			int i;
160 			for (i = 0; i < 5; i++) {
161 				if (radeon_check_and_fixup_offset(dev_priv,
162 								  file_priv,
163 								  &data[i])) {
164 					DRM_ERROR
165 					    ("Invalid R100 cubic texture offset\n");
166 					return -EINVAL;
167 				}
168 			}
169 		}
170 		break;
171 
172 	case R200_EMIT_VAP_CTL: {
173 			RING_LOCALS;
174 			BEGIN_RING(2);
175 			OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
176 			ADVANCE_RING();
177 		}
178 		break;
179 
180 	case RADEON_EMIT_RB3D_COLORPITCH:
181 	case RADEON_EMIT_RE_LINE_PATTERN:
182 	case RADEON_EMIT_SE_LINE_WIDTH:
183 	case RADEON_EMIT_PP_LUM_MATRIX:
184 	case RADEON_EMIT_PP_ROT_MATRIX_0:
185 	case RADEON_EMIT_RB3D_STENCILREFMASK:
186 	case RADEON_EMIT_SE_VPORT_XSCALE:
187 	case RADEON_EMIT_SE_CNTL:
188 	case RADEON_EMIT_SE_CNTL_STATUS:
189 	case RADEON_EMIT_RE_MISC:
190 	case RADEON_EMIT_PP_BORDER_COLOR_0:
191 	case RADEON_EMIT_PP_BORDER_COLOR_1:
192 	case RADEON_EMIT_PP_BORDER_COLOR_2:
193 	case RADEON_EMIT_SE_ZBIAS_FACTOR:
194 	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
195 	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
196 	case R200_EMIT_PP_TXCBLEND_0:
197 	case R200_EMIT_PP_TXCBLEND_1:
198 	case R200_EMIT_PP_TXCBLEND_2:
199 	case R200_EMIT_PP_TXCBLEND_3:
200 	case R200_EMIT_PP_TXCBLEND_4:
201 	case R200_EMIT_PP_TXCBLEND_5:
202 	case R200_EMIT_PP_TXCBLEND_6:
203 	case R200_EMIT_PP_TXCBLEND_7:
204 	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
205 	case R200_EMIT_TFACTOR_0:
206 	case R200_EMIT_VTX_FMT_0:
207 	case R200_EMIT_MATRIX_SELECT_0:
208 	case R200_EMIT_TEX_PROC_CTL_2:
209 	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
210 	case R200_EMIT_PP_TXFILTER_0:
211 	case R200_EMIT_PP_TXFILTER_1:
212 	case R200_EMIT_PP_TXFILTER_2:
213 	case R200_EMIT_PP_TXFILTER_3:
214 	case R200_EMIT_PP_TXFILTER_4:
215 	case R200_EMIT_PP_TXFILTER_5:
216 	case R200_EMIT_VTE_CNTL:
217 	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
218 	case R200_EMIT_PP_TAM_DEBUG3:
219 	case R200_EMIT_PP_CNTL_X:
220 	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
221 	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
222 	case R200_EMIT_RE_SCISSOR_TL_0:
223 	case R200_EMIT_RE_SCISSOR_TL_1:
224 	case R200_EMIT_RE_SCISSOR_TL_2:
225 	case R200_EMIT_SE_VAP_CNTL_STATUS:
226 	case R200_EMIT_SE_VTX_STATE_CNTL:
227 	case R200_EMIT_RE_POINTSIZE:
228 	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
229 	case R200_EMIT_PP_CUBIC_FACES_0:
230 	case R200_EMIT_PP_CUBIC_FACES_1:
231 	case R200_EMIT_PP_CUBIC_FACES_2:
232 	case R200_EMIT_PP_CUBIC_FACES_3:
233 	case R200_EMIT_PP_CUBIC_FACES_4:
234 	case R200_EMIT_PP_CUBIC_FACES_5:
235 	case RADEON_EMIT_PP_TEX_SIZE_0:
236 	case RADEON_EMIT_PP_TEX_SIZE_1:
237 	case RADEON_EMIT_PP_TEX_SIZE_2:
238 	case R200_EMIT_RB3D_BLENDCOLOR:
239 	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
240 	case RADEON_EMIT_PP_CUBIC_FACES_0:
241 	case RADEON_EMIT_PP_CUBIC_FACES_1:
242 	case RADEON_EMIT_PP_CUBIC_FACES_2:
243 	case R200_EMIT_PP_TRI_PERF_CNTL:
244 	case R200_EMIT_PP_AFS_0:
245 	case R200_EMIT_PP_AFS_1:
246 	case R200_EMIT_ATF_TFACTOR:
247 	case R200_EMIT_PP_TXCTLALL_0:
248 	case R200_EMIT_PP_TXCTLALL_1:
249 	case R200_EMIT_PP_TXCTLALL_2:
250 	case R200_EMIT_PP_TXCTLALL_3:
251 	case R200_EMIT_PP_TXCTLALL_4:
252 	case R200_EMIT_PP_TXCTLALL_5:
253 	case R200_EMIT_VAP_PVS_CNTL:
254 		/* These packets don't contain memory offsets */
255 		break;
256 
257 	default:
258 		DRM_ERROR("Unknown state packet ID %d\n", id);
259 		return -EINVAL;
260 	}
261 
262 	return 0;
263 }
264 
265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
266 						     dev_priv,
267 						     struct drm_file *file_priv,
268 						     drm_radeon_kcmd_buffer_t *
269 						     cmdbuf,
270 						     unsigned int *cmdsz)
271 {
272 	u32 *cmd = (u32 *) cmdbuf->buf;
273 	u32 offset, narrays;
274 	int count, i, k;
275 
276 	*cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
277 
278 	if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
279 		DRM_ERROR("Not a type 3 packet\n");
280 		return -EINVAL;
281 	}
282 
283 	if (4 * *cmdsz > cmdbuf->bufsz) {
284 		DRM_ERROR("Packet size larger than size of data provided\n");
285 		return -EINVAL;
286 	}
287 
288 	switch(cmd[0] & 0xff00) {
289 	/* XXX Are there old drivers needing other packets? */
290 
291 	case RADEON_3D_DRAW_IMMD:
292 	case RADEON_3D_DRAW_VBUF:
293 	case RADEON_3D_DRAW_INDX:
294 	case RADEON_WAIT_FOR_IDLE:
295 	case RADEON_CP_NOP:
296 	case RADEON_3D_CLEAR_ZMASK:
297 /*	case RADEON_CP_NEXT_CHAR:
298 	case RADEON_CP_PLY_NEXTSCAN:
299 	case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
300 		/* these packets are safe */
301 		break;
302 
303 	case RADEON_CP_3D_DRAW_IMMD_2:
304 	case RADEON_CP_3D_DRAW_VBUF_2:
305 	case RADEON_CP_3D_DRAW_INDX_2:
306 	case RADEON_3D_CLEAR_HIZ:
307 		/* safe but r200 only */
308 		if ((dev_priv->chip_family < CHIP_R200) ||
309 		    (dev_priv->chip_family > CHIP_RV280)) {
310 			DRM_ERROR("Invalid 3d packet for non r200-class chip\n");
311 			return -EINVAL;
312 		}
313 		break;
314 
315 	case RADEON_3D_LOAD_VBPNTR:
316 		count = (cmd[0] >> 16) & 0x3fff;
317 
318 		if (count > 18) { /* 12 arrays max */
319 			DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
320 				  count);
321 			return -EINVAL;
322 		}
323 
324 		/* carefully check packet contents */
325 		narrays = cmd[1] & ~0xc000;
326 		k = 0;
327 		i = 2;
328 		while ((k < narrays) && (i < (count + 2))) {
329 			i++;		/* skip attribute field */
330 			if (radeon_check_and_fixup_offset(dev_priv, file_priv,
331 							  &cmd[i])) {
332 				DRM_ERROR
333 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
334 				     k, i);
335 				return -EINVAL;
336 			}
337 			k++;
338 			i++;
339 			if (k == narrays)
340 				break;
341 			/* have one more to process, they come in pairs */
342 			if (radeon_check_and_fixup_offset(dev_priv,
343 							  file_priv, &cmd[i]))
344 			{
345 				DRM_ERROR
346 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
347 				     k, i);
348 				return -EINVAL;
349 			}
350 			k++;
351 			i++;
352 		}
353 		/* do the counts match what we expect ? */
354 		if ((k != narrays) || (i != (count + 2))) {
355 			DRM_ERROR
356 			    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
357 			      k, i, narrays, count + 1);
358 			return -EINVAL;
359 		}
360 		break;
361 
362 	case RADEON_3D_RNDR_GEN_INDX_PRIM:
363 		if (dev_priv->chip_family > CHIP_RS200) {
364 			DRM_ERROR("Invalid 3d packet for non-r100-class chip\n");
365 			return -EINVAL;
366 		}
367 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
368 				DRM_ERROR("Invalid rndr_gen_indx offset\n");
369 				return -EINVAL;
370 		}
371 		break;
372 
373 	case RADEON_CP_INDX_BUFFER:
374 		/* safe but r200 only */
375 		if ((dev_priv->chip_family < CHIP_R200) ||
376 		    (dev_priv->chip_family > CHIP_RV280)) {
377 			DRM_ERROR("Invalid 3d packet for non-r200-class chip\n");
378 			return -EINVAL;
379 		}
380 		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
381 			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
382 			return -EINVAL;
383 		}
384 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
385 			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
386 			return -EINVAL;
387 		}
388 		break;
389 
390 	case RADEON_CNTL_HOSTDATA_BLT:
391 	case RADEON_CNTL_PAINT_MULTI:
392 	case RADEON_CNTL_BITBLT_MULTI:
393 		/* MSB of opcode: next DWORD GUI_CNTL */
394 		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
395 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
396 			offset = cmd[2] << 10;
397 			if (radeon_check_and_fixup_offset
398 			    (dev_priv, file_priv, &offset)) {
399 				DRM_ERROR("Invalid first packet offset\n");
400 				return -EINVAL;
401 			}
402 			cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
403 		}
404 
405 		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
406 		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
407 			offset = cmd[3] << 10;
408 			if (radeon_check_and_fixup_offset
409 			    (dev_priv, file_priv, &offset)) {
410 				DRM_ERROR("Invalid second packet offset\n");
411 				return -EINVAL;
412 			}
413 			cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
414 		}
415 		break;
416 
417 	default:
418 		DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
419 		return -EINVAL;
420 	}
421 
422 	return 0;
423 }
424 
425 /* ================================================================
426  * CP hardware state programming functions
427  */
428 
429 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
430 					     struct drm_clip_rect * box)
431 {
432 	RING_LOCALS;
433 
434 	DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
435 		  box->x1, box->y1, box->x2, box->y2);
436 
437 	BEGIN_RING(4);
438 	OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
439 	OUT_RING((box->y1 << 16) | box->x1);
440 	OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
441 	OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
442 	ADVANCE_RING();
443 }
444 
445 /* Emit 1.1 state
446  */
447 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
448 			     struct drm_file *file_priv,
449 			     drm_radeon_context_regs_t * ctx,
450 			     drm_radeon_texture_regs_t * tex,
451 			     unsigned int dirty)
452 {
453 	RING_LOCALS;
454 	DRM_DEBUG("dirty=0x%08x\n", dirty);
455 
456 	if (dirty & RADEON_UPLOAD_CONTEXT) {
457 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
458 						  &ctx->rb3d_depthoffset)) {
459 			DRM_ERROR("Invalid depth buffer offset\n");
460 			return -EINVAL;
461 		}
462 
463 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
464 						  &ctx->rb3d_coloroffset)) {
465 			DRM_ERROR("Invalid depth buffer offset\n");
466 			return -EINVAL;
467 		}
468 
469 		BEGIN_RING(14);
470 		OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
471 		OUT_RING(ctx->pp_misc);
472 		OUT_RING(ctx->pp_fog_color);
473 		OUT_RING(ctx->re_solid_color);
474 		OUT_RING(ctx->rb3d_blendcntl);
475 		OUT_RING(ctx->rb3d_depthoffset);
476 		OUT_RING(ctx->rb3d_depthpitch);
477 		OUT_RING(ctx->rb3d_zstencilcntl);
478 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
479 		OUT_RING(ctx->pp_cntl);
480 		OUT_RING(ctx->rb3d_cntl);
481 		OUT_RING(ctx->rb3d_coloroffset);
482 		OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
483 		OUT_RING(ctx->rb3d_colorpitch);
484 		ADVANCE_RING();
485 	}
486 
487 	if (dirty & RADEON_UPLOAD_VERTFMT) {
488 		BEGIN_RING(2);
489 		OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
490 		OUT_RING(ctx->se_coord_fmt);
491 		ADVANCE_RING();
492 	}
493 
494 	if (dirty & RADEON_UPLOAD_LINE) {
495 		BEGIN_RING(5);
496 		OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
497 		OUT_RING(ctx->re_line_pattern);
498 		OUT_RING(ctx->re_line_state);
499 		OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
500 		OUT_RING(ctx->se_line_width);
501 		ADVANCE_RING();
502 	}
503 
504 	if (dirty & RADEON_UPLOAD_BUMPMAP) {
505 		BEGIN_RING(5);
506 		OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
507 		OUT_RING(ctx->pp_lum_matrix);
508 		OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
509 		OUT_RING(ctx->pp_rot_matrix_0);
510 		OUT_RING(ctx->pp_rot_matrix_1);
511 		ADVANCE_RING();
512 	}
513 
514 	if (dirty & RADEON_UPLOAD_MASKS) {
515 		BEGIN_RING(4);
516 		OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
517 		OUT_RING(ctx->rb3d_stencilrefmask);
518 		OUT_RING(ctx->rb3d_ropcntl);
519 		OUT_RING(ctx->rb3d_planemask);
520 		ADVANCE_RING();
521 	}
522 
523 	if (dirty & RADEON_UPLOAD_VIEWPORT) {
524 		BEGIN_RING(7);
525 		OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
526 		OUT_RING(ctx->se_vport_xscale);
527 		OUT_RING(ctx->se_vport_xoffset);
528 		OUT_RING(ctx->se_vport_yscale);
529 		OUT_RING(ctx->se_vport_yoffset);
530 		OUT_RING(ctx->se_vport_zscale);
531 		OUT_RING(ctx->se_vport_zoffset);
532 		ADVANCE_RING();
533 	}
534 
535 	if (dirty & RADEON_UPLOAD_SETUP) {
536 		BEGIN_RING(4);
537 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
538 		OUT_RING(ctx->se_cntl);
539 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
540 		OUT_RING(ctx->se_cntl_status);
541 		ADVANCE_RING();
542 	}
543 
544 	if (dirty & RADEON_UPLOAD_MISC) {
545 		BEGIN_RING(2);
546 		OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
547 		OUT_RING(ctx->re_misc);
548 		ADVANCE_RING();
549 	}
550 
551 	if (dirty & RADEON_UPLOAD_TEX0) {
552 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
553 						  &tex[0].pp_txoffset)) {
554 			DRM_ERROR("Invalid texture offset for unit 0\n");
555 			return -EINVAL;
556 		}
557 
558 		BEGIN_RING(9);
559 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
560 		OUT_RING(tex[0].pp_txfilter);
561 		OUT_RING(tex[0].pp_txformat);
562 		OUT_RING(tex[0].pp_txoffset);
563 		OUT_RING(tex[0].pp_txcblend);
564 		OUT_RING(tex[0].pp_txablend);
565 		OUT_RING(tex[0].pp_tfactor);
566 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
567 		OUT_RING(tex[0].pp_border_color);
568 		ADVANCE_RING();
569 	}
570 
571 	if (dirty & RADEON_UPLOAD_TEX1) {
572 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
573 						  &tex[1].pp_txoffset)) {
574 			DRM_ERROR("Invalid texture offset for unit 1\n");
575 			return -EINVAL;
576 		}
577 
578 		BEGIN_RING(9);
579 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
580 		OUT_RING(tex[1].pp_txfilter);
581 		OUT_RING(tex[1].pp_txformat);
582 		OUT_RING(tex[1].pp_txoffset);
583 		OUT_RING(tex[1].pp_txcblend);
584 		OUT_RING(tex[1].pp_txablend);
585 		OUT_RING(tex[1].pp_tfactor);
586 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
587 		OUT_RING(tex[1].pp_border_color);
588 		ADVANCE_RING();
589 	}
590 
591 	if (dirty & RADEON_UPLOAD_TEX2) {
592 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
593 						  &tex[2].pp_txoffset)) {
594 			DRM_ERROR("Invalid texture offset for unit 2\n");
595 			return -EINVAL;
596 		}
597 
598 		BEGIN_RING(9);
599 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
600 		OUT_RING(tex[2].pp_txfilter);
601 		OUT_RING(tex[2].pp_txformat);
602 		OUT_RING(tex[2].pp_txoffset);
603 		OUT_RING(tex[2].pp_txcblend);
604 		OUT_RING(tex[2].pp_txablend);
605 		OUT_RING(tex[2].pp_tfactor);
606 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
607 		OUT_RING(tex[2].pp_border_color);
608 		ADVANCE_RING();
609 	}
610 
611 	return 0;
612 }
613 
614 /* Emit 1.2 state
615  */
616 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
617 			      struct drm_file *file_priv,
618 			      drm_radeon_state_t * state)
619 {
620 	RING_LOCALS;
621 
622 	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
623 		BEGIN_RING(3);
624 		OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
625 		OUT_RING(state->context2.se_zbias_factor);
626 		OUT_RING(state->context2.se_zbias_constant);
627 		ADVANCE_RING();
628 	}
629 
630 	return radeon_emit_state(dev_priv, file_priv, &state->context,
631 				 state->tex, state->dirty);
632 }
633 
634 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
635  * 1.3 cmdbuffers allow all previous state to be updated as well as
636  * the tcl scalar and vector areas.
637  */
638 static struct {
639 	int start;
640 	int len;
641 	const char *name;
642 } packet[RADEON_MAX_STATE_PACKETS] = {
643 	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
644 	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
645 	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
646 	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
647 	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
648 	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
649 	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
650 	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
651 	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
652 	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
653 	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
654 	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
655 	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
656 	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
657 	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
658 	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
659 	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
660 	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
661 	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
662 	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
663 	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
664 		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
665 	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
666 	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
667 	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
668 	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
669 	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
670 	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
671 	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
672 	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
673 	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
674 	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
675 	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
676 	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
677 	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
678 	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
679 	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
680 	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
681 	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
682 	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
683 	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
684 	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
685 	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
686 	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
687 	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
688 	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
689 	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
690 	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
691 	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
692 	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
693 	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
694 	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
695 	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
696 	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
697 	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
698 	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
699 	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
700 	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
701 	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
702 	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
703 	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
704 	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
705 	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
706 		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
707 	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
708 	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
709 	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
710 	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
711 	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
712 	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
713 	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
714 	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
715 	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
716 	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
717 	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
718 	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
719 	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
720 	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
721 	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
722 	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
723 	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
724 	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
725 	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
726 	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
727 	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
728 	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
729 	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
730 	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
731 	{R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
732 	{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
733 	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
734 	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
735 	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
736 	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
737 	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
738 	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
739 	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
740 	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
741 };
742 
743 /* ================================================================
744  * Performance monitoring functions
745  */
746 
747 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
748 			     int x, int y, int w, int h, int r, int g, int b)
749 {
750 	u32 color;
751 	RING_LOCALS;
752 
753 	x += dev_priv->sarea_priv->boxes[0].x1;
754 	y += dev_priv->sarea_priv->boxes[0].y1;
755 
756 	switch (dev_priv->color_fmt) {
757 	case RADEON_COLOR_FORMAT_RGB565:
758 		color = (((r & 0xf8) << 8) |
759 			 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
760 		break;
761 	case RADEON_COLOR_FORMAT_ARGB8888:
762 	default:
763 		color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
764 		break;
765 	}
766 
767 	BEGIN_RING(4);
768 	RADEON_WAIT_UNTIL_3D_IDLE();
769 	OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
770 	OUT_RING(0xffffffff);
771 	ADVANCE_RING();
772 
773 	BEGIN_RING(6);
774 
775 	OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
776 	OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
777 		 RADEON_GMC_BRUSH_SOLID_COLOR |
778 		 (dev_priv->color_fmt << 8) |
779 		 RADEON_GMC_SRC_DATATYPE_COLOR |
780 		 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
781 
782 	if (dev_priv->sarea_priv->pfCurrentPage == 1) {
783 		OUT_RING(dev_priv->front_pitch_offset);
784 	} else {
785 		OUT_RING(dev_priv->back_pitch_offset);
786 	}
787 
788 	OUT_RING(color);
789 
790 	OUT_RING((x << 16) | y);
791 	OUT_RING((w << 16) | h);
792 
793 	ADVANCE_RING();
794 }
795 
796 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
797 {
798 	/* Collapse various things into a wait flag -- trying to
799 	 * guess if userspase slept -- better just to have them tell us.
800 	 */
801 	if (dev_priv->stats.last_frame_reads > 1 ||
802 	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
803 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
804 	}
805 
806 	if (dev_priv->stats.freelist_loops) {
807 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
808 	}
809 
810 	/* Purple box for page flipping
811 	 */
812 	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
813 		radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
814 
815 	/* Red box if we have to wait for idle at any point
816 	 */
817 	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
818 		radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
819 
820 	/* Blue box: lost context?
821 	 */
822 
823 	/* Yellow box for texture swaps
824 	 */
825 	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
826 		radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
827 
828 	/* Green box if hardware never idles (as far as we can tell)
829 	 */
830 	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
831 		radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
832 
833 	/* Draw bars indicating number of buffers allocated
834 	 * (not a great measure, easily confused)
835 	 */
836 	if (dev_priv->stats.requested_bufs) {
837 		if (dev_priv->stats.requested_bufs > 100)
838 			dev_priv->stats.requested_bufs = 100;
839 
840 		radeon_clear_box(dev_priv, 4, 16,
841 				 dev_priv->stats.requested_bufs, 4,
842 				 196, 128, 128);
843 	}
844 
845 	memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
846 
847 }
848 
849 /* ================================================================
850  * CP command dispatch functions
851  */
852 
853 static void radeon_cp_dispatch_clear(struct drm_device * dev,
854 				     drm_radeon_clear_t * clear,
855 				     drm_radeon_clear_rect_t * depth_boxes)
856 {
857 	drm_radeon_private_t *dev_priv = dev->dev_private;
858 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
859 	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
860 	int nbox = sarea_priv->nbox;
861 	struct drm_clip_rect *pbox = sarea_priv->boxes;
862 	unsigned int flags = clear->flags;
863 	u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
864 	int i;
865 	RING_LOCALS;
866 	DRM_DEBUG("flags = 0x%x\n", flags);
867 
868 	dev_priv->stats.clears++;
869 
870 	if (dev_priv->sarea_priv->pfCurrentPage == 1) {
871 		unsigned int tmp = flags;
872 
873 		flags &= ~(RADEON_FRONT | RADEON_BACK);
874 		if (tmp & RADEON_FRONT)
875 			flags |= RADEON_BACK;
876 		if (tmp & RADEON_BACK)
877 			flags |= RADEON_FRONT;
878 	}
879 
880 	if (flags & (RADEON_FRONT | RADEON_BACK)) {
881 
882 		BEGIN_RING(4);
883 
884 		/* Ensure the 3D stream is idle before doing a
885 		 * 2D fill to clear the front or back buffer.
886 		 */
887 		RADEON_WAIT_UNTIL_3D_IDLE();
888 
889 		OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
890 		OUT_RING(clear->color_mask);
891 
892 		ADVANCE_RING();
893 
894 		/* Make sure we restore the 3D state next time.
895 		 */
896 		dev_priv->sarea_priv->ctx_owner = 0;
897 
898 		for (i = 0; i < nbox; i++) {
899 			int x = pbox[i].x1;
900 			int y = pbox[i].y1;
901 			int w = pbox[i].x2 - x;
902 			int h = pbox[i].y2 - y;
903 
904 			DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
905 				  x, y, w, h, flags);
906 
907 			if (flags & RADEON_FRONT) {
908 				BEGIN_RING(6);
909 
910 				OUT_RING(CP_PACKET3
911 					 (RADEON_CNTL_PAINT_MULTI, 4));
912 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
913 					 RADEON_GMC_BRUSH_SOLID_COLOR |
914 					 (dev_priv->
915 					  color_fmt << 8) |
916 					 RADEON_GMC_SRC_DATATYPE_COLOR |
917 					 RADEON_ROP3_P |
918 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
919 
920 				OUT_RING(dev_priv->front_pitch_offset);
921 				OUT_RING(clear->clear_color);
922 
923 				OUT_RING((x << 16) | y);
924 				OUT_RING((w << 16) | h);
925 
926 				ADVANCE_RING();
927 			}
928 
929 			if (flags & RADEON_BACK) {
930 				BEGIN_RING(6);
931 
932 				OUT_RING(CP_PACKET3
933 					 (RADEON_CNTL_PAINT_MULTI, 4));
934 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
935 					 RADEON_GMC_BRUSH_SOLID_COLOR |
936 					 (dev_priv->
937 					  color_fmt << 8) |
938 					 RADEON_GMC_SRC_DATATYPE_COLOR |
939 					 RADEON_ROP3_P |
940 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
941 
942 				OUT_RING(dev_priv->back_pitch_offset);
943 				OUT_RING(clear->clear_color);
944 
945 				OUT_RING((x << 16) | y);
946 				OUT_RING((w << 16) | h);
947 
948 				ADVANCE_RING();
949 			}
950 		}
951 	}
952 
953 	/* hyper z clear */
954 	/* no docs available, based on reverse engeneering by Stephane Marchesin */
955 	if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
956 	    && (flags & RADEON_CLEAR_FASTZ)) {
957 
958 		int depthpixperline =
959 		    dev_priv->depth_fmt ==
960 		    RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
961 						       2) : (dev_priv->
962 							     depth_pitch / 4);
963 
964 		u32 clearmask;
965 
966 		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
967 		    ((clear->depth_mask & 0xff) << 24);
968 
969 		/* Make sure we restore the 3D state next time.
970 		 * we haven't touched any "normal" state - still need this?
971 		 */
972 		dev_priv->sarea_priv->ctx_owner = 0;
973 
974 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
975 		    && (flags & RADEON_USE_HIERZ)) {
976 			/* FIXME : reverse engineer that for Rx00 cards */
977 			/* FIXME : the mask supposedly contains low-res z values. So can't set
978 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
979 			   value into account? */
980 			/* pattern seems to work for r100, though get slight
981 			   rendering errors with glxgears. If hierz is not enabled for r100,
982 			   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
983 			   other ones are ignored, and the same clear mask can be used. That's
984 			   very different behaviour than R200 which needs different clear mask
985 			   and different number of tiles to clear if hierz is enabled or not !?!
986 			 */
987 			clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
988 		} else {
989 			/* clear mask : chooses the clearing pattern.
990 			   rv250: could be used to clear only parts of macrotiles
991 			   (but that would get really complicated...)?
992 			   bit 0 and 1 (either or both of them ?!?!) are used to
993 			   not clear tile (or maybe one of the bits indicates if the tile is
994 			   compressed or not), bit 2 and 3 to not clear tile 1,...,.
995 			   Pattern is as follows:
996 			   | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
997 			   bits -------------------------------------------------
998 			   | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
999 			   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1000 			   covers 256 pixels ?!?
1001 			 */
1002 			clearmask = 0x0;
1003 		}
1004 
1005 		BEGIN_RING(8);
1006 		RADEON_WAIT_UNTIL_2D_IDLE();
1007 		OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1008 			     tempRB3D_DEPTHCLEARVALUE);
1009 		/* what offset is this exactly ? */
1010 		OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1011 		/* need ctlstat, otherwise get some strange black flickering */
1012 		OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1013 			     RADEON_RB3D_ZC_FLUSH_ALL);
1014 		ADVANCE_RING();
1015 
1016 		for (i = 0; i < nbox; i++) {
1017 			int tileoffset, nrtilesx, nrtilesy, j;
1018 			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1019 			if ((dev_priv->flags & RADEON_HAS_HIERZ)
1020 			    && (dev_priv->chip_family < CHIP_R200)) {
1021 				/* FIXME : figure this out for r200 (when hierz is enabled). Or
1022 				   maybe r200 actually doesn't need to put the low-res z value into
1023 				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
1024 				   Works for R100, both with hierz and without.
1025 				   R100 seems to operate on 2x1 8x8 tiles, but...
1026 				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1027 				   problematic with resolutions which are not 64 pix aligned? */
1028 				tileoffset =
1029 				    ((pbox[i].y1 >> 3) * depthpixperline +
1030 				     pbox[i].x1) >> 6;
1031 				nrtilesx =
1032 				    ((pbox[i].x2 & ~63) -
1033 				     (pbox[i].x1 & ~63)) >> 4;
1034 				nrtilesy =
1035 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1036 				for (j = 0; j <= nrtilesy; j++) {
1037 					BEGIN_RING(4);
1038 					OUT_RING(CP_PACKET3
1039 						 (RADEON_3D_CLEAR_ZMASK, 2));
1040 					/* first tile */
1041 					OUT_RING(tileoffset * 8);
1042 					/* the number of tiles to clear */
1043 					OUT_RING(nrtilesx + 4);
1044 					/* clear mask : chooses the clearing pattern. */
1045 					OUT_RING(clearmask);
1046 					ADVANCE_RING();
1047 					tileoffset += depthpixperline >> 6;
1048 				}
1049 			} else if ((dev_priv->chip_family >= CHIP_R200) &&
1050 				   (dev_priv->chip_family <= CHIP_RV280)) {
1051 				/* works for rv250. */
1052 				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
1053 				tileoffset =
1054 				    ((pbox[i].y1 >> 3) * depthpixperline +
1055 				     pbox[i].x1) >> 5;
1056 				nrtilesx =
1057 				    (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1058 				nrtilesy =
1059 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1060 				for (j = 0; j <= nrtilesy; j++) {
1061 					BEGIN_RING(4);
1062 					OUT_RING(CP_PACKET3
1063 						 (RADEON_3D_CLEAR_ZMASK, 2));
1064 					/* first tile */
1065 					/* judging by the first tile offset needed, could possibly
1066 					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
1067 					   macro tiles, though would still need clear mask for
1068 					   right/bottom if truely 4x4 granularity is desired ? */
1069 					OUT_RING(tileoffset * 16);
1070 					/* the number of tiles to clear */
1071 					OUT_RING(nrtilesx + 1);
1072 					/* clear mask : chooses the clearing pattern. */
1073 					OUT_RING(clearmask);
1074 					ADVANCE_RING();
1075 					tileoffset += depthpixperline >> 5;
1076 				}
1077 			} else {	/* rv 100 */
1078 				/* rv100 might not need 64 pix alignment, who knows */
1079 				/* offsets are, hmm, weird */
1080 				tileoffset =
1081 				    ((pbox[i].y1 >> 4) * depthpixperline +
1082 				     pbox[i].x1) >> 6;
1083 				nrtilesx =
1084 				    ((pbox[i].x2 & ~63) -
1085 				     (pbox[i].x1 & ~63)) >> 4;
1086 				nrtilesy =
1087 				    (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1088 				for (j = 0; j <= nrtilesy; j++) {
1089 					BEGIN_RING(4);
1090 					OUT_RING(CP_PACKET3
1091 						 (RADEON_3D_CLEAR_ZMASK, 2));
1092 					OUT_RING(tileoffset * 128);
1093 					/* the number of tiles to clear */
1094 					OUT_RING(nrtilesx + 4);
1095 					/* clear mask : chooses the clearing pattern. */
1096 					OUT_RING(clearmask);
1097 					ADVANCE_RING();
1098 					tileoffset += depthpixperline >> 6;
1099 				}
1100 			}
1101 		}
1102 
1103 		/* TODO don't always clear all hi-level z tiles */
1104 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
1105 		    && ((dev_priv->chip_family >= CHIP_R200) &&
1106 			(dev_priv->chip_family <= CHIP_RV280))
1107 		    && (flags & RADEON_USE_HIERZ))
1108 			/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1109 			/* FIXME : the mask supposedly contains low-res z values. So can't set
1110 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
1111 			   value into account? */
1112 		{
1113 			BEGIN_RING(4);
1114 			OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1115 			OUT_RING(0x0);	/* First tile */
1116 			OUT_RING(0x3cc0);
1117 			OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1118 			ADVANCE_RING();
1119 		}
1120 	}
1121 
1122 	/* We have to clear the depth and/or stencil buffers by
1123 	 * rendering a quad into just those buffers.  Thus, we have to
1124 	 * make sure the 3D engine is configured correctly.
1125 	 */
1126 	else if ((dev_priv->chip_family >= CHIP_R200) &&
1127 		 (dev_priv->chip_family <= CHIP_RV280) &&
1128 		 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1129 
1130 		int tempPP_CNTL;
1131 		int tempRE_CNTL;
1132 		int tempRB3D_CNTL;
1133 		int tempRB3D_ZSTENCILCNTL;
1134 		int tempRB3D_STENCILREFMASK;
1135 		int tempRB3D_PLANEMASK;
1136 		int tempSE_CNTL;
1137 		int tempSE_VTE_CNTL;
1138 		int tempSE_VTX_FMT_0;
1139 		int tempSE_VTX_FMT_1;
1140 		int tempSE_VAP_CNTL;
1141 		int tempRE_AUX_SCISSOR_CNTL;
1142 
1143 		tempPP_CNTL = 0;
1144 		tempRE_CNTL = 0;
1145 
1146 		tempRB3D_CNTL = depth_clear->rb3d_cntl;
1147 
1148 		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1149 		tempRB3D_STENCILREFMASK = 0x0;
1150 
1151 		tempSE_CNTL = depth_clear->se_cntl;
1152 
1153 		/* Disable TCL */
1154 
1155 		tempSE_VAP_CNTL = (	/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1156 					  (0x9 <<
1157 					   SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1158 
1159 		tempRB3D_PLANEMASK = 0x0;
1160 
1161 		tempRE_AUX_SCISSOR_CNTL = 0x0;
1162 
1163 		tempSE_VTE_CNTL =
1164 		    SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1165 
1166 		/* Vertex format (X, Y, Z, W) */
1167 		tempSE_VTX_FMT_0 =
1168 		    SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1169 		    SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1170 		tempSE_VTX_FMT_1 = 0x0;
1171 
1172 		/*
1173 		 * Depth buffer specific enables
1174 		 */
1175 		if (flags & RADEON_DEPTH) {
1176 			/* Enable depth buffer */
1177 			tempRB3D_CNTL |= RADEON_Z_ENABLE;
1178 		} else {
1179 			/* Disable depth buffer */
1180 			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1181 		}
1182 
1183 		/*
1184 		 * Stencil buffer specific enables
1185 		 */
1186 		if (flags & RADEON_STENCIL) {
1187 			tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1188 			tempRB3D_STENCILREFMASK = clear->depth_mask;
1189 		} else {
1190 			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1191 			tempRB3D_STENCILREFMASK = 0x00000000;
1192 		}
1193 
1194 		if (flags & RADEON_USE_COMP_ZBUF) {
1195 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1196 			    RADEON_Z_DECOMPRESSION_ENABLE;
1197 		}
1198 		if (flags & RADEON_USE_HIERZ) {
1199 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1200 		}
1201 
1202 		BEGIN_RING(26);
1203 		RADEON_WAIT_UNTIL_2D_IDLE();
1204 
1205 		OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1206 		OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1207 		OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1208 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1209 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1210 			     tempRB3D_STENCILREFMASK);
1211 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1212 		OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1213 		OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1214 		OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1215 		OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1216 		OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1217 		OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1218 		ADVANCE_RING();
1219 
1220 		/* Make sure we restore the 3D state next time.
1221 		 */
1222 		dev_priv->sarea_priv->ctx_owner = 0;
1223 
1224 		for (i = 0; i < nbox; i++) {
1225 
1226 			/* Funny that this should be required --
1227 			 *  sets top-left?
1228 			 */
1229 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1230 
1231 			BEGIN_RING(14);
1232 			OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1233 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1234 				  RADEON_PRIM_WALK_RING |
1235 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1236 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1237 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1238 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1239 			OUT_RING(0x3f800000);
1240 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1241 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1242 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1243 			OUT_RING(0x3f800000);
1244 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1245 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1246 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1247 			OUT_RING(0x3f800000);
1248 			ADVANCE_RING();
1249 		}
1250 	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1251 
1252 		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1253 
1254 		rb3d_cntl = depth_clear->rb3d_cntl;
1255 
1256 		if (flags & RADEON_DEPTH) {
1257 			rb3d_cntl |= RADEON_Z_ENABLE;
1258 		} else {
1259 			rb3d_cntl &= ~RADEON_Z_ENABLE;
1260 		}
1261 
1262 		if (flags & RADEON_STENCIL) {
1263 			rb3d_cntl |= RADEON_STENCIL_ENABLE;
1264 			rb3d_stencilrefmask = clear->depth_mask;	/* misnamed field */
1265 		} else {
1266 			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1267 			rb3d_stencilrefmask = 0x00000000;
1268 		}
1269 
1270 		if (flags & RADEON_USE_COMP_ZBUF) {
1271 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1272 			    RADEON_Z_DECOMPRESSION_ENABLE;
1273 		}
1274 		if (flags & RADEON_USE_HIERZ) {
1275 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1276 		}
1277 
1278 		BEGIN_RING(13);
1279 		RADEON_WAIT_UNTIL_2D_IDLE();
1280 
1281 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1282 		OUT_RING(0x00000000);
1283 		OUT_RING(rb3d_cntl);
1284 
1285 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1286 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1287 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1288 		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1289 		ADVANCE_RING();
1290 
1291 		/* Make sure we restore the 3D state next time.
1292 		 */
1293 		dev_priv->sarea_priv->ctx_owner = 0;
1294 
1295 		for (i = 0; i < nbox; i++) {
1296 
1297 			/* Funny that this should be required --
1298 			 *  sets top-left?
1299 			 */
1300 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1301 
1302 			BEGIN_RING(15);
1303 
1304 			OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1305 			OUT_RING(RADEON_VTX_Z_PRESENT |
1306 				 RADEON_VTX_PKCOLOR_PRESENT);
1307 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1308 				  RADEON_PRIM_WALK_RING |
1309 				  RADEON_MAOS_ENABLE |
1310 				  RADEON_VTX_FMT_RADEON_MODE |
1311 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1312 
1313 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1314 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1315 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1316 			OUT_RING(0x0);
1317 
1318 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1319 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1320 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1321 			OUT_RING(0x0);
1322 
1323 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1324 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1325 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1326 			OUT_RING(0x0);
1327 
1328 			ADVANCE_RING();
1329 		}
1330 	}
1331 
1332 	/* Increment the clear counter.  The client-side 3D driver must
1333 	 * wait on this value before performing the clear ioctl.  We
1334 	 * need this because the card's so damned fast...
1335 	 */
1336 	dev_priv->sarea_priv->last_clear++;
1337 
1338 	BEGIN_RING(4);
1339 
1340 	RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1341 	RADEON_WAIT_UNTIL_IDLE();
1342 
1343 	ADVANCE_RING();
1344 }
1345 
1346 static void radeon_cp_dispatch_swap(struct drm_device * dev)
1347 {
1348 	drm_radeon_private_t *dev_priv = dev->dev_private;
1349 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1350 	int nbox = sarea_priv->nbox;
1351 	struct drm_clip_rect *pbox = sarea_priv->boxes;
1352 	int i;
1353 	RING_LOCALS;
1354 	DRM_DEBUG("\n");
1355 
1356 	/* Do some trivial performance monitoring...
1357 	 */
1358 	if (dev_priv->do_boxes)
1359 		radeon_cp_performance_boxes(dev_priv);
1360 
1361 	/* Wait for the 3D stream to idle before dispatching the bitblt.
1362 	 * This will prevent data corruption between the two streams.
1363 	 */
1364 	BEGIN_RING(2);
1365 
1366 	RADEON_WAIT_UNTIL_3D_IDLE();
1367 
1368 	ADVANCE_RING();
1369 
1370 	for (i = 0; i < nbox; i++) {
1371 		int x = pbox[i].x1;
1372 		int y = pbox[i].y1;
1373 		int w = pbox[i].x2 - x;
1374 		int h = pbox[i].y2 - y;
1375 
1376 		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1377 
1378 		BEGIN_RING(9);
1379 
1380 		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1381 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1382 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1383 			 RADEON_GMC_BRUSH_NONE |
1384 			 (dev_priv->color_fmt << 8) |
1385 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1386 			 RADEON_ROP3_S |
1387 			 RADEON_DP_SRC_SOURCE_MEMORY |
1388 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1389 
1390 		/* Make this work even if front & back are flipped:
1391 		 */
1392 		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1393 		if (dev_priv->sarea_priv->pfCurrentPage == 0) {
1394 			OUT_RING(dev_priv->back_pitch_offset);
1395 			OUT_RING(dev_priv->front_pitch_offset);
1396 		} else {
1397 			OUT_RING(dev_priv->front_pitch_offset);
1398 			OUT_RING(dev_priv->back_pitch_offset);
1399 		}
1400 
1401 		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1402 		OUT_RING((x << 16) | y);
1403 		OUT_RING((x << 16) | y);
1404 		OUT_RING((w << 16) | h);
1405 
1406 		ADVANCE_RING();
1407 	}
1408 
1409 	/* Increment the frame counter.  The client-side 3D driver must
1410 	 * throttle the framerate by waiting for this value before
1411 	 * performing the swapbuffer ioctl.
1412 	 */
1413 	dev_priv->sarea_priv->last_frame++;
1414 
1415 	BEGIN_RING(4);
1416 
1417 	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1418 	RADEON_WAIT_UNTIL_2D_IDLE();
1419 
1420 	ADVANCE_RING();
1421 }
1422 
1423 static void radeon_cp_dispatch_flip(struct drm_device * dev)
1424 {
1425 	drm_radeon_private_t *dev_priv = dev->dev_private;
1426 	struct drm_sarea *sarea = (struct drm_sarea *) dev_priv->sarea->handle;
1427 	int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1428 	    ? dev_priv->front_offset : dev_priv->back_offset;
1429 	RING_LOCALS;
1430 	DRM_DEBUG("pfCurrentPage=%d\n",
1431 		  dev_priv->sarea_priv->pfCurrentPage);
1432 
1433 	/* Do some trivial performance monitoring...
1434 	 */
1435 	if (dev_priv->do_boxes) {
1436 		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1437 		radeon_cp_performance_boxes(dev_priv);
1438 	}
1439 
1440 	/* Update the frame offsets for both CRTCs
1441 	 */
1442 	BEGIN_RING(6);
1443 
1444 	RADEON_WAIT_UNTIL_3D_IDLE();
1445 	OUT_RING_REG(RADEON_CRTC_OFFSET,
1446 		     ((sarea->frame.y * dev_priv->front_pitch +
1447 		       sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1448 		     + offset);
1449 	OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1450 		     + offset);
1451 
1452 	ADVANCE_RING();
1453 
1454 	/* Increment the frame counter.  The client-side 3D driver must
1455 	 * throttle the framerate by waiting for this value before
1456 	 * performing the swapbuffer ioctl.
1457 	 */
1458 	dev_priv->sarea_priv->last_frame++;
1459 	dev_priv->sarea_priv->pfCurrentPage =
1460 		1 - dev_priv->sarea_priv->pfCurrentPage;
1461 
1462 	BEGIN_RING(2);
1463 
1464 	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1465 
1466 	ADVANCE_RING();
1467 }
1468 
1469 static int bad_prim_vertex_nr(int primitive, int nr)
1470 {
1471 	switch (primitive & RADEON_PRIM_TYPE_MASK) {
1472 	case RADEON_PRIM_TYPE_NONE:
1473 	case RADEON_PRIM_TYPE_POINT:
1474 		return nr < 1;
1475 	case RADEON_PRIM_TYPE_LINE:
1476 		return (nr & 1) || nr == 0;
1477 	case RADEON_PRIM_TYPE_LINE_STRIP:
1478 		return nr < 2;
1479 	case RADEON_PRIM_TYPE_TRI_LIST:
1480 	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1481 	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1482 	case RADEON_PRIM_TYPE_RECT_LIST:
1483 		return nr % 3 || nr == 0;
1484 	case RADEON_PRIM_TYPE_TRI_FAN:
1485 	case RADEON_PRIM_TYPE_TRI_STRIP:
1486 		return nr < 3;
1487 	default:
1488 		return 1;
1489 	}
1490 }
1491 
1492 typedef struct {
1493 	unsigned int start;
1494 	unsigned int finish;
1495 	unsigned int prim;
1496 	unsigned int numverts;
1497 	unsigned int offset;
1498 	unsigned int vc_format;
1499 } drm_radeon_tcl_prim_t;
1500 
1501 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1502 				      struct drm_buf * buf,
1503 				      drm_radeon_tcl_prim_t * prim)
1504 {
1505 	drm_radeon_private_t *dev_priv = dev->dev_private;
1506 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1507 	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1508 	int numverts = (int)prim->numverts;
1509 	int nbox = sarea_priv->nbox;
1510 	int i = 0;
1511 	RING_LOCALS;
1512 
1513 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1514 		  prim->prim,
1515 		  prim->vc_format, prim->start, prim->finish, prim->numverts);
1516 
1517 	if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1518 		DRM_ERROR("bad prim %x numverts %d\n",
1519 			  prim->prim, prim->numverts);
1520 		return;
1521 	}
1522 
1523 	do {
1524 		/* Emit the next cliprect */
1525 		if (i < nbox) {
1526 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1527 		}
1528 
1529 		/* Emit the vertex buffer rendering commands */
1530 		BEGIN_RING(5);
1531 
1532 		OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1533 		OUT_RING(offset);
1534 		OUT_RING(numverts);
1535 		OUT_RING(prim->vc_format);
1536 		OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1537 			 RADEON_COLOR_ORDER_RGBA |
1538 			 RADEON_VTX_FMT_RADEON_MODE |
1539 			 (numverts << RADEON_NUM_VERTICES_SHIFT));
1540 
1541 		ADVANCE_RING();
1542 
1543 		i++;
1544 	} while (i < nbox);
1545 }
1546 
1547 void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
1548 {
1549 	drm_radeon_private_t *dev_priv = dev->dev_private;
1550 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1551 	RING_LOCALS;
1552 
1553 	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1554 
1555 	/* Emit the vertex buffer age */
1556 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1557 		BEGIN_RING(3);
1558 		R600_DISPATCH_AGE(buf_priv->age);
1559 		ADVANCE_RING();
1560 	} else {
1561 		BEGIN_RING(2);
1562 		RADEON_DISPATCH_AGE(buf_priv->age);
1563 		ADVANCE_RING();
1564 	}
1565 
1566 	buf->pending = 1;
1567 	buf->used = 0;
1568 }
1569 
1570 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1571 					struct drm_buf * buf, int start, int end)
1572 {
1573 	drm_radeon_private_t *dev_priv = dev->dev_private;
1574 	RING_LOCALS;
1575 	DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1576 
1577 	if (start != end) {
1578 		int offset = (dev_priv->gart_buffers_offset
1579 			      + buf->offset + start);
1580 		int dwords = (end - start + 3) / sizeof(u32);
1581 
1582 		/* Indirect buffer data must be an even number of
1583 		 * dwords, so if we've been given an odd number we must
1584 		 * pad the data with a Type-2 CP packet.
1585 		 */
1586 		if (dwords & 1) {
1587 			u32 *data = (u32 *)
1588 			    ((char *)dev->agp_buffer_map->handle
1589 			     + buf->offset + start);
1590 			data[dwords++] = RADEON_CP_PACKET2;
1591 		}
1592 
1593 		/* Fire off the indirect buffer */
1594 		BEGIN_RING(3);
1595 
1596 		OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1597 		OUT_RING(offset);
1598 		OUT_RING(dwords);
1599 
1600 		ADVANCE_RING();
1601 	}
1602 }
1603 
1604 static void radeon_cp_dispatch_indices(struct drm_device * dev,
1605 				       struct drm_buf * elt_buf,
1606 				       drm_radeon_tcl_prim_t * prim)
1607 {
1608 	drm_radeon_private_t *dev_priv = dev->dev_private;
1609 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1610 	int offset = dev_priv->gart_buffers_offset + prim->offset;
1611 	u32 *data;
1612 	int dwords;
1613 	int i = 0;
1614 	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1615 	int count = (prim->finish - start) / sizeof(u16);
1616 	int nbox = sarea_priv->nbox;
1617 
1618 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1619 		  prim->prim,
1620 		  prim->vc_format,
1621 		  prim->start, prim->finish, prim->offset, prim->numverts);
1622 
1623 	if (bad_prim_vertex_nr(prim->prim, count)) {
1624 		DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1625 		return;
1626 	}
1627 
1628 	if (start >= prim->finish || (prim->start & 0x7)) {
1629 		DRM_ERROR("buffer prim %d\n", prim->prim);
1630 		return;
1631 	}
1632 
1633 	dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1634 
1635 	data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1636 			elt_buf->offset + prim->start);
1637 
1638 	data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1639 	data[1] = offset;
1640 	data[2] = prim->numverts;
1641 	data[3] = prim->vc_format;
1642 	data[4] = (prim->prim |
1643 		   RADEON_PRIM_WALK_IND |
1644 		   RADEON_COLOR_ORDER_RGBA |
1645 		   RADEON_VTX_FMT_RADEON_MODE |
1646 		   (count << RADEON_NUM_VERTICES_SHIFT));
1647 
1648 	do {
1649 		if (i < nbox)
1650 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1651 
1652 		radeon_cp_dispatch_indirect(dev, elt_buf,
1653 					    prim->start, prim->finish);
1654 
1655 		i++;
1656 	} while (i < nbox);
1657 
1658 }
1659 
1660 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1661 
1662 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1663 				      struct drm_file *file_priv,
1664 				      drm_radeon_texture_t * tex,
1665 				      drm_radeon_tex_image_t * image)
1666 {
1667 	drm_radeon_private_t *dev_priv = dev->dev_private;
1668 	struct drm_buf *buf;
1669 	u32 format;
1670 	u32 *buffer;
1671 	const u8 __user *data;
1672 	int size, dwords, tex_width, blit_width, spitch;
1673 	u32 height;
1674 	int i;
1675 	u32 texpitch, microtile;
1676 	u32 offset, byte_offset;
1677 	RING_LOCALS;
1678 
1679 	if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1680 		DRM_ERROR("Invalid destination offset\n");
1681 		return -EINVAL;
1682 	}
1683 
1684 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1685 
1686 	/* Flush the pixel cache.  This ensures no pixel data gets mixed
1687 	 * up with the texture data from the host data blit, otherwise
1688 	 * part of the texture image may be corrupted.
1689 	 */
1690 	BEGIN_RING(4);
1691 	RADEON_FLUSH_CACHE();
1692 	RADEON_WAIT_UNTIL_IDLE();
1693 	ADVANCE_RING();
1694 
1695 	/* The compiler won't optimize away a division by a variable,
1696 	 * even if the only legal values are powers of two.  Thus, we'll
1697 	 * use a shift instead.
1698 	 */
1699 	switch (tex->format) {
1700 	case RADEON_TXFORMAT_ARGB8888:
1701 	case RADEON_TXFORMAT_RGBA8888:
1702 		format = RADEON_COLOR_FORMAT_ARGB8888;
1703 		tex_width = tex->width * 4;
1704 		blit_width = image->width * 4;
1705 		break;
1706 	case RADEON_TXFORMAT_AI88:
1707 	case RADEON_TXFORMAT_ARGB1555:
1708 	case RADEON_TXFORMAT_RGB565:
1709 	case RADEON_TXFORMAT_ARGB4444:
1710 	case RADEON_TXFORMAT_VYUY422:
1711 	case RADEON_TXFORMAT_YVYU422:
1712 		format = RADEON_COLOR_FORMAT_RGB565;
1713 		tex_width = tex->width * 2;
1714 		blit_width = image->width * 2;
1715 		break;
1716 	case RADEON_TXFORMAT_I8:
1717 	case RADEON_TXFORMAT_RGB332:
1718 		format = RADEON_COLOR_FORMAT_CI8;
1719 		tex_width = tex->width * 1;
1720 		blit_width = image->width * 1;
1721 		break;
1722 	default:
1723 		DRM_ERROR("invalid texture format %d\n", tex->format);
1724 		return -EINVAL;
1725 	}
1726 	spitch = blit_width >> 6;
1727 	if (spitch == 0 && image->height > 1)
1728 		return -EINVAL;
1729 
1730 	texpitch = tex->pitch;
1731 	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1732 		microtile = 1;
1733 		if (tex_width < 64) {
1734 			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1735 			/* we got tiled coordinates, untile them */
1736 			image->x *= 2;
1737 		}
1738 	} else
1739 		microtile = 0;
1740 
1741 	/* this might fail for zero-sized uploads - are those illegal? */
1742 	if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1743 				blit_width - 1)) {
1744 		DRM_ERROR("Invalid final destination offset\n");
1745 		return -EINVAL;
1746 	}
1747 
1748 	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1749 
1750 	do {
1751 		DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1752 			  tex->offset >> 10, tex->pitch, tex->format,
1753 			  image->x, image->y, image->width, image->height);
1754 
1755 		/* Make a copy of some parameters in case we have to
1756 		 * update them for a multi-pass texture blit.
1757 		 */
1758 		height = image->height;
1759 		data = (const u8 __user *)image->data;
1760 
1761 		size = height * blit_width;
1762 
1763 		if (size > RADEON_MAX_TEXTURE_SIZE) {
1764 			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1765 			size = height * blit_width;
1766 		} else if (size < 4 && size > 0) {
1767 			size = 4;
1768 		} else if (size == 0) {
1769 			return 0;
1770 		}
1771 
1772 		buf = radeon_freelist_get(dev);
1773 		if (0 && !buf) {
1774 			radeon_do_cp_idle(dev_priv);
1775 			buf = radeon_freelist_get(dev);
1776 		}
1777 		if (!buf) {
1778 			DRM_DEBUG("EAGAIN\n");
1779 			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1780 				return -EFAULT;
1781 			return -EAGAIN;
1782 		}
1783 
1784 		/* Dispatch the indirect buffer.
1785 		 */
1786 		buffer =
1787 		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1788 		dwords = size / 4;
1789 
1790 #define RADEON_COPY_MT(_buf, _data, _width) \
1791 	do { \
1792 		if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1793 			DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1794 			return -EFAULT; \
1795 		} \
1796 	} while(0)
1797 
1798 		if (microtile) {
1799 			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
1800 			   however, we cannot use blitter directly for texture width < 64 bytes,
1801 			   since minimum tex pitch is 64 bytes and we need this to match
1802 			   the texture width, otherwise the blitter will tile it wrong.
1803 			   Thus, tiling manually in this case. Additionally, need to special
1804 			   case tex height = 1, since our actual image will have height 2
1805 			   and we need to ensure we don't read beyond the texture size
1806 			   from user space. */
1807 			if (tex->height == 1) {
1808 				if (tex_width >= 64 || tex_width <= 16) {
1809 					RADEON_COPY_MT(buffer, data,
1810 						(int)(tex_width * sizeof(u32)));
1811 				} else if (tex_width == 32) {
1812 					RADEON_COPY_MT(buffer, data, 16);
1813 					RADEON_COPY_MT(buffer + 8,
1814 						       data + 16, 16);
1815 				}
1816 			} else if (tex_width >= 64 || tex_width == 16) {
1817 				RADEON_COPY_MT(buffer, data,
1818 					       (int)(dwords * sizeof(u32)));
1819 			} else if (tex_width < 16) {
1820 				for (i = 0; i < tex->height; i++) {
1821 					RADEON_COPY_MT(buffer, data, tex_width);
1822 					buffer += 4;
1823 					data += tex_width;
1824 				}
1825 			} else if (tex_width == 32) {
1826 				/* TODO: make sure this works when not fitting in one buffer
1827 				   (i.e. 32bytes x 2048...) */
1828 				for (i = 0; i < tex->height; i += 2) {
1829 					RADEON_COPY_MT(buffer, data, 16);
1830 					data += 16;
1831 					RADEON_COPY_MT(buffer + 8, data, 16);
1832 					data += 16;
1833 					RADEON_COPY_MT(buffer + 4, data, 16);
1834 					data += 16;
1835 					RADEON_COPY_MT(buffer + 12, data, 16);
1836 					data += 16;
1837 					buffer += 16;
1838 				}
1839 			}
1840 		} else {
1841 			if (tex_width >= 32) {
1842 				/* Texture image width is larger than the minimum, so we
1843 				 * can upload it directly.
1844 				 */
1845 				RADEON_COPY_MT(buffer, data,
1846 					       (int)(dwords * sizeof(u32)));
1847 			} else {
1848 				/* Texture image width is less than the minimum, so we
1849 				 * need to pad out each image scanline to the minimum
1850 				 * width.
1851 				 */
1852 				for (i = 0; i < tex->height; i++) {
1853 					RADEON_COPY_MT(buffer, data, tex_width);
1854 					buffer += 8;
1855 					data += tex_width;
1856 				}
1857 			}
1858 		}
1859 
1860 #undef RADEON_COPY_MT
1861 		byte_offset = (image->y & ~2047) * blit_width;
1862 		buf->file_priv = file_priv;
1863 		buf->used = size;
1864 		offset = dev_priv->gart_buffers_offset + buf->offset;
1865 		BEGIN_RING(9);
1866 		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1867 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1868 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1869 			 RADEON_GMC_BRUSH_NONE |
1870 			 (format << 8) |
1871 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1872 			 RADEON_ROP3_S |
1873 			 RADEON_DP_SRC_SOURCE_MEMORY |
1874 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1875 		OUT_RING((spitch << 22) | (offset >> 10));
1876 		OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1877 		OUT_RING(0);
1878 		OUT_RING((image->x << 16) | (image->y % 2048));
1879 		OUT_RING((image->width << 16) | height);
1880 		RADEON_WAIT_UNTIL_2D_IDLE();
1881 		ADVANCE_RING();
1882 		COMMIT_RING();
1883 
1884 		radeon_cp_discard_buffer(dev, buf);
1885 
1886 		/* Update the input parameters for next time */
1887 		image->y += height;
1888 		image->height -= height;
1889 		image->data = (const u8 __user *)image->data + size;
1890 	} while (image->height > 0);
1891 
1892 	/* Flush the pixel cache after the blit completes.  This ensures
1893 	 * the texture data is written out to memory before rendering
1894 	 * continues.
1895 	 */
1896 	BEGIN_RING(4);
1897 	RADEON_FLUSH_CACHE();
1898 	RADEON_WAIT_UNTIL_2D_IDLE();
1899 	ADVANCE_RING();
1900 	COMMIT_RING();
1901 
1902 	return 0;
1903 }
1904 
1905 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1906 {
1907 	drm_radeon_private_t *dev_priv = dev->dev_private;
1908 	int i;
1909 	RING_LOCALS;
1910 	DRM_DEBUG("\n");
1911 
1912 	BEGIN_RING(35);
1913 
1914 	OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1915 	OUT_RING(0x00000000);
1916 
1917 	OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1918 	for (i = 0; i < 32; i++) {
1919 		OUT_RING(stipple[i]);
1920 	}
1921 
1922 	ADVANCE_RING();
1923 }
1924 
1925 static void radeon_apply_surface_regs(int surf_index,
1926 				      drm_radeon_private_t *dev_priv)
1927 {
1928 	if (!dev_priv->mmio)
1929 		return;
1930 
1931 	radeon_do_cp_idle(dev_priv);
1932 
1933 	RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1934 		     dev_priv->surfaces[surf_index].flags);
1935 	RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1936 		     dev_priv->surfaces[surf_index].lower);
1937 	RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1938 		     dev_priv->surfaces[surf_index].upper);
1939 }
1940 
1941 /* Allocates a virtual surface
1942  * doesn't always allocate a real surface, will stretch an existing
1943  * surface when possible.
1944  *
1945  * Note that refcount can be at most 2, since during a free refcount=3
1946  * might mean we have to allocate a new surface which might not always
1947  * be available.
1948  * For example : we allocate three contigous surfaces ABC. If B is
1949  * freed, we suddenly need two surfaces to store A and C, which might
1950  * not always be available.
1951  */
1952 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1953 			 drm_radeon_private_t *dev_priv,
1954 			 struct drm_file *file_priv)
1955 {
1956 	struct radeon_virt_surface *s;
1957 	int i;
1958 	int virt_surface_index;
1959 	uint32_t new_upper, new_lower;
1960 
1961 	new_lower = new->address;
1962 	new_upper = new_lower + new->size - 1;
1963 
1964 	/* sanity check */
1965 	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1966 	    ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1967 	     RADEON_SURF_ADDRESS_FIXED_MASK)
1968 	    || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1969 		return -1;
1970 
1971 	/* make sure there is no overlap with existing surfaces */
1972 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1973 		if ((dev_priv->surfaces[i].refcount != 0) &&
1974 		    (((new_lower >= dev_priv->surfaces[i].lower) &&
1975 		      (new_lower < dev_priv->surfaces[i].upper)) ||
1976 		     ((new_lower < dev_priv->surfaces[i].lower) &&
1977 		      (new_upper > dev_priv->surfaces[i].lower)))) {
1978 			return -1;
1979 		}
1980 	}
1981 
1982 	/* find a virtual surface */
1983 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1984 		if (dev_priv->virt_surfaces[i].file_priv == 0)
1985 			break;
1986 	if (i == 2 * RADEON_MAX_SURFACES) {
1987 		return -1;
1988 	}
1989 	virt_surface_index = i;
1990 
1991 	/* try to reuse an existing surface */
1992 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1993 		/* extend before */
1994 		if ((dev_priv->surfaces[i].refcount == 1) &&
1995 		    (new->flags == dev_priv->surfaces[i].flags) &&
1996 		    (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1997 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
1998 			s->surface_index = i;
1999 			s->lower = new_lower;
2000 			s->upper = new_upper;
2001 			s->flags = new->flags;
2002 			s->file_priv = file_priv;
2003 			dev_priv->surfaces[i].refcount++;
2004 			dev_priv->surfaces[i].lower = s->lower;
2005 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2006 			return virt_surface_index;
2007 		}
2008 
2009 		/* extend after */
2010 		if ((dev_priv->surfaces[i].refcount == 1) &&
2011 		    (new->flags == dev_priv->surfaces[i].flags) &&
2012 		    (new_lower == dev_priv->surfaces[i].upper + 1)) {
2013 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2014 			s->surface_index = i;
2015 			s->lower = new_lower;
2016 			s->upper = new_upper;
2017 			s->flags = new->flags;
2018 			s->file_priv = file_priv;
2019 			dev_priv->surfaces[i].refcount++;
2020 			dev_priv->surfaces[i].upper = s->upper;
2021 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2022 			return virt_surface_index;
2023 		}
2024 	}
2025 
2026 	/* okay, we need a new one */
2027 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2028 		if (dev_priv->surfaces[i].refcount == 0) {
2029 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2030 			s->surface_index = i;
2031 			s->lower = new_lower;
2032 			s->upper = new_upper;
2033 			s->flags = new->flags;
2034 			s->file_priv = file_priv;
2035 			dev_priv->surfaces[i].refcount = 1;
2036 			dev_priv->surfaces[i].lower = s->lower;
2037 			dev_priv->surfaces[i].upper = s->upper;
2038 			dev_priv->surfaces[i].flags = s->flags;
2039 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2040 			return virt_surface_index;
2041 		}
2042 	}
2043 
2044 	/* we didn't find anything */
2045 	return -1;
2046 }
2047 
2048 static int free_surface(struct drm_file *file_priv,
2049 			drm_radeon_private_t * dev_priv,
2050 			int lower)
2051 {
2052 	struct radeon_virt_surface *s;
2053 	int i;
2054 	/* find the virtual surface */
2055 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2056 		s = &(dev_priv->virt_surfaces[i]);
2057 		if (s->file_priv) {
2058 			if ((lower == s->lower) && (file_priv == s->file_priv))
2059 			{
2060 				if (dev_priv->surfaces[s->surface_index].
2061 				    lower == s->lower)
2062 					dev_priv->surfaces[s->surface_index].
2063 					    lower = s->upper;
2064 
2065 				if (dev_priv->surfaces[s->surface_index].
2066 				    upper == s->upper)
2067 					dev_priv->surfaces[s->surface_index].
2068 					    upper = s->lower;
2069 
2070 				dev_priv->surfaces[s->surface_index].refcount--;
2071 				if (dev_priv->surfaces[s->surface_index].
2072 				    refcount == 0)
2073 					dev_priv->surfaces[s->surface_index].
2074 					    flags = 0;
2075 				s->file_priv = NULL;
2076 				radeon_apply_surface_regs(s->surface_index,
2077 							  dev_priv);
2078 				return 0;
2079 			}
2080 		}
2081 	}
2082 	return 1;
2083 }
2084 
2085 static void radeon_surfaces_release(struct drm_file *file_priv,
2086 				    drm_radeon_private_t * dev_priv)
2087 {
2088 	int i;
2089 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2090 		if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2091 			free_surface(file_priv, dev_priv,
2092 				     dev_priv->virt_surfaces[i].lower);
2093 	}
2094 }
2095 
2096 /* ================================================================
2097  * IOCTL functions
2098  */
2099 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2100 {
2101 	drm_radeon_private_t *dev_priv = dev->dev_private;
2102 	drm_radeon_surface_alloc_t *alloc = data;
2103 
2104 	if (!dev_priv) {
2105 		DRM_ERROR("called with no initialization\n");
2106 		return -EINVAL;
2107 	}
2108 
2109 	if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2110 		return -EINVAL;
2111 	else
2112 		return 0;
2113 }
2114 
2115 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2116 {
2117 	drm_radeon_private_t *dev_priv = dev->dev_private;
2118 	drm_radeon_surface_free_t *memfree = data;
2119 
2120 	if (!dev_priv) {
2121 		DRM_ERROR("called with no initialization\n");
2122 		return -EINVAL;
2123 	}
2124 
2125 	if (free_surface(file_priv, dev_priv, memfree->address))
2126 		return -EINVAL;
2127 	else
2128 		return 0;
2129 }
2130 
2131 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2132 {
2133 	drm_radeon_private_t *dev_priv = dev->dev_private;
2134 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2135 	drm_radeon_clear_t *clear = data;
2136 	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2137 	DRM_DEBUG("\n");
2138 
2139 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2140 
2141 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2142 
2143 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2144 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2145 
2146 	if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2147 			       sarea_priv->nbox * sizeof(depth_boxes[0])))
2148 		return -EFAULT;
2149 
2150 	radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2151 
2152 	COMMIT_RING();
2153 	return 0;
2154 }
2155 
2156 /* Not sure why this isn't set all the time:
2157  */
2158 static int radeon_do_init_pageflip(struct drm_device * dev)
2159 {
2160 	drm_radeon_private_t *dev_priv = dev->dev_private;
2161 	RING_LOCALS;
2162 
2163 	DRM_DEBUG("\n");
2164 
2165 	BEGIN_RING(6);
2166 	RADEON_WAIT_UNTIL_3D_IDLE();
2167 	OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2168 	OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2169 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2170 	OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2171 	OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2172 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2173 	ADVANCE_RING();
2174 
2175 	dev_priv->page_flipping = 1;
2176 
2177 	if (dev_priv->sarea_priv->pfCurrentPage != 1)
2178 		dev_priv->sarea_priv->pfCurrentPage = 0;
2179 
2180 	return 0;
2181 }
2182 
2183 /* Swapping and flipping are different operations, need different ioctls.
2184  * They can & should be intermixed to support multiple 3d windows.
2185  */
2186 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2187 {
2188 	drm_radeon_private_t *dev_priv = dev->dev_private;
2189 	DRM_DEBUG("\n");
2190 
2191 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2192 
2193 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2194 
2195 	if (!dev_priv->page_flipping)
2196 		radeon_do_init_pageflip(dev);
2197 
2198 	radeon_cp_dispatch_flip(dev);
2199 
2200 	COMMIT_RING();
2201 	return 0;
2202 }
2203 
2204 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2205 {
2206 	drm_radeon_private_t *dev_priv = dev->dev_private;
2207 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2208 	DRM_DEBUG("\n");
2209 
2210 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2211 
2212 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2213 
2214 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2215 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2216 
2217 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2218 		r600_cp_dispatch_swap(dev);
2219 	else
2220 		radeon_cp_dispatch_swap(dev);
2221 	dev_priv->sarea_priv->ctx_owner = 0;
2222 
2223 	COMMIT_RING();
2224 	return 0;
2225 }
2226 
2227 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2228 {
2229 	drm_radeon_private_t *dev_priv = dev->dev_private;
2230 	drm_radeon_sarea_t *sarea_priv;
2231 	struct drm_device_dma *dma = dev->dma;
2232 	struct drm_buf *buf;
2233 	drm_radeon_vertex_t *vertex = data;
2234 	drm_radeon_tcl_prim_t prim;
2235 
2236 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2237 
2238 	if (!dev_priv) {
2239 		DRM_ERROR("called with no initialization\n");
2240 		return -EINVAL;
2241 	}
2242 
2243 	sarea_priv = dev_priv->sarea_priv;
2244 
2245 	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2246 		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2247 
2248 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2249 		DRM_ERROR("buffer index %d (of %d max)\n",
2250 			  vertex->idx, dma->buf_count - 1);
2251 		return -EINVAL;
2252 	}
2253 	if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2254 		DRM_ERROR("buffer prim %d\n", vertex->prim);
2255 		return -EINVAL;
2256 	}
2257 
2258 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2259 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2260 
2261 	buf = dma->buflist[vertex->idx];
2262 
2263 	if (buf->file_priv != file_priv) {
2264 		DRM_ERROR("process %d using buffer owned by %p\n",
2265 			  DRM_CURRENTPID, buf->file_priv);
2266 		return -EINVAL;
2267 	}
2268 	if (buf->pending) {
2269 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2270 		return -EINVAL;
2271 	}
2272 
2273 	/* Build up a prim_t record:
2274 	 */
2275 	if (vertex->count) {
2276 		buf->used = vertex->count;	/* not used? */
2277 
2278 		if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2279 			if (radeon_emit_state(dev_priv, file_priv,
2280 					      &sarea_priv->context_state,
2281 					      sarea_priv->tex_state,
2282 					      sarea_priv->dirty)) {
2283 				DRM_ERROR("radeon_emit_state failed\n");
2284 				return -EINVAL;
2285 			}
2286 
2287 			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2288 					       RADEON_UPLOAD_TEX1IMAGES |
2289 					       RADEON_UPLOAD_TEX2IMAGES |
2290 					       RADEON_REQUIRE_QUIESCENCE);
2291 		}
2292 
2293 		prim.start = 0;
2294 		prim.finish = vertex->count;	/* unused */
2295 		prim.prim = vertex->prim;
2296 		prim.numverts = vertex->count;
2297 		prim.vc_format = dev_priv->sarea_priv->vc_format;
2298 
2299 		radeon_cp_dispatch_vertex(dev, buf, &prim);
2300 	}
2301 
2302 	if (vertex->discard) {
2303 		radeon_cp_discard_buffer(dev, buf);
2304 	}
2305 
2306 	COMMIT_RING();
2307 	return 0;
2308 }
2309 
2310 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2311 {
2312 	drm_radeon_private_t *dev_priv = dev->dev_private;
2313 	drm_radeon_sarea_t *sarea_priv;
2314 	struct drm_device_dma *dma = dev->dma;
2315 	struct drm_buf *buf;
2316 	drm_radeon_indices_t *elts = data;
2317 	drm_radeon_tcl_prim_t prim;
2318 	int count;
2319 
2320 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2321 
2322 	if (!dev_priv) {
2323 		DRM_ERROR("called with no initialization\n");
2324 		return -EINVAL;
2325 	}
2326 	sarea_priv = dev_priv->sarea_priv;
2327 
2328 	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2329 		  DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2330 		  elts->discard);
2331 
2332 	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2333 		DRM_ERROR("buffer index %d (of %d max)\n",
2334 			  elts->idx, dma->buf_count - 1);
2335 		return -EINVAL;
2336 	}
2337 	if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2338 		DRM_ERROR("buffer prim %d\n", elts->prim);
2339 		return -EINVAL;
2340 	}
2341 
2342 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2343 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2344 
2345 	buf = dma->buflist[elts->idx];
2346 
2347 	if (buf->file_priv != file_priv) {
2348 		DRM_ERROR("process %d using buffer owned by %p\n",
2349 			  DRM_CURRENTPID, buf->file_priv);
2350 		return -EINVAL;
2351 	}
2352 	if (buf->pending) {
2353 		DRM_ERROR("sending pending buffer %d\n", elts->idx);
2354 		return -EINVAL;
2355 	}
2356 
2357 	count = (elts->end - elts->start) / sizeof(u16);
2358 	elts->start -= RADEON_INDEX_PRIM_OFFSET;
2359 
2360 	if (elts->start & 0x7) {
2361 		DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2362 		return -EINVAL;
2363 	}
2364 	if (elts->start < buf->used) {
2365 		DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2366 		return -EINVAL;
2367 	}
2368 
2369 	buf->used = elts->end;
2370 
2371 	if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2372 		if (radeon_emit_state(dev_priv, file_priv,
2373 				      &sarea_priv->context_state,
2374 				      sarea_priv->tex_state,
2375 				      sarea_priv->dirty)) {
2376 			DRM_ERROR("radeon_emit_state failed\n");
2377 			return -EINVAL;
2378 		}
2379 
2380 		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2381 				       RADEON_UPLOAD_TEX1IMAGES |
2382 				       RADEON_UPLOAD_TEX2IMAGES |
2383 				       RADEON_REQUIRE_QUIESCENCE);
2384 	}
2385 
2386 	/* Build up a prim_t record:
2387 	 */
2388 	prim.start = elts->start;
2389 	prim.finish = elts->end;
2390 	prim.prim = elts->prim;
2391 	prim.offset = 0;	/* offset from start of dma buffers */
2392 	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2393 	prim.vc_format = dev_priv->sarea_priv->vc_format;
2394 
2395 	radeon_cp_dispatch_indices(dev, buf, &prim);
2396 	if (elts->discard) {
2397 		radeon_cp_discard_buffer(dev, buf);
2398 	}
2399 
2400 	COMMIT_RING();
2401 	return 0;
2402 }
2403 
2404 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2405 {
2406 	drm_radeon_private_t *dev_priv = dev->dev_private;
2407 	drm_radeon_texture_t *tex = data;
2408 	drm_radeon_tex_image_t image;
2409 	int ret;
2410 
2411 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2412 
2413 	if (tex->image == NULL) {
2414 		DRM_ERROR("null texture image!\n");
2415 		return -EINVAL;
2416 	}
2417 
2418 	if (DRM_COPY_FROM_USER(&image,
2419 			       (drm_radeon_tex_image_t __user *) tex->image,
2420 			       sizeof(image)))
2421 		return -EFAULT;
2422 
2423 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2424 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2425 
2426 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2427 		ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2428 	else
2429 		ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2430 
2431 	return ret;
2432 }
2433 
2434 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2435 {
2436 	drm_radeon_private_t *dev_priv = dev->dev_private;
2437 	drm_radeon_stipple_t *stipple = data;
2438 	u32 mask[32];
2439 
2440 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2441 
2442 	if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2443 		return -EFAULT;
2444 
2445 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2446 
2447 	radeon_cp_dispatch_stipple(dev, mask);
2448 
2449 	COMMIT_RING();
2450 	return 0;
2451 }
2452 
2453 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2454 {
2455 	drm_radeon_private_t *dev_priv = dev->dev_private;
2456 	struct drm_device_dma *dma = dev->dma;
2457 	struct drm_buf *buf;
2458 	drm_radeon_indirect_t *indirect = data;
2459 	RING_LOCALS;
2460 
2461 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2462 
2463 	if (!dev_priv) {
2464 		DRM_ERROR("called with no initialization\n");
2465 		return -EINVAL;
2466 	}
2467 
2468 	DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2469 		  indirect->idx, indirect->start, indirect->end,
2470 		  indirect->discard);
2471 
2472 	if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2473 		DRM_ERROR("buffer index %d (of %d max)\n",
2474 			  indirect->idx, dma->buf_count - 1);
2475 		return -EINVAL;
2476 	}
2477 
2478 	buf = dma->buflist[indirect->idx];
2479 
2480 	if (buf->file_priv != file_priv) {
2481 		DRM_ERROR("process %d using buffer owned by %p\n",
2482 			  DRM_CURRENTPID, buf->file_priv);
2483 		return -EINVAL;
2484 	}
2485 	if (buf->pending) {
2486 		DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2487 		return -EINVAL;
2488 	}
2489 
2490 	if (indirect->start < buf->used) {
2491 		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2492 			  indirect->start, buf->used);
2493 		return -EINVAL;
2494 	}
2495 
2496 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2497 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2498 
2499 	buf->used = indirect->end;
2500 
2501 	/* Dispatch the indirect buffer full of commands from the
2502 	 * X server.  This is insecure and is thus only available to
2503 	 * privileged clients.
2504 	 */
2505 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2506 		r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2507 	else {
2508 		/* Wait for the 3D stream to idle before the indirect buffer
2509 		 * containing 2D acceleration commands is processed.
2510 		 */
2511 		BEGIN_RING(2);
2512 		RADEON_WAIT_UNTIL_3D_IDLE();
2513 		ADVANCE_RING();
2514 		radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2515 	}
2516 
2517 	if (indirect->discard)
2518 		radeon_cp_discard_buffer(dev, buf);
2519 
2520 	COMMIT_RING();
2521 	return 0;
2522 }
2523 
2524 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2525 {
2526 	drm_radeon_private_t *dev_priv = dev->dev_private;
2527 	drm_radeon_sarea_t *sarea_priv;
2528 	struct drm_device_dma *dma = dev->dma;
2529 	struct drm_buf *buf;
2530 	drm_radeon_vertex2_t *vertex = data;
2531 	int i;
2532 	unsigned char laststate;
2533 
2534 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2535 
2536 	if (!dev_priv) {
2537 		DRM_ERROR("called with no initialization\n");
2538 		return -EINVAL;
2539 	}
2540 
2541 	sarea_priv = dev_priv->sarea_priv;
2542 
2543 	DRM_DEBUG("pid=%d index=%d discard=%d\n",
2544 		  DRM_CURRENTPID, vertex->idx, vertex->discard);
2545 
2546 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2547 		DRM_ERROR("buffer index %d (of %d max)\n",
2548 			  vertex->idx, dma->buf_count - 1);
2549 		return -EINVAL;
2550 	}
2551 
2552 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2553 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2554 
2555 	buf = dma->buflist[vertex->idx];
2556 
2557 	if (buf->file_priv != file_priv) {
2558 		DRM_ERROR("process %d using buffer owned by %p\n",
2559 			  DRM_CURRENTPID, buf->file_priv);
2560 		return -EINVAL;
2561 	}
2562 
2563 	if (buf->pending) {
2564 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2565 		return -EINVAL;
2566 	}
2567 
2568 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2569 		return -EINVAL;
2570 
2571 	for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2572 		drm_radeon_prim_t prim;
2573 		drm_radeon_tcl_prim_t tclprim;
2574 
2575 		if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2576 			return -EFAULT;
2577 
2578 		if (prim.stateidx != laststate) {
2579 			drm_radeon_state_t state;
2580 
2581 			if (DRM_COPY_FROM_USER(&state,
2582 					       &vertex->state[prim.stateidx],
2583 					       sizeof(state)))
2584 				return -EFAULT;
2585 
2586 			if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2587 				DRM_ERROR("radeon_emit_state2 failed\n");
2588 				return -EINVAL;
2589 			}
2590 
2591 			laststate = prim.stateidx;
2592 		}
2593 
2594 		tclprim.start = prim.start;
2595 		tclprim.finish = prim.finish;
2596 		tclprim.prim = prim.prim;
2597 		tclprim.vc_format = prim.vc_format;
2598 
2599 		if (prim.prim & RADEON_PRIM_WALK_IND) {
2600 			tclprim.offset = prim.numverts * 64;
2601 			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2602 
2603 			radeon_cp_dispatch_indices(dev, buf, &tclprim);
2604 		} else {
2605 			tclprim.numverts = prim.numverts;
2606 			tclprim.offset = 0;	/* not used */
2607 
2608 			radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2609 		}
2610 
2611 		if (sarea_priv->nbox == 1)
2612 			sarea_priv->nbox = 0;
2613 	}
2614 
2615 	if (vertex->discard) {
2616 		radeon_cp_discard_buffer(dev, buf);
2617 	}
2618 
2619 	COMMIT_RING();
2620 	return 0;
2621 }
2622 
2623 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2624 			       struct drm_file *file_priv,
2625 			       drm_radeon_cmd_header_t header,
2626 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2627 {
2628 	int id = (int)header.packet.packet_id;
2629 	int sz, reg;
2630 	int *data = (int *)cmdbuf->buf;
2631 	RING_LOCALS;
2632 
2633 	if (id >= RADEON_MAX_STATE_PACKETS)
2634 		return -EINVAL;
2635 
2636 	sz = packet[id].len;
2637 	reg = packet[id].start;
2638 
2639 	if (sz * sizeof(int) > cmdbuf->bufsz) {
2640 		DRM_ERROR("Packet size provided larger than data provided\n");
2641 		return -EINVAL;
2642 	}
2643 
2644 	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2645 		DRM_ERROR("Packet verification failed\n");
2646 		return -EINVAL;
2647 	}
2648 
2649 	BEGIN_RING(sz + 1);
2650 	OUT_RING(CP_PACKET0(reg, (sz - 1)));
2651 	OUT_RING_TABLE(data, sz);
2652 	ADVANCE_RING();
2653 
2654 	cmdbuf->buf += sz * sizeof(int);
2655 	cmdbuf->bufsz -= sz * sizeof(int);
2656 	return 0;
2657 }
2658 
2659 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2660 					  drm_radeon_cmd_header_t header,
2661 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2662 {
2663 	int sz = header.scalars.count;
2664 	int start = header.scalars.offset;
2665 	int stride = header.scalars.stride;
2666 	RING_LOCALS;
2667 
2668 	BEGIN_RING(3 + sz);
2669 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2670 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2671 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2672 	OUT_RING_TABLE(cmdbuf->buf, sz);
2673 	ADVANCE_RING();
2674 	cmdbuf->buf += sz * sizeof(int);
2675 	cmdbuf->bufsz -= sz * sizeof(int);
2676 	return 0;
2677 }
2678 
2679 /* God this is ugly
2680  */
2681 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2682 					   drm_radeon_cmd_header_t header,
2683 					   drm_radeon_kcmd_buffer_t *cmdbuf)
2684 {
2685 	int sz = header.scalars.count;
2686 	int start = ((unsigned int)header.scalars.offset) + 0x100;
2687 	int stride = header.scalars.stride;
2688 	RING_LOCALS;
2689 
2690 	BEGIN_RING(3 + sz);
2691 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2692 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2693 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2694 	OUT_RING_TABLE(cmdbuf->buf, sz);
2695 	ADVANCE_RING();
2696 	cmdbuf->buf += sz * sizeof(int);
2697 	cmdbuf->bufsz -= sz * sizeof(int);
2698 	return 0;
2699 }
2700 
2701 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2702 					  drm_radeon_cmd_header_t header,
2703 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2704 {
2705 	int sz = header.vectors.count;
2706 	int start = header.vectors.offset;
2707 	int stride = header.vectors.stride;
2708 	RING_LOCALS;
2709 
2710 	BEGIN_RING(5 + sz);
2711 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2712 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2713 	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2714 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2715 	OUT_RING_TABLE(cmdbuf->buf, sz);
2716 	ADVANCE_RING();
2717 
2718 	cmdbuf->buf += sz * sizeof(int);
2719 	cmdbuf->bufsz -= sz * sizeof(int);
2720 	return 0;
2721 }
2722 
2723 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2724 					  drm_radeon_cmd_header_t header,
2725 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2726 {
2727 	int sz = header.veclinear.count * 4;
2728 	int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2729 	RING_LOCALS;
2730 
2731 	if (!sz)
2732 		return 0;
2733 	if (sz * 4 > cmdbuf->bufsz)
2734 		return -EINVAL;
2735 
2736 	BEGIN_RING(5 + sz);
2737 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2738 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2739 	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2740 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2741 	OUT_RING_TABLE(cmdbuf->buf, sz);
2742 	ADVANCE_RING();
2743 
2744 	cmdbuf->buf += sz * sizeof(int);
2745 	cmdbuf->bufsz -= sz * sizeof(int);
2746 	return 0;
2747 }
2748 
2749 static int radeon_emit_packet3(struct drm_device * dev,
2750 			       struct drm_file *file_priv,
2751 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2752 {
2753 	drm_radeon_private_t *dev_priv = dev->dev_private;
2754 	unsigned int cmdsz;
2755 	int ret;
2756 	RING_LOCALS;
2757 
2758 	DRM_DEBUG("\n");
2759 
2760 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2761 						  cmdbuf, &cmdsz))) {
2762 		DRM_ERROR("Packet verification failed\n");
2763 		return ret;
2764 	}
2765 
2766 	BEGIN_RING(cmdsz);
2767 	OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2768 	ADVANCE_RING();
2769 
2770 	cmdbuf->buf += cmdsz * 4;
2771 	cmdbuf->bufsz -= cmdsz * 4;
2772 	return 0;
2773 }
2774 
2775 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2776 					struct drm_file *file_priv,
2777 					drm_radeon_kcmd_buffer_t *cmdbuf,
2778 					int orig_nbox)
2779 {
2780 	drm_radeon_private_t *dev_priv = dev->dev_private;
2781 	struct drm_clip_rect box;
2782 	unsigned int cmdsz;
2783 	int ret;
2784 	struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2785 	int i = 0;
2786 	RING_LOCALS;
2787 
2788 	DRM_DEBUG("\n");
2789 
2790 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2791 						  cmdbuf, &cmdsz))) {
2792 		DRM_ERROR("Packet verification failed\n");
2793 		return ret;
2794 	}
2795 
2796 	if (!orig_nbox)
2797 		goto out;
2798 
2799 	do {
2800 		if (i < cmdbuf->nbox) {
2801 			if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2802 				return -EFAULT;
2803 			/* FIXME The second and subsequent times round
2804 			 * this loop, send a WAIT_UNTIL_3D_IDLE before
2805 			 * calling emit_clip_rect(). This fixes a
2806 			 * lockup on fast machines when sending
2807 			 * several cliprects with a cmdbuf, as when
2808 			 * waving a 2D window over a 3D
2809 			 * window. Something in the commands from user
2810 			 * space seems to hang the card when they're
2811 			 * sent several times in a row. That would be
2812 			 * the correct place to fix it but this works
2813 			 * around it until I can figure that out - Tim
2814 			 * Smith */
2815 			if (i) {
2816 				BEGIN_RING(2);
2817 				RADEON_WAIT_UNTIL_3D_IDLE();
2818 				ADVANCE_RING();
2819 			}
2820 			radeon_emit_clip_rect(dev_priv, &box);
2821 		}
2822 
2823 		BEGIN_RING(cmdsz);
2824 		OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2825 		ADVANCE_RING();
2826 
2827 	} while (++i < cmdbuf->nbox);
2828 	if (cmdbuf->nbox == 1)
2829 		cmdbuf->nbox = 0;
2830 
2831       out:
2832 	cmdbuf->buf += cmdsz * 4;
2833 	cmdbuf->bufsz -= cmdsz * 4;
2834 	return 0;
2835 }
2836 
2837 static int radeon_emit_wait(struct drm_device * dev, int flags)
2838 {
2839 	drm_radeon_private_t *dev_priv = dev->dev_private;
2840 	RING_LOCALS;
2841 
2842 	DRM_DEBUG("%x\n", flags);
2843 	switch (flags) {
2844 	case RADEON_WAIT_2D:
2845 		BEGIN_RING(2);
2846 		RADEON_WAIT_UNTIL_2D_IDLE();
2847 		ADVANCE_RING();
2848 		break;
2849 	case RADEON_WAIT_3D:
2850 		BEGIN_RING(2);
2851 		RADEON_WAIT_UNTIL_3D_IDLE();
2852 		ADVANCE_RING();
2853 		break;
2854 	case RADEON_WAIT_2D | RADEON_WAIT_3D:
2855 		BEGIN_RING(2);
2856 		RADEON_WAIT_UNTIL_IDLE();
2857 		ADVANCE_RING();
2858 		break;
2859 	default:
2860 		return -EINVAL;
2861 	}
2862 
2863 	return 0;
2864 }
2865 
2866 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2867 {
2868 	drm_radeon_private_t *dev_priv = dev->dev_private;
2869 	struct drm_device_dma *dma = dev->dma;
2870 	struct drm_buf *buf = NULL;
2871 	int idx;
2872 	drm_radeon_kcmd_buffer_t *cmdbuf = data;
2873 	drm_radeon_cmd_header_t header;
2874 	int orig_nbox, orig_bufsz;
2875 	char *kbuf = NULL;
2876 
2877 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2878 
2879 	if (!dev_priv) {
2880 		DRM_ERROR("called with no initialization\n");
2881 		return -EINVAL;
2882 	}
2883 
2884 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2885 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2886 
2887 	if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2888 		return -EINVAL;
2889 	}
2890 
2891 	/* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2892 	 * races between checking values and using those values in other code,
2893 	 * and simply to avoid a lot of function calls to copy in data.
2894 	 */
2895 	orig_bufsz = cmdbuf->bufsz;
2896 	if (orig_bufsz != 0) {
2897 		kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2898 		if (kbuf == NULL)
2899 			return -ENOMEM;
2900 		if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2901 				       cmdbuf->bufsz)) {
2902 			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2903 			return -EFAULT;
2904 		}
2905 		cmdbuf->buf = kbuf;
2906 	}
2907 
2908 	orig_nbox = cmdbuf->nbox;
2909 
2910 	if (dev_priv->chip_family >= CHIP_R300) {
2911 		int temp;
2912 		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2913 
2914 		if (orig_bufsz != 0)
2915 			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2916 
2917 		return temp;
2918 	}
2919 
2920 	/* microcode_version != r300 */
2921 	while (cmdbuf->bufsz >= sizeof(header)) {
2922 
2923 		header.i = *(int *)cmdbuf->buf;
2924 		cmdbuf->buf += sizeof(header);
2925 		cmdbuf->bufsz -= sizeof(header);
2926 
2927 		switch (header.header.cmd_type) {
2928 		case RADEON_CMD_PACKET:
2929 			DRM_DEBUG("RADEON_CMD_PACKET\n");
2930 			if (radeon_emit_packets
2931 			    (dev_priv, file_priv, header, cmdbuf)) {
2932 				DRM_ERROR("radeon_emit_packets failed\n");
2933 				goto err;
2934 			}
2935 			break;
2936 
2937 		case RADEON_CMD_SCALARS:
2938 			DRM_DEBUG("RADEON_CMD_SCALARS\n");
2939 			if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2940 				DRM_ERROR("radeon_emit_scalars failed\n");
2941 				goto err;
2942 			}
2943 			break;
2944 
2945 		case RADEON_CMD_VECTORS:
2946 			DRM_DEBUG("RADEON_CMD_VECTORS\n");
2947 			if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2948 				DRM_ERROR("radeon_emit_vectors failed\n");
2949 				goto err;
2950 			}
2951 			break;
2952 
2953 		case RADEON_CMD_DMA_DISCARD:
2954 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2955 			idx = header.dma.buf_idx;
2956 			if (idx < 0 || idx >= dma->buf_count) {
2957 				DRM_ERROR("buffer index %d (of %d max)\n",
2958 					  idx, dma->buf_count - 1);
2959 				goto err;
2960 			}
2961 
2962 			buf = dma->buflist[idx];
2963 			if (buf->file_priv != file_priv || buf->pending) {
2964 				DRM_ERROR("bad buffer %p %p %d\n",
2965 					  buf->file_priv, file_priv,
2966 					  buf->pending);
2967 				goto err;
2968 			}
2969 
2970 			radeon_cp_discard_buffer(dev, buf);
2971 			break;
2972 
2973 		case RADEON_CMD_PACKET3:
2974 			DRM_DEBUG("RADEON_CMD_PACKET3\n");
2975 			if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2976 				DRM_ERROR("radeon_emit_packet3 failed\n");
2977 				goto err;
2978 			}
2979 			break;
2980 
2981 		case RADEON_CMD_PACKET3_CLIP:
2982 			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2983 			if (radeon_emit_packet3_cliprect
2984 			    (dev, file_priv, cmdbuf, orig_nbox)) {
2985 				DRM_ERROR("radeon_emit_packet3_clip failed\n");
2986 				goto err;
2987 			}
2988 			break;
2989 
2990 		case RADEON_CMD_SCALARS2:
2991 			DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2992 			if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2993 				DRM_ERROR("radeon_emit_scalars2 failed\n");
2994 				goto err;
2995 			}
2996 			break;
2997 
2998 		case RADEON_CMD_WAIT:
2999 			DRM_DEBUG("RADEON_CMD_WAIT\n");
3000 			if (radeon_emit_wait(dev, header.wait.flags)) {
3001 				DRM_ERROR("radeon_emit_wait failed\n");
3002 				goto err;
3003 			}
3004 			break;
3005 		case RADEON_CMD_VECLINEAR:
3006 			DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3007 			if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
3008 				DRM_ERROR("radeon_emit_veclinear failed\n");
3009 				goto err;
3010 			}
3011 			break;
3012 
3013 		default:
3014 			DRM_ERROR("bad cmd_type %d at %p\n",
3015 				  header.header.cmd_type,
3016 				  cmdbuf->buf - sizeof(header));
3017 			goto err;
3018 		}
3019 	}
3020 
3021 	if (orig_bufsz != 0)
3022 		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3023 
3024 	DRM_DEBUG("DONE\n");
3025 	COMMIT_RING();
3026 	return 0;
3027 
3028       err:
3029 	if (orig_bufsz != 0)
3030 		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3031 	return -EINVAL;
3032 }
3033 
3034 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3035 {
3036 	drm_radeon_private_t *dev_priv = dev->dev_private;
3037 	drm_radeon_getparam_t *param = data;
3038 	int value;
3039 
3040 	if (!dev_priv) {
3041 		DRM_ERROR("called with no initialization\n");
3042 		return -EINVAL;
3043 	}
3044 
3045 	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3046 
3047 	switch (param->param) {
3048 	case RADEON_PARAM_GART_BUFFER_OFFSET:
3049 		value = dev_priv->gart_buffers_offset;
3050 		break;
3051 	case RADEON_PARAM_LAST_FRAME:
3052 		dev_priv->stats.last_frame_reads++;
3053 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3054 			value = GET_R600_SCRATCH(0);
3055 		else
3056 			value = GET_SCRATCH(0);
3057 		break;
3058 	case RADEON_PARAM_LAST_DISPATCH:
3059 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3060 			value = GET_R600_SCRATCH(1);
3061 		else
3062 			value = GET_SCRATCH(1);
3063 		break;
3064 	case RADEON_PARAM_LAST_CLEAR:
3065 		dev_priv->stats.last_clear_reads++;
3066 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3067 			value = GET_R600_SCRATCH(2);
3068 		else
3069 			value = GET_SCRATCH(2);
3070 		break;
3071 	case RADEON_PARAM_IRQ_NR:
3072 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3073 			value = 0;
3074 		else
3075 			value = dev->irq;
3076 		break;
3077 	case RADEON_PARAM_GART_BASE:
3078 		value = dev_priv->gart_vm_start;
3079 		break;
3080 	case RADEON_PARAM_REGISTER_HANDLE:
3081 		value = dev_priv->mmio->offset;
3082 		break;
3083 	case RADEON_PARAM_STATUS_HANDLE:
3084 		value = dev_priv->ring_rptr_offset;
3085 		break;
3086 #ifndef __LP64__
3087 		/*
3088 		 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3089 		 * pointer which can't fit into an int-sized variable.  According to
3090 		 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3091 		 * not supporting it shouldn't be a problem.  If the same functionality
3092 		 * is needed on 64-bit platforms, a new ioctl() would have to be added,
3093 		 * so backwards-compatibility for the embedded platforms can be
3094 		 * maintained.  --davidm 4-Feb-2004.
3095 		 */
3096 	case RADEON_PARAM_SAREA_HANDLE:
3097 		/* The lock is the first dword in the sarea. */
3098 		value = (long)dev->lock.hw_lock;
3099 		break;
3100 #endif
3101 	case RADEON_PARAM_GART_TEX_HANDLE:
3102 		value = dev_priv->gart_textures_offset;
3103 		break;
3104 	case RADEON_PARAM_SCRATCH_OFFSET:
3105 		if (!dev_priv->writeback_works)
3106 			return -EINVAL;
3107 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3108 			value = R600_SCRATCH_REG_OFFSET;
3109 		else
3110 			value = RADEON_SCRATCH_REG_OFFSET;
3111 		break;
3112 
3113 	case RADEON_PARAM_CARD_TYPE:
3114 		if (dev_priv->flags & RADEON_IS_PCIE)
3115 			value = RADEON_CARD_PCIE;
3116 		else if (dev_priv->flags & RADEON_IS_AGP)
3117 			value = RADEON_CARD_AGP;
3118 		else
3119 			value = RADEON_CARD_PCI;
3120 		break;
3121 	case RADEON_PARAM_VBLANK_CRTC:
3122 		value = radeon_vblank_crtc_get(dev);
3123 		break;
3124 	case RADEON_PARAM_FB_LOCATION:
3125 		value = radeon_read_fb_location(dev_priv);
3126 		break;
3127 	case RADEON_PARAM_NUM_GB_PIPES:
3128 		value = dev_priv->num_gb_pipes;
3129 		break;
3130 	case RADEON_PARAM_NUM_Z_PIPES:
3131 		value = dev_priv->num_z_pipes;
3132 		break;
3133 	default:
3134 		DRM_DEBUG( "Invalid parameter %d\n", param->param );
3135 		return -EINVAL;
3136 	}
3137 
3138 	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3139 		DRM_ERROR("copy_to_user\n");
3140 		return -EFAULT;
3141 	}
3142 
3143 	return 0;
3144 }
3145 
3146 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3147 {
3148 	drm_radeon_private_t *dev_priv = dev->dev_private;
3149 	drm_radeon_setparam_t *sp = data;
3150 	struct drm_radeon_driver_file_fields *radeon_priv;
3151 
3152 	if (!dev_priv) {
3153 		DRM_ERROR("called with no initialization\n");
3154 		return -EINVAL;
3155 	}
3156 
3157 	switch (sp->param) {
3158 	case RADEON_SETPARAM_FB_LOCATION:
3159 		radeon_priv = file_priv->driver_priv;
3160 		radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3161 		    sp->value;
3162 		break;
3163 	case RADEON_SETPARAM_SWITCH_TILING:
3164 		if (sp->value == 0) {
3165 			DRM_DEBUG("color tiling disabled\n");
3166 			dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3167 			dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3168 			if (dev_priv->sarea_priv)
3169 				dev_priv->sarea_priv->tiling_enabled = 0;
3170 		} else if (sp->value == 1) {
3171 			DRM_DEBUG("color tiling enabled\n");
3172 			dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3173 			dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3174 			if (dev_priv->sarea_priv)
3175 				dev_priv->sarea_priv->tiling_enabled = 1;
3176 		}
3177 		break;
3178 	case RADEON_SETPARAM_PCIGART_LOCATION:
3179 		dev_priv->pcigart_offset = sp->value;
3180 		dev_priv->pcigart_offset_set = 1;
3181 		break;
3182 	case RADEON_SETPARAM_NEW_MEMMAP:
3183 		dev_priv->new_memmap = sp->value;
3184 		break;
3185 	case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3186 		dev_priv->gart_info.table_size = sp->value;
3187 		if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3188 			dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3189 		break;
3190 	case RADEON_SETPARAM_VBLANK_CRTC:
3191 		return radeon_vblank_crtc_set(dev, sp->value);
3192 		break;
3193 	default:
3194 		DRM_DEBUG("Invalid parameter %d\n", sp->param);
3195 		return -EINVAL;
3196 	}
3197 
3198 	return 0;
3199 }
3200 
3201 /* When a client dies:
3202  *    - Check for and clean up flipped page state
3203  *    - Free any alloced GART memory.
3204  *    - Free any alloced radeon surfaces.
3205  *
3206  * DRM infrastructure takes care of reclaiming dma buffers.
3207  */
3208 void radeon_driver_preclose(struct drm_device *dev,
3209 			    struct drm_file *file_priv)
3210 {
3211 	if (dev->dev_private) {
3212 		drm_radeon_private_t *dev_priv = dev->dev_private;
3213 		dev_priv->page_flipping = 0;
3214 		radeon_mem_release(file_priv, dev_priv->gart_heap);
3215 		radeon_mem_release(file_priv, dev_priv->fb_heap);
3216 		radeon_surfaces_release(file_priv, dev_priv);
3217 	}
3218 }
3219 
3220 void radeon_driver_lastclose(struct drm_device *dev)
3221 {
3222   	//radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3223 	if (dev->dev_private) {
3224 		drm_radeon_private_t *dev_priv = dev->dev_private;
3225 
3226 		if (dev_priv->sarea_priv &&
3227 		    dev_priv->sarea_priv->pfCurrentPage != 0)
3228 			radeon_cp_dispatch_flip(dev);
3229 	}
3230 
3231 	radeon_do_release(dev);
3232 }
3233 
3234 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3235 {
3236 	drm_radeon_private_t *dev_priv = dev->dev_private;
3237 	struct drm_radeon_driver_file_fields *radeon_priv;
3238 
3239 	DRM_DEBUG("\n");
3240 	radeon_priv =
3241 	    (struct drm_radeon_driver_file_fields *)
3242 	    drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3243 
3244 	if (!radeon_priv)
3245 		return -ENOMEM;
3246 
3247 	file_priv->driver_priv = radeon_priv;
3248 
3249 	if (dev_priv)
3250 		radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3251 	else
3252 		radeon_priv->radeon_fb_delta = 0;
3253 	return 0;
3254 }
3255 
3256 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3257 {
3258 	struct drm_radeon_driver_file_fields *radeon_priv =
3259 	    file_priv->driver_priv;
3260 
3261 	drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3262 }
3263 
3264 struct drm_ioctl_desc radeon_ioctls[] = {
3265 	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3266 	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3267 	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3268 	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3269 	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3270 	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3271 	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3272 	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3273 	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3274 	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3275 	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3276 	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3277 	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3278 	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3279 	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_ROOT_ONLY),
3280 	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3281 	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3282 	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3283 	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3284 	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3285 	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3286 	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3287 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3288 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3289 	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3290 	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3291 	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3292 	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH)
3293 };
3294 
3295 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
3296