1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "evergreend.h"
31 #include "evergreen_reg_safe.h"
32 #include "cayman_reg_safe.h"
33 
34 #ifndef __NetBSD__
35 #define MAX(a,b)			(((a)>(b))?(a):(b))
36 #define MIN(a,b)			(((a)<(b))?(a):(b))
37 #endif
38 
39 int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
40 			   struct radeon_cs_reloc **cs_reloc);
41 struct evergreen_cs_track {
42 	u32			group_size;
43 	u32			nbanks;
44 	u32			npipes;
45 	u32			row_size;
46 	/* value we track */
47 	u32			nsamples;		/* unused */
48 	struct radeon_bo	*cb_color_bo[12];
49 	u32			cb_color_bo_offset[12];
50 	struct radeon_bo	*cb_color_fmask_bo[8];	/* unused */
51 	struct radeon_bo	*cb_color_cmask_bo[8];	/* unused */
52 	u32			cb_color_info[12];
53 	u32			cb_color_view[12];
54 	u32			cb_color_pitch[12];
55 	u32			cb_color_slice[12];
56 	u32			cb_color_slice_idx[12];
57 	u32			cb_color_attrib[12];
58 	u32			cb_color_cmask_slice[8];/* unused */
59 	u32			cb_color_fmask_slice[8];/* unused */
60 	u32			cb_target_mask;
61 	u32			cb_shader_mask; /* unused */
62 	u32			vgt_strmout_config;
63 	u32			vgt_strmout_buffer_config;
64 	struct radeon_bo	*vgt_strmout_bo[4];
65 	u32			vgt_strmout_bo_offset[4];
66 	u32			vgt_strmout_size[4];
67 	u32			db_depth_control;
68 	u32			db_depth_view;
69 	u32			db_depth_slice;
70 	u32			db_depth_size;
71 	u32			db_z_info;
72 	u32			db_z_read_offset;
73 	u32			db_z_write_offset;
74 	struct radeon_bo	*db_z_read_bo;
75 	struct radeon_bo	*db_z_write_bo;
76 	u32			db_s_info;
77 	u32			db_s_read_offset;
78 	u32			db_s_write_offset;
79 	struct radeon_bo	*db_s_read_bo;
80 	struct radeon_bo	*db_s_write_bo;
81 	bool			sx_misc_kill_all_prims;
82 	bool			cb_dirty;
83 	bool			db_dirty;
84 	bool			streamout_dirty;
85 	u32			htile_offset;
86 	u32			htile_surface;
87 	struct radeon_bo	*htile_bo;
88 };
89 
evergreen_cs_get_aray_mode(u32 tiling_flags)90 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
91 {
92 	if (tiling_flags & RADEON_TILING_MACRO)
93 		return ARRAY_2D_TILED_THIN1;
94 	else if (tiling_flags & RADEON_TILING_MICRO)
95 		return ARRAY_1D_TILED_THIN1;
96 	else
97 		return ARRAY_LINEAR_GENERAL;
98 }
99 
evergreen_cs_get_num_banks(u32 nbanks)100 static u32 evergreen_cs_get_num_banks(u32 nbanks)
101 {
102 	switch (nbanks) {
103 	case 2:
104 		return ADDR_SURF_2_BANK;
105 	case 4:
106 		return ADDR_SURF_4_BANK;
107 	case 8:
108 	default:
109 		return ADDR_SURF_8_BANK;
110 	case 16:
111 		return ADDR_SURF_16_BANK;
112 	}
113 }
114 
evergreen_cs_track_init(struct evergreen_cs_track * track)115 static void evergreen_cs_track_init(struct evergreen_cs_track *track)
116 {
117 	int i;
118 
119 	for (i = 0; i < 8; i++) {
120 		track->cb_color_fmask_bo[i] = NULL;
121 		track->cb_color_cmask_bo[i] = NULL;
122 		track->cb_color_cmask_slice[i] = 0;
123 		track->cb_color_fmask_slice[i] = 0;
124 	}
125 
126 	for (i = 0; i < 12; i++) {
127 		track->cb_color_bo[i] = NULL;
128 		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
129 		track->cb_color_info[i] = 0;
130 		track->cb_color_view[i] = 0xFFFFFFFF;
131 		track->cb_color_pitch[i] = 0;
132 		track->cb_color_slice[i] = 0xfffffff;
133 		track->cb_color_slice_idx[i] = 0;
134 	}
135 	track->cb_target_mask = 0xFFFFFFFF;
136 	track->cb_shader_mask = 0xFFFFFFFF;
137 	track->cb_dirty = true;
138 
139 	track->db_depth_slice = 0xffffffff;
140 	track->db_depth_view = 0xFFFFC000;
141 	track->db_depth_size = 0xFFFFFFFF;
142 	track->db_depth_control = 0xFFFFFFFF;
143 	track->db_z_info = 0xFFFFFFFF;
144 	track->db_z_read_offset = 0xFFFFFFFF;
145 	track->db_z_write_offset = 0xFFFFFFFF;
146 	track->db_z_read_bo = NULL;
147 	track->db_z_write_bo = NULL;
148 	track->db_s_info = 0xFFFFFFFF;
149 	track->db_s_read_offset = 0xFFFFFFFF;
150 	track->db_s_write_offset = 0xFFFFFFFF;
151 	track->db_s_read_bo = NULL;
152 	track->db_s_write_bo = NULL;
153 	track->db_dirty = true;
154 	track->htile_bo = NULL;
155 	track->htile_offset = 0xFFFFFFFF;
156 	track->htile_surface = 0;
157 
158 	for (i = 0; i < 4; i++) {
159 		track->vgt_strmout_size[i] = 0;
160 		track->vgt_strmout_bo[i] = NULL;
161 		track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
162 	}
163 	track->streamout_dirty = true;
164 	track->sx_misc_kill_all_prims = false;
165 }
166 
167 struct eg_surface {
168 	/* value gathered from cs */
169 	unsigned	nbx;
170 	unsigned	nby;
171 	unsigned	format;
172 	unsigned	mode;
173 	unsigned	nbanks;
174 	unsigned	bankw;
175 	unsigned	bankh;
176 	unsigned	tsplit;
177 	unsigned	mtilea;
178 	unsigned	nsamples;
179 	/* output value */
180 	unsigned	bpe;
181 	unsigned	layer_size;
182 	unsigned	palign;
183 	unsigned	halign;
184 	unsigned long	base_align;
185 };
186 
evergreen_surface_check_linear(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)187 static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
188 					  struct eg_surface *surf,
189 					  const char *prefix)
190 {
191 	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
192 	surf->base_align = surf->bpe;
193 	surf->palign = 1;
194 	surf->halign = 1;
195 	return 0;
196 }
197 
evergreen_surface_check_linear_aligned(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)198 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
199 						  struct eg_surface *surf,
200 						  const char *prefix)
201 {
202 	struct evergreen_cs_track *track = p->track;
203 	unsigned palign;
204 
205 	palign = MAX(64, track->group_size / surf->bpe);
206 	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
207 	surf->base_align = track->group_size;
208 	surf->palign = palign;
209 	surf->halign = 1;
210 	if (surf->nbx & (palign - 1)) {
211 		if (prefix) {
212 			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
213 				 __func__, __LINE__, prefix, surf->nbx, palign);
214 		}
215 		return -EINVAL;
216 	}
217 	return 0;
218 }
219 
evergreen_surface_check_1d(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)220 static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
221 				      struct eg_surface *surf,
222 				      const char *prefix)
223 {
224 	struct evergreen_cs_track *track = p->track;
225 	unsigned palign;
226 
227 	palign = track->group_size / (8 * surf->bpe * surf->nsamples);
228 	palign = MAX(8, palign);
229 	surf->layer_size = surf->nbx * surf->nby * surf->bpe;
230 	surf->base_align = track->group_size;
231 	surf->palign = palign;
232 	surf->halign = 8;
233 	if ((surf->nbx & (palign - 1))) {
234 		if (prefix) {
235 			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
236 				 __func__, __LINE__, prefix, surf->nbx, palign,
237 				 track->group_size, surf->bpe, surf->nsamples);
238 		}
239 		return -EINVAL;
240 	}
241 	if ((surf->nby & (8 - 1))) {
242 		if (prefix) {
243 			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
244 				 __func__, __LINE__, prefix, surf->nby);
245 		}
246 		return -EINVAL;
247 	}
248 	return 0;
249 }
250 
evergreen_surface_check_2d(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)251 static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
252 				      struct eg_surface *surf,
253 				      const char *prefix)
254 {
255 	struct evergreen_cs_track *track = p->track;
256 	unsigned palign, halign, tileb, slice_pt;
257 	unsigned mtile_pr, mtile_ps, mtileb;
258 
259 	tileb = 64 * surf->bpe * surf->nsamples;
260 	slice_pt = 1;
261 	if (tileb > surf->tsplit) {
262 		slice_pt = tileb / surf->tsplit;
263 	}
264 	tileb = tileb / slice_pt;
265 	/* macro tile width & height */
266 	palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
267 	halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
268 	mtileb = (palign / 8) * (halign / 8) * tileb;
269 	mtile_pr = surf->nbx / palign;
270 	mtile_ps = (mtile_pr * surf->nby) / halign;
271 	surf->layer_size = mtile_ps * mtileb * slice_pt;
272 	surf->base_align = (palign / 8) * (halign / 8) * tileb;
273 	surf->palign = palign;
274 	surf->halign = halign;
275 
276 	if ((surf->nbx & (palign - 1))) {
277 		if (prefix) {
278 			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
279 				 __func__, __LINE__, prefix, surf->nbx, palign);
280 		}
281 		return -EINVAL;
282 	}
283 	if ((surf->nby & (halign - 1))) {
284 		if (prefix) {
285 			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
286 				 __func__, __LINE__, prefix, surf->nby, halign);
287 		}
288 		return -EINVAL;
289 	}
290 
291 	return 0;
292 }
293 
evergreen_surface_check(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)294 static int evergreen_surface_check(struct radeon_cs_parser *p,
295 				   struct eg_surface *surf,
296 				   const char *prefix)
297 {
298 	/* some common value computed here */
299 	surf->bpe = r600_fmt_get_blocksize(surf->format);
300 
301 	switch (surf->mode) {
302 	case ARRAY_LINEAR_GENERAL:
303 		return evergreen_surface_check_linear(p, surf, prefix);
304 	case ARRAY_LINEAR_ALIGNED:
305 		return evergreen_surface_check_linear_aligned(p, surf, prefix);
306 	case ARRAY_1D_TILED_THIN1:
307 		return evergreen_surface_check_1d(p, surf, prefix);
308 	case ARRAY_2D_TILED_THIN1:
309 		return evergreen_surface_check_2d(p, surf, prefix);
310 	default:
311 		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
312 				__func__, __LINE__, prefix, surf->mode);
313 		return -EINVAL;
314 	}
315 	return -EINVAL;
316 }
317 
evergreen_surface_value_conv_check(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)318 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
319 					      struct eg_surface *surf,
320 					      const char *prefix)
321 {
322 	switch (surf->mode) {
323 	case ARRAY_2D_TILED_THIN1:
324 		break;
325 	case ARRAY_LINEAR_GENERAL:
326 	case ARRAY_LINEAR_ALIGNED:
327 	case ARRAY_1D_TILED_THIN1:
328 		return 0;
329 	default:
330 		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
331 				__func__, __LINE__, prefix, surf->mode);
332 		return -EINVAL;
333 	}
334 
335 	switch (surf->nbanks) {
336 	case 0: surf->nbanks = 2; break;
337 	case 1: surf->nbanks = 4; break;
338 	case 2: surf->nbanks = 8; break;
339 	case 3: surf->nbanks = 16; break;
340 	default:
341 		dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
342 			 __func__, __LINE__, prefix, surf->nbanks);
343 		return -EINVAL;
344 	}
345 	switch (surf->bankw) {
346 	case 0: surf->bankw = 1; break;
347 	case 1: surf->bankw = 2; break;
348 	case 2: surf->bankw = 4; break;
349 	case 3: surf->bankw = 8; break;
350 	default:
351 		dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
352 			 __func__, __LINE__, prefix, surf->bankw);
353 		return -EINVAL;
354 	}
355 	switch (surf->bankh) {
356 	case 0: surf->bankh = 1; break;
357 	case 1: surf->bankh = 2; break;
358 	case 2: surf->bankh = 4; break;
359 	case 3: surf->bankh = 8; break;
360 	default:
361 		dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
362 			 __func__, __LINE__, prefix, surf->bankh);
363 		return -EINVAL;
364 	}
365 	switch (surf->mtilea) {
366 	case 0: surf->mtilea = 1; break;
367 	case 1: surf->mtilea = 2; break;
368 	case 2: surf->mtilea = 4; break;
369 	case 3: surf->mtilea = 8; break;
370 	default:
371 		dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
372 			 __func__, __LINE__, prefix, surf->mtilea);
373 		return -EINVAL;
374 	}
375 	switch (surf->tsplit) {
376 	case 0: surf->tsplit = 64; break;
377 	case 1: surf->tsplit = 128; break;
378 	case 2: surf->tsplit = 256; break;
379 	case 3: surf->tsplit = 512; break;
380 	case 4: surf->tsplit = 1024; break;
381 	case 5: surf->tsplit = 2048; break;
382 	case 6: surf->tsplit = 4096; break;
383 	default:
384 		dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
385 			 __func__, __LINE__, prefix, surf->tsplit);
386 		return -EINVAL;
387 	}
388 	return 0;
389 }
390 
evergreen_cs_track_validate_cb(struct radeon_cs_parser * p,unsigned id)391 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
392 {
393 	struct evergreen_cs_track *track = p->track;
394 	struct eg_surface surf;
395 	unsigned pitch, slice, mslice;
396 	unsigned long offset;
397 	int r;
398 
399 	mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
400 	pitch = track->cb_color_pitch[id];
401 	slice = track->cb_color_slice[id];
402 	surf.nbx = (pitch + 1) * 8;
403 	surf.nby = ((slice + 1) * 64) / surf.nbx;
404 	surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
405 	surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
406 	surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
407 	surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
408 	surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
409 	surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
410 	surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
411 	surf.nsamples = 1;
412 
413 	if (!r600_fmt_is_valid_color(surf.format)) {
414 		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
415 			 __func__, __LINE__, surf.format,
416 			id, track->cb_color_info[id]);
417 		return -EINVAL;
418 	}
419 
420 	r = evergreen_surface_value_conv_check(p, &surf, "cb");
421 	if (r) {
422 		return r;
423 	}
424 
425 	r = evergreen_surface_check(p, &surf, "cb");
426 	if (r) {
427 		dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
428 			 __func__, __LINE__, id, track->cb_color_pitch[id],
429 			 track->cb_color_slice[id], track->cb_color_attrib[id],
430 			 track->cb_color_info[id]);
431 		return r;
432 	}
433 
434 	offset = track->cb_color_bo_offset[id] << 8;
435 	if (offset & (surf.base_align - 1)) {
436 		dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
437 			 __func__, __LINE__, id, offset, surf.base_align);
438 		return -EINVAL;
439 	}
440 
441 	offset += surf.layer_size * mslice;
442 	if (offset > radeon_bo_size(track->cb_color_bo[id])) {
443 		/* old ddx are broken they allocate bo with w*h*bpp but
444 		 * program slice with ALIGN(h, 8), catch this and patch
445 		 * command stream.
446 		 */
447 		if (!surf.mode) {
448 			volatile u32 *ib = p->ib.ptr;
449 			unsigned long tmp, nby, bsize, size, vmin = 0;
450 
451 			/* find the height the ddx wants */
452 			if (surf.nby > 8) {
453 				vmin = surf.nby - 8;
454 			}
455 			bsize = radeon_bo_size(track->cb_color_bo[id]);
456 			tmp = track->cb_color_bo_offset[id] << 8;
457 			for (nby = surf.nby; nby > vmin; nby--) {
458 				size = nby * surf.nbx * surf.bpe * surf.nsamples;
459 				if ((tmp + size * mslice) <= bsize) {
460 					break;
461 				}
462 			}
463 			if (nby > vmin) {
464 				surf.nby = nby;
465 				slice = ((nby * surf.nbx) / 64) - 1;
466 				if (!evergreen_surface_check(p, &surf, "cb")) {
467 					/* check if this one works */
468 					tmp += surf.layer_size * mslice;
469 					if (tmp <= bsize) {
470 						ib[track->cb_color_slice_idx[id]] = slice;
471 						goto old_ddx_ok;
472 					}
473 				}
474 			}
475 		}
476 		dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
477 			 "offset %d, max layer %d, bo size %ld, slice %d)\n",
478 			 __func__, __LINE__, id, surf.layer_size,
479 			track->cb_color_bo_offset[id] << 8, mslice,
480 			radeon_bo_size(track->cb_color_bo[id]), slice);
481 		dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
482 			 __func__, __LINE__, surf.nbx, surf.nby,
483 			surf.mode, surf.bpe, surf.nsamples,
484 			surf.bankw, surf.bankh,
485 			surf.tsplit, surf.mtilea);
486 		return -EINVAL;
487 	}
488 old_ddx_ok:
489 
490 	return 0;
491 }
492 
evergreen_cs_track_validate_htile(struct radeon_cs_parser * p,unsigned nbx,unsigned nby)493 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
494 						unsigned nbx, unsigned nby)
495 {
496 	struct evergreen_cs_track *track = p->track;
497 	unsigned long size;
498 
499 	if (track->htile_bo == NULL) {
500 		dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
501 				__func__, __LINE__, track->db_z_info);
502 		return -EINVAL;
503 	}
504 
505 	if (G_028ABC_LINEAR(track->htile_surface)) {
506 		/* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
507 		nbx = round_up(nbx, 16 * 8);
508 		/* height is npipes htiles aligned == npipes * 8 pixel aligned */
509 		nby = round_up(nby, track->npipes * 8);
510 	} else {
511 		/* always assume 8x8 htile */
512 		/* align is htile align * 8, htile align vary according to
513 		 * number of pipe and tile width and nby
514 		 */
515 		switch (track->npipes) {
516 		case 8:
517 			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
518 			nbx = round_up(nbx, 64 * 8);
519 			nby = round_up(nby, 64 * 8);
520 			break;
521 		case 4:
522 			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
523 			nbx = round_up(nbx, 64 * 8);
524 			nby = round_up(nby, 32 * 8);
525 			break;
526 		case 2:
527 			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
528 			nbx = round_up(nbx, 32 * 8);
529 			nby = round_up(nby, 32 * 8);
530 			break;
531 		case 1:
532 			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
533 			nbx = round_up(nbx, 32 * 8);
534 			nby = round_up(nby, 16 * 8);
535 			break;
536 		default:
537 			dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
538 					__func__, __LINE__, track->npipes);
539 			return -EINVAL;
540 		}
541 	}
542 	/* compute number of htile */
543 	nbx = nbx >> 3;
544 	nby = nby >> 3;
545 	/* size must be aligned on npipes * 2K boundary */
546 	size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
547 	size += track->htile_offset;
548 
549 	if (!track->htile_bo) {
550 		dev_warn(p->dev, "%s:%d htile_bo not set", __func__, __LINE__);
551 		return -EINVAL;
552 	}
553 	if (size > radeon_bo_size(track->htile_bo)) {
554 		dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
555 				__func__, __LINE__, radeon_bo_size(track->htile_bo),
556 				size, nbx, nby);
557 		return -EINVAL;
558 	}
559 	return 0;
560 }
561 
evergreen_cs_track_validate_stencil(struct radeon_cs_parser * p)562 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
563 {
564 	struct evergreen_cs_track *track = p->track;
565 	struct eg_surface surf;
566 	unsigned pitch, slice, mslice;
567 	unsigned long offset;
568 	int r;
569 
570 	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
571 	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
572 	slice = track->db_depth_slice;
573 	surf.nbx = (pitch + 1) * 8;
574 	surf.nby = ((slice + 1) * 64) / surf.nbx;
575 	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
576 	surf.format = G_028044_FORMAT(track->db_s_info);
577 	surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
578 	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
579 	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
580 	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
581 	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
582 	surf.nsamples = 1;
583 
584 	if (surf.format != 1) {
585 		dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
586 			 __func__, __LINE__, surf.format);
587 		return -EINVAL;
588 	}
589 	/* replace by color format so we can use same code */
590 	surf.format = V_028C70_COLOR_8;
591 
592 	r = evergreen_surface_value_conv_check(p, &surf, "stencil");
593 	if (r) {
594 		return r;
595 	}
596 
597 	r = evergreen_surface_check(p, &surf, NULL);
598 	if (r) {
599 		/* old userspace doesn't compute proper depth/stencil alignment
600 		 * check that alignment against a bigger byte per elements and
601 		 * only report if that alignment is wrong too.
602 		 */
603 		surf.format = V_028C70_COLOR_8_8_8_8;
604 		r = evergreen_surface_check(p, &surf, "stencil");
605 		if (r) {
606 			dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
607 				 __func__, __LINE__, track->db_depth_size,
608 				 track->db_depth_slice, track->db_s_info, track->db_z_info);
609 		}
610 		return r;
611 	}
612 
613 	offset = track->db_s_read_offset << 8;
614 	if (offset & (surf.base_align - 1)) {
615 		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
616 			 __func__, __LINE__, offset, surf.base_align);
617 		return -EINVAL;
618 	}
619 	offset += surf.layer_size * mslice;
620 	if (!track->db_s_read_bo) {
621 		dev_warn(p->dev, "%s:%d db_s_read_bo not set", __func__, __LINE__);
622 		return -EINVAL;
623 	}
624 	if (offset > radeon_bo_size(track->db_s_read_bo)) {
625 		dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
626 			 "offset %ld, max layer %d, bo size %ld)\n",
627 			 __func__, __LINE__, surf.layer_size,
628 			(unsigned long)track->db_s_read_offset << 8, mslice,
629 			radeon_bo_size(track->db_s_read_bo));
630 		dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
631 			 __func__, __LINE__, track->db_depth_size,
632 			 track->db_depth_slice, track->db_s_info, track->db_z_info);
633 		return -EINVAL;
634 	}
635 
636 	offset = track->db_s_write_offset << 8;
637 	if (offset & (surf.base_align - 1)) {
638 		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
639 			 __func__, __LINE__, offset, surf.base_align);
640 		return -EINVAL;
641 	}
642 	offset += surf.layer_size * mslice;
643 	if (!track->db_s_write_bo) {
644 		dev_warn(p->dev, "%s:%d db_s_write_bo not set", __func__, __LINE__);
645 		return -EINVAL;
646 	}
647 	if (offset > radeon_bo_size(track->db_s_write_bo)) {
648 		dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
649 			 "offset %ld, max layer %d, bo size %ld)\n",
650 			 __func__, __LINE__, surf.layer_size,
651 			(unsigned long)track->db_s_write_offset << 8, mslice,
652 			radeon_bo_size(track->db_s_write_bo));
653 		return -EINVAL;
654 	}
655 
656 	/* hyperz */
657 	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
658 		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
659 		if (r) {
660 			return r;
661 		}
662 	}
663 
664 	return 0;
665 }
666 
evergreen_cs_track_validate_depth(struct radeon_cs_parser * p)667 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
668 {
669 	struct evergreen_cs_track *track = p->track;
670 	struct eg_surface surf;
671 	unsigned pitch, slice, mslice;
672 	unsigned long offset;
673 	int r;
674 
675 	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
676 	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
677 	slice = track->db_depth_slice;
678 	surf.nbx = (pitch + 1) * 8;
679 	surf.nby = ((slice + 1) * 64) / surf.nbx;
680 	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
681 	surf.format = G_028040_FORMAT(track->db_z_info);
682 	surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
683 	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
684 	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
685 	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
686 	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
687 	surf.nsamples = 1;
688 
689 	switch (surf.format) {
690 	case V_028040_Z_16:
691 		surf.format = V_028C70_COLOR_16;
692 		break;
693 	case V_028040_Z_24:
694 	case V_028040_Z_32_FLOAT:
695 		surf.format = V_028C70_COLOR_8_8_8_8;
696 		break;
697 	default:
698 		dev_warn(p->dev, "%s:%d depth invalid format %d\n",
699 			 __func__, __LINE__, surf.format);
700 		return -EINVAL;
701 	}
702 
703 	r = evergreen_surface_value_conv_check(p, &surf, "depth");
704 	if (r) {
705 		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
706 			 __func__, __LINE__, track->db_depth_size,
707 			 track->db_depth_slice, track->db_z_info);
708 		return r;
709 	}
710 
711 	r = evergreen_surface_check(p, &surf, "depth");
712 	if (r) {
713 		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
714 			 __func__, __LINE__, track->db_depth_size,
715 			 track->db_depth_slice, track->db_z_info);
716 		return r;
717 	}
718 
719 	offset = track->db_z_read_offset << 8;
720 	if (offset & (surf.base_align - 1)) {
721 		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
722 			 __func__, __LINE__, offset, surf.base_align);
723 		return -EINVAL;
724 	}
725 	offset += surf.layer_size * mslice;
726 	if (!track->db_z_read_bo) {
727 		dev_warn(p->dev, "%s:%d db_z_read_bo not set", __func__, __LINE__);
728 		return -EINVAL;
729 	}
730 	if (offset > radeon_bo_size(track->db_z_read_bo)) {
731 		dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
732 			 "offset %ld, max layer %d, bo size %ld)\n",
733 			 __func__, __LINE__, surf.layer_size,
734 			(unsigned long)track->db_z_read_offset << 8, mslice,
735 			radeon_bo_size(track->db_z_read_bo));
736 		return -EINVAL;
737 	}
738 
739 	offset = track->db_z_write_offset << 8;
740 	if (offset & (surf.base_align - 1)) {
741 		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
742 			 __func__, __LINE__, offset, surf.base_align);
743 		return -EINVAL;
744 	}
745 	offset += surf.layer_size * mslice;
746 	if (!track->db_z_write_bo) {
747 		dev_warn(p->dev, "%s:%d db_z_write_bo not set", __func__, __LINE__);
748 		return -EINVAL;
749 	}
750 	if (offset > radeon_bo_size(track->db_z_write_bo)) {
751 		dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
752 			 "offset %ld, max layer %d, bo size %ld)\n",
753 			 __func__, __LINE__, surf.layer_size,
754 			(unsigned long)track->db_z_write_offset << 8, mslice,
755 			radeon_bo_size(track->db_z_write_bo));
756 		return -EINVAL;
757 	}
758 
759 	/* hyperz */
760 	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
761 		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
762 		if (r) {
763 			return r;
764 		}
765 	}
766 
767 	return 0;
768 }
769 
evergreen_cs_track_validate_texture(struct radeon_cs_parser * p,struct radeon_bo * texture,struct radeon_bo * mipmap,unsigned idx)770 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
771 					       struct radeon_bo *texture,
772 					       struct radeon_bo *mipmap,
773 					       unsigned idx)
774 {
775 	struct eg_surface surf;
776 	unsigned long toffset, moffset;
777 	unsigned dim, llevel, mslice, width, height, depth, i;
778 	u32 texdw[8];
779 	int r;
780 
781 	texdw[0] = radeon_get_ib_value(p, idx + 0);
782 	texdw[1] = radeon_get_ib_value(p, idx + 1);
783 	texdw[2] = radeon_get_ib_value(p, idx + 2);
784 	texdw[3] = radeon_get_ib_value(p, idx + 3);
785 	texdw[4] = radeon_get_ib_value(p, idx + 4);
786 	texdw[5] = radeon_get_ib_value(p, idx + 5);
787 	texdw[6] = radeon_get_ib_value(p, idx + 6);
788 	texdw[7] = radeon_get_ib_value(p, idx + 7);
789 	dim = G_030000_DIM(texdw[0]);
790 	llevel = G_030014_LAST_LEVEL(texdw[5]);
791 	mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
792 	width = G_030000_TEX_WIDTH(texdw[0]) + 1;
793 	height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
794 	depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
795 	surf.format = G_03001C_DATA_FORMAT(texdw[7]);
796 	surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
797 	surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
798 	surf.nby = r600_fmt_get_nblocksy(surf.format, height);
799 	surf.mode = G_030004_ARRAY_MODE(texdw[1]);
800 	surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
801 	surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
802 	surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
803 	surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
804 	surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
805 	surf.nsamples = 1;
806 	toffset = texdw[2] << 8;
807 	moffset = texdw[3] << 8;
808 
809 	if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
810 		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
811 			 __func__, __LINE__, surf.format);
812 		return -EINVAL;
813 	}
814 	switch (dim) {
815 	case V_030000_SQ_TEX_DIM_1D:
816 	case V_030000_SQ_TEX_DIM_2D:
817 	case V_030000_SQ_TEX_DIM_CUBEMAP:
818 	case V_030000_SQ_TEX_DIM_1D_ARRAY:
819 	case V_030000_SQ_TEX_DIM_2D_ARRAY:
820 		depth = 1;
821 		break;
822 	case V_030000_SQ_TEX_DIM_2D_MSAA:
823 	case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
824 		surf.nsamples = 1 << llevel;
825 		llevel = 0;
826 		depth = 1;
827 		break;
828 	case V_030000_SQ_TEX_DIM_3D:
829 		break;
830 	default:
831 		dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
832 			 __func__, __LINE__, dim);
833 		return -EINVAL;
834 	}
835 
836 	r = evergreen_surface_value_conv_check(p, &surf, "texture");
837 	if (r) {
838 		return r;
839 	}
840 
841 	/* align height */
842 	evergreen_surface_check(p, &surf, NULL);
843 #ifdef __NetBSD__		/* XXX ALIGN means something else */
844 	surf.nby = round_up(surf.nby, surf.halign);
845 #else
846 	surf.nby = ALIGN(surf.nby, surf.halign);
847 #endif
848 
849 	r = evergreen_surface_check(p, &surf, "texture");
850 	if (r) {
851 		dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
852 			 __func__, __LINE__, texdw[0], texdw[1], texdw[4],
853 			 texdw[5], texdw[6], texdw[7]);
854 		return r;
855 	}
856 
857 	/* check texture size */
858 	if (toffset & (surf.base_align - 1)) {
859 		dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
860 			 __func__, __LINE__, toffset, surf.base_align);
861 		return -EINVAL;
862 	}
863 	if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
864 		dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
865 			 __func__, __LINE__, moffset, surf.base_align);
866 		return -EINVAL;
867 	}
868 	if (dim == SQ_TEX_DIM_3D) {
869 		toffset += surf.layer_size * depth;
870 	} else {
871 		toffset += surf.layer_size * mslice;
872 	}
873 	if (toffset > radeon_bo_size(texture)) {
874 		dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
875 			 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
876 			 __func__, __LINE__, surf.layer_size,
877 			(unsigned long)texdw[2] << 8, mslice,
878 			depth, radeon_bo_size(texture),
879 			surf.nbx, surf.nby);
880 		return -EINVAL;
881 	}
882 
883 	if (!mipmap) {
884 		if (llevel) {
885 			dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
886 				 __func__, __LINE__);
887 			return -EINVAL;
888 		} else {
889 			return 0; /* everything's ok */
890 		}
891 	}
892 
893 	/* check mipmap size */
894 	for (i = 1; i <= llevel; i++) {
895 		unsigned w, h, d;
896 
897 		w = r600_mip_minify(width, i);
898 		h = r600_mip_minify(height, i);
899 		d = r600_mip_minify(depth, i);
900 		surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
901 		surf.nby = r600_fmt_get_nblocksy(surf.format, h);
902 
903 		switch (surf.mode) {
904 		case ARRAY_2D_TILED_THIN1:
905 			if (surf.nbx < surf.palign || surf.nby < surf.halign) {
906 				surf.mode = ARRAY_1D_TILED_THIN1;
907 			}
908 			/* recompute alignment */
909 			evergreen_surface_check(p, &surf, NULL);
910 			break;
911 		case ARRAY_LINEAR_GENERAL:
912 		case ARRAY_LINEAR_ALIGNED:
913 		case ARRAY_1D_TILED_THIN1:
914 			break;
915 		default:
916 			dev_warn(p->dev, "%s:%d invalid array mode %d\n",
917 				 __func__, __LINE__, surf.mode);
918 			return -EINVAL;
919 		}
920 #ifdef __NetBSD__		/* XXX ALIGN means something else.  */
921 		surf.nbx = round_up(surf.nbx, surf.palign);
922 		surf.nby = round_up(surf.nby, surf.halign);
923 #else
924 		surf.nbx = ALIGN(surf.nbx, surf.palign);
925 		surf.nby = ALIGN(surf.nby, surf.halign);
926 #endif
927 
928 		r = evergreen_surface_check(p, &surf, "mipmap");
929 		if (r) {
930 			return r;
931 		}
932 
933 		if (dim == SQ_TEX_DIM_3D) {
934 			moffset += surf.layer_size * d;
935 		} else {
936 			moffset += surf.layer_size * mslice;
937 		}
938 		if (moffset > radeon_bo_size(mipmap)) {
939 			dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
940 					"offset %ld, coffset %ld, max layer %d, depth %d, "
941 					"bo size %ld) level0 (%d %d %d)\n",
942 					__func__, __LINE__, i, surf.layer_size,
943 					(unsigned long)texdw[3] << 8, moffset, mslice,
944 					d, radeon_bo_size(mipmap),
945 					width, height, depth);
946 			dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
947 				 __func__, __LINE__, surf.nbx, surf.nby,
948 				surf.mode, surf.bpe, surf.nsamples,
949 				surf.bankw, surf.bankh,
950 				surf.tsplit, surf.mtilea);
951 			return -EINVAL;
952 		}
953 	}
954 
955 	return 0;
956 }
957 
evergreen_cs_track_check(struct radeon_cs_parser * p)958 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
959 {
960 	struct evergreen_cs_track *track = p->track;
961 	unsigned tmp, i;
962 	int r;
963 	unsigned buffer_mask = 0;
964 
965 	/* check streamout */
966 	if (track->streamout_dirty && track->vgt_strmout_config) {
967 		for (i = 0; i < 4; i++) {
968 			if (track->vgt_strmout_config & (1 << i)) {
969 				buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
970 			}
971 		}
972 
973 		for (i = 0; i < 4; i++) {
974 			if (buffer_mask & (1 << i)) {
975 				if (track->vgt_strmout_bo[i]) {
976 					u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
977 							(u64)track->vgt_strmout_size[i];
978 					if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
979 						DRM_ERROR("streamout %d bo too small: 0x%"PRIx64", 0x%lx\n",
980 							  i, offset,
981 							  radeon_bo_size(track->vgt_strmout_bo[i]));
982 						return -EINVAL;
983 					}
984 				} else {
985 					dev_warn(p->dev, "No buffer for streamout %d\n", i);
986 					return -EINVAL;
987 				}
988 			}
989 		}
990 		track->streamout_dirty = false;
991 	}
992 
993 	if (track->sx_misc_kill_all_prims)
994 		return 0;
995 
996 	/* check that we have a cb for each enabled target
997 	 */
998 	if (track->cb_dirty) {
999 		tmp = track->cb_target_mask;
1000 		for (i = 0; i < 8; i++) {
1001 			u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
1002 
1003 			if (format != V_028C70_COLOR_INVALID &&
1004 			    (tmp >> (i * 4)) & 0xF) {
1005 				/* at least one component is enabled */
1006 				if (track->cb_color_bo[i] == NULL) {
1007 					dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
1008 						__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
1009 					return -EINVAL;
1010 				}
1011 				/* check cb */
1012 				r = evergreen_cs_track_validate_cb(p, i);
1013 				if (r) {
1014 					return r;
1015 				}
1016 			}
1017 		}
1018 		track->cb_dirty = false;
1019 	}
1020 
1021 	if (track->db_dirty) {
1022 		/* Check stencil buffer */
1023 		if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
1024 		    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
1025 			r = evergreen_cs_track_validate_stencil(p);
1026 			if (r)
1027 				return r;
1028 		}
1029 		/* Check depth buffer */
1030 		if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1031 		    G_028800_Z_ENABLE(track->db_depth_control)) {
1032 			r = evergreen_cs_track_validate_depth(p);
1033 			if (r)
1034 				return r;
1035 		}
1036 		track->db_dirty = false;
1037 	}
1038 
1039 	return 0;
1040 }
1041 
1042 /**
1043  * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1044  * @parser:		parser structure holding parsing context.
1045  *
1046  * This is an Evergreen(+)-specific function for parsing VLINE packets.
1047  * Real work is done by r600_cs_common_vline_parse function.
1048  * Here we just set up ASIC-specific register table and call
1049  * the common implementation function.
1050  */
evergreen_cs_packet_parse_vline(struct radeon_cs_parser * p)1051 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1052 {
1053 
1054 	static uint32_t vline_start_end[6] = {
1055 		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1056 		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1057 		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1058 		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1059 		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1060 		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1061 	};
1062 	static uint32_t vline_status[6] = {
1063 		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1064 		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1065 		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1066 		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1067 		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1068 		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1069 	};
1070 
1071 	return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1072 }
1073 
evergreen_packet0_check(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,unsigned idx,unsigned reg)1074 static int evergreen_packet0_check(struct radeon_cs_parser *p,
1075 				   struct radeon_cs_packet *pkt,
1076 				   unsigned idx, unsigned reg)
1077 {
1078 	int r;
1079 
1080 	switch (reg) {
1081 	case EVERGREEN_VLINE_START_END:
1082 		r = evergreen_cs_packet_parse_vline(p);
1083 		if (r) {
1084 			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1085 					idx, reg);
1086 			return r;
1087 		}
1088 		break;
1089 	default:
1090 		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1091 		       reg, idx);
1092 		return -EINVAL;
1093 	}
1094 	return 0;
1095 }
1096 
evergreen_cs_parse_packet0(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt)1097 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1098 				      struct radeon_cs_packet *pkt)
1099 {
1100 	unsigned reg, i;
1101 	unsigned idx;
1102 	int r;
1103 
1104 	idx = pkt->idx + 1;
1105 	reg = pkt->reg;
1106 	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1107 		r = evergreen_packet0_check(p, pkt, idx, reg);
1108 		if (r) {
1109 			return r;
1110 		}
1111 	}
1112 	return 0;
1113 }
1114 
1115 /**
1116  * evergreen_cs_check_reg() - check if register is authorized or not
1117  * @parser: parser structure holding parsing context
1118  * @reg: register we are testing
1119  * @idx: index into the cs buffer
1120  *
1121  * This function will test against evergreen_reg_safe_bm and return 0
1122  * if register is safe. If register is not flag as safe this function
1123  * will test it against a list of register needind special handling.
1124  */
evergreen_cs_check_reg(struct radeon_cs_parser * p,u32 reg,u32 idx)1125 static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1126 {
1127 	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1128 	struct radeon_cs_reloc *reloc;
1129 	u32 last_reg;
1130 	u32 m, i, tmp, *ib;
1131 	int r;
1132 
1133 	if (p->rdev->family >= CHIP_CAYMAN)
1134 		last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1135 	else
1136 		last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1137 
1138 	i = (reg >> 7);
1139 	if (i >= last_reg) {
1140 		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1141 		return -EINVAL;
1142 	}
1143 	m = 1 << ((reg >> 2) & 31);
1144 	if (p->rdev->family >= CHIP_CAYMAN) {
1145 		if (!(cayman_reg_safe_bm[i] & m))
1146 			return 0;
1147 	} else {
1148 		if (!(evergreen_reg_safe_bm[i] & m))
1149 			return 0;
1150 	}
1151 	ib = p->ib.ptr;
1152 	switch (reg) {
1153 	/* force following reg to 0 in an attempt to disable out buffer
1154 	 * which will need us to better understand how it works to perform
1155 	 * security check on it (Jerome)
1156 	 */
1157 	case SQ_ESGS_RING_SIZE:
1158 	case SQ_GSVS_RING_SIZE:
1159 	case SQ_ESTMP_RING_SIZE:
1160 	case SQ_GSTMP_RING_SIZE:
1161 	case SQ_HSTMP_RING_SIZE:
1162 	case SQ_LSTMP_RING_SIZE:
1163 	case SQ_PSTMP_RING_SIZE:
1164 	case SQ_VSTMP_RING_SIZE:
1165 	case SQ_ESGS_RING_ITEMSIZE:
1166 	case SQ_ESTMP_RING_ITEMSIZE:
1167 	case SQ_GSTMP_RING_ITEMSIZE:
1168 	case SQ_GSVS_RING_ITEMSIZE:
1169 	case SQ_GS_VERT_ITEMSIZE:
1170 	case SQ_GS_VERT_ITEMSIZE_1:
1171 	case SQ_GS_VERT_ITEMSIZE_2:
1172 	case SQ_GS_VERT_ITEMSIZE_3:
1173 	case SQ_GSVS_RING_OFFSET_1:
1174 	case SQ_GSVS_RING_OFFSET_2:
1175 	case SQ_GSVS_RING_OFFSET_3:
1176 	case SQ_HSTMP_RING_ITEMSIZE:
1177 	case SQ_LSTMP_RING_ITEMSIZE:
1178 	case SQ_PSTMP_RING_ITEMSIZE:
1179 	case SQ_VSTMP_RING_ITEMSIZE:
1180 	case VGT_TF_RING_SIZE:
1181 		/* get value to populate the IB don't remove */
1182 		/*tmp =radeon_get_ib_value(p, idx);
1183 		  ib[idx] = 0;*/
1184 		break;
1185 	case SQ_ESGS_RING_BASE:
1186 	case SQ_GSVS_RING_BASE:
1187 	case SQ_ESTMP_RING_BASE:
1188 	case SQ_GSTMP_RING_BASE:
1189 	case SQ_HSTMP_RING_BASE:
1190 	case SQ_LSTMP_RING_BASE:
1191 	case SQ_PSTMP_RING_BASE:
1192 	case SQ_VSTMP_RING_BASE:
1193 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1194 		if (r) {
1195 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1196 					"0x%04X\n", reg);
1197 			return -EINVAL;
1198 		}
1199 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1200 		break;
1201 	case DB_DEPTH_CONTROL:
1202 		track->db_depth_control = radeon_get_ib_value(p, idx);
1203 		track->db_dirty = true;
1204 		break;
1205 	case CAYMAN_DB_EQAA:
1206 		if (p->rdev->family < CHIP_CAYMAN) {
1207 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1208 				 "0x%04X\n", reg);
1209 			return -EINVAL;
1210 		}
1211 		break;
1212 	case CAYMAN_DB_DEPTH_INFO:
1213 		if (p->rdev->family < CHIP_CAYMAN) {
1214 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1215 				 "0x%04X\n", reg);
1216 			return -EINVAL;
1217 		}
1218 		break;
1219 	case DB_Z_INFO:
1220 		track->db_z_info = radeon_get_ib_value(p, idx);
1221 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1222 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1223 			if (r) {
1224 				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1225 						"0x%04X\n", reg);
1226 				return -EINVAL;
1227 			}
1228 			ib[idx] &= ~Z_ARRAY_MODE(0xf);
1229 			track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1230 			ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1231 			track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1232 			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1233 				unsigned bankw, bankh, mtaspect, tile_split;
1234 
1235 				evergreen_tiling_fields(reloc->tiling_flags,
1236 							&bankw, &bankh, &mtaspect,
1237 							&tile_split);
1238 				ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1239 				ib[idx] |= DB_TILE_SPLIT(tile_split) |
1240 						DB_BANK_WIDTH(bankw) |
1241 						DB_BANK_HEIGHT(bankh) |
1242 						DB_MACRO_TILE_ASPECT(mtaspect);
1243 			}
1244 		}
1245 		track->db_dirty = true;
1246 		break;
1247 	case DB_STENCIL_INFO:
1248 		track->db_s_info = radeon_get_ib_value(p, idx);
1249 		track->db_dirty = true;
1250 		break;
1251 	case DB_DEPTH_VIEW:
1252 		track->db_depth_view = radeon_get_ib_value(p, idx);
1253 		track->db_dirty = true;
1254 		break;
1255 	case DB_DEPTH_SIZE:
1256 		track->db_depth_size = radeon_get_ib_value(p, idx);
1257 		track->db_dirty = true;
1258 		break;
1259 	case R_02805C_DB_DEPTH_SLICE:
1260 		track->db_depth_slice = radeon_get_ib_value(p, idx);
1261 		track->db_dirty = true;
1262 		break;
1263 	case DB_Z_READ_BASE:
1264 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1265 		if (r) {
1266 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1267 					"0x%04X\n", reg);
1268 			return -EINVAL;
1269 		}
1270 		track->db_z_read_offset = radeon_get_ib_value(p, idx);
1271 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1272 		track->db_z_read_bo = reloc->robj;
1273 		track->db_dirty = true;
1274 		break;
1275 	case DB_Z_WRITE_BASE:
1276 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1277 		if (r) {
1278 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1279 					"0x%04X\n", reg);
1280 			return -EINVAL;
1281 		}
1282 		track->db_z_write_offset = radeon_get_ib_value(p, idx);
1283 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1284 		track->db_z_write_bo = reloc->robj;
1285 		track->db_dirty = true;
1286 		break;
1287 	case DB_STENCIL_READ_BASE:
1288 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1289 		if (r) {
1290 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1291 					"0x%04X\n", reg);
1292 			return -EINVAL;
1293 		}
1294 		track->db_s_read_offset = radeon_get_ib_value(p, idx);
1295 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1296 		track->db_s_read_bo = reloc->robj;
1297 		track->db_dirty = true;
1298 		break;
1299 	case DB_STENCIL_WRITE_BASE:
1300 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1301 		if (r) {
1302 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1303 					"0x%04X\n", reg);
1304 			return -EINVAL;
1305 		}
1306 		track->db_s_write_offset = radeon_get_ib_value(p, idx);
1307 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1308 		track->db_s_write_bo = reloc->robj;
1309 		track->db_dirty = true;
1310 		break;
1311 	case VGT_STRMOUT_CONFIG:
1312 		track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1313 		track->streamout_dirty = true;
1314 		break;
1315 	case VGT_STRMOUT_BUFFER_CONFIG:
1316 		track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1317 		track->streamout_dirty = true;
1318 		break;
1319 	case VGT_STRMOUT_BUFFER_BASE_0:
1320 	case VGT_STRMOUT_BUFFER_BASE_1:
1321 	case VGT_STRMOUT_BUFFER_BASE_2:
1322 	case VGT_STRMOUT_BUFFER_BASE_3:
1323 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1324 		if (r) {
1325 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1326 					"0x%04X\n", reg);
1327 			return -EINVAL;
1328 		}
1329 		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1330 		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1331 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1332 		track->vgt_strmout_bo[tmp] = reloc->robj;
1333 		track->streamout_dirty = true;
1334 		break;
1335 	case VGT_STRMOUT_BUFFER_SIZE_0:
1336 	case VGT_STRMOUT_BUFFER_SIZE_1:
1337 	case VGT_STRMOUT_BUFFER_SIZE_2:
1338 	case VGT_STRMOUT_BUFFER_SIZE_3:
1339 		tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1340 		/* size in register is DWs, convert to bytes */
1341 		track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1342 		track->streamout_dirty = true;
1343 		break;
1344 	case CP_COHER_BASE:
1345 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1346 		if (r) {
1347 			dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1348 					"0x%04X\n", reg);
1349 			return -EINVAL;
1350 		}
1351 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1352 	case CB_TARGET_MASK:
1353 		track->cb_target_mask = radeon_get_ib_value(p, idx);
1354 		track->cb_dirty = true;
1355 		break;
1356 	case CB_SHADER_MASK:
1357 		track->cb_shader_mask = radeon_get_ib_value(p, idx);
1358 		track->cb_dirty = true;
1359 		break;
1360 	case PA_SC_AA_CONFIG:
1361 		if (p->rdev->family >= CHIP_CAYMAN) {
1362 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1363 				 "0x%04X\n", reg);
1364 			return -EINVAL;
1365 		}
1366 		tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1367 		track->nsamples = 1 << tmp;
1368 		break;
1369 	case CAYMAN_PA_SC_AA_CONFIG:
1370 		if (p->rdev->family < CHIP_CAYMAN) {
1371 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1372 				 "0x%04X\n", reg);
1373 			return -EINVAL;
1374 		}
1375 		tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1376 		track->nsamples = 1 << tmp;
1377 		break;
1378 	case CB_COLOR0_VIEW:
1379 	case CB_COLOR1_VIEW:
1380 	case CB_COLOR2_VIEW:
1381 	case CB_COLOR3_VIEW:
1382 	case CB_COLOR4_VIEW:
1383 	case CB_COLOR5_VIEW:
1384 	case CB_COLOR6_VIEW:
1385 	case CB_COLOR7_VIEW:
1386 		tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1387 		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1388 		track->cb_dirty = true;
1389 		break;
1390 	case CB_COLOR8_VIEW:
1391 	case CB_COLOR9_VIEW:
1392 	case CB_COLOR10_VIEW:
1393 	case CB_COLOR11_VIEW:
1394 		tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1395 		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1396 		track->cb_dirty = true;
1397 		break;
1398 	case CB_COLOR0_INFO:
1399 	case CB_COLOR1_INFO:
1400 	case CB_COLOR2_INFO:
1401 	case CB_COLOR3_INFO:
1402 	case CB_COLOR4_INFO:
1403 	case CB_COLOR5_INFO:
1404 	case CB_COLOR6_INFO:
1405 	case CB_COLOR7_INFO:
1406 		tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1407 		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1408 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1409 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1410 			if (r) {
1411 				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1412 						"0x%04X\n", reg);
1413 				return -EINVAL;
1414 			}
1415 			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1416 			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1417 		}
1418 		track->cb_dirty = true;
1419 		break;
1420 	case CB_COLOR8_INFO:
1421 	case CB_COLOR9_INFO:
1422 	case CB_COLOR10_INFO:
1423 	case CB_COLOR11_INFO:
1424 		tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1425 		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1426 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1427 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1428 			if (r) {
1429 				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1430 						"0x%04X\n", reg);
1431 				return -EINVAL;
1432 			}
1433 			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1434 			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1435 		}
1436 		track->cb_dirty = true;
1437 		break;
1438 	case CB_COLOR0_PITCH:
1439 	case CB_COLOR1_PITCH:
1440 	case CB_COLOR2_PITCH:
1441 	case CB_COLOR3_PITCH:
1442 	case CB_COLOR4_PITCH:
1443 	case CB_COLOR5_PITCH:
1444 	case CB_COLOR6_PITCH:
1445 	case CB_COLOR7_PITCH:
1446 		tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1447 		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1448 		track->cb_dirty = true;
1449 		break;
1450 	case CB_COLOR8_PITCH:
1451 	case CB_COLOR9_PITCH:
1452 	case CB_COLOR10_PITCH:
1453 	case CB_COLOR11_PITCH:
1454 		tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1455 		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1456 		track->cb_dirty = true;
1457 		break;
1458 	case CB_COLOR0_SLICE:
1459 	case CB_COLOR1_SLICE:
1460 	case CB_COLOR2_SLICE:
1461 	case CB_COLOR3_SLICE:
1462 	case CB_COLOR4_SLICE:
1463 	case CB_COLOR5_SLICE:
1464 	case CB_COLOR6_SLICE:
1465 	case CB_COLOR7_SLICE:
1466 		tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1467 		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1468 		track->cb_color_slice_idx[tmp] = idx;
1469 		track->cb_dirty = true;
1470 		break;
1471 	case CB_COLOR8_SLICE:
1472 	case CB_COLOR9_SLICE:
1473 	case CB_COLOR10_SLICE:
1474 	case CB_COLOR11_SLICE:
1475 		tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1476 		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1477 		track->cb_color_slice_idx[tmp] = idx;
1478 		track->cb_dirty = true;
1479 		break;
1480 	case CB_COLOR0_ATTRIB:
1481 	case CB_COLOR1_ATTRIB:
1482 	case CB_COLOR2_ATTRIB:
1483 	case CB_COLOR3_ATTRIB:
1484 	case CB_COLOR4_ATTRIB:
1485 	case CB_COLOR5_ATTRIB:
1486 	case CB_COLOR6_ATTRIB:
1487 	case CB_COLOR7_ATTRIB:
1488 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1489 		if (r) {
1490 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1491 					"0x%04X\n", reg);
1492 			return -EINVAL;
1493 		}
1494 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1495 			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1496 				unsigned bankw, bankh, mtaspect, tile_split;
1497 
1498 				evergreen_tiling_fields(reloc->tiling_flags,
1499 							&bankw, &bankh, &mtaspect,
1500 							&tile_split);
1501 				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1502 				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1503 					   CB_BANK_WIDTH(bankw) |
1504 					   CB_BANK_HEIGHT(bankh) |
1505 					   CB_MACRO_TILE_ASPECT(mtaspect);
1506 			}
1507 		}
1508 		tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1509 		track->cb_color_attrib[tmp] = ib[idx];
1510 		track->cb_dirty = true;
1511 		break;
1512 	case CB_COLOR8_ATTRIB:
1513 	case CB_COLOR9_ATTRIB:
1514 	case CB_COLOR10_ATTRIB:
1515 	case CB_COLOR11_ATTRIB:
1516 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1517 		if (r) {
1518 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1519 					"0x%04X\n", reg);
1520 			return -EINVAL;
1521 		}
1522 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1523 			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1524 				unsigned bankw, bankh, mtaspect, tile_split;
1525 
1526 				evergreen_tiling_fields(reloc->tiling_flags,
1527 							&bankw, &bankh, &mtaspect,
1528 							&tile_split);
1529 				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1530 				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1531 					   CB_BANK_WIDTH(bankw) |
1532 					   CB_BANK_HEIGHT(bankh) |
1533 					   CB_MACRO_TILE_ASPECT(mtaspect);
1534 			}
1535 		}
1536 		tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1537 		track->cb_color_attrib[tmp] = ib[idx];
1538 		track->cb_dirty = true;
1539 		break;
1540 	case CB_COLOR0_FMASK:
1541 	case CB_COLOR1_FMASK:
1542 	case CB_COLOR2_FMASK:
1543 	case CB_COLOR3_FMASK:
1544 	case CB_COLOR4_FMASK:
1545 	case CB_COLOR5_FMASK:
1546 	case CB_COLOR6_FMASK:
1547 	case CB_COLOR7_FMASK:
1548 		tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1549 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1550 		if (r) {
1551 			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1552 			return -EINVAL;
1553 		}
1554 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1555 		track->cb_color_fmask_bo[tmp] = reloc->robj;
1556 		break;
1557 	case CB_COLOR0_CMASK:
1558 	case CB_COLOR1_CMASK:
1559 	case CB_COLOR2_CMASK:
1560 	case CB_COLOR3_CMASK:
1561 	case CB_COLOR4_CMASK:
1562 	case CB_COLOR5_CMASK:
1563 	case CB_COLOR6_CMASK:
1564 	case CB_COLOR7_CMASK:
1565 		tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1566 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1567 		if (r) {
1568 			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1569 			return -EINVAL;
1570 		}
1571 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1572 		track->cb_color_cmask_bo[tmp] = reloc->robj;
1573 		break;
1574 	case CB_COLOR0_FMASK_SLICE:
1575 	case CB_COLOR1_FMASK_SLICE:
1576 	case CB_COLOR2_FMASK_SLICE:
1577 	case CB_COLOR3_FMASK_SLICE:
1578 	case CB_COLOR4_FMASK_SLICE:
1579 	case CB_COLOR5_FMASK_SLICE:
1580 	case CB_COLOR6_FMASK_SLICE:
1581 	case CB_COLOR7_FMASK_SLICE:
1582 		tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1583 		track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1584 		break;
1585 	case CB_COLOR0_CMASK_SLICE:
1586 	case CB_COLOR1_CMASK_SLICE:
1587 	case CB_COLOR2_CMASK_SLICE:
1588 	case CB_COLOR3_CMASK_SLICE:
1589 	case CB_COLOR4_CMASK_SLICE:
1590 	case CB_COLOR5_CMASK_SLICE:
1591 	case CB_COLOR6_CMASK_SLICE:
1592 	case CB_COLOR7_CMASK_SLICE:
1593 		tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1594 		track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1595 		break;
1596 	case CB_COLOR0_BASE:
1597 	case CB_COLOR1_BASE:
1598 	case CB_COLOR2_BASE:
1599 	case CB_COLOR3_BASE:
1600 	case CB_COLOR4_BASE:
1601 	case CB_COLOR5_BASE:
1602 	case CB_COLOR6_BASE:
1603 	case CB_COLOR7_BASE:
1604 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1605 		if (r) {
1606 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1607 					"0x%04X\n", reg);
1608 			return -EINVAL;
1609 		}
1610 		tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1611 		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1612 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1613 		track->cb_color_bo[tmp] = reloc->robj;
1614 		track->cb_dirty = true;
1615 		break;
1616 	case CB_COLOR8_BASE:
1617 	case CB_COLOR9_BASE:
1618 	case CB_COLOR10_BASE:
1619 	case CB_COLOR11_BASE:
1620 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1621 		if (r) {
1622 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1623 					"0x%04X\n", reg);
1624 			return -EINVAL;
1625 		}
1626 		tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1627 		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1628 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1629 		track->cb_color_bo[tmp] = reloc->robj;
1630 		track->cb_dirty = true;
1631 		break;
1632 	case DB_HTILE_DATA_BASE:
1633 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1634 		if (r) {
1635 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1636 					"0x%04X\n", reg);
1637 			return -EINVAL;
1638 		}
1639 		track->htile_offset = radeon_get_ib_value(p, idx);
1640 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1641 		track->htile_bo = reloc->robj;
1642 		track->db_dirty = true;
1643 		break;
1644 	case DB_HTILE_SURFACE:
1645 		/* 8x8 only */
1646 		track->htile_surface = radeon_get_ib_value(p, idx);
1647 		/* force 8x8 htile width and height */
1648 		ib[idx] |= 3;
1649 		track->db_dirty = true;
1650 		break;
1651 	case CB_IMMED0_BASE:
1652 	case CB_IMMED1_BASE:
1653 	case CB_IMMED2_BASE:
1654 	case CB_IMMED3_BASE:
1655 	case CB_IMMED4_BASE:
1656 	case CB_IMMED5_BASE:
1657 	case CB_IMMED6_BASE:
1658 	case CB_IMMED7_BASE:
1659 	case CB_IMMED8_BASE:
1660 	case CB_IMMED9_BASE:
1661 	case CB_IMMED10_BASE:
1662 	case CB_IMMED11_BASE:
1663 	case SQ_PGM_START_FS:
1664 	case SQ_PGM_START_ES:
1665 	case SQ_PGM_START_VS:
1666 	case SQ_PGM_START_GS:
1667 	case SQ_PGM_START_PS:
1668 	case SQ_PGM_START_HS:
1669 	case SQ_PGM_START_LS:
1670 	case SQ_CONST_MEM_BASE:
1671 	case SQ_ALU_CONST_CACHE_GS_0:
1672 	case SQ_ALU_CONST_CACHE_GS_1:
1673 	case SQ_ALU_CONST_CACHE_GS_2:
1674 	case SQ_ALU_CONST_CACHE_GS_3:
1675 	case SQ_ALU_CONST_CACHE_GS_4:
1676 	case SQ_ALU_CONST_CACHE_GS_5:
1677 	case SQ_ALU_CONST_CACHE_GS_6:
1678 	case SQ_ALU_CONST_CACHE_GS_7:
1679 	case SQ_ALU_CONST_CACHE_GS_8:
1680 	case SQ_ALU_CONST_CACHE_GS_9:
1681 	case SQ_ALU_CONST_CACHE_GS_10:
1682 	case SQ_ALU_CONST_CACHE_GS_11:
1683 	case SQ_ALU_CONST_CACHE_GS_12:
1684 	case SQ_ALU_CONST_CACHE_GS_13:
1685 	case SQ_ALU_CONST_CACHE_GS_14:
1686 	case SQ_ALU_CONST_CACHE_GS_15:
1687 	case SQ_ALU_CONST_CACHE_PS_0:
1688 	case SQ_ALU_CONST_CACHE_PS_1:
1689 	case SQ_ALU_CONST_CACHE_PS_2:
1690 	case SQ_ALU_CONST_CACHE_PS_3:
1691 	case SQ_ALU_CONST_CACHE_PS_4:
1692 	case SQ_ALU_CONST_CACHE_PS_5:
1693 	case SQ_ALU_CONST_CACHE_PS_6:
1694 	case SQ_ALU_CONST_CACHE_PS_7:
1695 	case SQ_ALU_CONST_CACHE_PS_8:
1696 	case SQ_ALU_CONST_CACHE_PS_9:
1697 	case SQ_ALU_CONST_CACHE_PS_10:
1698 	case SQ_ALU_CONST_CACHE_PS_11:
1699 	case SQ_ALU_CONST_CACHE_PS_12:
1700 	case SQ_ALU_CONST_CACHE_PS_13:
1701 	case SQ_ALU_CONST_CACHE_PS_14:
1702 	case SQ_ALU_CONST_CACHE_PS_15:
1703 	case SQ_ALU_CONST_CACHE_VS_0:
1704 	case SQ_ALU_CONST_CACHE_VS_1:
1705 	case SQ_ALU_CONST_CACHE_VS_2:
1706 	case SQ_ALU_CONST_CACHE_VS_3:
1707 	case SQ_ALU_CONST_CACHE_VS_4:
1708 	case SQ_ALU_CONST_CACHE_VS_5:
1709 	case SQ_ALU_CONST_CACHE_VS_6:
1710 	case SQ_ALU_CONST_CACHE_VS_7:
1711 	case SQ_ALU_CONST_CACHE_VS_8:
1712 	case SQ_ALU_CONST_CACHE_VS_9:
1713 	case SQ_ALU_CONST_CACHE_VS_10:
1714 	case SQ_ALU_CONST_CACHE_VS_11:
1715 	case SQ_ALU_CONST_CACHE_VS_12:
1716 	case SQ_ALU_CONST_CACHE_VS_13:
1717 	case SQ_ALU_CONST_CACHE_VS_14:
1718 	case SQ_ALU_CONST_CACHE_VS_15:
1719 	case SQ_ALU_CONST_CACHE_HS_0:
1720 	case SQ_ALU_CONST_CACHE_HS_1:
1721 	case SQ_ALU_CONST_CACHE_HS_2:
1722 	case SQ_ALU_CONST_CACHE_HS_3:
1723 	case SQ_ALU_CONST_CACHE_HS_4:
1724 	case SQ_ALU_CONST_CACHE_HS_5:
1725 	case SQ_ALU_CONST_CACHE_HS_6:
1726 	case SQ_ALU_CONST_CACHE_HS_7:
1727 	case SQ_ALU_CONST_CACHE_HS_8:
1728 	case SQ_ALU_CONST_CACHE_HS_9:
1729 	case SQ_ALU_CONST_CACHE_HS_10:
1730 	case SQ_ALU_CONST_CACHE_HS_11:
1731 	case SQ_ALU_CONST_CACHE_HS_12:
1732 	case SQ_ALU_CONST_CACHE_HS_13:
1733 	case SQ_ALU_CONST_CACHE_HS_14:
1734 	case SQ_ALU_CONST_CACHE_HS_15:
1735 	case SQ_ALU_CONST_CACHE_LS_0:
1736 	case SQ_ALU_CONST_CACHE_LS_1:
1737 	case SQ_ALU_CONST_CACHE_LS_2:
1738 	case SQ_ALU_CONST_CACHE_LS_3:
1739 	case SQ_ALU_CONST_CACHE_LS_4:
1740 	case SQ_ALU_CONST_CACHE_LS_5:
1741 	case SQ_ALU_CONST_CACHE_LS_6:
1742 	case SQ_ALU_CONST_CACHE_LS_7:
1743 	case SQ_ALU_CONST_CACHE_LS_8:
1744 	case SQ_ALU_CONST_CACHE_LS_9:
1745 	case SQ_ALU_CONST_CACHE_LS_10:
1746 	case SQ_ALU_CONST_CACHE_LS_11:
1747 	case SQ_ALU_CONST_CACHE_LS_12:
1748 	case SQ_ALU_CONST_CACHE_LS_13:
1749 	case SQ_ALU_CONST_CACHE_LS_14:
1750 	case SQ_ALU_CONST_CACHE_LS_15:
1751 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1752 		if (r) {
1753 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1754 					"0x%04X\n", reg);
1755 			return -EINVAL;
1756 		}
1757 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1758 		break;
1759 	case SX_MEMORY_EXPORT_BASE:
1760 		if (p->rdev->family >= CHIP_CAYMAN) {
1761 			dev_warn(p->dev, "bad SET_CONFIG_REG "
1762 				 "0x%04X\n", reg);
1763 			return -EINVAL;
1764 		}
1765 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1766 		if (r) {
1767 			dev_warn(p->dev, "bad SET_CONFIG_REG "
1768 					"0x%04X\n", reg);
1769 			return -EINVAL;
1770 		}
1771 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1772 		break;
1773 	case CAYMAN_SX_SCATTER_EXPORT_BASE:
1774 		if (p->rdev->family < CHIP_CAYMAN) {
1775 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1776 				 "0x%04X\n", reg);
1777 			return -EINVAL;
1778 		}
1779 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1780 		if (r) {
1781 			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1782 					"0x%04X\n", reg);
1783 			return -EINVAL;
1784 		}
1785 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1786 		break;
1787 	case SX_MISC:
1788 		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1789 		break;
1790 	default:
1791 		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1792 		return -EINVAL;
1793 	}
1794 	return 0;
1795 }
1796 
evergreen_is_safe_reg(struct radeon_cs_parser * p,u32 reg,u32 idx)1797 static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1798 {
1799 	u32 last_reg, m, i;
1800 
1801 	if (p->rdev->family >= CHIP_CAYMAN)
1802 		last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1803 	else
1804 		last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1805 
1806 	i = (reg >> 7);
1807 	if (i >= last_reg) {
1808 		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1809 		return false;
1810 	}
1811 	m = 1 << ((reg >> 2) & 31);
1812 	if (p->rdev->family >= CHIP_CAYMAN) {
1813 		if (!(cayman_reg_safe_bm[i] & m))
1814 			return true;
1815 	} else {
1816 		if (!(evergreen_reg_safe_bm[i] & m))
1817 			return true;
1818 	}
1819 	dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1820 	return false;
1821 }
1822 
evergreen_packet3_check(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt)1823 static int evergreen_packet3_check(struct radeon_cs_parser *p,
1824 				   struct radeon_cs_packet *pkt)
1825 {
1826 	struct radeon_cs_reloc *reloc;
1827 	struct evergreen_cs_track *track;
1828 	volatile u32 *ib;
1829 	unsigned idx;
1830 	unsigned i;
1831 	unsigned start_reg, end_reg, reg;
1832 	int r;
1833 	u32 idx_value;
1834 
1835 	track = (struct evergreen_cs_track *)p->track;
1836 	ib = p->ib.ptr;
1837 	idx = pkt->idx + 1;
1838 	idx_value = radeon_get_ib_value(p, idx);
1839 
1840 	switch (pkt->opcode) {
1841 	case PACKET3_SET_PREDICATION:
1842 	{
1843 		int pred_op;
1844 		int tmp;
1845 		uint64_t offset;
1846 
1847 		if (pkt->count != 1) {
1848 			DRM_ERROR("bad SET PREDICATION\n");
1849 			return -EINVAL;
1850 		}
1851 
1852 		tmp = radeon_get_ib_value(p, idx + 1);
1853 		pred_op = (tmp >> 16) & 0x7;
1854 
1855 		/* for the clear predicate operation */
1856 		if (pred_op == 0)
1857 			return 0;
1858 
1859 		if (pred_op > 2) {
1860 			DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1861 			return -EINVAL;
1862 		}
1863 
1864 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1865 		if (r) {
1866 			DRM_ERROR("bad SET PREDICATION\n");
1867 			return -EINVAL;
1868 		}
1869 
1870 		offset = reloc->gpu_offset +
1871 		         (idx_value & 0xfffffff0) +
1872 		         ((u64)(tmp & 0xff) << 32);
1873 
1874 		ib[idx + 0] = offset;
1875 		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1876 	}
1877 	break;
1878 	case PACKET3_CONTEXT_CONTROL:
1879 		if (pkt->count != 1) {
1880 			DRM_ERROR("bad CONTEXT_CONTROL\n");
1881 			return -EINVAL;
1882 		}
1883 		break;
1884 	case PACKET3_INDEX_TYPE:
1885 	case PACKET3_NUM_INSTANCES:
1886 	case PACKET3_CLEAR_STATE:
1887 		if (pkt->count) {
1888 			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1889 			return -EINVAL;
1890 		}
1891 		break;
1892 	case CAYMAN_PACKET3_DEALLOC_STATE:
1893 		if (p->rdev->family < CHIP_CAYMAN) {
1894 			DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1895 			return -EINVAL;
1896 		}
1897 		if (pkt->count) {
1898 			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1899 			return -EINVAL;
1900 		}
1901 		break;
1902 	case PACKET3_INDEX_BASE:
1903 	{
1904 		uint64_t offset;
1905 
1906 		if (pkt->count != 1) {
1907 			DRM_ERROR("bad INDEX_BASE\n");
1908 			return -EINVAL;
1909 		}
1910 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1911 		if (r) {
1912 			DRM_ERROR("bad INDEX_BASE\n");
1913 			return -EINVAL;
1914 		}
1915 
1916 		offset = reloc->gpu_offset +
1917 		         idx_value +
1918 		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1919 
1920 		ib[idx+0] = offset;
1921 		ib[idx+1] = upper_32_bits(offset) & 0xff;
1922 
1923 		r = evergreen_cs_track_check(p);
1924 		if (r) {
1925 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1926 			return r;
1927 		}
1928 		break;
1929 	}
1930 	case PACKET3_DRAW_INDEX:
1931 	{
1932 		uint64_t offset;
1933 		if (pkt->count != 3) {
1934 			DRM_ERROR("bad DRAW_INDEX\n");
1935 			return -EINVAL;
1936 		}
1937 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1938 		if (r) {
1939 			DRM_ERROR("bad DRAW_INDEX\n");
1940 			return -EINVAL;
1941 		}
1942 
1943 		offset = reloc->gpu_offset +
1944 		         idx_value +
1945 		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1946 
1947 		ib[idx+0] = offset;
1948 		ib[idx+1] = upper_32_bits(offset) & 0xff;
1949 
1950 		r = evergreen_cs_track_check(p);
1951 		if (r) {
1952 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1953 			return r;
1954 		}
1955 		break;
1956 	}
1957 	case PACKET3_DRAW_INDEX_2:
1958 	{
1959 		uint64_t offset;
1960 
1961 		if (pkt->count != 4) {
1962 			DRM_ERROR("bad DRAW_INDEX_2\n");
1963 			return -EINVAL;
1964 		}
1965 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1966 		if (r) {
1967 			DRM_ERROR("bad DRAW_INDEX_2\n");
1968 			return -EINVAL;
1969 		}
1970 
1971 		offset = reloc->gpu_offset +
1972 		         radeon_get_ib_value(p, idx+1) +
1973 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1974 
1975 		ib[idx+1] = offset;
1976 		ib[idx+2] = upper_32_bits(offset) & 0xff;
1977 
1978 		r = evergreen_cs_track_check(p);
1979 		if (r) {
1980 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1981 			return r;
1982 		}
1983 		break;
1984 	}
1985 	case PACKET3_DRAW_INDEX_AUTO:
1986 		if (pkt->count != 1) {
1987 			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1988 			return -EINVAL;
1989 		}
1990 		r = evergreen_cs_track_check(p);
1991 		if (r) {
1992 			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1993 			return r;
1994 		}
1995 		break;
1996 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
1997 		if (pkt->count != 2) {
1998 			DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1999 			return -EINVAL;
2000 		}
2001 		r = evergreen_cs_track_check(p);
2002 		if (r) {
2003 			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2004 			return r;
2005 		}
2006 		break;
2007 	case PACKET3_DRAW_INDEX_IMMD:
2008 		if (pkt->count < 2) {
2009 			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
2010 			return -EINVAL;
2011 		}
2012 		r = evergreen_cs_track_check(p);
2013 		if (r) {
2014 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2015 			return r;
2016 		}
2017 		break;
2018 	case PACKET3_DRAW_INDEX_OFFSET:
2019 		if (pkt->count != 2) {
2020 			DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2021 			return -EINVAL;
2022 		}
2023 		r = evergreen_cs_track_check(p);
2024 		if (r) {
2025 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2026 			return r;
2027 		}
2028 		break;
2029 	case PACKET3_DRAW_INDEX_OFFSET_2:
2030 		if (pkt->count != 3) {
2031 			DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2032 			return -EINVAL;
2033 		}
2034 		r = evergreen_cs_track_check(p);
2035 		if (r) {
2036 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2037 			return r;
2038 		}
2039 		break;
2040 	case PACKET3_DISPATCH_DIRECT:
2041 		if (pkt->count != 3) {
2042 			DRM_ERROR("bad DISPATCH_DIRECT\n");
2043 			return -EINVAL;
2044 		}
2045 		r = evergreen_cs_track_check(p);
2046 		if (r) {
2047 			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2048 			return r;
2049 		}
2050 		break;
2051 	case PACKET3_DISPATCH_INDIRECT:
2052 		if (pkt->count != 1) {
2053 			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2054 			return -EINVAL;
2055 		}
2056 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2057 		if (r) {
2058 			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2059 			return -EINVAL;
2060 		}
2061 		ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2062 		r = evergreen_cs_track_check(p);
2063 		if (r) {
2064 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2065 			return r;
2066 		}
2067 		break;
2068 	case PACKET3_WAIT_REG_MEM:
2069 		if (pkt->count != 5) {
2070 			DRM_ERROR("bad WAIT_REG_MEM\n");
2071 			return -EINVAL;
2072 		}
2073 		/* bit 4 is reg (0) or mem (1) */
2074 		if (idx_value & 0x10) {
2075 			uint64_t offset;
2076 
2077 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2078 			if (r) {
2079 				DRM_ERROR("bad WAIT_REG_MEM\n");
2080 				return -EINVAL;
2081 			}
2082 
2083 			offset = reloc->gpu_offset +
2084 			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2085 			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2086 
2087 			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2088 			ib[idx+2] = upper_32_bits(offset) & 0xff;
2089 		} else if (idx_value & 0x100) {
2090 			DRM_ERROR("cannot use PFP on REG wait\n");
2091 			return -EINVAL;
2092 		}
2093 		break;
2094 	case PACKET3_CP_DMA:
2095 	{
2096 		u32 command, size, info;
2097 		u64 offset, tmp;
2098 		if (pkt->count != 4) {
2099 			DRM_ERROR("bad CP DMA\n");
2100 			return -EINVAL;
2101 		}
2102 		command = radeon_get_ib_value(p, idx+4);
2103 		size = command & 0x1fffff;
2104 		info = radeon_get_ib_value(p, idx+1);
2105 		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2106 		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2107 		    ((((info & 0x00300000) >> 20) == 0) &&
2108 		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2109 		    ((((info & 0x60000000) >> 29) == 0) &&
2110 		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2111 			/* non mem to mem copies requires dw aligned count */
2112 			if (size % 4) {
2113 				DRM_ERROR("CP DMA command requires dw count alignment\n");
2114 				return -EINVAL;
2115 			}
2116 		}
2117 		if (command & PACKET3_CP_DMA_CMD_SAS) {
2118 			/* src address space is register */
2119 			/* GDS is ok */
2120 			if (((info & 0x60000000) >> 29) != 1) {
2121 				DRM_ERROR("CP DMA SAS not supported\n");
2122 				return -EINVAL;
2123 			}
2124 		} else {
2125 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
2126 				DRM_ERROR("CP DMA SAIC only supported for registers\n");
2127 				return -EINVAL;
2128 			}
2129 			/* src address space is memory */
2130 			if (((info & 0x60000000) >> 29) == 0) {
2131 				r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2132 				if (r) {
2133 					DRM_ERROR("bad CP DMA SRC\n");
2134 					return -EINVAL;
2135 				}
2136 
2137 				tmp = radeon_get_ib_value(p, idx) +
2138 					((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2139 
2140 				offset = reloc->gpu_offset + tmp;
2141 
2142 				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2143 					dev_warn(p->dev, "CP DMA src buffer too small (%"PRIu64" %lu)\n",
2144 						 tmp + size, radeon_bo_size(reloc->robj));
2145 					return -EINVAL;
2146 				}
2147 
2148 				ib[idx] = offset;
2149 				ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2150 			} else if (((info & 0x60000000) >> 29) != 2) {
2151 				DRM_ERROR("bad CP DMA SRC_SEL\n");
2152 				return -EINVAL;
2153 			}
2154 		}
2155 		if (command & PACKET3_CP_DMA_CMD_DAS) {
2156 			/* dst address space is register */
2157 			/* GDS is ok */
2158 			if (((info & 0x00300000) >> 20) != 1) {
2159 				DRM_ERROR("CP DMA DAS not supported\n");
2160 				return -EINVAL;
2161 			}
2162 		} else {
2163 			/* dst address space is memory */
2164 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
2165 				DRM_ERROR("CP DMA DAIC only supported for registers\n");
2166 				return -EINVAL;
2167 			}
2168 			if (((info & 0x00300000) >> 20) == 0) {
2169 				r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2170 				if (r) {
2171 					DRM_ERROR("bad CP DMA DST\n");
2172 					return -EINVAL;
2173 				}
2174 
2175 				tmp = radeon_get_ib_value(p, idx+2) +
2176 					((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2177 
2178 				offset = reloc->gpu_offset + tmp;
2179 
2180 				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2181 					dev_warn(p->dev, "CP DMA dst buffer too small (%"PRIu64" %lu)\n",
2182 						 tmp + size, radeon_bo_size(reloc->robj));
2183 					return -EINVAL;
2184 				}
2185 
2186 				ib[idx+2] = offset;
2187 				ib[idx+3] = upper_32_bits(offset) & 0xff;
2188 			} else {
2189 				DRM_ERROR("bad CP DMA DST_SEL\n");
2190 				return -EINVAL;
2191 			}
2192 		}
2193 		break;
2194 	}
2195 	case PACKET3_SURFACE_SYNC:
2196 		if (pkt->count != 3) {
2197 			DRM_ERROR("bad SURFACE_SYNC\n");
2198 			return -EINVAL;
2199 		}
2200 		/* 0xffffffff/0x0 is flush all cache flag */
2201 		if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2202 		    radeon_get_ib_value(p, idx + 2) != 0) {
2203 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2204 			if (r) {
2205 				DRM_ERROR("bad SURFACE_SYNC\n");
2206 				return -EINVAL;
2207 			}
2208 			ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2209 		}
2210 		break;
2211 	case PACKET3_EVENT_WRITE:
2212 		if (pkt->count != 2 && pkt->count != 0) {
2213 			DRM_ERROR("bad EVENT_WRITE\n");
2214 			return -EINVAL;
2215 		}
2216 		if (pkt->count) {
2217 			uint64_t offset;
2218 
2219 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2220 			if (r) {
2221 				DRM_ERROR("bad EVENT_WRITE\n");
2222 				return -EINVAL;
2223 			}
2224 			offset = reloc->gpu_offset +
2225 			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2226 			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2227 
2228 			ib[idx+1] = offset & 0xfffffff8;
2229 			ib[idx+2] = upper_32_bits(offset) & 0xff;
2230 		}
2231 		break;
2232 	case PACKET3_EVENT_WRITE_EOP:
2233 	{
2234 		uint64_t offset;
2235 
2236 		if (pkt->count != 4) {
2237 			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2238 			return -EINVAL;
2239 		}
2240 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2241 		if (r) {
2242 			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2243 			return -EINVAL;
2244 		}
2245 
2246 		offset = reloc->gpu_offset +
2247 		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2248 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2249 
2250 		ib[idx+1] = offset & 0xfffffffc;
2251 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2252 		break;
2253 	}
2254 	case PACKET3_EVENT_WRITE_EOS:
2255 	{
2256 		uint64_t offset;
2257 
2258 		if (pkt->count != 3) {
2259 			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2260 			return -EINVAL;
2261 		}
2262 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2263 		if (r) {
2264 			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2265 			return -EINVAL;
2266 		}
2267 
2268 		offset = reloc->gpu_offset +
2269 		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2270 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2271 
2272 		ib[idx+1] = offset & 0xfffffffc;
2273 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2274 		break;
2275 	}
2276 	case PACKET3_SET_CONFIG_REG:
2277 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2278 		end_reg = 4 * pkt->count + start_reg - 4;
2279 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2280 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2281 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2282 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2283 			return -EINVAL;
2284 		}
2285 		for (i = 0; i < pkt->count; i++) {
2286 			reg = start_reg + (4 * i);
2287 			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2288 			if (r)
2289 				return r;
2290 		}
2291 		break;
2292 	case PACKET3_SET_CONTEXT_REG:
2293 		start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2294 		end_reg = 4 * pkt->count + start_reg - 4;
2295 		if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2296 		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2297 		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2298 			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2299 			return -EINVAL;
2300 		}
2301 		for (i = 0; i < pkt->count; i++) {
2302 			reg = start_reg + (4 * i);
2303 			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2304 			if (r)
2305 				return r;
2306 		}
2307 		break;
2308 	case PACKET3_SET_RESOURCE:
2309 		if (pkt->count % 8) {
2310 			DRM_ERROR("bad SET_RESOURCE\n");
2311 			return -EINVAL;
2312 		}
2313 		start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2314 		end_reg = 4 * pkt->count + start_reg - 4;
2315 		if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2316 		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2317 		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2318 			DRM_ERROR("bad SET_RESOURCE\n");
2319 			return -EINVAL;
2320 		}
2321 		for (i = 0; i < (pkt->count / 8); i++) {
2322 			struct radeon_bo *texture, *mipmap;
2323 			u32 toffset, moffset;
2324 			u32 size, offset, mip_address, tex_dim;
2325 
2326 			switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2327 			case SQ_TEX_VTX_VALID_TEXTURE:
2328 				/* tex base */
2329 				r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2330 				if (r) {
2331 					DRM_ERROR("bad SET_RESOURCE (tex)\n");
2332 					return -EINVAL;
2333 				}
2334 				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2335 					ib[idx+1+(i*8)+1] |=
2336 						TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2337 					if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2338 						unsigned bankw, bankh, mtaspect, tile_split;
2339 
2340 						evergreen_tiling_fields(reloc->tiling_flags,
2341 									&bankw, &bankh, &mtaspect,
2342 									&tile_split);
2343 						ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2344 						ib[idx+1+(i*8)+7] |=
2345 							TEX_BANK_WIDTH(bankw) |
2346 							TEX_BANK_HEIGHT(bankh) |
2347 							MACRO_TILE_ASPECT(mtaspect) |
2348 							TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2349 					}
2350 				}
2351 				texture = reloc->robj;
2352 				toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2353 
2354 				/* tex mip base */
2355 				tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2356 				mip_address = ib[idx+1+(i*8)+3];
2357 
2358 				if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2359 				    !mip_address &&
2360 				    !radeon_cs_packet_next_is_pkt3_nop(p)) {
2361 					/* MIP_ADDRESS should point to FMASK for an MSAA texture.
2362 					 * It should be 0 if FMASK is disabled. */
2363 					moffset = 0;
2364 					mipmap = NULL;
2365 				} else {
2366 					r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2367 					if (r) {
2368 						DRM_ERROR("bad SET_RESOURCE (tex)\n");
2369 						return -EINVAL;
2370 					}
2371 					moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2372 					mipmap = reloc->robj;
2373 				}
2374 
2375 				r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2376 				if (r)
2377 					return r;
2378 				ib[idx+1+(i*8)+2] += toffset;
2379 				ib[idx+1+(i*8)+3] += moffset;
2380 				break;
2381 			case SQ_TEX_VTX_VALID_BUFFER:
2382 			{
2383 				uint64_t offset64;
2384 				/* vtx base */
2385 				r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2386 				if (r) {
2387 					DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2388 					return -EINVAL;
2389 				}
2390 				offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2391 				size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2392 				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2393 					/* force size to size of the buffer */
2394 					dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2395 					ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2396 				}
2397 
2398 				offset64 = reloc->gpu_offset + offset;
2399 				ib[idx+1+(i*8)+0] = offset64;
2400 				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2401 						    (upper_32_bits(offset64) & 0xff);
2402 				break;
2403 			}
2404 			case SQ_TEX_VTX_INVALID_TEXTURE:
2405 			case SQ_TEX_VTX_INVALID_BUFFER:
2406 			default:
2407 				DRM_ERROR("bad SET_RESOURCE\n");
2408 				return -EINVAL;
2409 			}
2410 		}
2411 		break;
2412 	case PACKET3_SET_ALU_CONST:
2413 		/* XXX fix me ALU const buffers only */
2414 		break;
2415 	case PACKET3_SET_BOOL_CONST:
2416 		start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2417 		end_reg = 4 * pkt->count + start_reg - 4;
2418 		if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2419 		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2420 		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2421 			DRM_ERROR("bad SET_BOOL_CONST\n");
2422 			return -EINVAL;
2423 		}
2424 		break;
2425 	case PACKET3_SET_LOOP_CONST:
2426 		start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2427 		end_reg = 4 * pkt->count + start_reg - 4;
2428 		if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2429 		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2430 		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2431 			DRM_ERROR("bad SET_LOOP_CONST\n");
2432 			return -EINVAL;
2433 		}
2434 		break;
2435 	case PACKET3_SET_CTL_CONST:
2436 		start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2437 		end_reg = 4 * pkt->count + start_reg - 4;
2438 		if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2439 		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2440 		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2441 			DRM_ERROR("bad SET_CTL_CONST\n");
2442 			return -EINVAL;
2443 		}
2444 		break;
2445 	case PACKET3_SET_SAMPLER:
2446 		if (pkt->count % 3) {
2447 			DRM_ERROR("bad SET_SAMPLER\n");
2448 			return -EINVAL;
2449 		}
2450 		start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2451 		end_reg = 4 * pkt->count + start_reg - 4;
2452 		if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2453 		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2454 		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2455 			DRM_ERROR("bad SET_SAMPLER\n");
2456 			return -EINVAL;
2457 		}
2458 		break;
2459 	case PACKET3_STRMOUT_BUFFER_UPDATE:
2460 		if (pkt->count != 4) {
2461 			DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2462 			return -EINVAL;
2463 		}
2464 		/* Updating memory at DST_ADDRESS. */
2465 		if (idx_value & 0x1) {
2466 			u64 offset;
2467 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2468 			if (r) {
2469 				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2470 				return -EINVAL;
2471 			}
2472 			offset = radeon_get_ib_value(p, idx+1);
2473 			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2474 			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2475 				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%"PRIx64", 0x%lx\n",
2476 					  offset + 4, radeon_bo_size(reloc->robj));
2477 				return -EINVAL;
2478 			}
2479 			offset += reloc->gpu_offset;
2480 			ib[idx+1] = offset;
2481 			ib[idx+2] = upper_32_bits(offset) & 0xff;
2482 		}
2483 		/* Reading data from SRC_ADDRESS. */
2484 		if (((idx_value >> 1) & 0x3) == 2) {
2485 			u64 offset;
2486 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2487 			if (r) {
2488 				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2489 				return -EINVAL;
2490 			}
2491 			offset = radeon_get_ib_value(p, idx+3);
2492 			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2493 			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2494 				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%"PRIx64", 0x%lx\n",
2495 					  offset + 4, radeon_bo_size(reloc->robj));
2496 				return -EINVAL;
2497 			}
2498 			offset += reloc->gpu_offset;
2499 			ib[idx+3] = offset;
2500 			ib[idx+4] = upper_32_bits(offset) & 0xff;
2501 		}
2502 		break;
2503 	case PACKET3_MEM_WRITE:
2504 	{
2505 		u64 offset;
2506 
2507 		if (pkt->count != 3) {
2508 			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2509 			return -EINVAL;
2510 		}
2511 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2512 		if (r) {
2513 			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2514 			return -EINVAL;
2515 		}
2516 		offset = radeon_get_ib_value(p, idx+0);
2517 		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2518 		if (offset & 0x7) {
2519 			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2520 			return -EINVAL;
2521 		}
2522 		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2523 			DRM_ERROR("bad MEM_WRITE bo too small: 0x%"PRIx64", 0x%lx\n",
2524 				  offset + 8, radeon_bo_size(reloc->robj));
2525 			return -EINVAL;
2526 		}
2527 		offset += reloc->gpu_offset;
2528 		ib[idx+0] = offset;
2529 		ib[idx+1] = upper_32_bits(offset) & 0xff;
2530 		break;
2531 	}
2532 	case PACKET3_COPY_DW:
2533 		if (pkt->count != 4) {
2534 			DRM_ERROR("bad COPY_DW (invalid count)\n");
2535 			return -EINVAL;
2536 		}
2537 		if (idx_value & 0x1) {
2538 			u64 offset;
2539 			/* SRC is memory. */
2540 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2541 			if (r) {
2542 				DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2543 				return -EINVAL;
2544 			}
2545 			offset = radeon_get_ib_value(p, idx+1);
2546 			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2547 			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2548 				DRM_ERROR("bad COPY_DW src bo too small: 0x%"PRIx64", 0x%lx\n",
2549 					  offset + 4, radeon_bo_size(reloc->robj));
2550 				return -EINVAL;
2551 			}
2552 			offset += reloc->gpu_offset;
2553 			ib[idx+1] = offset;
2554 			ib[idx+2] = upper_32_bits(offset) & 0xff;
2555 		} else {
2556 			/* SRC is a reg. */
2557 			reg = radeon_get_ib_value(p, idx+1) << 2;
2558 			if (!evergreen_is_safe_reg(p, reg, idx+1))
2559 				return -EINVAL;
2560 		}
2561 		if (idx_value & 0x2) {
2562 			u64 offset;
2563 			/* DST is memory. */
2564 			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2565 			if (r) {
2566 				DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2567 				return -EINVAL;
2568 			}
2569 			offset = radeon_get_ib_value(p, idx+3);
2570 			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2571 			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2572 				DRM_ERROR("bad COPY_DW dst bo too small: 0x%"PRIx64", 0x%lx\n",
2573 					  offset + 4, radeon_bo_size(reloc->robj));
2574 				return -EINVAL;
2575 			}
2576 			offset += reloc->gpu_offset;
2577 			ib[idx+3] = offset;
2578 			ib[idx+4] = upper_32_bits(offset) & 0xff;
2579 		} else {
2580 			/* DST is a reg. */
2581 			reg = radeon_get_ib_value(p, idx+3) << 2;
2582 			if (!evergreen_is_safe_reg(p, reg, idx+3))
2583 				return -EINVAL;
2584 		}
2585 		break;
2586 	case PACKET3_NOP:
2587 		break;
2588 	default:
2589 		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2590 		return -EINVAL;
2591 	}
2592 	return 0;
2593 }
2594 
evergreen_cs_parse(struct radeon_cs_parser * p)2595 int evergreen_cs_parse(struct radeon_cs_parser *p)
2596 {
2597 	struct radeon_cs_packet pkt;
2598 	struct evergreen_cs_track *track;
2599 	u32 tmp;
2600 	int r;
2601 
2602 	if (p->track == NULL) {
2603 		/* initialize tracker, we are in kms */
2604 		track = kzalloc(sizeof(*track), GFP_KERNEL);
2605 		if (track == NULL)
2606 			return -ENOMEM;
2607 		evergreen_cs_track_init(track);
2608 		if (p->rdev->family >= CHIP_CAYMAN)
2609 			tmp = p->rdev->config.cayman.tile_config;
2610 		else
2611 			tmp = p->rdev->config.evergreen.tile_config;
2612 
2613 		switch (tmp & 0xf) {
2614 		case 0:
2615 			track->npipes = 1;
2616 			break;
2617 		case 1:
2618 		default:
2619 			track->npipes = 2;
2620 			break;
2621 		case 2:
2622 			track->npipes = 4;
2623 			break;
2624 		case 3:
2625 			track->npipes = 8;
2626 			break;
2627 		}
2628 
2629 		switch ((tmp & 0xf0) >> 4) {
2630 		case 0:
2631 			track->nbanks = 4;
2632 			break;
2633 		case 1:
2634 		default:
2635 			track->nbanks = 8;
2636 			break;
2637 		case 2:
2638 			track->nbanks = 16;
2639 			break;
2640 		}
2641 
2642 		switch ((tmp & 0xf00) >> 8) {
2643 		case 0:
2644 			track->group_size = 256;
2645 			break;
2646 		case 1:
2647 		default:
2648 			track->group_size = 512;
2649 			break;
2650 		}
2651 
2652 		switch ((tmp & 0xf000) >> 12) {
2653 		case 0:
2654 			track->row_size = 1;
2655 			break;
2656 		case 1:
2657 		default:
2658 			track->row_size = 2;
2659 			break;
2660 		case 2:
2661 			track->row_size = 4;
2662 			break;
2663 		}
2664 
2665 		p->track = track;
2666 	}
2667 	do {
2668 		r = radeon_cs_packet_parse(p, &pkt, p->idx);
2669 		if (r) {
2670 			kfree(p->track);
2671 			p->track = NULL;
2672 			return r;
2673 		}
2674 		p->idx += pkt.count + 2;
2675 		switch (pkt.type) {
2676 		case RADEON_PACKET_TYPE0:
2677 			r = evergreen_cs_parse_packet0(p, &pkt);
2678 			break;
2679 		case RADEON_PACKET_TYPE2:
2680 			break;
2681 		case RADEON_PACKET_TYPE3:
2682 			r = evergreen_packet3_check(p, &pkt);
2683 			break;
2684 		default:
2685 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2686 			kfree(p->track);
2687 			p->track = NULL;
2688 			return -EINVAL;
2689 		}
2690 		if (r) {
2691 			kfree(p->track);
2692 			p->track = NULL;
2693 			return r;
2694 		}
2695 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2696 #if 0
2697 	for (r = 0; r < p->ib.length_dw; r++) {
2698 		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2699 		mdelay(1);
2700 	}
2701 #endif
2702 	kfree(p->track);
2703 	p->track = NULL;
2704 	return 0;
2705 }
2706 
2707 /**
2708  * evergreen_dma_cs_parse() - parse the DMA IB
2709  * @p:		parser structure holding parsing context.
2710  *
2711  * Parses the DMA IB from the CS ioctl and updates
2712  * the GPU addresses based on the reloc information and
2713  * checks for errors. (Evergreen-Cayman)
2714  * Returns 0 for success and an error on failure.
2715  **/
evergreen_dma_cs_parse(struct radeon_cs_parser * p)2716 int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2717 {
2718 	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2719 	struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2720 	u32 header, cmd, count, sub_cmd;
2721 	volatile u32 *ib = p->ib.ptr;
2722 	u32 idx;
2723 	u64 src_offset, dst_offset, dst2_offset;
2724 	int r;
2725 
2726 	do {
2727 		if (p->idx >= ib_chunk->length_dw) {
2728 			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2729 				  p->idx, ib_chunk->length_dw);
2730 			return -EINVAL;
2731 		}
2732 		idx = p->idx;
2733 		header = radeon_get_ib_value(p, idx);
2734 		cmd = GET_DMA_CMD(header);
2735 		count = GET_DMA_COUNT(header);
2736 		sub_cmd = GET_DMA_SUB_CMD(header);
2737 
2738 		switch (cmd) {
2739 		case DMA_PACKET_WRITE:
2740 			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2741 			if (r) {
2742 				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2743 				return -EINVAL;
2744 			}
2745 			switch (sub_cmd) {
2746 			/* tiled */
2747 			case 8:
2748 				dst_offset = radeon_get_ib_value(p, idx+1);
2749 				dst_offset <<= 8;
2750 
2751 				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2752 				p->idx += count + 7;
2753 				break;
2754 			/* linear */
2755 			case 0:
2756 				dst_offset = radeon_get_ib_value(p, idx+1);
2757 				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2758 
2759 				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2760 				ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2761 				p->idx += count + 3;
2762 				break;
2763 			default:
2764 				DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2765 				return -EINVAL;
2766 			}
2767 			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2768 				dev_warn(p->dev, "DMA write buffer too small (%"PRIu64" %lu)\n",
2769 					 dst_offset, radeon_bo_size(dst_reloc->robj));
2770 				return -EINVAL;
2771 			}
2772 			break;
2773 		case DMA_PACKET_COPY:
2774 			r = r600_dma_cs_next_reloc(p, &src_reloc);
2775 			if (r) {
2776 				DRM_ERROR("bad DMA_PACKET_COPY\n");
2777 				return -EINVAL;
2778 			}
2779 			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2780 			if (r) {
2781 				DRM_ERROR("bad DMA_PACKET_COPY\n");
2782 				return -EINVAL;
2783 			}
2784 			switch (sub_cmd) {
2785 			/* Copy L2L, DW aligned */
2786 			case 0x00:
2787 				/* L2L, dw */
2788 				src_offset = radeon_get_ib_value(p, idx+2);
2789 				src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2790 				dst_offset = radeon_get_ib_value(p, idx+1);
2791 				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2792 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2793 					dev_warn(p->dev, "DMA L2L, dw src buffer too small (%"PRIu64" %lu)\n",
2794 							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2795 					return -EINVAL;
2796 				}
2797 				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2798 					dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%"PRIu64" %lu)\n",
2799 							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2800 					return -EINVAL;
2801 				}
2802 				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2803 				ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2804 				ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2805 				ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2806 				p->idx += 5;
2807 				break;
2808 			/* Copy L2T/T2L */
2809 			case 0x08:
2810 				/* detile bit */
2811 				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2812 					/* tiled src, linear dst */
2813 					src_offset = radeon_get_ib_value(p, idx+1);
2814 					src_offset <<= 8;
2815 					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2816 
2817 					dst_offset = radeon_get_ib_value(p, idx + 7);
2818 					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2819 					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2820 					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2821 				} else {
2822 					/* linear src, tiled dst */
2823 					src_offset = radeon_get_ib_value(p, idx+7);
2824 					src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2825 					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2826 					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2827 
2828 					dst_offset = radeon_get_ib_value(p, idx+1);
2829 					dst_offset <<= 8;
2830 					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2831 				}
2832 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2833 					dev_warn(p->dev, "DMA L2T, src buffer too small (%"PRIu64" %lu)\n",
2834 							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2835 					return -EINVAL;
2836 				}
2837 				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2838 					dev_warn(p->dev, "DMA L2T, dst buffer too small (%"PRIu64" %lu)\n",
2839 							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2840 					return -EINVAL;
2841 				}
2842 				p->idx += 9;
2843 				break;
2844 			/* Copy L2L, byte aligned */
2845 			case 0x40:
2846 				/* L2L, byte */
2847 				src_offset = radeon_get_ib_value(p, idx+2);
2848 				src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2849 				dst_offset = radeon_get_ib_value(p, idx+1);
2850 				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2851 				if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2852 					dev_warn(p->dev, "DMA L2L, byte src buffer too small (%"PRIu64" %lu)\n",
2853 							src_offset + count, radeon_bo_size(src_reloc->robj));
2854 					return -EINVAL;
2855 				}
2856 				if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2857 					dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%"PRIu64" %lu)\n",
2858 							dst_offset + count, radeon_bo_size(dst_reloc->robj));
2859 					return -EINVAL;
2860 				}
2861 				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2862 				ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2863 				ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2864 				ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2865 				p->idx += 5;
2866 				break;
2867 			/* Copy L2L, partial */
2868 			case 0x41:
2869 				/* L2L, partial */
2870 				if (p->family < CHIP_CAYMAN) {
2871 					DRM_ERROR("L2L Partial is cayman only !\n");
2872 					return -EINVAL;
2873 				}
2874 				ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2875 				ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2876 				ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2877 				ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2878 
2879 				p->idx += 9;
2880 				break;
2881 			/* Copy L2L, DW aligned, broadcast */
2882 			case 0x44:
2883 				/* L2L, dw, broadcast */
2884 				r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2885 				if (r) {
2886 					DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2887 					return -EINVAL;
2888 				}
2889 				dst_offset = radeon_get_ib_value(p, idx+1);
2890 				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2891 				dst2_offset = radeon_get_ib_value(p, idx+2);
2892 				dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2893 				src_offset = radeon_get_ib_value(p, idx+3);
2894 				src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2895 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2896 					dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%"PRIu64" %lu)\n",
2897 							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2898 					return -EINVAL;
2899 				}
2900 				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2901 					dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%"PRIu64" %lu)\n",
2902 							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2903 					return -EINVAL;
2904 				}
2905 				if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2906 					dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
2907 							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2908 					return -EINVAL;
2909 				}
2910 				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2911 				ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2912 				ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2913 				ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2914 				ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2915 				ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2916 				p->idx += 7;
2917 				break;
2918 			/* Copy L2T Frame to Field */
2919 			case 0x48:
2920 				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2921 					DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2922 					return -EINVAL;
2923 				}
2924 				r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2925 				if (r) {
2926 					DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2927 					return -EINVAL;
2928 				}
2929 				dst_offset = radeon_get_ib_value(p, idx+1);
2930 				dst_offset <<= 8;
2931 				dst2_offset = radeon_get_ib_value(p, idx+2);
2932 				dst2_offset <<= 8;
2933 				src_offset = radeon_get_ib_value(p, idx+8);
2934 				src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2935 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2936 					dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%"PRIu64" %lu)\n",
2937 							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2938 					return -EINVAL;
2939 				}
2940 				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2941 					dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%"PRIu64" %lu)\n",
2942 							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2943 					return -EINVAL;
2944 				}
2945 				if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2946 					dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%"PRIu64" %lu)\n",
2947 							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2948 					return -EINVAL;
2949 				}
2950 				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2951 				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
2952 				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2953 				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2954 				p->idx += 10;
2955 				break;
2956 			/* Copy L2T/T2L, partial */
2957 			case 0x49:
2958 				/* L2T, T2L partial */
2959 				if (p->family < CHIP_CAYMAN) {
2960 					DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2961 					return -EINVAL;
2962 				}
2963 				/* detile bit */
2964 				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2965 					/* tiled src, linear dst */
2966 					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2967 
2968 					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2969 					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2970 				} else {
2971 					/* linear src, tiled dst */
2972 					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2973 					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2974 
2975 					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2976 				}
2977 				p->idx += 12;
2978 				break;
2979 			/* Copy L2T broadcast */
2980 			case 0x4b:
2981 				/* L2T, broadcast */
2982 				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2983 					DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2984 					return -EINVAL;
2985 				}
2986 				r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2987 				if (r) {
2988 					DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2989 					return -EINVAL;
2990 				}
2991 				dst_offset = radeon_get_ib_value(p, idx+1);
2992 				dst_offset <<= 8;
2993 				dst2_offset = radeon_get_ib_value(p, idx+2);
2994 				dst2_offset <<= 8;
2995 				src_offset = radeon_get_ib_value(p, idx+8);
2996 				src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2997 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2998 					dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%"PRIu64" %lu)\n",
2999 							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3000 					return -EINVAL;
3001 				}
3002 				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3003 					dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%"PRIu64" %lu)\n",
3004 							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3005 					return -EINVAL;
3006 				}
3007 				if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3008 					dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
3009 							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3010 					return -EINVAL;
3011 				}
3012 				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3013 				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3014 				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3015 				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3016 				p->idx += 10;
3017 				break;
3018 			/* Copy L2T/T2L (tile units) */
3019 			case 0x4c:
3020 				/* L2T, T2L */
3021 				/* detile bit */
3022 				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3023 					/* tiled src, linear dst */
3024 					src_offset = radeon_get_ib_value(p, idx+1);
3025 					src_offset <<= 8;
3026 					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3027 
3028 					dst_offset = radeon_get_ib_value(p, idx+7);
3029 					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3030 					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3031 					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3032 				} else {
3033 					/* linear src, tiled dst */
3034 					src_offset = radeon_get_ib_value(p, idx+7);
3035 					src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3036 					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3037 					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3038 
3039 					dst_offset = radeon_get_ib_value(p, idx+1);
3040 					dst_offset <<= 8;
3041 					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3042 				}
3043 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3044 					dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%"PRIu64" %lu)\n",
3045 							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3046 					return -EINVAL;
3047 				}
3048 				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3049 					dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%"PRIu64" %lu)\n",
3050 							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3051 					return -EINVAL;
3052 				}
3053 				p->idx += 9;
3054 				break;
3055 			/* Copy T2T, partial (tile units) */
3056 			case 0x4d:
3057 				/* T2T partial */
3058 				if (p->family < CHIP_CAYMAN) {
3059 					DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3060 					return -EINVAL;
3061 				}
3062 				ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3063 				ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3064 				p->idx += 13;
3065 				break;
3066 			/* Copy L2T broadcast (tile units) */
3067 			case 0x4f:
3068 				/* L2T, broadcast */
3069 				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3070 					DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3071 					return -EINVAL;
3072 				}
3073 				r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3074 				if (r) {
3075 					DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3076 					return -EINVAL;
3077 				}
3078 				dst_offset = radeon_get_ib_value(p, idx+1);
3079 				dst_offset <<= 8;
3080 				dst2_offset = radeon_get_ib_value(p, idx+2);
3081 				dst2_offset <<= 8;
3082 				src_offset = radeon_get_ib_value(p, idx+8);
3083 				src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3084 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3085 					dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%"PRIu64" %lu)\n",
3086 							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3087 					return -EINVAL;
3088 				}
3089 				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3090 					dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%"PRIu64" %lu)\n",
3091 							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3092 					return -EINVAL;
3093 				}
3094 				if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3095 					dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
3096 							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3097 					return -EINVAL;
3098 				}
3099 				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3100 				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3101 				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3102 				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3103 				p->idx += 10;
3104 				break;
3105 			default:
3106 				DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3107 				return -EINVAL;
3108 			}
3109 			break;
3110 		case DMA_PACKET_CONSTANT_FILL:
3111 			r = r600_dma_cs_next_reloc(p, &dst_reloc);
3112 			if (r) {
3113 				DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3114 				return -EINVAL;
3115 			}
3116 			dst_offset = radeon_get_ib_value(p, idx+1);
3117 			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3118 			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3119 				dev_warn(p->dev, "DMA constant fill buffer too small (%"PRIu64" %lu)\n",
3120 					 dst_offset, radeon_bo_size(dst_reloc->robj));
3121 				return -EINVAL;
3122 			}
3123 			ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3124 			ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3125 			p->idx += 4;
3126 			break;
3127 		case DMA_PACKET_NOP:
3128 			p->idx += 1;
3129 			break;
3130 		default:
3131 			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3132 			return -EINVAL;
3133 		}
3134 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3135 #if 0
3136 	for (r = 0; r < p->ib->length_dw; r++) {
3137 		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
3138 		mdelay(1);
3139 	}
3140 #endif
3141 	return 0;
3142 }
3143 
3144 /* vm parser */
evergreen_vm_reg_valid(u32 reg)3145 static bool evergreen_vm_reg_valid(u32 reg)
3146 {
3147 	/* context regs are fine */
3148 	if (reg >= 0x28000)
3149 		return true;
3150 
3151 	/* check config regs */
3152 	switch (reg) {
3153 	case WAIT_UNTIL:
3154 	case GRBM_GFX_INDEX:
3155 	case CP_STRMOUT_CNTL:
3156 	case CP_COHER_CNTL:
3157 	case CP_COHER_SIZE:
3158 	case VGT_VTX_VECT_EJECT_REG:
3159 	case VGT_CACHE_INVALIDATION:
3160 	case VGT_GS_VERTEX_REUSE:
3161 	case VGT_PRIMITIVE_TYPE:
3162 	case VGT_INDEX_TYPE:
3163 	case VGT_NUM_INDICES:
3164 	case VGT_NUM_INSTANCES:
3165 	case VGT_COMPUTE_DIM_X:
3166 	case VGT_COMPUTE_DIM_Y:
3167 	case VGT_COMPUTE_DIM_Z:
3168 	case VGT_COMPUTE_START_X:
3169 	case VGT_COMPUTE_START_Y:
3170 	case VGT_COMPUTE_START_Z:
3171 	case VGT_COMPUTE_INDEX:
3172 	case VGT_COMPUTE_THREAD_GROUP_SIZE:
3173 	case VGT_HS_OFFCHIP_PARAM:
3174 	case PA_CL_ENHANCE:
3175 	case PA_SU_LINE_STIPPLE_VALUE:
3176 	case PA_SC_LINE_STIPPLE_STATE:
3177 	case PA_SC_ENHANCE:
3178 	case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3179 	case SQ_DYN_GPR_SIMD_LOCK_EN:
3180 	case SQ_CONFIG:
3181 	case SQ_GPR_RESOURCE_MGMT_1:
3182 	case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3183 	case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3184 	case SQ_CONST_MEM_BASE:
3185 	case SQ_STATIC_THREAD_MGMT_1:
3186 	case SQ_STATIC_THREAD_MGMT_2:
3187 	case SQ_STATIC_THREAD_MGMT_3:
3188 	case SPI_CONFIG_CNTL:
3189 	case SPI_CONFIG_CNTL_1:
3190 	case TA_CNTL_AUX:
3191 	case DB_DEBUG:
3192 	case DB_DEBUG2:
3193 	case DB_DEBUG3:
3194 	case DB_DEBUG4:
3195 	case DB_WATERMARKS:
3196 	case TD_PS_BORDER_COLOR_INDEX:
3197 	case TD_PS_BORDER_COLOR_RED:
3198 	case TD_PS_BORDER_COLOR_GREEN:
3199 	case TD_PS_BORDER_COLOR_BLUE:
3200 	case TD_PS_BORDER_COLOR_ALPHA:
3201 	case TD_VS_BORDER_COLOR_INDEX:
3202 	case TD_VS_BORDER_COLOR_RED:
3203 	case TD_VS_BORDER_COLOR_GREEN:
3204 	case TD_VS_BORDER_COLOR_BLUE:
3205 	case TD_VS_BORDER_COLOR_ALPHA:
3206 	case TD_GS_BORDER_COLOR_INDEX:
3207 	case TD_GS_BORDER_COLOR_RED:
3208 	case TD_GS_BORDER_COLOR_GREEN:
3209 	case TD_GS_BORDER_COLOR_BLUE:
3210 	case TD_GS_BORDER_COLOR_ALPHA:
3211 	case TD_HS_BORDER_COLOR_INDEX:
3212 	case TD_HS_BORDER_COLOR_RED:
3213 	case TD_HS_BORDER_COLOR_GREEN:
3214 	case TD_HS_BORDER_COLOR_BLUE:
3215 	case TD_HS_BORDER_COLOR_ALPHA:
3216 	case TD_LS_BORDER_COLOR_INDEX:
3217 	case TD_LS_BORDER_COLOR_RED:
3218 	case TD_LS_BORDER_COLOR_GREEN:
3219 	case TD_LS_BORDER_COLOR_BLUE:
3220 	case TD_LS_BORDER_COLOR_ALPHA:
3221 	case TD_CS_BORDER_COLOR_INDEX:
3222 	case TD_CS_BORDER_COLOR_RED:
3223 	case TD_CS_BORDER_COLOR_GREEN:
3224 	case TD_CS_BORDER_COLOR_BLUE:
3225 	case TD_CS_BORDER_COLOR_ALPHA:
3226 	case SQ_ESGS_RING_SIZE:
3227 	case SQ_GSVS_RING_SIZE:
3228 	case SQ_ESTMP_RING_SIZE:
3229 	case SQ_GSTMP_RING_SIZE:
3230 	case SQ_HSTMP_RING_SIZE:
3231 	case SQ_LSTMP_RING_SIZE:
3232 	case SQ_PSTMP_RING_SIZE:
3233 	case SQ_VSTMP_RING_SIZE:
3234 	case SQ_ESGS_RING_ITEMSIZE:
3235 	case SQ_ESTMP_RING_ITEMSIZE:
3236 	case SQ_GSTMP_RING_ITEMSIZE:
3237 	case SQ_GSVS_RING_ITEMSIZE:
3238 	case SQ_GS_VERT_ITEMSIZE:
3239 	case SQ_GS_VERT_ITEMSIZE_1:
3240 	case SQ_GS_VERT_ITEMSIZE_2:
3241 	case SQ_GS_VERT_ITEMSIZE_3:
3242 	case SQ_GSVS_RING_OFFSET_1:
3243 	case SQ_GSVS_RING_OFFSET_2:
3244 	case SQ_GSVS_RING_OFFSET_3:
3245 	case SQ_HSTMP_RING_ITEMSIZE:
3246 	case SQ_LSTMP_RING_ITEMSIZE:
3247 	case SQ_PSTMP_RING_ITEMSIZE:
3248 	case SQ_VSTMP_RING_ITEMSIZE:
3249 	case VGT_TF_RING_SIZE:
3250 	case SQ_ESGS_RING_BASE:
3251 	case SQ_GSVS_RING_BASE:
3252 	case SQ_ESTMP_RING_BASE:
3253 	case SQ_GSTMP_RING_BASE:
3254 	case SQ_HSTMP_RING_BASE:
3255 	case SQ_LSTMP_RING_BASE:
3256 	case SQ_PSTMP_RING_BASE:
3257 	case SQ_VSTMP_RING_BASE:
3258 	case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3259 	case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3260 		return true;
3261 	default:
3262 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3263 		return false;
3264 	}
3265 }
3266 
evergreen_vm_packet3_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)3267 static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3268 				      u32 *ib, struct radeon_cs_packet *pkt)
3269 {
3270 	u32 idx = pkt->idx + 1;
3271 	u32 idx_value = ib[idx];
3272 	u32 start_reg, end_reg, reg, i;
3273 	u32 command, info;
3274 
3275 	switch (pkt->opcode) {
3276 	case PACKET3_NOP:
3277 	case PACKET3_SET_BASE:
3278 	case PACKET3_CLEAR_STATE:
3279 	case PACKET3_INDEX_BUFFER_SIZE:
3280 	case PACKET3_DISPATCH_DIRECT:
3281 	case PACKET3_DISPATCH_INDIRECT:
3282 	case PACKET3_MODE_CONTROL:
3283 	case PACKET3_SET_PREDICATION:
3284 	case PACKET3_COND_EXEC:
3285 	case PACKET3_PRED_EXEC:
3286 	case PACKET3_DRAW_INDIRECT:
3287 	case PACKET3_DRAW_INDEX_INDIRECT:
3288 	case PACKET3_INDEX_BASE:
3289 	case PACKET3_DRAW_INDEX_2:
3290 	case PACKET3_CONTEXT_CONTROL:
3291 	case PACKET3_DRAW_INDEX_OFFSET:
3292 	case PACKET3_INDEX_TYPE:
3293 	case PACKET3_DRAW_INDEX:
3294 	case PACKET3_DRAW_INDEX_AUTO:
3295 	case PACKET3_DRAW_INDEX_IMMD:
3296 	case PACKET3_NUM_INSTANCES:
3297 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
3298 	case PACKET3_STRMOUT_BUFFER_UPDATE:
3299 	case PACKET3_DRAW_INDEX_OFFSET_2:
3300 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3301 	case PACKET3_MPEG_INDEX:
3302 	case PACKET3_WAIT_REG_MEM:
3303 	case PACKET3_MEM_WRITE:
3304 	case PACKET3_SURFACE_SYNC:
3305 	case PACKET3_EVENT_WRITE:
3306 	case PACKET3_EVENT_WRITE_EOP:
3307 	case PACKET3_EVENT_WRITE_EOS:
3308 	case PACKET3_SET_CONTEXT_REG:
3309 	case PACKET3_SET_BOOL_CONST:
3310 	case PACKET3_SET_LOOP_CONST:
3311 	case PACKET3_SET_RESOURCE:
3312 	case PACKET3_SET_SAMPLER:
3313 	case PACKET3_SET_CTL_CONST:
3314 	case PACKET3_SET_RESOURCE_OFFSET:
3315 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3316 	case PACKET3_SET_RESOURCE_INDIRECT:
3317 	case CAYMAN_PACKET3_DEALLOC_STATE:
3318 		break;
3319 	case PACKET3_COND_WRITE:
3320 		if (idx_value & 0x100) {
3321 			reg = ib[idx + 5] * 4;
3322 			if (!evergreen_vm_reg_valid(reg))
3323 				return -EINVAL;
3324 		}
3325 		break;
3326 	case PACKET3_COPY_DW:
3327 		if (idx_value & 0x2) {
3328 			reg = ib[idx + 3] * 4;
3329 			if (!evergreen_vm_reg_valid(reg))
3330 				return -EINVAL;
3331 		}
3332 		break;
3333 	case PACKET3_SET_CONFIG_REG:
3334 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3335 		end_reg = 4 * pkt->count + start_reg - 4;
3336 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3337 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3338 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3339 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3340 			return -EINVAL;
3341 		}
3342 		for (i = 0; i < pkt->count; i++) {
3343 			reg = start_reg + (4 * i);
3344 			if (!evergreen_vm_reg_valid(reg))
3345 				return -EINVAL;
3346 		}
3347 		break;
3348 	case PACKET3_CP_DMA:
3349 		command = ib[idx + 4];
3350 		info = ib[idx + 1];
3351 		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3352 		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3353 		    ((((info & 0x00300000) >> 20) == 0) &&
3354 		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3355 		    ((((info & 0x60000000) >> 29) == 0) &&
3356 		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3357 			/* non mem to mem copies requires dw aligned count */
3358 			if ((command & 0x1fffff) % 4) {
3359 				DRM_ERROR("CP DMA command requires dw count alignment\n");
3360 				return -EINVAL;
3361 			}
3362 		}
3363 		if (command & PACKET3_CP_DMA_CMD_SAS) {
3364 			/* src address space is register */
3365 			if (((info & 0x60000000) >> 29) == 0) {
3366 				start_reg = idx_value << 2;
3367 				if (command & PACKET3_CP_DMA_CMD_SAIC) {
3368 					reg = start_reg;
3369 					if (!evergreen_vm_reg_valid(reg)) {
3370 						DRM_ERROR("CP DMA Bad SRC register\n");
3371 						return -EINVAL;
3372 					}
3373 				} else {
3374 					for (i = 0; i < (command & 0x1fffff); i++) {
3375 						reg = start_reg + (4 * i);
3376 						if (!evergreen_vm_reg_valid(reg)) {
3377 							DRM_ERROR("CP DMA Bad SRC register\n");
3378 							return -EINVAL;
3379 						}
3380 					}
3381 				}
3382 			}
3383 		}
3384 		if (command & PACKET3_CP_DMA_CMD_DAS) {
3385 			/* dst address space is register */
3386 			if (((info & 0x00300000) >> 20) == 0) {
3387 				start_reg = ib[idx + 2];
3388 				if (command & PACKET3_CP_DMA_CMD_DAIC) {
3389 					reg = start_reg;
3390 					if (!evergreen_vm_reg_valid(reg)) {
3391 						DRM_ERROR("CP DMA Bad DST register\n");
3392 						return -EINVAL;
3393 					}
3394 				} else {
3395 					for (i = 0; i < (command & 0x1fffff); i++) {
3396 						reg = start_reg + (4 * i);
3397 						if (!evergreen_vm_reg_valid(reg)) {
3398 							DRM_ERROR("CP DMA Bad DST register\n");
3399 							return -EINVAL;
3400 						}
3401 					}
3402 				}
3403 			}
3404 		}
3405 		break;
3406 	default:
3407 		return -EINVAL;
3408 	}
3409 	return 0;
3410 }
3411 
evergreen_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)3412 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3413 {
3414 	int ret = 0;
3415 	u32 idx = 0;
3416 	struct radeon_cs_packet pkt;
3417 
3418 	do {
3419 		pkt.idx = idx;
3420 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3421 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3422 		pkt.one_reg_wr = 0;
3423 		switch (pkt.type) {
3424 		case RADEON_PACKET_TYPE0:
3425 			dev_err(rdev->dev, "Packet0 not allowed!\n");
3426 			ret = -EINVAL;
3427 			break;
3428 		case RADEON_PACKET_TYPE2:
3429 			idx += 1;
3430 			break;
3431 		case RADEON_PACKET_TYPE3:
3432 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3433 			ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3434 			idx += pkt.count + 2;
3435 			break;
3436 		default:
3437 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3438 			ret = -EINVAL;
3439 			break;
3440 		}
3441 		if (ret)
3442 			break;
3443 	} while (idx < ib->length_dw);
3444 
3445 	return ret;
3446 }
3447 
3448 /**
3449  * evergreen_dma_ib_parse() - parse the DMA IB for VM
3450  * @rdev: radeon_device pointer
3451  * @ib:	radeon_ib pointer
3452  *
3453  * Parses the DMA IB from the VM CS ioctl
3454  * checks for errors. (Cayman-SI)
3455  * Returns 0 for success and an error on failure.
3456  **/
evergreen_dma_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)3457 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3458 {
3459 	u32 idx = 0;
3460 	u32 header, cmd, count, sub_cmd;
3461 
3462 	do {
3463 		header = ib->ptr[idx];
3464 		cmd = GET_DMA_CMD(header);
3465 		count = GET_DMA_COUNT(header);
3466 		sub_cmd = GET_DMA_SUB_CMD(header);
3467 
3468 		switch (cmd) {
3469 		case DMA_PACKET_WRITE:
3470 			switch (sub_cmd) {
3471 			/* tiled */
3472 			case 8:
3473 				idx += count + 7;
3474 				break;
3475 			/* linear */
3476 			case 0:
3477 				idx += count + 3;
3478 				break;
3479 			default:
3480 				DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3481 				return -EINVAL;
3482 			}
3483 			break;
3484 		case DMA_PACKET_COPY:
3485 			switch (sub_cmd) {
3486 			/* Copy L2L, DW aligned */
3487 			case 0x00:
3488 				idx += 5;
3489 				break;
3490 			/* Copy L2T/T2L */
3491 			case 0x08:
3492 				idx += 9;
3493 				break;
3494 			/* Copy L2L, byte aligned */
3495 			case 0x40:
3496 				idx += 5;
3497 				break;
3498 			/* Copy L2L, partial */
3499 			case 0x41:
3500 				idx += 9;
3501 				break;
3502 			/* Copy L2L, DW aligned, broadcast */
3503 			case 0x44:
3504 				idx += 7;
3505 				break;
3506 			/* Copy L2T Frame to Field */
3507 			case 0x48:
3508 				idx += 10;
3509 				break;
3510 			/* Copy L2T/T2L, partial */
3511 			case 0x49:
3512 				idx += 12;
3513 				break;
3514 			/* Copy L2T broadcast */
3515 			case 0x4b:
3516 				idx += 10;
3517 				break;
3518 			/* Copy L2T/T2L (tile units) */
3519 			case 0x4c:
3520 				idx += 9;
3521 				break;
3522 			/* Copy T2T, partial (tile units) */
3523 			case 0x4d:
3524 				idx += 13;
3525 				break;
3526 			/* Copy L2T broadcast (tile units) */
3527 			case 0x4f:
3528 				idx += 10;
3529 				break;
3530 			default:
3531 				DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3532 				return -EINVAL;
3533 			}
3534 			break;
3535 		case DMA_PACKET_CONSTANT_FILL:
3536 			idx += 4;
3537 			break;
3538 		case DMA_PACKET_NOP:
3539 			idx += 1;
3540 			break;
3541 		default:
3542 			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3543 			return -EINVAL;
3544 		}
3545 	} while (idx < ib->length_dw);
3546 
3547 	return 0;
3548 }
3549