1 /*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "evergreend.h"
31 #include "evergreen_reg_safe.h"
32 #include "cayman_reg_safe.h"
33
34 #ifndef __NetBSD__
35 #define MAX(a,b) (((a)>(b))?(a):(b))
36 #define MIN(a,b) (((a)<(b))?(a):(b))
37 #endif
38
39 int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
40 struct radeon_cs_reloc **cs_reloc);
41 struct evergreen_cs_track {
42 u32 group_size;
43 u32 nbanks;
44 u32 npipes;
45 u32 row_size;
46 /* value we track */
47 u32 nsamples; /* unused */
48 struct radeon_bo *cb_color_bo[12];
49 u32 cb_color_bo_offset[12];
50 struct radeon_bo *cb_color_fmask_bo[8]; /* unused */
51 struct radeon_bo *cb_color_cmask_bo[8]; /* unused */
52 u32 cb_color_info[12];
53 u32 cb_color_view[12];
54 u32 cb_color_pitch[12];
55 u32 cb_color_slice[12];
56 u32 cb_color_slice_idx[12];
57 u32 cb_color_attrib[12];
58 u32 cb_color_cmask_slice[8];/* unused */
59 u32 cb_color_fmask_slice[8];/* unused */
60 u32 cb_target_mask;
61 u32 cb_shader_mask; /* unused */
62 u32 vgt_strmout_config;
63 u32 vgt_strmout_buffer_config;
64 struct radeon_bo *vgt_strmout_bo[4];
65 u32 vgt_strmout_bo_offset[4];
66 u32 vgt_strmout_size[4];
67 u32 db_depth_control;
68 u32 db_depth_view;
69 u32 db_depth_slice;
70 u32 db_depth_size;
71 u32 db_z_info;
72 u32 db_z_read_offset;
73 u32 db_z_write_offset;
74 struct radeon_bo *db_z_read_bo;
75 struct radeon_bo *db_z_write_bo;
76 u32 db_s_info;
77 u32 db_s_read_offset;
78 u32 db_s_write_offset;
79 struct radeon_bo *db_s_read_bo;
80 struct radeon_bo *db_s_write_bo;
81 bool sx_misc_kill_all_prims;
82 bool cb_dirty;
83 bool db_dirty;
84 bool streamout_dirty;
85 u32 htile_offset;
86 u32 htile_surface;
87 struct radeon_bo *htile_bo;
88 };
89
evergreen_cs_get_aray_mode(u32 tiling_flags)90 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
91 {
92 if (tiling_flags & RADEON_TILING_MACRO)
93 return ARRAY_2D_TILED_THIN1;
94 else if (tiling_flags & RADEON_TILING_MICRO)
95 return ARRAY_1D_TILED_THIN1;
96 else
97 return ARRAY_LINEAR_GENERAL;
98 }
99
evergreen_cs_get_num_banks(u32 nbanks)100 static u32 evergreen_cs_get_num_banks(u32 nbanks)
101 {
102 switch (nbanks) {
103 case 2:
104 return ADDR_SURF_2_BANK;
105 case 4:
106 return ADDR_SURF_4_BANK;
107 case 8:
108 default:
109 return ADDR_SURF_8_BANK;
110 case 16:
111 return ADDR_SURF_16_BANK;
112 }
113 }
114
evergreen_cs_track_init(struct evergreen_cs_track * track)115 static void evergreen_cs_track_init(struct evergreen_cs_track *track)
116 {
117 int i;
118
119 for (i = 0; i < 8; i++) {
120 track->cb_color_fmask_bo[i] = NULL;
121 track->cb_color_cmask_bo[i] = NULL;
122 track->cb_color_cmask_slice[i] = 0;
123 track->cb_color_fmask_slice[i] = 0;
124 }
125
126 for (i = 0; i < 12; i++) {
127 track->cb_color_bo[i] = NULL;
128 track->cb_color_bo_offset[i] = 0xFFFFFFFF;
129 track->cb_color_info[i] = 0;
130 track->cb_color_view[i] = 0xFFFFFFFF;
131 track->cb_color_pitch[i] = 0;
132 track->cb_color_slice[i] = 0xfffffff;
133 track->cb_color_slice_idx[i] = 0;
134 }
135 track->cb_target_mask = 0xFFFFFFFF;
136 track->cb_shader_mask = 0xFFFFFFFF;
137 track->cb_dirty = true;
138
139 track->db_depth_slice = 0xffffffff;
140 track->db_depth_view = 0xFFFFC000;
141 track->db_depth_size = 0xFFFFFFFF;
142 track->db_depth_control = 0xFFFFFFFF;
143 track->db_z_info = 0xFFFFFFFF;
144 track->db_z_read_offset = 0xFFFFFFFF;
145 track->db_z_write_offset = 0xFFFFFFFF;
146 track->db_z_read_bo = NULL;
147 track->db_z_write_bo = NULL;
148 track->db_s_info = 0xFFFFFFFF;
149 track->db_s_read_offset = 0xFFFFFFFF;
150 track->db_s_write_offset = 0xFFFFFFFF;
151 track->db_s_read_bo = NULL;
152 track->db_s_write_bo = NULL;
153 track->db_dirty = true;
154 track->htile_bo = NULL;
155 track->htile_offset = 0xFFFFFFFF;
156 track->htile_surface = 0;
157
158 for (i = 0; i < 4; i++) {
159 track->vgt_strmout_size[i] = 0;
160 track->vgt_strmout_bo[i] = NULL;
161 track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
162 }
163 track->streamout_dirty = true;
164 track->sx_misc_kill_all_prims = false;
165 }
166
167 struct eg_surface {
168 /* value gathered from cs */
169 unsigned nbx;
170 unsigned nby;
171 unsigned format;
172 unsigned mode;
173 unsigned nbanks;
174 unsigned bankw;
175 unsigned bankh;
176 unsigned tsplit;
177 unsigned mtilea;
178 unsigned nsamples;
179 /* output value */
180 unsigned bpe;
181 unsigned layer_size;
182 unsigned palign;
183 unsigned halign;
184 unsigned long base_align;
185 };
186
evergreen_surface_check_linear(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)187 static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
188 struct eg_surface *surf,
189 const char *prefix)
190 {
191 surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
192 surf->base_align = surf->bpe;
193 surf->palign = 1;
194 surf->halign = 1;
195 return 0;
196 }
197
evergreen_surface_check_linear_aligned(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)198 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
199 struct eg_surface *surf,
200 const char *prefix)
201 {
202 struct evergreen_cs_track *track = p->track;
203 unsigned palign;
204
205 palign = MAX(64, track->group_size / surf->bpe);
206 surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
207 surf->base_align = track->group_size;
208 surf->palign = palign;
209 surf->halign = 1;
210 if (surf->nbx & (palign - 1)) {
211 if (prefix) {
212 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
213 __func__, __LINE__, prefix, surf->nbx, palign);
214 }
215 return -EINVAL;
216 }
217 return 0;
218 }
219
evergreen_surface_check_1d(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)220 static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
221 struct eg_surface *surf,
222 const char *prefix)
223 {
224 struct evergreen_cs_track *track = p->track;
225 unsigned palign;
226
227 palign = track->group_size / (8 * surf->bpe * surf->nsamples);
228 palign = MAX(8, palign);
229 surf->layer_size = surf->nbx * surf->nby * surf->bpe;
230 surf->base_align = track->group_size;
231 surf->palign = palign;
232 surf->halign = 8;
233 if ((surf->nbx & (palign - 1))) {
234 if (prefix) {
235 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
236 __func__, __LINE__, prefix, surf->nbx, palign,
237 track->group_size, surf->bpe, surf->nsamples);
238 }
239 return -EINVAL;
240 }
241 if ((surf->nby & (8 - 1))) {
242 if (prefix) {
243 dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
244 __func__, __LINE__, prefix, surf->nby);
245 }
246 return -EINVAL;
247 }
248 return 0;
249 }
250
evergreen_surface_check_2d(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)251 static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
252 struct eg_surface *surf,
253 const char *prefix)
254 {
255 struct evergreen_cs_track *track = p->track;
256 unsigned palign, halign, tileb, slice_pt;
257 unsigned mtile_pr, mtile_ps, mtileb;
258
259 tileb = 64 * surf->bpe * surf->nsamples;
260 slice_pt = 1;
261 if (tileb > surf->tsplit) {
262 slice_pt = tileb / surf->tsplit;
263 }
264 tileb = tileb / slice_pt;
265 /* macro tile width & height */
266 palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
267 halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
268 mtileb = (palign / 8) * (halign / 8) * tileb;
269 mtile_pr = surf->nbx / palign;
270 mtile_ps = (mtile_pr * surf->nby) / halign;
271 surf->layer_size = mtile_ps * mtileb * slice_pt;
272 surf->base_align = (palign / 8) * (halign / 8) * tileb;
273 surf->palign = palign;
274 surf->halign = halign;
275
276 if ((surf->nbx & (palign - 1))) {
277 if (prefix) {
278 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
279 __func__, __LINE__, prefix, surf->nbx, palign);
280 }
281 return -EINVAL;
282 }
283 if ((surf->nby & (halign - 1))) {
284 if (prefix) {
285 dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
286 __func__, __LINE__, prefix, surf->nby, halign);
287 }
288 return -EINVAL;
289 }
290
291 return 0;
292 }
293
evergreen_surface_check(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)294 static int evergreen_surface_check(struct radeon_cs_parser *p,
295 struct eg_surface *surf,
296 const char *prefix)
297 {
298 /* some common value computed here */
299 surf->bpe = r600_fmt_get_blocksize(surf->format);
300
301 switch (surf->mode) {
302 case ARRAY_LINEAR_GENERAL:
303 return evergreen_surface_check_linear(p, surf, prefix);
304 case ARRAY_LINEAR_ALIGNED:
305 return evergreen_surface_check_linear_aligned(p, surf, prefix);
306 case ARRAY_1D_TILED_THIN1:
307 return evergreen_surface_check_1d(p, surf, prefix);
308 case ARRAY_2D_TILED_THIN1:
309 return evergreen_surface_check_2d(p, surf, prefix);
310 default:
311 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
312 __func__, __LINE__, prefix, surf->mode);
313 return -EINVAL;
314 }
315 return -EINVAL;
316 }
317
evergreen_surface_value_conv_check(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)318 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
319 struct eg_surface *surf,
320 const char *prefix)
321 {
322 switch (surf->mode) {
323 case ARRAY_2D_TILED_THIN1:
324 break;
325 case ARRAY_LINEAR_GENERAL:
326 case ARRAY_LINEAR_ALIGNED:
327 case ARRAY_1D_TILED_THIN1:
328 return 0;
329 default:
330 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
331 __func__, __LINE__, prefix, surf->mode);
332 return -EINVAL;
333 }
334
335 switch (surf->nbanks) {
336 case 0: surf->nbanks = 2; break;
337 case 1: surf->nbanks = 4; break;
338 case 2: surf->nbanks = 8; break;
339 case 3: surf->nbanks = 16; break;
340 default:
341 dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
342 __func__, __LINE__, prefix, surf->nbanks);
343 return -EINVAL;
344 }
345 switch (surf->bankw) {
346 case 0: surf->bankw = 1; break;
347 case 1: surf->bankw = 2; break;
348 case 2: surf->bankw = 4; break;
349 case 3: surf->bankw = 8; break;
350 default:
351 dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
352 __func__, __LINE__, prefix, surf->bankw);
353 return -EINVAL;
354 }
355 switch (surf->bankh) {
356 case 0: surf->bankh = 1; break;
357 case 1: surf->bankh = 2; break;
358 case 2: surf->bankh = 4; break;
359 case 3: surf->bankh = 8; break;
360 default:
361 dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
362 __func__, __LINE__, prefix, surf->bankh);
363 return -EINVAL;
364 }
365 switch (surf->mtilea) {
366 case 0: surf->mtilea = 1; break;
367 case 1: surf->mtilea = 2; break;
368 case 2: surf->mtilea = 4; break;
369 case 3: surf->mtilea = 8; break;
370 default:
371 dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
372 __func__, __LINE__, prefix, surf->mtilea);
373 return -EINVAL;
374 }
375 switch (surf->tsplit) {
376 case 0: surf->tsplit = 64; break;
377 case 1: surf->tsplit = 128; break;
378 case 2: surf->tsplit = 256; break;
379 case 3: surf->tsplit = 512; break;
380 case 4: surf->tsplit = 1024; break;
381 case 5: surf->tsplit = 2048; break;
382 case 6: surf->tsplit = 4096; break;
383 default:
384 dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
385 __func__, __LINE__, prefix, surf->tsplit);
386 return -EINVAL;
387 }
388 return 0;
389 }
390
evergreen_cs_track_validate_cb(struct radeon_cs_parser * p,unsigned id)391 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
392 {
393 struct evergreen_cs_track *track = p->track;
394 struct eg_surface surf;
395 unsigned pitch, slice, mslice;
396 unsigned long offset;
397 int r;
398
399 mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
400 pitch = track->cb_color_pitch[id];
401 slice = track->cb_color_slice[id];
402 surf.nbx = (pitch + 1) * 8;
403 surf.nby = ((slice + 1) * 64) / surf.nbx;
404 surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
405 surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
406 surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
407 surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
408 surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
409 surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
410 surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
411 surf.nsamples = 1;
412
413 if (!r600_fmt_is_valid_color(surf.format)) {
414 dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
415 __func__, __LINE__, surf.format,
416 id, track->cb_color_info[id]);
417 return -EINVAL;
418 }
419
420 r = evergreen_surface_value_conv_check(p, &surf, "cb");
421 if (r) {
422 return r;
423 }
424
425 r = evergreen_surface_check(p, &surf, "cb");
426 if (r) {
427 dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
428 __func__, __LINE__, id, track->cb_color_pitch[id],
429 track->cb_color_slice[id], track->cb_color_attrib[id],
430 track->cb_color_info[id]);
431 return r;
432 }
433
434 offset = track->cb_color_bo_offset[id] << 8;
435 if (offset & (surf.base_align - 1)) {
436 dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
437 __func__, __LINE__, id, offset, surf.base_align);
438 return -EINVAL;
439 }
440
441 offset += surf.layer_size * mslice;
442 if (offset > radeon_bo_size(track->cb_color_bo[id])) {
443 /* old ddx are broken they allocate bo with w*h*bpp but
444 * program slice with ALIGN(h, 8), catch this and patch
445 * command stream.
446 */
447 if (!surf.mode) {
448 volatile u32 *ib = p->ib.ptr;
449 unsigned long tmp, nby, bsize, size, vmin = 0;
450
451 /* find the height the ddx wants */
452 if (surf.nby > 8) {
453 vmin = surf.nby - 8;
454 }
455 bsize = radeon_bo_size(track->cb_color_bo[id]);
456 tmp = track->cb_color_bo_offset[id] << 8;
457 for (nby = surf.nby; nby > vmin; nby--) {
458 size = nby * surf.nbx * surf.bpe * surf.nsamples;
459 if ((tmp + size * mslice) <= bsize) {
460 break;
461 }
462 }
463 if (nby > vmin) {
464 surf.nby = nby;
465 slice = ((nby * surf.nbx) / 64) - 1;
466 if (!evergreen_surface_check(p, &surf, "cb")) {
467 /* check if this one works */
468 tmp += surf.layer_size * mslice;
469 if (tmp <= bsize) {
470 ib[track->cb_color_slice_idx[id]] = slice;
471 goto old_ddx_ok;
472 }
473 }
474 }
475 }
476 dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
477 "offset %d, max layer %d, bo size %ld, slice %d)\n",
478 __func__, __LINE__, id, surf.layer_size,
479 track->cb_color_bo_offset[id] << 8, mslice,
480 radeon_bo_size(track->cb_color_bo[id]), slice);
481 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
482 __func__, __LINE__, surf.nbx, surf.nby,
483 surf.mode, surf.bpe, surf.nsamples,
484 surf.bankw, surf.bankh,
485 surf.tsplit, surf.mtilea);
486 return -EINVAL;
487 }
488 old_ddx_ok:
489
490 return 0;
491 }
492
evergreen_cs_track_validate_htile(struct radeon_cs_parser * p,unsigned nbx,unsigned nby)493 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
494 unsigned nbx, unsigned nby)
495 {
496 struct evergreen_cs_track *track = p->track;
497 unsigned long size;
498
499 if (track->htile_bo == NULL) {
500 dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
501 __func__, __LINE__, track->db_z_info);
502 return -EINVAL;
503 }
504
505 if (G_028ABC_LINEAR(track->htile_surface)) {
506 /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
507 nbx = round_up(nbx, 16 * 8);
508 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
509 nby = round_up(nby, track->npipes * 8);
510 } else {
511 /* always assume 8x8 htile */
512 /* align is htile align * 8, htile align vary according to
513 * number of pipe and tile width and nby
514 */
515 switch (track->npipes) {
516 case 8:
517 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
518 nbx = round_up(nbx, 64 * 8);
519 nby = round_up(nby, 64 * 8);
520 break;
521 case 4:
522 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
523 nbx = round_up(nbx, 64 * 8);
524 nby = round_up(nby, 32 * 8);
525 break;
526 case 2:
527 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
528 nbx = round_up(nbx, 32 * 8);
529 nby = round_up(nby, 32 * 8);
530 break;
531 case 1:
532 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
533 nbx = round_up(nbx, 32 * 8);
534 nby = round_up(nby, 16 * 8);
535 break;
536 default:
537 dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
538 __func__, __LINE__, track->npipes);
539 return -EINVAL;
540 }
541 }
542 /* compute number of htile */
543 nbx = nbx >> 3;
544 nby = nby >> 3;
545 /* size must be aligned on npipes * 2K boundary */
546 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
547 size += track->htile_offset;
548
549 if (!track->htile_bo) {
550 dev_warn(p->dev, "%s:%d htile_bo not set", __func__, __LINE__);
551 return -EINVAL;
552 }
553 if (size > radeon_bo_size(track->htile_bo)) {
554 dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
555 __func__, __LINE__, radeon_bo_size(track->htile_bo),
556 size, nbx, nby);
557 return -EINVAL;
558 }
559 return 0;
560 }
561
evergreen_cs_track_validate_stencil(struct radeon_cs_parser * p)562 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
563 {
564 struct evergreen_cs_track *track = p->track;
565 struct eg_surface surf;
566 unsigned pitch, slice, mslice;
567 unsigned long offset;
568 int r;
569
570 mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
571 pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
572 slice = track->db_depth_slice;
573 surf.nbx = (pitch + 1) * 8;
574 surf.nby = ((slice + 1) * 64) / surf.nbx;
575 surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
576 surf.format = G_028044_FORMAT(track->db_s_info);
577 surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
578 surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
579 surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
580 surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
581 surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
582 surf.nsamples = 1;
583
584 if (surf.format != 1) {
585 dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
586 __func__, __LINE__, surf.format);
587 return -EINVAL;
588 }
589 /* replace by color format so we can use same code */
590 surf.format = V_028C70_COLOR_8;
591
592 r = evergreen_surface_value_conv_check(p, &surf, "stencil");
593 if (r) {
594 return r;
595 }
596
597 r = evergreen_surface_check(p, &surf, NULL);
598 if (r) {
599 /* old userspace doesn't compute proper depth/stencil alignment
600 * check that alignment against a bigger byte per elements and
601 * only report if that alignment is wrong too.
602 */
603 surf.format = V_028C70_COLOR_8_8_8_8;
604 r = evergreen_surface_check(p, &surf, "stencil");
605 if (r) {
606 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
607 __func__, __LINE__, track->db_depth_size,
608 track->db_depth_slice, track->db_s_info, track->db_z_info);
609 }
610 return r;
611 }
612
613 offset = track->db_s_read_offset << 8;
614 if (offset & (surf.base_align - 1)) {
615 dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
616 __func__, __LINE__, offset, surf.base_align);
617 return -EINVAL;
618 }
619 offset += surf.layer_size * mslice;
620 if (!track->db_s_read_bo) {
621 dev_warn(p->dev, "%s:%d db_s_read_bo not set", __func__, __LINE__);
622 return -EINVAL;
623 }
624 if (offset > radeon_bo_size(track->db_s_read_bo)) {
625 dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
626 "offset %ld, max layer %d, bo size %ld)\n",
627 __func__, __LINE__, surf.layer_size,
628 (unsigned long)track->db_s_read_offset << 8, mslice,
629 radeon_bo_size(track->db_s_read_bo));
630 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
631 __func__, __LINE__, track->db_depth_size,
632 track->db_depth_slice, track->db_s_info, track->db_z_info);
633 return -EINVAL;
634 }
635
636 offset = track->db_s_write_offset << 8;
637 if (offset & (surf.base_align - 1)) {
638 dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
639 __func__, __LINE__, offset, surf.base_align);
640 return -EINVAL;
641 }
642 offset += surf.layer_size * mslice;
643 if (!track->db_s_write_bo) {
644 dev_warn(p->dev, "%s:%d db_s_write_bo not set", __func__, __LINE__);
645 return -EINVAL;
646 }
647 if (offset > radeon_bo_size(track->db_s_write_bo)) {
648 dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
649 "offset %ld, max layer %d, bo size %ld)\n",
650 __func__, __LINE__, surf.layer_size,
651 (unsigned long)track->db_s_write_offset << 8, mslice,
652 radeon_bo_size(track->db_s_write_bo));
653 return -EINVAL;
654 }
655
656 /* hyperz */
657 if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
658 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
659 if (r) {
660 return r;
661 }
662 }
663
664 return 0;
665 }
666
evergreen_cs_track_validate_depth(struct radeon_cs_parser * p)667 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
668 {
669 struct evergreen_cs_track *track = p->track;
670 struct eg_surface surf;
671 unsigned pitch, slice, mslice;
672 unsigned long offset;
673 int r;
674
675 mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
676 pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
677 slice = track->db_depth_slice;
678 surf.nbx = (pitch + 1) * 8;
679 surf.nby = ((slice + 1) * 64) / surf.nbx;
680 surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
681 surf.format = G_028040_FORMAT(track->db_z_info);
682 surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
683 surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
684 surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
685 surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
686 surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
687 surf.nsamples = 1;
688
689 switch (surf.format) {
690 case V_028040_Z_16:
691 surf.format = V_028C70_COLOR_16;
692 break;
693 case V_028040_Z_24:
694 case V_028040_Z_32_FLOAT:
695 surf.format = V_028C70_COLOR_8_8_8_8;
696 break;
697 default:
698 dev_warn(p->dev, "%s:%d depth invalid format %d\n",
699 __func__, __LINE__, surf.format);
700 return -EINVAL;
701 }
702
703 r = evergreen_surface_value_conv_check(p, &surf, "depth");
704 if (r) {
705 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
706 __func__, __LINE__, track->db_depth_size,
707 track->db_depth_slice, track->db_z_info);
708 return r;
709 }
710
711 r = evergreen_surface_check(p, &surf, "depth");
712 if (r) {
713 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
714 __func__, __LINE__, track->db_depth_size,
715 track->db_depth_slice, track->db_z_info);
716 return r;
717 }
718
719 offset = track->db_z_read_offset << 8;
720 if (offset & (surf.base_align - 1)) {
721 dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
722 __func__, __LINE__, offset, surf.base_align);
723 return -EINVAL;
724 }
725 offset += surf.layer_size * mslice;
726 if (!track->db_z_read_bo) {
727 dev_warn(p->dev, "%s:%d db_z_read_bo not set", __func__, __LINE__);
728 return -EINVAL;
729 }
730 if (offset > radeon_bo_size(track->db_z_read_bo)) {
731 dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
732 "offset %ld, max layer %d, bo size %ld)\n",
733 __func__, __LINE__, surf.layer_size,
734 (unsigned long)track->db_z_read_offset << 8, mslice,
735 radeon_bo_size(track->db_z_read_bo));
736 return -EINVAL;
737 }
738
739 offset = track->db_z_write_offset << 8;
740 if (offset & (surf.base_align - 1)) {
741 dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
742 __func__, __LINE__, offset, surf.base_align);
743 return -EINVAL;
744 }
745 offset += surf.layer_size * mslice;
746 if (!track->db_z_write_bo) {
747 dev_warn(p->dev, "%s:%d db_z_write_bo not set", __func__, __LINE__);
748 return -EINVAL;
749 }
750 if (offset > radeon_bo_size(track->db_z_write_bo)) {
751 dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
752 "offset %ld, max layer %d, bo size %ld)\n",
753 __func__, __LINE__, surf.layer_size,
754 (unsigned long)track->db_z_write_offset << 8, mslice,
755 radeon_bo_size(track->db_z_write_bo));
756 return -EINVAL;
757 }
758
759 /* hyperz */
760 if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
761 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
762 if (r) {
763 return r;
764 }
765 }
766
767 return 0;
768 }
769
evergreen_cs_track_validate_texture(struct radeon_cs_parser * p,struct radeon_bo * texture,struct radeon_bo * mipmap,unsigned idx)770 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
771 struct radeon_bo *texture,
772 struct radeon_bo *mipmap,
773 unsigned idx)
774 {
775 struct eg_surface surf;
776 unsigned long toffset, moffset;
777 unsigned dim, llevel, mslice, width, height, depth, i;
778 u32 texdw[8];
779 int r;
780
781 texdw[0] = radeon_get_ib_value(p, idx + 0);
782 texdw[1] = radeon_get_ib_value(p, idx + 1);
783 texdw[2] = radeon_get_ib_value(p, idx + 2);
784 texdw[3] = radeon_get_ib_value(p, idx + 3);
785 texdw[4] = radeon_get_ib_value(p, idx + 4);
786 texdw[5] = radeon_get_ib_value(p, idx + 5);
787 texdw[6] = radeon_get_ib_value(p, idx + 6);
788 texdw[7] = radeon_get_ib_value(p, idx + 7);
789 dim = G_030000_DIM(texdw[0]);
790 llevel = G_030014_LAST_LEVEL(texdw[5]);
791 mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
792 width = G_030000_TEX_WIDTH(texdw[0]) + 1;
793 height = G_030004_TEX_HEIGHT(texdw[1]) + 1;
794 depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
795 surf.format = G_03001C_DATA_FORMAT(texdw[7]);
796 surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
797 surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
798 surf.nby = r600_fmt_get_nblocksy(surf.format, height);
799 surf.mode = G_030004_ARRAY_MODE(texdw[1]);
800 surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
801 surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
802 surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
803 surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
804 surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
805 surf.nsamples = 1;
806 toffset = texdw[2] << 8;
807 moffset = texdw[3] << 8;
808
809 if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
810 dev_warn(p->dev, "%s:%d texture invalid format %d\n",
811 __func__, __LINE__, surf.format);
812 return -EINVAL;
813 }
814 switch (dim) {
815 case V_030000_SQ_TEX_DIM_1D:
816 case V_030000_SQ_TEX_DIM_2D:
817 case V_030000_SQ_TEX_DIM_CUBEMAP:
818 case V_030000_SQ_TEX_DIM_1D_ARRAY:
819 case V_030000_SQ_TEX_DIM_2D_ARRAY:
820 depth = 1;
821 break;
822 case V_030000_SQ_TEX_DIM_2D_MSAA:
823 case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
824 surf.nsamples = 1 << llevel;
825 llevel = 0;
826 depth = 1;
827 break;
828 case V_030000_SQ_TEX_DIM_3D:
829 break;
830 default:
831 dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
832 __func__, __LINE__, dim);
833 return -EINVAL;
834 }
835
836 r = evergreen_surface_value_conv_check(p, &surf, "texture");
837 if (r) {
838 return r;
839 }
840
841 /* align height */
842 evergreen_surface_check(p, &surf, NULL);
843 #ifdef __NetBSD__ /* XXX ALIGN means something else */
844 surf.nby = round_up(surf.nby, surf.halign);
845 #else
846 surf.nby = ALIGN(surf.nby, surf.halign);
847 #endif
848
849 r = evergreen_surface_check(p, &surf, "texture");
850 if (r) {
851 dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
852 __func__, __LINE__, texdw[0], texdw[1], texdw[4],
853 texdw[5], texdw[6], texdw[7]);
854 return r;
855 }
856
857 /* check texture size */
858 if (toffset & (surf.base_align - 1)) {
859 dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
860 __func__, __LINE__, toffset, surf.base_align);
861 return -EINVAL;
862 }
863 if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
864 dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
865 __func__, __LINE__, moffset, surf.base_align);
866 return -EINVAL;
867 }
868 if (dim == SQ_TEX_DIM_3D) {
869 toffset += surf.layer_size * depth;
870 } else {
871 toffset += surf.layer_size * mslice;
872 }
873 if (toffset > radeon_bo_size(texture)) {
874 dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
875 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
876 __func__, __LINE__, surf.layer_size,
877 (unsigned long)texdw[2] << 8, mslice,
878 depth, radeon_bo_size(texture),
879 surf.nbx, surf.nby);
880 return -EINVAL;
881 }
882
883 if (!mipmap) {
884 if (llevel) {
885 dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
886 __func__, __LINE__);
887 return -EINVAL;
888 } else {
889 return 0; /* everything's ok */
890 }
891 }
892
893 /* check mipmap size */
894 for (i = 1; i <= llevel; i++) {
895 unsigned w, h, d;
896
897 w = r600_mip_minify(width, i);
898 h = r600_mip_minify(height, i);
899 d = r600_mip_minify(depth, i);
900 surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
901 surf.nby = r600_fmt_get_nblocksy(surf.format, h);
902
903 switch (surf.mode) {
904 case ARRAY_2D_TILED_THIN1:
905 if (surf.nbx < surf.palign || surf.nby < surf.halign) {
906 surf.mode = ARRAY_1D_TILED_THIN1;
907 }
908 /* recompute alignment */
909 evergreen_surface_check(p, &surf, NULL);
910 break;
911 case ARRAY_LINEAR_GENERAL:
912 case ARRAY_LINEAR_ALIGNED:
913 case ARRAY_1D_TILED_THIN1:
914 break;
915 default:
916 dev_warn(p->dev, "%s:%d invalid array mode %d\n",
917 __func__, __LINE__, surf.mode);
918 return -EINVAL;
919 }
920 #ifdef __NetBSD__ /* XXX ALIGN means something else. */
921 surf.nbx = round_up(surf.nbx, surf.palign);
922 surf.nby = round_up(surf.nby, surf.halign);
923 #else
924 surf.nbx = ALIGN(surf.nbx, surf.palign);
925 surf.nby = ALIGN(surf.nby, surf.halign);
926 #endif
927
928 r = evergreen_surface_check(p, &surf, "mipmap");
929 if (r) {
930 return r;
931 }
932
933 if (dim == SQ_TEX_DIM_3D) {
934 moffset += surf.layer_size * d;
935 } else {
936 moffset += surf.layer_size * mslice;
937 }
938 if (moffset > radeon_bo_size(mipmap)) {
939 dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
940 "offset %ld, coffset %ld, max layer %d, depth %d, "
941 "bo size %ld) level0 (%d %d %d)\n",
942 __func__, __LINE__, i, surf.layer_size,
943 (unsigned long)texdw[3] << 8, moffset, mslice,
944 d, radeon_bo_size(mipmap),
945 width, height, depth);
946 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
947 __func__, __LINE__, surf.nbx, surf.nby,
948 surf.mode, surf.bpe, surf.nsamples,
949 surf.bankw, surf.bankh,
950 surf.tsplit, surf.mtilea);
951 return -EINVAL;
952 }
953 }
954
955 return 0;
956 }
957
evergreen_cs_track_check(struct radeon_cs_parser * p)958 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
959 {
960 struct evergreen_cs_track *track = p->track;
961 unsigned tmp, i;
962 int r;
963 unsigned buffer_mask = 0;
964
965 /* check streamout */
966 if (track->streamout_dirty && track->vgt_strmout_config) {
967 for (i = 0; i < 4; i++) {
968 if (track->vgt_strmout_config & (1 << i)) {
969 buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
970 }
971 }
972
973 for (i = 0; i < 4; i++) {
974 if (buffer_mask & (1 << i)) {
975 if (track->vgt_strmout_bo[i]) {
976 u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
977 (u64)track->vgt_strmout_size[i];
978 if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
979 DRM_ERROR("streamout %d bo too small: 0x%"PRIx64", 0x%lx\n",
980 i, offset,
981 radeon_bo_size(track->vgt_strmout_bo[i]));
982 return -EINVAL;
983 }
984 } else {
985 dev_warn(p->dev, "No buffer for streamout %d\n", i);
986 return -EINVAL;
987 }
988 }
989 }
990 track->streamout_dirty = false;
991 }
992
993 if (track->sx_misc_kill_all_prims)
994 return 0;
995
996 /* check that we have a cb for each enabled target
997 */
998 if (track->cb_dirty) {
999 tmp = track->cb_target_mask;
1000 for (i = 0; i < 8; i++) {
1001 u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
1002
1003 if (format != V_028C70_COLOR_INVALID &&
1004 (tmp >> (i * 4)) & 0xF) {
1005 /* at least one component is enabled */
1006 if (track->cb_color_bo[i] == NULL) {
1007 dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
1008 __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
1009 return -EINVAL;
1010 }
1011 /* check cb */
1012 r = evergreen_cs_track_validate_cb(p, i);
1013 if (r) {
1014 return r;
1015 }
1016 }
1017 }
1018 track->cb_dirty = false;
1019 }
1020
1021 if (track->db_dirty) {
1022 /* Check stencil buffer */
1023 if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
1024 G_028800_STENCIL_ENABLE(track->db_depth_control)) {
1025 r = evergreen_cs_track_validate_stencil(p);
1026 if (r)
1027 return r;
1028 }
1029 /* Check depth buffer */
1030 if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1031 G_028800_Z_ENABLE(track->db_depth_control)) {
1032 r = evergreen_cs_track_validate_depth(p);
1033 if (r)
1034 return r;
1035 }
1036 track->db_dirty = false;
1037 }
1038
1039 return 0;
1040 }
1041
1042 /**
1043 * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1044 * @parser: parser structure holding parsing context.
1045 *
1046 * This is an Evergreen(+)-specific function for parsing VLINE packets.
1047 * Real work is done by r600_cs_common_vline_parse function.
1048 * Here we just set up ASIC-specific register table and call
1049 * the common implementation function.
1050 */
evergreen_cs_packet_parse_vline(struct radeon_cs_parser * p)1051 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1052 {
1053
1054 static uint32_t vline_start_end[6] = {
1055 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1056 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1057 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1058 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1059 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1060 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1061 };
1062 static uint32_t vline_status[6] = {
1063 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1064 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1065 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1066 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1067 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1068 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1069 };
1070
1071 return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1072 }
1073
evergreen_packet0_check(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,unsigned idx,unsigned reg)1074 static int evergreen_packet0_check(struct radeon_cs_parser *p,
1075 struct radeon_cs_packet *pkt,
1076 unsigned idx, unsigned reg)
1077 {
1078 int r;
1079
1080 switch (reg) {
1081 case EVERGREEN_VLINE_START_END:
1082 r = evergreen_cs_packet_parse_vline(p);
1083 if (r) {
1084 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1085 idx, reg);
1086 return r;
1087 }
1088 break;
1089 default:
1090 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1091 reg, idx);
1092 return -EINVAL;
1093 }
1094 return 0;
1095 }
1096
evergreen_cs_parse_packet0(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt)1097 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1098 struct radeon_cs_packet *pkt)
1099 {
1100 unsigned reg, i;
1101 unsigned idx;
1102 int r;
1103
1104 idx = pkt->idx + 1;
1105 reg = pkt->reg;
1106 for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1107 r = evergreen_packet0_check(p, pkt, idx, reg);
1108 if (r) {
1109 return r;
1110 }
1111 }
1112 return 0;
1113 }
1114
1115 /**
1116 * evergreen_cs_check_reg() - check if register is authorized or not
1117 * @parser: parser structure holding parsing context
1118 * @reg: register we are testing
1119 * @idx: index into the cs buffer
1120 *
1121 * This function will test against evergreen_reg_safe_bm and return 0
1122 * if register is safe. If register is not flag as safe this function
1123 * will test it against a list of register needind special handling.
1124 */
evergreen_cs_check_reg(struct radeon_cs_parser * p,u32 reg,u32 idx)1125 static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1126 {
1127 struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1128 struct radeon_cs_reloc *reloc;
1129 u32 last_reg;
1130 u32 m, i, tmp, *ib;
1131 int r;
1132
1133 if (p->rdev->family >= CHIP_CAYMAN)
1134 last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1135 else
1136 last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1137
1138 i = (reg >> 7);
1139 if (i >= last_reg) {
1140 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1141 return -EINVAL;
1142 }
1143 m = 1 << ((reg >> 2) & 31);
1144 if (p->rdev->family >= CHIP_CAYMAN) {
1145 if (!(cayman_reg_safe_bm[i] & m))
1146 return 0;
1147 } else {
1148 if (!(evergreen_reg_safe_bm[i] & m))
1149 return 0;
1150 }
1151 ib = p->ib.ptr;
1152 switch (reg) {
1153 /* force following reg to 0 in an attempt to disable out buffer
1154 * which will need us to better understand how it works to perform
1155 * security check on it (Jerome)
1156 */
1157 case SQ_ESGS_RING_SIZE:
1158 case SQ_GSVS_RING_SIZE:
1159 case SQ_ESTMP_RING_SIZE:
1160 case SQ_GSTMP_RING_SIZE:
1161 case SQ_HSTMP_RING_SIZE:
1162 case SQ_LSTMP_RING_SIZE:
1163 case SQ_PSTMP_RING_SIZE:
1164 case SQ_VSTMP_RING_SIZE:
1165 case SQ_ESGS_RING_ITEMSIZE:
1166 case SQ_ESTMP_RING_ITEMSIZE:
1167 case SQ_GSTMP_RING_ITEMSIZE:
1168 case SQ_GSVS_RING_ITEMSIZE:
1169 case SQ_GS_VERT_ITEMSIZE:
1170 case SQ_GS_VERT_ITEMSIZE_1:
1171 case SQ_GS_VERT_ITEMSIZE_2:
1172 case SQ_GS_VERT_ITEMSIZE_3:
1173 case SQ_GSVS_RING_OFFSET_1:
1174 case SQ_GSVS_RING_OFFSET_2:
1175 case SQ_GSVS_RING_OFFSET_3:
1176 case SQ_HSTMP_RING_ITEMSIZE:
1177 case SQ_LSTMP_RING_ITEMSIZE:
1178 case SQ_PSTMP_RING_ITEMSIZE:
1179 case SQ_VSTMP_RING_ITEMSIZE:
1180 case VGT_TF_RING_SIZE:
1181 /* get value to populate the IB don't remove */
1182 /*tmp =radeon_get_ib_value(p, idx);
1183 ib[idx] = 0;*/
1184 break;
1185 case SQ_ESGS_RING_BASE:
1186 case SQ_GSVS_RING_BASE:
1187 case SQ_ESTMP_RING_BASE:
1188 case SQ_GSTMP_RING_BASE:
1189 case SQ_HSTMP_RING_BASE:
1190 case SQ_LSTMP_RING_BASE:
1191 case SQ_PSTMP_RING_BASE:
1192 case SQ_VSTMP_RING_BASE:
1193 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1194 if (r) {
1195 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1196 "0x%04X\n", reg);
1197 return -EINVAL;
1198 }
1199 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1200 break;
1201 case DB_DEPTH_CONTROL:
1202 track->db_depth_control = radeon_get_ib_value(p, idx);
1203 track->db_dirty = true;
1204 break;
1205 case CAYMAN_DB_EQAA:
1206 if (p->rdev->family < CHIP_CAYMAN) {
1207 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1208 "0x%04X\n", reg);
1209 return -EINVAL;
1210 }
1211 break;
1212 case CAYMAN_DB_DEPTH_INFO:
1213 if (p->rdev->family < CHIP_CAYMAN) {
1214 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1215 "0x%04X\n", reg);
1216 return -EINVAL;
1217 }
1218 break;
1219 case DB_Z_INFO:
1220 track->db_z_info = radeon_get_ib_value(p, idx);
1221 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1222 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1223 if (r) {
1224 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1225 "0x%04X\n", reg);
1226 return -EINVAL;
1227 }
1228 ib[idx] &= ~Z_ARRAY_MODE(0xf);
1229 track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1230 ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1231 track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1232 if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1233 unsigned bankw, bankh, mtaspect, tile_split;
1234
1235 evergreen_tiling_fields(reloc->tiling_flags,
1236 &bankw, &bankh, &mtaspect,
1237 &tile_split);
1238 ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1239 ib[idx] |= DB_TILE_SPLIT(tile_split) |
1240 DB_BANK_WIDTH(bankw) |
1241 DB_BANK_HEIGHT(bankh) |
1242 DB_MACRO_TILE_ASPECT(mtaspect);
1243 }
1244 }
1245 track->db_dirty = true;
1246 break;
1247 case DB_STENCIL_INFO:
1248 track->db_s_info = radeon_get_ib_value(p, idx);
1249 track->db_dirty = true;
1250 break;
1251 case DB_DEPTH_VIEW:
1252 track->db_depth_view = radeon_get_ib_value(p, idx);
1253 track->db_dirty = true;
1254 break;
1255 case DB_DEPTH_SIZE:
1256 track->db_depth_size = radeon_get_ib_value(p, idx);
1257 track->db_dirty = true;
1258 break;
1259 case R_02805C_DB_DEPTH_SLICE:
1260 track->db_depth_slice = radeon_get_ib_value(p, idx);
1261 track->db_dirty = true;
1262 break;
1263 case DB_Z_READ_BASE:
1264 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1265 if (r) {
1266 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1267 "0x%04X\n", reg);
1268 return -EINVAL;
1269 }
1270 track->db_z_read_offset = radeon_get_ib_value(p, idx);
1271 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1272 track->db_z_read_bo = reloc->robj;
1273 track->db_dirty = true;
1274 break;
1275 case DB_Z_WRITE_BASE:
1276 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1277 if (r) {
1278 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1279 "0x%04X\n", reg);
1280 return -EINVAL;
1281 }
1282 track->db_z_write_offset = radeon_get_ib_value(p, idx);
1283 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1284 track->db_z_write_bo = reloc->robj;
1285 track->db_dirty = true;
1286 break;
1287 case DB_STENCIL_READ_BASE:
1288 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1289 if (r) {
1290 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1291 "0x%04X\n", reg);
1292 return -EINVAL;
1293 }
1294 track->db_s_read_offset = radeon_get_ib_value(p, idx);
1295 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1296 track->db_s_read_bo = reloc->robj;
1297 track->db_dirty = true;
1298 break;
1299 case DB_STENCIL_WRITE_BASE:
1300 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1301 if (r) {
1302 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1303 "0x%04X\n", reg);
1304 return -EINVAL;
1305 }
1306 track->db_s_write_offset = radeon_get_ib_value(p, idx);
1307 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1308 track->db_s_write_bo = reloc->robj;
1309 track->db_dirty = true;
1310 break;
1311 case VGT_STRMOUT_CONFIG:
1312 track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1313 track->streamout_dirty = true;
1314 break;
1315 case VGT_STRMOUT_BUFFER_CONFIG:
1316 track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1317 track->streamout_dirty = true;
1318 break;
1319 case VGT_STRMOUT_BUFFER_BASE_0:
1320 case VGT_STRMOUT_BUFFER_BASE_1:
1321 case VGT_STRMOUT_BUFFER_BASE_2:
1322 case VGT_STRMOUT_BUFFER_BASE_3:
1323 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1324 if (r) {
1325 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1326 "0x%04X\n", reg);
1327 return -EINVAL;
1328 }
1329 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1330 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1331 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1332 track->vgt_strmout_bo[tmp] = reloc->robj;
1333 track->streamout_dirty = true;
1334 break;
1335 case VGT_STRMOUT_BUFFER_SIZE_0:
1336 case VGT_STRMOUT_BUFFER_SIZE_1:
1337 case VGT_STRMOUT_BUFFER_SIZE_2:
1338 case VGT_STRMOUT_BUFFER_SIZE_3:
1339 tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1340 /* size in register is DWs, convert to bytes */
1341 track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1342 track->streamout_dirty = true;
1343 break;
1344 case CP_COHER_BASE:
1345 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1346 if (r) {
1347 dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1348 "0x%04X\n", reg);
1349 return -EINVAL;
1350 }
1351 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1352 case CB_TARGET_MASK:
1353 track->cb_target_mask = radeon_get_ib_value(p, idx);
1354 track->cb_dirty = true;
1355 break;
1356 case CB_SHADER_MASK:
1357 track->cb_shader_mask = radeon_get_ib_value(p, idx);
1358 track->cb_dirty = true;
1359 break;
1360 case PA_SC_AA_CONFIG:
1361 if (p->rdev->family >= CHIP_CAYMAN) {
1362 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1363 "0x%04X\n", reg);
1364 return -EINVAL;
1365 }
1366 tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1367 track->nsamples = 1 << tmp;
1368 break;
1369 case CAYMAN_PA_SC_AA_CONFIG:
1370 if (p->rdev->family < CHIP_CAYMAN) {
1371 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1372 "0x%04X\n", reg);
1373 return -EINVAL;
1374 }
1375 tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1376 track->nsamples = 1 << tmp;
1377 break;
1378 case CB_COLOR0_VIEW:
1379 case CB_COLOR1_VIEW:
1380 case CB_COLOR2_VIEW:
1381 case CB_COLOR3_VIEW:
1382 case CB_COLOR4_VIEW:
1383 case CB_COLOR5_VIEW:
1384 case CB_COLOR6_VIEW:
1385 case CB_COLOR7_VIEW:
1386 tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1387 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1388 track->cb_dirty = true;
1389 break;
1390 case CB_COLOR8_VIEW:
1391 case CB_COLOR9_VIEW:
1392 case CB_COLOR10_VIEW:
1393 case CB_COLOR11_VIEW:
1394 tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1395 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1396 track->cb_dirty = true;
1397 break;
1398 case CB_COLOR0_INFO:
1399 case CB_COLOR1_INFO:
1400 case CB_COLOR2_INFO:
1401 case CB_COLOR3_INFO:
1402 case CB_COLOR4_INFO:
1403 case CB_COLOR5_INFO:
1404 case CB_COLOR6_INFO:
1405 case CB_COLOR7_INFO:
1406 tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1407 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1408 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1409 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1410 if (r) {
1411 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1412 "0x%04X\n", reg);
1413 return -EINVAL;
1414 }
1415 ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1416 track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1417 }
1418 track->cb_dirty = true;
1419 break;
1420 case CB_COLOR8_INFO:
1421 case CB_COLOR9_INFO:
1422 case CB_COLOR10_INFO:
1423 case CB_COLOR11_INFO:
1424 tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1425 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1426 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1427 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1428 if (r) {
1429 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1430 "0x%04X\n", reg);
1431 return -EINVAL;
1432 }
1433 ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1434 track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1435 }
1436 track->cb_dirty = true;
1437 break;
1438 case CB_COLOR0_PITCH:
1439 case CB_COLOR1_PITCH:
1440 case CB_COLOR2_PITCH:
1441 case CB_COLOR3_PITCH:
1442 case CB_COLOR4_PITCH:
1443 case CB_COLOR5_PITCH:
1444 case CB_COLOR6_PITCH:
1445 case CB_COLOR7_PITCH:
1446 tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1447 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1448 track->cb_dirty = true;
1449 break;
1450 case CB_COLOR8_PITCH:
1451 case CB_COLOR9_PITCH:
1452 case CB_COLOR10_PITCH:
1453 case CB_COLOR11_PITCH:
1454 tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1455 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1456 track->cb_dirty = true;
1457 break;
1458 case CB_COLOR0_SLICE:
1459 case CB_COLOR1_SLICE:
1460 case CB_COLOR2_SLICE:
1461 case CB_COLOR3_SLICE:
1462 case CB_COLOR4_SLICE:
1463 case CB_COLOR5_SLICE:
1464 case CB_COLOR6_SLICE:
1465 case CB_COLOR7_SLICE:
1466 tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1467 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1468 track->cb_color_slice_idx[tmp] = idx;
1469 track->cb_dirty = true;
1470 break;
1471 case CB_COLOR8_SLICE:
1472 case CB_COLOR9_SLICE:
1473 case CB_COLOR10_SLICE:
1474 case CB_COLOR11_SLICE:
1475 tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1476 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1477 track->cb_color_slice_idx[tmp] = idx;
1478 track->cb_dirty = true;
1479 break;
1480 case CB_COLOR0_ATTRIB:
1481 case CB_COLOR1_ATTRIB:
1482 case CB_COLOR2_ATTRIB:
1483 case CB_COLOR3_ATTRIB:
1484 case CB_COLOR4_ATTRIB:
1485 case CB_COLOR5_ATTRIB:
1486 case CB_COLOR6_ATTRIB:
1487 case CB_COLOR7_ATTRIB:
1488 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1489 if (r) {
1490 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1491 "0x%04X\n", reg);
1492 return -EINVAL;
1493 }
1494 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1495 if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1496 unsigned bankw, bankh, mtaspect, tile_split;
1497
1498 evergreen_tiling_fields(reloc->tiling_flags,
1499 &bankw, &bankh, &mtaspect,
1500 &tile_split);
1501 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1502 ib[idx] |= CB_TILE_SPLIT(tile_split) |
1503 CB_BANK_WIDTH(bankw) |
1504 CB_BANK_HEIGHT(bankh) |
1505 CB_MACRO_TILE_ASPECT(mtaspect);
1506 }
1507 }
1508 tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1509 track->cb_color_attrib[tmp] = ib[idx];
1510 track->cb_dirty = true;
1511 break;
1512 case CB_COLOR8_ATTRIB:
1513 case CB_COLOR9_ATTRIB:
1514 case CB_COLOR10_ATTRIB:
1515 case CB_COLOR11_ATTRIB:
1516 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1517 if (r) {
1518 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1519 "0x%04X\n", reg);
1520 return -EINVAL;
1521 }
1522 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1523 if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1524 unsigned bankw, bankh, mtaspect, tile_split;
1525
1526 evergreen_tiling_fields(reloc->tiling_flags,
1527 &bankw, &bankh, &mtaspect,
1528 &tile_split);
1529 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1530 ib[idx] |= CB_TILE_SPLIT(tile_split) |
1531 CB_BANK_WIDTH(bankw) |
1532 CB_BANK_HEIGHT(bankh) |
1533 CB_MACRO_TILE_ASPECT(mtaspect);
1534 }
1535 }
1536 tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1537 track->cb_color_attrib[tmp] = ib[idx];
1538 track->cb_dirty = true;
1539 break;
1540 case CB_COLOR0_FMASK:
1541 case CB_COLOR1_FMASK:
1542 case CB_COLOR2_FMASK:
1543 case CB_COLOR3_FMASK:
1544 case CB_COLOR4_FMASK:
1545 case CB_COLOR5_FMASK:
1546 case CB_COLOR6_FMASK:
1547 case CB_COLOR7_FMASK:
1548 tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1549 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1550 if (r) {
1551 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1552 return -EINVAL;
1553 }
1554 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1555 track->cb_color_fmask_bo[tmp] = reloc->robj;
1556 break;
1557 case CB_COLOR0_CMASK:
1558 case CB_COLOR1_CMASK:
1559 case CB_COLOR2_CMASK:
1560 case CB_COLOR3_CMASK:
1561 case CB_COLOR4_CMASK:
1562 case CB_COLOR5_CMASK:
1563 case CB_COLOR6_CMASK:
1564 case CB_COLOR7_CMASK:
1565 tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1566 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1567 if (r) {
1568 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1569 return -EINVAL;
1570 }
1571 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1572 track->cb_color_cmask_bo[tmp] = reloc->robj;
1573 break;
1574 case CB_COLOR0_FMASK_SLICE:
1575 case CB_COLOR1_FMASK_SLICE:
1576 case CB_COLOR2_FMASK_SLICE:
1577 case CB_COLOR3_FMASK_SLICE:
1578 case CB_COLOR4_FMASK_SLICE:
1579 case CB_COLOR5_FMASK_SLICE:
1580 case CB_COLOR6_FMASK_SLICE:
1581 case CB_COLOR7_FMASK_SLICE:
1582 tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1583 track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1584 break;
1585 case CB_COLOR0_CMASK_SLICE:
1586 case CB_COLOR1_CMASK_SLICE:
1587 case CB_COLOR2_CMASK_SLICE:
1588 case CB_COLOR3_CMASK_SLICE:
1589 case CB_COLOR4_CMASK_SLICE:
1590 case CB_COLOR5_CMASK_SLICE:
1591 case CB_COLOR6_CMASK_SLICE:
1592 case CB_COLOR7_CMASK_SLICE:
1593 tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1594 track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1595 break;
1596 case CB_COLOR0_BASE:
1597 case CB_COLOR1_BASE:
1598 case CB_COLOR2_BASE:
1599 case CB_COLOR3_BASE:
1600 case CB_COLOR4_BASE:
1601 case CB_COLOR5_BASE:
1602 case CB_COLOR6_BASE:
1603 case CB_COLOR7_BASE:
1604 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1605 if (r) {
1606 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1607 "0x%04X\n", reg);
1608 return -EINVAL;
1609 }
1610 tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1611 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1612 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1613 track->cb_color_bo[tmp] = reloc->robj;
1614 track->cb_dirty = true;
1615 break;
1616 case CB_COLOR8_BASE:
1617 case CB_COLOR9_BASE:
1618 case CB_COLOR10_BASE:
1619 case CB_COLOR11_BASE:
1620 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1621 if (r) {
1622 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1623 "0x%04X\n", reg);
1624 return -EINVAL;
1625 }
1626 tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1627 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1628 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1629 track->cb_color_bo[tmp] = reloc->robj;
1630 track->cb_dirty = true;
1631 break;
1632 case DB_HTILE_DATA_BASE:
1633 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1634 if (r) {
1635 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1636 "0x%04X\n", reg);
1637 return -EINVAL;
1638 }
1639 track->htile_offset = radeon_get_ib_value(p, idx);
1640 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1641 track->htile_bo = reloc->robj;
1642 track->db_dirty = true;
1643 break;
1644 case DB_HTILE_SURFACE:
1645 /* 8x8 only */
1646 track->htile_surface = radeon_get_ib_value(p, idx);
1647 /* force 8x8 htile width and height */
1648 ib[idx] |= 3;
1649 track->db_dirty = true;
1650 break;
1651 case CB_IMMED0_BASE:
1652 case CB_IMMED1_BASE:
1653 case CB_IMMED2_BASE:
1654 case CB_IMMED3_BASE:
1655 case CB_IMMED4_BASE:
1656 case CB_IMMED5_BASE:
1657 case CB_IMMED6_BASE:
1658 case CB_IMMED7_BASE:
1659 case CB_IMMED8_BASE:
1660 case CB_IMMED9_BASE:
1661 case CB_IMMED10_BASE:
1662 case CB_IMMED11_BASE:
1663 case SQ_PGM_START_FS:
1664 case SQ_PGM_START_ES:
1665 case SQ_PGM_START_VS:
1666 case SQ_PGM_START_GS:
1667 case SQ_PGM_START_PS:
1668 case SQ_PGM_START_HS:
1669 case SQ_PGM_START_LS:
1670 case SQ_CONST_MEM_BASE:
1671 case SQ_ALU_CONST_CACHE_GS_0:
1672 case SQ_ALU_CONST_CACHE_GS_1:
1673 case SQ_ALU_CONST_CACHE_GS_2:
1674 case SQ_ALU_CONST_CACHE_GS_3:
1675 case SQ_ALU_CONST_CACHE_GS_4:
1676 case SQ_ALU_CONST_CACHE_GS_5:
1677 case SQ_ALU_CONST_CACHE_GS_6:
1678 case SQ_ALU_CONST_CACHE_GS_7:
1679 case SQ_ALU_CONST_CACHE_GS_8:
1680 case SQ_ALU_CONST_CACHE_GS_9:
1681 case SQ_ALU_CONST_CACHE_GS_10:
1682 case SQ_ALU_CONST_CACHE_GS_11:
1683 case SQ_ALU_CONST_CACHE_GS_12:
1684 case SQ_ALU_CONST_CACHE_GS_13:
1685 case SQ_ALU_CONST_CACHE_GS_14:
1686 case SQ_ALU_CONST_CACHE_GS_15:
1687 case SQ_ALU_CONST_CACHE_PS_0:
1688 case SQ_ALU_CONST_CACHE_PS_1:
1689 case SQ_ALU_CONST_CACHE_PS_2:
1690 case SQ_ALU_CONST_CACHE_PS_3:
1691 case SQ_ALU_CONST_CACHE_PS_4:
1692 case SQ_ALU_CONST_CACHE_PS_5:
1693 case SQ_ALU_CONST_CACHE_PS_6:
1694 case SQ_ALU_CONST_CACHE_PS_7:
1695 case SQ_ALU_CONST_CACHE_PS_8:
1696 case SQ_ALU_CONST_CACHE_PS_9:
1697 case SQ_ALU_CONST_CACHE_PS_10:
1698 case SQ_ALU_CONST_CACHE_PS_11:
1699 case SQ_ALU_CONST_CACHE_PS_12:
1700 case SQ_ALU_CONST_CACHE_PS_13:
1701 case SQ_ALU_CONST_CACHE_PS_14:
1702 case SQ_ALU_CONST_CACHE_PS_15:
1703 case SQ_ALU_CONST_CACHE_VS_0:
1704 case SQ_ALU_CONST_CACHE_VS_1:
1705 case SQ_ALU_CONST_CACHE_VS_2:
1706 case SQ_ALU_CONST_CACHE_VS_3:
1707 case SQ_ALU_CONST_CACHE_VS_4:
1708 case SQ_ALU_CONST_CACHE_VS_5:
1709 case SQ_ALU_CONST_CACHE_VS_6:
1710 case SQ_ALU_CONST_CACHE_VS_7:
1711 case SQ_ALU_CONST_CACHE_VS_8:
1712 case SQ_ALU_CONST_CACHE_VS_9:
1713 case SQ_ALU_CONST_CACHE_VS_10:
1714 case SQ_ALU_CONST_CACHE_VS_11:
1715 case SQ_ALU_CONST_CACHE_VS_12:
1716 case SQ_ALU_CONST_CACHE_VS_13:
1717 case SQ_ALU_CONST_CACHE_VS_14:
1718 case SQ_ALU_CONST_CACHE_VS_15:
1719 case SQ_ALU_CONST_CACHE_HS_0:
1720 case SQ_ALU_CONST_CACHE_HS_1:
1721 case SQ_ALU_CONST_CACHE_HS_2:
1722 case SQ_ALU_CONST_CACHE_HS_3:
1723 case SQ_ALU_CONST_CACHE_HS_4:
1724 case SQ_ALU_CONST_CACHE_HS_5:
1725 case SQ_ALU_CONST_CACHE_HS_6:
1726 case SQ_ALU_CONST_CACHE_HS_7:
1727 case SQ_ALU_CONST_CACHE_HS_8:
1728 case SQ_ALU_CONST_CACHE_HS_9:
1729 case SQ_ALU_CONST_CACHE_HS_10:
1730 case SQ_ALU_CONST_CACHE_HS_11:
1731 case SQ_ALU_CONST_CACHE_HS_12:
1732 case SQ_ALU_CONST_CACHE_HS_13:
1733 case SQ_ALU_CONST_CACHE_HS_14:
1734 case SQ_ALU_CONST_CACHE_HS_15:
1735 case SQ_ALU_CONST_CACHE_LS_0:
1736 case SQ_ALU_CONST_CACHE_LS_1:
1737 case SQ_ALU_CONST_CACHE_LS_2:
1738 case SQ_ALU_CONST_CACHE_LS_3:
1739 case SQ_ALU_CONST_CACHE_LS_4:
1740 case SQ_ALU_CONST_CACHE_LS_5:
1741 case SQ_ALU_CONST_CACHE_LS_6:
1742 case SQ_ALU_CONST_CACHE_LS_7:
1743 case SQ_ALU_CONST_CACHE_LS_8:
1744 case SQ_ALU_CONST_CACHE_LS_9:
1745 case SQ_ALU_CONST_CACHE_LS_10:
1746 case SQ_ALU_CONST_CACHE_LS_11:
1747 case SQ_ALU_CONST_CACHE_LS_12:
1748 case SQ_ALU_CONST_CACHE_LS_13:
1749 case SQ_ALU_CONST_CACHE_LS_14:
1750 case SQ_ALU_CONST_CACHE_LS_15:
1751 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1752 if (r) {
1753 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1754 "0x%04X\n", reg);
1755 return -EINVAL;
1756 }
1757 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1758 break;
1759 case SX_MEMORY_EXPORT_BASE:
1760 if (p->rdev->family >= CHIP_CAYMAN) {
1761 dev_warn(p->dev, "bad SET_CONFIG_REG "
1762 "0x%04X\n", reg);
1763 return -EINVAL;
1764 }
1765 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1766 if (r) {
1767 dev_warn(p->dev, "bad SET_CONFIG_REG "
1768 "0x%04X\n", reg);
1769 return -EINVAL;
1770 }
1771 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1772 break;
1773 case CAYMAN_SX_SCATTER_EXPORT_BASE:
1774 if (p->rdev->family < CHIP_CAYMAN) {
1775 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1776 "0x%04X\n", reg);
1777 return -EINVAL;
1778 }
1779 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1780 if (r) {
1781 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1782 "0x%04X\n", reg);
1783 return -EINVAL;
1784 }
1785 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1786 break;
1787 case SX_MISC:
1788 track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1789 break;
1790 default:
1791 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1792 return -EINVAL;
1793 }
1794 return 0;
1795 }
1796
evergreen_is_safe_reg(struct radeon_cs_parser * p,u32 reg,u32 idx)1797 static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1798 {
1799 u32 last_reg, m, i;
1800
1801 if (p->rdev->family >= CHIP_CAYMAN)
1802 last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1803 else
1804 last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1805
1806 i = (reg >> 7);
1807 if (i >= last_reg) {
1808 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1809 return false;
1810 }
1811 m = 1 << ((reg >> 2) & 31);
1812 if (p->rdev->family >= CHIP_CAYMAN) {
1813 if (!(cayman_reg_safe_bm[i] & m))
1814 return true;
1815 } else {
1816 if (!(evergreen_reg_safe_bm[i] & m))
1817 return true;
1818 }
1819 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1820 return false;
1821 }
1822
evergreen_packet3_check(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt)1823 static int evergreen_packet3_check(struct radeon_cs_parser *p,
1824 struct radeon_cs_packet *pkt)
1825 {
1826 struct radeon_cs_reloc *reloc;
1827 struct evergreen_cs_track *track;
1828 volatile u32 *ib;
1829 unsigned idx;
1830 unsigned i;
1831 unsigned start_reg, end_reg, reg;
1832 int r;
1833 u32 idx_value;
1834
1835 track = (struct evergreen_cs_track *)p->track;
1836 ib = p->ib.ptr;
1837 idx = pkt->idx + 1;
1838 idx_value = radeon_get_ib_value(p, idx);
1839
1840 switch (pkt->opcode) {
1841 case PACKET3_SET_PREDICATION:
1842 {
1843 int pred_op;
1844 int tmp;
1845 uint64_t offset;
1846
1847 if (pkt->count != 1) {
1848 DRM_ERROR("bad SET PREDICATION\n");
1849 return -EINVAL;
1850 }
1851
1852 tmp = radeon_get_ib_value(p, idx + 1);
1853 pred_op = (tmp >> 16) & 0x7;
1854
1855 /* for the clear predicate operation */
1856 if (pred_op == 0)
1857 return 0;
1858
1859 if (pred_op > 2) {
1860 DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1861 return -EINVAL;
1862 }
1863
1864 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1865 if (r) {
1866 DRM_ERROR("bad SET PREDICATION\n");
1867 return -EINVAL;
1868 }
1869
1870 offset = reloc->gpu_offset +
1871 (idx_value & 0xfffffff0) +
1872 ((u64)(tmp & 0xff) << 32);
1873
1874 ib[idx + 0] = offset;
1875 ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1876 }
1877 break;
1878 case PACKET3_CONTEXT_CONTROL:
1879 if (pkt->count != 1) {
1880 DRM_ERROR("bad CONTEXT_CONTROL\n");
1881 return -EINVAL;
1882 }
1883 break;
1884 case PACKET3_INDEX_TYPE:
1885 case PACKET3_NUM_INSTANCES:
1886 case PACKET3_CLEAR_STATE:
1887 if (pkt->count) {
1888 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1889 return -EINVAL;
1890 }
1891 break;
1892 case CAYMAN_PACKET3_DEALLOC_STATE:
1893 if (p->rdev->family < CHIP_CAYMAN) {
1894 DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1895 return -EINVAL;
1896 }
1897 if (pkt->count) {
1898 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1899 return -EINVAL;
1900 }
1901 break;
1902 case PACKET3_INDEX_BASE:
1903 {
1904 uint64_t offset;
1905
1906 if (pkt->count != 1) {
1907 DRM_ERROR("bad INDEX_BASE\n");
1908 return -EINVAL;
1909 }
1910 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1911 if (r) {
1912 DRM_ERROR("bad INDEX_BASE\n");
1913 return -EINVAL;
1914 }
1915
1916 offset = reloc->gpu_offset +
1917 idx_value +
1918 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1919
1920 ib[idx+0] = offset;
1921 ib[idx+1] = upper_32_bits(offset) & 0xff;
1922
1923 r = evergreen_cs_track_check(p);
1924 if (r) {
1925 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1926 return r;
1927 }
1928 break;
1929 }
1930 case PACKET3_DRAW_INDEX:
1931 {
1932 uint64_t offset;
1933 if (pkt->count != 3) {
1934 DRM_ERROR("bad DRAW_INDEX\n");
1935 return -EINVAL;
1936 }
1937 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1938 if (r) {
1939 DRM_ERROR("bad DRAW_INDEX\n");
1940 return -EINVAL;
1941 }
1942
1943 offset = reloc->gpu_offset +
1944 idx_value +
1945 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1946
1947 ib[idx+0] = offset;
1948 ib[idx+1] = upper_32_bits(offset) & 0xff;
1949
1950 r = evergreen_cs_track_check(p);
1951 if (r) {
1952 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1953 return r;
1954 }
1955 break;
1956 }
1957 case PACKET3_DRAW_INDEX_2:
1958 {
1959 uint64_t offset;
1960
1961 if (pkt->count != 4) {
1962 DRM_ERROR("bad DRAW_INDEX_2\n");
1963 return -EINVAL;
1964 }
1965 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1966 if (r) {
1967 DRM_ERROR("bad DRAW_INDEX_2\n");
1968 return -EINVAL;
1969 }
1970
1971 offset = reloc->gpu_offset +
1972 radeon_get_ib_value(p, idx+1) +
1973 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1974
1975 ib[idx+1] = offset;
1976 ib[idx+2] = upper_32_bits(offset) & 0xff;
1977
1978 r = evergreen_cs_track_check(p);
1979 if (r) {
1980 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1981 return r;
1982 }
1983 break;
1984 }
1985 case PACKET3_DRAW_INDEX_AUTO:
1986 if (pkt->count != 1) {
1987 DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1988 return -EINVAL;
1989 }
1990 r = evergreen_cs_track_check(p);
1991 if (r) {
1992 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1993 return r;
1994 }
1995 break;
1996 case PACKET3_DRAW_INDEX_MULTI_AUTO:
1997 if (pkt->count != 2) {
1998 DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1999 return -EINVAL;
2000 }
2001 r = evergreen_cs_track_check(p);
2002 if (r) {
2003 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2004 return r;
2005 }
2006 break;
2007 case PACKET3_DRAW_INDEX_IMMD:
2008 if (pkt->count < 2) {
2009 DRM_ERROR("bad DRAW_INDEX_IMMD\n");
2010 return -EINVAL;
2011 }
2012 r = evergreen_cs_track_check(p);
2013 if (r) {
2014 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2015 return r;
2016 }
2017 break;
2018 case PACKET3_DRAW_INDEX_OFFSET:
2019 if (pkt->count != 2) {
2020 DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2021 return -EINVAL;
2022 }
2023 r = evergreen_cs_track_check(p);
2024 if (r) {
2025 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2026 return r;
2027 }
2028 break;
2029 case PACKET3_DRAW_INDEX_OFFSET_2:
2030 if (pkt->count != 3) {
2031 DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2032 return -EINVAL;
2033 }
2034 r = evergreen_cs_track_check(p);
2035 if (r) {
2036 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2037 return r;
2038 }
2039 break;
2040 case PACKET3_DISPATCH_DIRECT:
2041 if (pkt->count != 3) {
2042 DRM_ERROR("bad DISPATCH_DIRECT\n");
2043 return -EINVAL;
2044 }
2045 r = evergreen_cs_track_check(p);
2046 if (r) {
2047 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2048 return r;
2049 }
2050 break;
2051 case PACKET3_DISPATCH_INDIRECT:
2052 if (pkt->count != 1) {
2053 DRM_ERROR("bad DISPATCH_INDIRECT\n");
2054 return -EINVAL;
2055 }
2056 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2057 if (r) {
2058 DRM_ERROR("bad DISPATCH_INDIRECT\n");
2059 return -EINVAL;
2060 }
2061 ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2062 r = evergreen_cs_track_check(p);
2063 if (r) {
2064 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2065 return r;
2066 }
2067 break;
2068 case PACKET3_WAIT_REG_MEM:
2069 if (pkt->count != 5) {
2070 DRM_ERROR("bad WAIT_REG_MEM\n");
2071 return -EINVAL;
2072 }
2073 /* bit 4 is reg (0) or mem (1) */
2074 if (idx_value & 0x10) {
2075 uint64_t offset;
2076
2077 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2078 if (r) {
2079 DRM_ERROR("bad WAIT_REG_MEM\n");
2080 return -EINVAL;
2081 }
2082
2083 offset = reloc->gpu_offset +
2084 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2085 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2086
2087 ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2088 ib[idx+2] = upper_32_bits(offset) & 0xff;
2089 } else if (idx_value & 0x100) {
2090 DRM_ERROR("cannot use PFP on REG wait\n");
2091 return -EINVAL;
2092 }
2093 break;
2094 case PACKET3_CP_DMA:
2095 {
2096 u32 command, size, info;
2097 u64 offset, tmp;
2098 if (pkt->count != 4) {
2099 DRM_ERROR("bad CP DMA\n");
2100 return -EINVAL;
2101 }
2102 command = radeon_get_ib_value(p, idx+4);
2103 size = command & 0x1fffff;
2104 info = radeon_get_ib_value(p, idx+1);
2105 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2106 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2107 ((((info & 0x00300000) >> 20) == 0) &&
2108 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2109 ((((info & 0x60000000) >> 29) == 0) &&
2110 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2111 /* non mem to mem copies requires dw aligned count */
2112 if (size % 4) {
2113 DRM_ERROR("CP DMA command requires dw count alignment\n");
2114 return -EINVAL;
2115 }
2116 }
2117 if (command & PACKET3_CP_DMA_CMD_SAS) {
2118 /* src address space is register */
2119 /* GDS is ok */
2120 if (((info & 0x60000000) >> 29) != 1) {
2121 DRM_ERROR("CP DMA SAS not supported\n");
2122 return -EINVAL;
2123 }
2124 } else {
2125 if (command & PACKET3_CP_DMA_CMD_SAIC) {
2126 DRM_ERROR("CP DMA SAIC only supported for registers\n");
2127 return -EINVAL;
2128 }
2129 /* src address space is memory */
2130 if (((info & 0x60000000) >> 29) == 0) {
2131 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2132 if (r) {
2133 DRM_ERROR("bad CP DMA SRC\n");
2134 return -EINVAL;
2135 }
2136
2137 tmp = radeon_get_ib_value(p, idx) +
2138 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2139
2140 offset = reloc->gpu_offset + tmp;
2141
2142 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2143 dev_warn(p->dev, "CP DMA src buffer too small (%"PRIu64" %lu)\n",
2144 tmp + size, radeon_bo_size(reloc->robj));
2145 return -EINVAL;
2146 }
2147
2148 ib[idx] = offset;
2149 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2150 } else if (((info & 0x60000000) >> 29) != 2) {
2151 DRM_ERROR("bad CP DMA SRC_SEL\n");
2152 return -EINVAL;
2153 }
2154 }
2155 if (command & PACKET3_CP_DMA_CMD_DAS) {
2156 /* dst address space is register */
2157 /* GDS is ok */
2158 if (((info & 0x00300000) >> 20) != 1) {
2159 DRM_ERROR("CP DMA DAS not supported\n");
2160 return -EINVAL;
2161 }
2162 } else {
2163 /* dst address space is memory */
2164 if (command & PACKET3_CP_DMA_CMD_DAIC) {
2165 DRM_ERROR("CP DMA DAIC only supported for registers\n");
2166 return -EINVAL;
2167 }
2168 if (((info & 0x00300000) >> 20) == 0) {
2169 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2170 if (r) {
2171 DRM_ERROR("bad CP DMA DST\n");
2172 return -EINVAL;
2173 }
2174
2175 tmp = radeon_get_ib_value(p, idx+2) +
2176 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2177
2178 offset = reloc->gpu_offset + tmp;
2179
2180 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2181 dev_warn(p->dev, "CP DMA dst buffer too small (%"PRIu64" %lu)\n",
2182 tmp + size, radeon_bo_size(reloc->robj));
2183 return -EINVAL;
2184 }
2185
2186 ib[idx+2] = offset;
2187 ib[idx+3] = upper_32_bits(offset) & 0xff;
2188 } else {
2189 DRM_ERROR("bad CP DMA DST_SEL\n");
2190 return -EINVAL;
2191 }
2192 }
2193 break;
2194 }
2195 case PACKET3_SURFACE_SYNC:
2196 if (pkt->count != 3) {
2197 DRM_ERROR("bad SURFACE_SYNC\n");
2198 return -EINVAL;
2199 }
2200 /* 0xffffffff/0x0 is flush all cache flag */
2201 if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2202 radeon_get_ib_value(p, idx + 2) != 0) {
2203 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2204 if (r) {
2205 DRM_ERROR("bad SURFACE_SYNC\n");
2206 return -EINVAL;
2207 }
2208 ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2209 }
2210 break;
2211 case PACKET3_EVENT_WRITE:
2212 if (pkt->count != 2 && pkt->count != 0) {
2213 DRM_ERROR("bad EVENT_WRITE\n");
2214 return -EINVAL;
2215 }
2216 if (pkt->count) {
2217 uint64_t offset;
2218
2219 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2220 if (r) {
2221 DRM_ERROR("bad EVENT_WRITE\n");
2222 return -EINVAL;
2223 }
2224 offset = reloc->gpu_offset +
2225 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2226 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2227
2228 ib[idx+1] = offset & 0xfffffff8;
2229 ib[idx+2] = upper_32_bits(offset) & 0xff;
2230 }
2231 break;
2232 case PACKET3_EVENT_WRITE_EOP:
2233 {
2234 uint64_t offset;
2235
2236 if (pkt->count != 4) {
2237 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2238 return -EINVAL;
2239 }
2240 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2241 if (r) {
2242 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2243 return -EINVAL;
2244 }
2245
2246 offset = reloc->gpu_offset +
2247 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2248 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2249
2250 ib[idx+1] = offset & 0xfffffffc;
2251 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2252 break;
2253 }
2254 case PACKET3_EVENT_WRITE_EOS:
2255 {
2256 uint64_t offset;
2257
2258 if (pkt->count != 3) {
2259 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2260 return -EINVAL;
2261 }
2262 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2263 if (r) {
2264 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2265 return -EINVAL;
2266 }
2267
2268 offset = reloc->gpu_offset +
2269 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2270 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2271
2272 ib[idx+1] = offset & 0xfffffffc;
2273 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2274 break;
2275 }
2276 case PACKET3_SET_CONFIG_REG:
2277 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2278 end_reg = 4 * pkt->count + start_reg - 4;
2279 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2280 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2281 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2282 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2283 return -EINVAL;
2284 }
2285 for (i = 0; i < pkt->count; i++) {
2286 reg = start_reg + (4 * i);
2287 r = evergreen_cs_check_reg(p, reg, idx+1+i);
2288 if (r)
2289 return r;
2290 }
2291 break;
2292 case PACKET3_SET_CONTEXT_REG:
2293 start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2294 end_reg = 4 * pkt->count + start_reg - 4;
2295 if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2296 (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2297 (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2298 DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2299 return -EINVAL;
2300 }
2301 for (i = 0; i < pkt->count; i++) {
2302 reg = start_reg + (4 * i);
2303 r = evergreen_cs_check_reg(p, reg, idx+1+i);
2304 if (r)
2305 return r;
2306 }
2307 break;
2308 case PACKET3_SET_RESOURCE:
2309 if (pkt->count % 8) {
2310 DRM_ERROR("bad SET_RESOURCE\n");
2311 return -EINVAL;
2312 }
2313 start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2314 end_reg = 4 * pkt->count + start_reg - 4;
2315 if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2316 (start_reg >= PACKET3_SET_RESOURCE_END) ||
2317 (end_reg >= PACKET3_SET_RESOURCE_END)) {
2318 DRM_ERROR("bad SET_RESOURCE\n");
2319 return -EINVAL;
2320 }
2321 for (i = 0; i < (pkt->count / 8); i++) {
2322 struct radeon_bo *texture, *mipmap;
2323 u32 toffset, moffset;
2324 u32 size, offset, mip_address, tex_dim;
2325
2326 switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2327 case SQ_TEX_VTX_VALID_TEXTURE:
2328 /* tex base */
2329 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2330 if (r) {
2331 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2332 return -EINVAL;
2333 }
2334 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2335 ib[idx+1+(i*8)+1] |=
2336 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2337 if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2338 unsigned bankw, bankh, mtaspect, tile_split;
2339
2340 evergreen_tiling_fields(reloc->tiling_flags,
2341 &bankw, &bankh, &mtaspect,
2342 &tile_split);
2343 ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2344 ib[idx+1+(i*8)+7] |=
2345 TEX_BANK_WIDTH(bankw) |
2346 TEX_BANK_HEIGHT(bankh) |
2347 MACRO_TILE_ASPECT(mtaspect) |
2348 TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2349 }
2350 }
2351 texture = reloc->robj;
2352 toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2353
2354 /* tex mip base */
2355 tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2356 mip_address = ib[idx+1+(i*8)+3];
2357
2358 if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2359 !mip_address &&
2360 !radeon_cs_packet_next_is_pkt3_nop(p)) {
2361 /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2362 * It should be 0 if FMASK is disabled. */
2363 moffset = 0;
2364 mipmap = NULL;
2365 } else {
2366 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2367 if (r) {
2368 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2369 return -EINVAL;
2370 }
2371 moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2372 mipmap = reloc->robj;
2373 }
2374
2375 r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2376 if (r)
2377 return r;
2378 ib[idx+1+(i*8)+2] += toffset;
2379 ib[idx+1+(i*8)+3] += moffset;
2380 break;
2381 case SQ_TEX_VTX_VALID_BUFFER:
2382 {
2383 uint64_t offset64;
2384 /* vtx base */
2385 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2386 if (r) {
2387 DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2388 return -EINVAL;
2389 }
2390 offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2391 size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2392 if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2393 /* force size to size of the buffer */
2394 dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2395 ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2396 }
2397
2398 offset64 = reloc->gpu_offset + offset;
2399 ib[idx+1+(i*8)+0] = offset64;
2400 ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2401 (upper_32_bits(offset64) & 0xff);
2402 break;
2403 }
2404 case SQ_TEX_VTX_INVALID_TEXTURE:
2405 case SQ_TEX_VTX_INVALID_BUFFER:
2406 default:
2407 DRM_ERROR("bad SET_RESOURCE\n");
2408 return -EINVAL;
2409 }
2410 }
2411 break;
2412 case PACKET3_SET_ALU_CONST:
2413 /* XXX fix me ALU const buffers only */
2414 break;
2415 case PACKET3_SET_BOOL_CONST:
2416 start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2417 end_reg = 4 * pkt->count + start_reg - 4;
2418 if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2419 (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2420 (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2421 DRM_ERROR("bad SET_BOOL_CONST\n");
2422 return -EINVAL;
2423 }
2424 break;
2425 case PACKET3_SET_LOOP_CONST:
2426 start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2427 end_reg = 4 * pkt->count + start_reg - 4;
2428 if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2429 (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2430 (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2431 DRM_ERROR("bad SET_LOOP_CONST\n");
2432 return -EINVAL;
2433 }
2434 break;
2435 case PACKET3_SET_CTL_CONST:
2436 start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2437 end_reg = 4 * pkt->count + start_reg - 4;
2438 if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2439 (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2440 (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2441 DRM_ERROR("bad SET_CTL_CONST\n");
2442 return -EINVAL;
2443 }
2444 break;
2445 case PACKET3_SET_SAMPLER:
2446 if (pkt->count % 3) {
2447 DRM_ERROR("bad SET_SAMPLER\n");
2448 return -EINVAL;
2449 }
2450 start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2451 end_reg = 4 * pkt->count + start_reg - 4;
2452 if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2453 (start_reg >= PACKET3_SET_SAMPLER_END) ||
2454 (end_reg >= PACKET3_SET_SAMPLER_END)) {
2455 DRM_ERROR("bad SET_SAMPLER\n");
2456 return -EINVAL;
2457 }
2458 break;
2459 case PACKET3_STRMOUT_BUFFER_UPDATE:
2460 if (pkt->count != 4) {
2461 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2462 return -EINVAL;
2463 }
2464 /* Updating memory at DST_ADDRESS. */
2465 if (idx_value & 0x1) {
2466 u64 offset;
2467 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2468 if (r) {
2469 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2470 return -EINVAL;
2471 }
2472 offset = radeon_get_ib_value(p, idx+1);
2473 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2474 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2475 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%"PRIx64", 0x%lx\n",
2476 offset + 4, radeon_bo_size(reloc->robj));
2477 return -EINVAL;
2478 }
2479 offset += reloc->gpu_offset;
2480 ib[idx+1] = offset;
2481 ib[idx+2] = upper_32_bits(offset) & 0xff;
2482 }
2483 /* Reading data from SRC_ADDRESS. */
2484 if (((idx_value >> 1) & 0x3) == 2) {
2485 u64 offset;
2486 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2487 if (r) {
2488 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2489 return -EINVAL;
2490 }
2491 offset = radeon_get_ib_value(p, idx+3);
2492 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2493 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2494 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%"PRIx64", 0x%lx\n",
2495 offset + 4, radeon_bo_size(reloc->robj));
2496 return -EINVAL;
2497 }
2498 offset += reloc->gpu_offset;
2499 ib[idx+3] = offset;
2500 ib[idx+4] = upper_32_bits(offset) & 0xff;
2501 }
2502 break;
2503 case PACKET3_MEM_WRITE:
2504 {
2505 u64 offset;
2506
2507 if (pkt->count != 3) {
2508 DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2509 return -EINVAL;
2510 }
2511 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2512 if (r) {
2513 DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2514 return -EINVAL;
2515 }
2516 offset = radeon_get_ib_value(p, idx+0);
2517 offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2518 if (offset & 0x7) {
2519 DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2520 return -EINVAL;
2521 }
2522 if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2523 DRM_ERROR("bad MEM_WRITE bo too small: 0x%"PRIx64", 0x%lx\n",
2524 offset + 8, radeon_bo_size(reloc->robj));
2525 return -EINVAL;
2526 }
2527 offset += reloc->gpu_offset;
2528 ib[idx+0] = offset;
2529 ib[idx+1] = upper_32_bits(offset) & 0xff;
2530 break;
2531 }
2532 case PACKET3_COPY_DW:
2533 if (pkt->count != 4) {
2534 DRM_ERROR("bad COPY_DW (invalid count)\n");
2535 return -EINVAL;
2536 }
2537 if (idx_value & 0x1) {
2538 u64 offset;
2539 /* SRC is memory. */
2540 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2541 if (r) {
2542 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2543 return -EINVAL;
2544 }
2545 offset = radeon_get_ib_value(p, idx+1);
2546 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2547 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2548 DRM_ERROR("bad COPY_DW src bo too small: 0x%"PRIx64", 0x%lx\n",
2549 offset + 4, radeon_bo_size(reloc->robj));
2550 return -EINVAL;
2551 }
2552 offset += reloc->gpu_offset;
2553 ib[idx+1] = offset;
2554 ib[idx+2] = upper_32_bits(offset) & 0xff;
2555 } else {
2556 /* SRC is a reg. */
2557 reg = radeon_get_ib_value(p, idx+1) << 2;
2558 if (!evergreen_is_safe_reg(p, reg, idx+1))
2559 return -EINVAL;
2560 }
2561 if (idx_value & 0x2) {
2562 u64 offset;
2563 /* DST is memory. */
2564 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2565 if (r) {
2566 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2567 return -EINVAL;
2568 }
2569 offset = radeon_get_ib_value(p, idx+3);
2570 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2571 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2572 DRM_ERROR("bad COPY_DW dst bo too small: 0x%"PRIx64", 0x%lx\n",
2573 offset + 4, radeon_bo_size(reloc->robj));
2574 return -EINVAL;
2575 }
2576 offset += reloc->gpu_offset;
2577 ib[idx+3] = offset;
2578 ib[idx+4] = upper_32_bits(offset) & 0xff;
2579 } else {
2580 /* DST is a reg. */
2581 reg = radeon_get_ib_value(p, idx+3) << 2;
2582 if (!evergreen_is_safe_reg(p, reg, idx+3))
2583 return -EINVAL;
2584 }
2585 break;
2586 case PACKET3_NOP:
2587 break;
2588 default:
2589 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2590 return -EINVAL;
2591 }
2592 return 0;
2593 }
2594
evergreen_cs_parse(struct radeon_cs_parser * p)2595 int evergreen_cs_parse(struct radeon_cs_parser *p)
2596 {
2597 struct radeon_cs_packet pkt;
2598 struct evergreen_cs_track *track;
2599 u32 tmp;
2600 int r;
2601
2602 if (p->track == NULL) {
2603 /* initialize tracker, we are in kms */
2604 track = kzalloc(sizeof(*track), GFP_KERNEL);
2605 if (track == NULL)
2606 return -ENOMEM;
2607 evergreen_cs_track_init(track);
2608 if (p->rdev->family >= CHIP_CAYMAN)
2609 tmp = p->rdev->config.cayman.tile_config;
2610 else
2611 tmp = p->rdev->config.evergreen.tile_config;
2612
2613 switch (tmp & 0xf) {
2614 case 0:
2615 track->npipes = 1;
2616 break;
2617 case 1:
2618 default:
2619 track->npipes = 2;
2620 break;
2621 case 2:
2622 track->npipes = 4;
2623 break;
2624 case 3:
2625 track->npipes = 8;
2626 break;
2627 }
2628
2629 switch ((tmp & 0xf0) >> 4) {
2630 case 0:
2631 track->nbanks = 4;
2632 break;
2633 case 1:
2634 default:
2635 track->nbanks = 8;
2636 break;
2637 case 2:
2638 track->nbanks = 16;
2639 break;
2640 }
2641
2642 switch ((tmp & 0xf00) >> 8) {
2643 case 0:
2644 track->group_size = 256;
2645 break;
2646 case 1:
2647 default:
2648 track->group_size = 512;
2649 break;
2650 }
2651
2652 switch ((tmp & 0xf000) >> 12) {
2653 case 0:
2654 track->row_size = 1;
2655 break;
2656 case 1:
2657 default:
2658 track->row_size = 2;
2659 break;
2660 case 2:
2661 track->row_size = 4;
2662 break;
2663 }
2664
2665 p->track = track;
2666 }
2667 do {
2668 r = radeon_cs_packet_parse(p, &pkt, p->idx);
2669 if (r) {
2670 kfree(p->track);
2671 p->track = NULL;
2672 return r;
2673 }
2674 p->idx += pkt.count + 2;
2675 switch (pkt.type) {
2676 case RADEON_PACKET_TYPE0:
2677 r = evergreen_cs_parse_packet0(p, &pkt);
2678 break;
2679 case RADEON_PACKET_TYPE2:
2680 break;
2681 case RADEON_PACKET_TYPE3:
2682 r = evergreen_packet3_check(p, &pkt);
2683 break;
2684 default:
2685 DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2686 kfree(p->track);
2687 p->track = NULL;
2688 return -EINVAL;
2689 }
2690 if (r) {
2691 kfree(p->track);
2692 p->track = NULL;
2693 return r;
2694 }
2695 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2696 #if 0
2697 for (r = 0; r < p->ib.length_dw; r++) {
2698 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
2699 mdelay(1);
2700 }
2701 #endif
2702 kfree(p->track);
2703 p->track = NULL;
2704 return 0;
2705 }
2706
2707 /**
2708 * evergreen_dma_cs_parse() - parse the DMA IB
2709 * @p: parser structure holding parsing context.
2710 *
2711 * Parses the DMA IB from the CS ioctl and updates
2712 * the GPU addresses based on the reloc information and
2713 * checks for errors. (Evergreen-Cayman)
2714 * Returns 0 for success and an error on failure.
2715 **/
evergreen_dma_cs_parse(struct radeon_cs_parser * p)2716 int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2717 {
2718 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2719 struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2720 u32 header, cmd, count, sub_cmd;
2721 volatile u32 *ib = p->ib.ptr;
2722 u32 idx;
2723 u64 src_offset, dst_offset, dst2_offset;
2724 int r;
2725
2726 do {
2727 if (p->idx >= ib_chunk->length_dw) {
2728 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2729 p->idx, ib_chunk->length_dw);
2730 return -EINVAL;
2731 }
2732 idx = p->idx;
2733 header = radeon_get_ib_value(p, idx);
2734 cmd = GET_DMA_CMD(header);
2735 count = GET_DMA_COUNT(header);
2736 sub_cmd = GET_DMA_SUB_CMD(header);
2737
2738 switch (cmd) {
2739 case DMA_PACKET_WRITE:
2740 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2741 if (r) {
2742 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2743 return -EINVAL;
2744 }
2745 switch (sub_cmd) {
2746 /* tiled */
2747 case 8:
2748 dst_offset = radeon_get_ib_value(p, idx+1);
2749 dst_offset <<= 8;
2750
2751 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2752 p->idx += count + 7;
2753 break;
2754 /* linear */
2755 case 0:
2756 dst_offset = radeon_get_ib_value(p, idx+1);
2757 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2758
2759 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2760 ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2761 p->idx += count + 3;
2762 break;
2763 default:
2764 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2765 return -EINVAL;
2766 }
2767 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2768 dev_warn(p->dev, "DMA write buffer too small (%"PRIu64" %lu)\n",
2769 dst_offset, radeon_bo_size(dst_reloc->robj));
2770 return -EINVAL;
2771 }
2772 break;
2773 case DMA_PACKET_COPY:
2774 r = r600_dma_cs_next_reloc(p, &src_reloc);
2775 if (r) {
2776 DRM_ERROR("bad DMA_PACKET_COPY\n");
2777 return -EINVAL;
2778 }
2779 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2780 if (r) {
2781 DRM_ERROR("bad DMA_PACKET_COPY\n");
2782 return -EINVAL;
2783 }
2784 switch (sub_cmd) {
2785 /* Copy L2L, DW aligned */
2786 case 0x00:
2787 /* L2L, dw */
2788 src_offset = radeon_get_ib_value(p, idx+2);
2789 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2790 dst_offset = radeon_get_ib_value(p, idx+1);
2791 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2792 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2793 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%"PRIu64" %lu)\n",
2794 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2795 return -EINVAL;
2796 }
2797 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2798 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%"PRIu64" %lu)\n",
2799 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2800 return -EINVAL;
2801 }
2802 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2803 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2804 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2805 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2806 p->idx += 5;
2807 break;
2808 /* Copy L2T/T2L */
2809 case 0x08:
2810 /* detile bit */
2811 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2812 /* tiled src, linear dst */
2813 src_offset = radeon_get_ib_value(p, idx+1);
2814 src_offset <<= 8;
2815 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2816
2817 dst_offset = radeon_get_ib_value(p, idx + 7);
2818 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2819 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2820 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2821 } else {
2822 /* linear src, tiled dst */
2823 src_offset = radeon_get_ib_value(p, idx+7);
2824 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2825 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2826 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2827
2828 dst_offset = radeon_get_ib_value(p, idx+1);
2829 dst_offset <<= 8;
2830 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2831 }
2832 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2833 dev_warn(p->dev, "DMA L2T, src buffer too small (%"PRIu64" %lu)\n",
2834 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2835 return -EINVAL;
2836 }
2837 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2838 dev_warn(p->dev, "DMA L2T, dst buffer too small (%"PRIu64" %lu)\n",
2839 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2840 return -EINVAL;
2841 }
2842 p->idx += 9;
2843 break;
2844 /* Copy L2L, byte aligned */
2845 case 0x40:
2846 /* L2L, byte */
2847 src_offset = radeon_get_ib_value(p, idx+2);
2848 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2849 dst_offset = radeon_get_ib_value(p, idx+1);
2850 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2851 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2852 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%"PRIu64" %lu)\n",
2853 src_offset + count, radeon_bo_size(src_reloc->robj));
2854 return -EINVAL;
2855 }
2856 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2857 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%"PRIu64" %lu)\n",
2858 dst_offset + count, radeon_bo_size(dst_reloc->robj));
2859 return -EINVAL;
2860 }
2861 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2862 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2863 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2864 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2865 p->idx += 5;
2866 break;
2867 /* Copy L2L, partial */
2868 case 0x41:
2869 /* L2L, partial */
2870 if (p->family < CHIP_CAYMAN) {
2871 DRM_ERROR("L2L Partial is cayman only !\n");
2872 return -EINVAL;
2873 }
2874 ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2875 ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2876 ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2877 ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2878
2879 p->idx += 9;
2880 break;
2881 /* Copy L2L, DW aligned, broadcast */
2882 case 0x44:
2883 /* L2L, dw, broadcast */
2884 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2885 if (r) {
2886 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2887 return -EINVAL;
2888 }
2889 dst_offset = radeon_get_ib_value(p, idx+1);
2890 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2891 dst2_offset = radeon_get_ib_value(p, idx+2);
2892 dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2893 src_offset = radeon_get_ib_value(p, idx+3);
2894 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2895 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2896 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%"PRIu64" %lu)\n",
2897 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2898 return -EINVAL;
2899 }
2900 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2901 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%"PRIu64" %lu)\n",
2902 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2903 return -EINVAL;
2904 }
2905 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2906 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
2907 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2908 return -EINVAL;
2909 }
2910 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2911 ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2912 ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2913 ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2914 ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2915 ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2916 p->idx += 7;
2917 break;
2918 /* Copy L2T Frame to Field */
2919 case 0x48:
2920 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2921 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2922 return -EINVAL;
2923 }
2924 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2925 if (r) {
2926 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2927 return -EINVAL;
2928 }
2929 dst_offset = radeon_get_ib_value(p, idx+1);
2930 dst_offset <<= 8;
2931 dst2_offset = radeon_get_ib_value(p, idx+2);
2932 dst2_offset <<= 8;
2933 src_offset = radeon_get_ib_value(p, idx+8);
2934 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2935 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2936 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%"PRIu64" %lu)\n",
2937 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2938 return -EINVAL;
2939 }
2940 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2941 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%"PRIu64" %lu)\n",
2942 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2943 return -EINVAL;
2944 }
2945 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2946 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%"PRIu64" %lu)\n",
2947 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2948 return -EINVAL;
2949 }
2950 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2951 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
2952 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2953 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2954 p->idx += 10;
2955 break;
2956 /* Copy L2T/T2L, partial */
2957 case 0x49:
2958 /* L2T, T2L partial */
2959 if (p->family < CHIP_CAYMAN) {
2960 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2961 return -EINVAL;
2962 }
2963 /* detile bit */
2964 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2965 /* tiled src, linear dst */
2966 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2967
2968 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2969 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2970 } else {
2971 /* linear src, tiled dst */
2972 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2973 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2974
2975 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2976 }
2977 p->idx += 12;
2978 break;
2979 /* Copy L2T broadcast */
2980 case 0x4b:
2981 /* L2T, broadcast */
2982 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2983 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2984 return -EINVAL;
2985 }
2986 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2987 if (r) {
2988 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2989 return -EINVAL;
2990 }
2991 dst_offset = radeon_get_ib_value(p, idx+1);
2992 dst_offset <<= 8;
2993 dst2_offset = radeon_get_ib_value(p, idx+2);
2994 dst2_offset <<= 8;
2995 src_offset = radeon_get_ib_value(p, idx+8);
2996 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2997 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2998 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%"PRIu64" %lu)\n",
2999 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3000 return -EINVAL;
3001 }
3002 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3003 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%"PRIu64" %lu)\n",
3004 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3005 return -EINVAL;
3006 }
3007 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3008 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
3009 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3010 return -EINVAL;
3011 }
3012 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3013 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3014 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3015 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3016 p->idx += 10;
3017 break;
3018 /* Copy L2T/T2L (tile units) */
3019 case 0x4c:
3020 /* L2T, T2L */
3021 /* detile bit */
3022 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3023 /* tiled src, linear dst */
3024 src_offset = radeon_get_ib_value(p, idx+1);
3025 src_offset <<= 8;
3026 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3027
3028 dst_offset = radeon_get_ib_value(p, idx+7);
3029 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3030 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3031 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3032 } else {
3033 /* linear src, tiled dst */
3034 src_offset = radeon_get_ib_value(p, idx+7);
3035 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3036 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3037 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3038
3039 dst_offset = radeon_get_ib_value(p, idx+1);
3040 dst_offset <<= 8;
3041 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3042 }
3043 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3044 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%"PRIu64" %lu)\n",
3045 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3046 return -EINVAL;
3047 }
3048 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3049 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%"PRIu64" %lu)\n",
3050 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3051 return -EINVAL;
3052 }
3053 p->idx += 9;
3054 break;
3055 /* Copy T2T, partial (tile units) */
3056 case 0x4d:
3057 /* T2T partial */
3058 if (p->family < CHIP_CAYMAN) {
3059 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3060 return -EINVAL;
3061 }
3062 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3063 ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3064 p->idx += 13;
3065 break;
3066 /* Copy L2T broadcast (tile units) */
3067 case 0x4f:
3068 /* L2T, broadcast */
3069 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3070 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3071 return -EINVAL;
3072 }
3073 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3074 if (r) {
3075 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3076 return -EINVAL;
3077 }
3078 dst_offset = radeon_get_ib_value(p, idx+1);
3079 dst_offset <<= 8;
3080 dst2_offset = radeon_get_ib_value(p, idx+2);
3081 dst2_offset <<= 8;
3082 src_offset = radeon_get_ib_value(p, idx+8);
3083 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3084 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3085 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%"PRIu64" %lu)\n",
3086 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3087 return -EINVAL;
3088 }
3089 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3090 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%"PRIu64" %lu)\n",
3091 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3092 return -EINVAL;
3093 }
3094 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3095 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%"PRIu64" %lu)\n",
3096 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3097 return -EINVAL;
3098 }
3099 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3100 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3101 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3102 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3103 p->idx += 10;
3104 break;
3105 default:
3106 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3107 return -EINVAL;
3108 }
3109 break;
3110 case DMA_PACKET_CONSTANT_FILL:
3111 r = r600_dma_cs_next_reloc(p, &dst_reloc);
3112 if (r) {
3113 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3114 return -EINVAL;
3115 }
3116 dst_offset = radeon_get_ib_value(p, idx+1);
3117 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3118 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3119 dev_warn(p->dev, "DMA constant fill buffer too small (%"PRIu64" %lu)\n",
3120 dst_offset, radeon_bo_size(dst_reloc->robj));
3121 return -EINVAL;
3122 }
3123 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3124 ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3125 p->idx += 4;
3126 break;
3127 case DMA_PACKET_NOP:
3128 p->idx += 1;
3129 break;
3130 default:
3131 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3132 return -EINVAL;
3133 }
3134 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3135 #if 0
3136 for (r = 0; r < p->ib->length_dw; r++) {
3137 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
3138 mdelay(1);
3139 }
3140 #endif
3141 return 0;
3142 }
3143
3144 /* vm parser */
evergreen_vm_reg_valid(u32 reg)3145 static bool evergreen_vm_reg_valid(u32 reg)
3146 {
3147 /* context regs are fine */
3148 if (reg >= 0x28000)
3149 return true;
3150
3151 /* check config regs */
3152 switch (reg) {
3153 case WAIT_UNTIL:
3154 case GRBM_GFX_INDEX:
3155 case CP_STRMOUT_CNTL:
3156 case CP_COHER_CNTL:
3157 case CP_COHER_SIZE:
3158 case VGT_VTX_VECT_EJECT_REG:
3159 case VGT_CACHE_INVALIDATION:
3160 case VGT_GS_VERTEX_REUSE:
3161 case VGT_PRIMITIVE_TYPE:
3162 case VGT_INDEX_TYPE:
3163 case VGT_NUM_INDICES:
3164 case VGT_NUM_INSTANCES:
3165 case VGT_COMPUTE_DIM_X:
3166 case VGT_COMPUTE_DIM_Y:
3167 case VGT_COMPUTE_DIM_Z:
3168 case VGT_COMPUTE_START_X:
3169 case VGT_COMPUTE_START_Y:
3170 case VGT_COMPUTE_START_Z:
3171 case VGT_COMPUTE_INDEX:
3172 case VGT_COMPUTE_THREAD_GROUP_SIZE:
3173 case VGT_HS_OFFCHIP_PARAM:
3174 case PA_CL_ENHANCE:
3175 case PA_SU_LINE_STIPPLE_VALUE:
3176 case PA_SC_LINE_STIPPLE_STATE:
3177 case PA_SC_ENHANCE:
3178 case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3179 case SQ_DYN_GPR_SIMD_LOCK_EN:
3180 case SQ_CONFIG:
3181 case SQ_GPR_RESOURCE_MGMT_1:
3182 case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3183 case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3184 case SQ_CONST_MEM_BASE:
3185 case SQ_STATIC_THREAD_MGMT_1:
3186 case SQ_STATIC_THREAD_MGMT_2:
3187 case SQ_STATIC_THREAD_MGMT_3:
3188 case SPI_CONFIG_CNTL:
3189 case SPI_CONFIG_CNTL_1:
3190 case TA_CNTL_AUX:
3191 case DB_DEBUG:
3192 case DB_DEBUG2:
3193 case DB_DEBUG3:
3194 case DB_DEBUG4:
3195 case DB_WATERMARKS:
3196 case TD_PS_BORDER_COLOR_INDEX:
3197 case TD_PS_BORDER_COLOR_RED:
3198 case TD_PS_BORDER_COLOR_GREEN:
3199 case TD_PS_BORDER_COLOR_BLUE:
3200 case TD_PS_BORDER_COLOR_ALPHA:
3201 case TD_VS_BORDER_COLOR_INDEX:
3202 case TD_VS_BORDER_COLOR_RED:
3203 case TD_VS_BORDER_COLOR_GREEN:
3204 case TD_VS_BORDER_COLOR_BLUE:
3205 case TD_VS_BORDER_COLOR_ALPHA:
3206 case TD_GS_BORDER_COLOR_INDEX:
3207 case TD_GS_BORDER_COLOR_RED:
3208 case TD_GS_BORDER_COLOR_GREEN:
3209 case TD_GS_BORDER_COLOR_BLUE:
3210 case TD_GS_BORDER_COLOR_ALPHA:
3211 case TD_HS_BORDER_COLOR_INDEX:
3212 case TD_HS_BORDER_COLOR_RED:
3213 case TD_HS_BORDER_COLOR_GREEN:
3214 case TD_HS_BORDER_COLOR_BLUE:
3215 case TD_HS_BORDER_COLOR_ALPHA:
3216 case TD_LS_BORDER_COLOR_INDEX:
3217 case TD_LS_BORDER_COLOR_RED:
3218 case TD_LS_BORDER_COLOR_GREEN:
3219 case TD_LS_BORDER_COLOR_BLUE:
3220 case TD_LS_BORDER_COLOR_ALPHA:
3221 case TD_CS_BORDER_COLOR_INDEX:
3222 case TD_CS_BORDER_COLOR_RED:
3223 case TD_CS_BORDER_COLOR_GREEN:
3224 case TD_CS_BORDER_COLOR_BLUE:
3225 case TD_CS_BORDER_COLOR_ALPHA:
3226 case SQ_ESGS_RING_SIZE:
3227 case SQ_GSVS_RING_SIZE:
3228 case SQ_ESTMP_RING_SIZE:
3229 case SQ_GSTMP_RING_SIZE:
3230 case SQ_HSTMP_RING_SIZE:
3231 case SQ_LSTMP_RING_SIZE:
3232 case SQ_PSTMP_RING_SIZE:
3233 case SQ_VSTMP_RING_SIZE:
3234 case SQ_ESGS_RING_ITEMSIZE:
3235 case SQ_ESTMP_RING_ITEMSIZE:
3236 case SQ_GSTMP_RING_ITEMSIZE:
3237 case SQ_GSVS_RING_ITEMSIZE:
3238 case SQ_GS_VERT_ITEMSIZE:
3239 case SQ_GS_VERT_ITEMSIZE_1:
3240 case SQ_GS_VERT_ITEMSIZE_2:
3241 case SQ_GS_VERT_ITEMSIZE_3:
3242 case SQ_GSVS_RING_OFFSET_1:
3243 case SQ_GSVS_RING_OFFSET_2:
3244 case SQ_GSVS_RING_OFFSET_3:
3245 case SQ_HSTMP_RING_ITEMSIZE:
3246 case SQ_LSTMP_RING_ITEMSIZE:
3247 case SQ_PSTMP_RING_ITEMSIZE:
3248 case SQ_VSTMP_RING_ITEMSIZE:
3249 case VGT_TF_RING_SIZE:
3250 case SQ_ESGS_RING_BASE:
3251 case SQ_GSVS_RING_BASE:
3252 case SQ_ESTMP_RING_BASE:
3253 case SQ_GSTMP_RING_BASE:
3254 case SQ_HSTMP_RING_BASE:
3255 case SQ_LSTMP_RING_BASE:
3256 case SQ_PSTMP_RING_BASE:
3257 case SQ_VSTMP_RING_BASE:
3258 case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3259 case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3260 return true;
3261 default:
3262 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3263 return false;
3264 }
3265 }
3266
evergreen_vm_packet3_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)3267 static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3268 u32 *ib, struct radeon_cs_packet *pkt)
3269 {
3270 u32 idx = pkt->idx + 1;
3271 u32 idx_value = ib[idx];
3272 u32 start_reg, end_reg, reg, i;
3273 u32 command, info;
3274
3275 switch (pkt->opcode) {
3276 case PACKET3_NOP:
3277 case PACKET3_SET_BASE:
3278 case PACKET3_CLEAR_STATE:
3279 case PACKET3_INDEX_BUFFER_SIZE:
3280 case PACKET3_DISPATCH_DIRECT:
3281 case PACKET3_DISPATCH_INDIRECT:
3282 case PACKET3_MODE_CONTROL:
3283 case PACKET3_SET_PREDICATION:
3284 case PACKET3_COND_EXEC:
3285 case PACKET3_PRED_EXEC:
3286 case PACKET3_DRAW_INDIRECT:
3287 case PACKET3_DRAW_INDEX_INDIRECT:
3288 case PACKET3_INDEX_BASE:
3289 case PACKET3_DRAW_INDEX_2:
3290 case PACKET3_CONTEXT_CONTROL:
3291 case PACKET3_DRAW_INDEX_OFFSET:
3292 case PACKET3_INDEX_TYPE:
3293 case PACKET3_DRAW_INDEX:
3294 case PACKET3_DRAW_INDEX_AUTO:
3295 case PACKET3_DRAW_INDEX_IMMD:
3296 case PACKET3_NUM_INSTANCES:
3297 case PACKET3_DRAW_INDEX_MULTI_AUTO:
3298 case PACKET3_STRMOUT_BUFFER_UPDATE:
3299 case PACKET3_DRAW_INDEX_OFFSET_2:
3300 case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3301 case PACKET3_MPEG_INDEX:
3302 case PACKET3_WAIT_REG_MEM:
3303 case PACKET3_MEM_WRITE:
3304 case PACKET3_SURFACE_SYNC:
3305 case PACKET3_EVENT_WRITE:
3306 case PACKET3_EVENT_WRITE_EOP:
3307 case PACKET3_EVENT_WRITE_EOS:
3308 case PACKET3_SET_CONTEXT_REG:
3309 case PACKET3_SET_BOOL_CONST:
3310 case PACKET3_SET_LOOP_CONST:
3311 case PACKET3_SET_RESOURCE:
3312 case PACKET3_SET_SAMPLER:
3313 case PACKET3_SET_CTL_CONST:
3314 case PACKET3_SET_RESOURCE_OFFSET:
3315 case PACKET3_SET_CONTEXT_REG_INDIRECT:
3316 case PACKET3_SET_RESOURCE_INDIRECT:
3317 case CAYMAN_PACKET3_DEALLOC_STATE:
3318 break;
3319 case PACKET3_COND_WRITE:
3320 if (idx_value & 0x100) {
3321 reg = ib[idx + 5] * 4;
3322 if (!evergreen_vm_reg_valid(reg))
3323 return -EINVAL;
3324 }
3325 break;
3326 case PACKET3_COPY_DW:
3327 if (idx_value & 0x2) {
3328 reg = ib[idx + 3] * 4;
3329 if (!evergreen_vm_reg_valid(reg))
3330 return -EINVAL;
3331 }
3332 break;
3333 case PACKET3_SET_CONFIG_REG:
3334 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3335 end_reg = 4 * pkt->count + start_reg - 4;
3336 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3337 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3338 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3339 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3340 return -EINVAL;
3341 }
3342 for (i = 0; i < pkt->count; i++) {
3343 reg = start_reg + (4 * i);
3344 if (!evergreen_vm_reg_valid(reg))
3345 return -EINVAL;
3346 }
3347 break;
3348 case PACKET3_CP_DMA:
3349 command = ib[idx + 4];
3350 info = ib[idx + 1];
3351 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3352 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3353 ((((info & 0x00300000) >> 20) == 0) &&
3354 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3355 ((((info & 0x60000000) >> 29) == 0) &&
3356 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3357 /* non mem to mem copies requires dw aligned count */
3358 if ((command & 0x1fffff) % 4) {
3359 DRM_ERROR("CP DMA command requires dw count alignment\n");
3360 return -EINVAL;
3361 }
3362 }
3363 if (command & PACKET3_CP_DMA_CMD_SAS) {
3364 /* src address space is register */
3365 if (((info & 0x60000000) >> 29) == 0) {
3366 start_reg = idx_value << 2;
3367 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3368 reg = start_reg;
3369 if (!evergreen_vm_reg_valid(reg)) {
3370 DRM_ERROR("CP DMA Bad SRC register\n");
3371 return -EINVAL;
3372 }
3373 } else {
3374 for (i = 0; i < (command & 0x1fffff); i++) {
3375 reg = start_reg + (4 * i);
3376 if (!evergreen_vm_reg_valid(reg)) {
3377 DRM_ERROR("CP DMA Bad SRC register\n");
3378 return -EINVAL;
3379 }
3380 }
3381 }
3382 }
3383 }
3384 if (command & PACKET3_CP_DMA_CMD_DAS) {
3385 /* dst address space is register */
3386 if (((info & 0x00300000) >> 20) == 0) {
3387 start_reg = ib[idx + 2];
3388 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3389 reg = start_reg;
3390 if (!evergreen_vm_reg_valid(reg)) {
3391 DRM_ERROR("CP DMA Bad DST register\n");
3392 return -EINVAL;
3393 }
3394 } else {
3395 for (i = 0; i < (command & 0x1fffff); i++) {
3396 reg = start_reg + (4 * i);
3397 if (!evergreen_vm_reg_valid(reg)) {
3398 DRM_ERROR("CP DMA Bad DST register\n");
3399 return -EINVAL;
3400 }
3401 }
3402 }
3403 }
3404 }
3405 break;
3406 default:
3407 return -EINVAL;
3408 }
3409 return 0;
3410 }
3411
evergreen_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)3412 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3413 {
3414 int ret = 0;
3415 u32 idx = 0;
3416 struct radeon_cs_packet pkt;
3417
3418 do {
3419 pkt.idx = idx;
3420 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3421 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3422 pkt.one_reg_wr = 0;
3423 switch (pkt.type) {
3424 case RADEON_PACKET_TYPE0:
3425 dev_err(rdev->dev, "Packet0 not allowed!\n");
3426 ret = -EINVAL;
3427 break;
3428 case RADEON_PACKET_TYPE2:
3429 idx += 1;
3430 break;
3431 case RADEON_PACKET_TYPE3:
3432 pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3433 ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3434 idx += pkt.count + 2;
3435 break;
3436 default:
3437 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3438 ret = -EINVAL;
3439 break;
3440 }
3441 if (ret)
3442 break;
3443 } while (idx < ib->length_dw);
3444
3445 return ret;
3446 }
3447
3448 /**
3449 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3450 * @rdev: radeon_device pointer
3451 * @ib: radeon_ib pointer
3452 *
3453 * Parses the DMA IB from the VM CS ioctl
3454 * checks for errors. (Cayman-SI)
3455 * Returns 0 for success and an error on failure.
3456 **/
evergreen_dma_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)3457 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3458 {
3459 u32 idx = 0;
3460 u32 header, cmd, count, sub_cmd;
3461
3462 do {
3463 header = ib->ptr[idx];
3464 cmd = GET_DMA_CMD(header);
3465 count = GET_DMA_COUNT(header);
3466 sub_cmd = GET_DMA_SUB_CMD(header);
3467
3468 switch (cmd) {
3469 case DMA_PACKET_WRITE:
3470 switch (sub_cmd) {
3471 /* tiled */
3472 case 8:
3473 idx += count + 7;
3474 break;
3475 /* linear */
3476 case 0:
3477 idx += count + 3;
3478 break;
3479 default:
3480 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3481 return -EINVAL;
3482 }
3483 break;
3484 case DMA_PACKET_COPY:
3485 switch (sub_cmd) {
3486 /* Copy L2L, DW aligned */
3487 case 0x00:
3488 idx += 5;
3489 break;
3490 /* Copy L2T/T2L */
3491 case 0x08:
3492 idx += 9;
3493 break;
3494 /* Copy L2L, byte aligned */
3495 case 0x40:
3496 idx += 5;
3497 break;
3498 /* Copy L2L, partial */
3499 case 0x41:
3500 idx += 9;
3501 break;
3502 /* Copy L2L, DW aligned, broadcast */
3503 case 0x44:
3504 idx += 7;
3505 break;
3506 /* Copy L2T Frame to Field */
3507 case 0x48:
3508 idx += 10;
3509 break;
3510 /* Copy L2T/T2L, partial */
3511 case 0x49:
3512 idx += 12;
3513 break;
3514 /* Copy L2T broadcast */
3515 case 0x4b:
3516 idx += 10;
3517 break;
3518 /* Copy L2T/T2L (tile units) */
3519 case 0x4c:
3520 idx += 9;
3521 break;
3522 /* Copy T2T, partial (tile units) */
3523 case 0x4d:
3524 idx += 13;
3525 break;
3526 /* Copy L2T broadcast (tile units) */
3527 case 0x4f:
3528 idx += 10;
3529 break;
3530 default:
3531 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3532 return -EINVAL;
3533 }
3534 break;
3535 case DMA_PACKET_CONSTANT_FILL:
3536 idx += 4;
3537 break;
3538 case DMA_PACKET_NOP:
3539 idx += 1;
3540 break;
3541 default:
3542 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3543 return -EINVAL;
3544 }
3545 } while (idx < ib->length_dw);
3546
3547 return 0;
3548 }
3549