1 /* 2 * Copyright 2010 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <drm/drmP.h> 29 #include "radeon.h" 30 #include "radeon_asic.h" 31 #include "evergreend.h" 32 #include "evergreen_reg_safe.h" 33 #include "cayman_reg_safe.h" 34 35 #define MAX(a,b) (((a)>(b))?(a):(b)) 36 #define MIN(a,b) (((a)<(b))?(a):(b)) 37 38 struct evergreen_cs_track { 39 u32 group_size; 40 u32 nbanks; 41 u32 npipes; 42 u32 row_size; 43 /* value we track */ 44 u32 nsamples; /* unused */ 45 struct radeon_bo *cb_color_bo[12]; 46 u32 cb_color_bo_offset[12]; 47 struct radeon_bo *cb_color_fmask_bo[8]; /* unused */ 48 struct radeon_bo *cb_color_cmask_bo[8]; /* unused */ 49 u32 cb_color_info[12]; 50 u32 cb_color_view[12]; 51 u32 cb_color_pitch[12]; 52 u32 cb_color_slice[12]; 53 u32 cb_color_slice_idx[12]; 54 u32 cb_color_attrib[12]; 55 u32 cb_color_cmask_slice[8];/* unused */ 56 u32 cb_color_fmask_slice[8];/* unused */ 57 u32 cb_target_mask; 58 u32 cb_shader_mask; /* unused */ 59 u32 vgt_strmout_config; 60 u32 vgt_strmout_buffer_config; 61 struct radeon_bo *vgt_strmout_bo[4]; 62 u32 vgt_strmout_bo_offset[4]; 63 u32 vgt_strmout_size[4]; 64 u32 db_depth_control; 65 u32 db_depth_view; 66 u32 db_depth_slice; 67 u32 db_depth_size; 68 u32 db_z_info; 69 u32 db_z_read_offset; 70 u32 db_z_write_offset; 71 struct radeon_bo *db_z_read_bo; 72 struct radeon_bo *db_z_write_bo; 73 u32 db_s_info; 74 u32 db_s_read_offset; 75 u32 db_s_write_offset; 76 struct radeon_bo *db_s_read_bo; 77 struct radeon_bo *db_s_write_bo; 78 bool sx_misc_kill_all_prims; 79 bool cb_dirty; 80 bool db_dirty; 81 bool streamout_dirty; 82 u32 htile_offset; 83 u32 htile_surface; 84 struct radeon_bo *htile_bo; 85 }; 86 87 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags) 88 { 89 if (tiling_flags & RADEON_TILING_MACRO) 90 return ARRAY_2D_TILED_THIN1; 91 else if (tiling_flags & RADEON_TILING_MICRO) 92 return ARRAY_1D_TILED_THIN1; 93 else 94 return ARRAY_LINEAR_GENERAL; 95 } 96 97 static u32 evergreen_cs_get_num_banks(u32 nbanks) 98 { 99 switch (nbanks) { 100 case 2: 101 return ADDR_SURF_2_BANK; 102 case 4: 103 return ADDR_SURF_4_BANK; 104 case 8: 105 default: 106 return ADDR_SURF_8_BANK; 107 case 16: 108 return ADDR_SURF_16_BANK; 109 } 110 } 111 112 static void evergreen_cs_track_init(struct evergreen_cs_track *track) 113 { 114 int i; 115 116 for (i = 0; i < 8; i++) { 117 track->cb_color_fmask_bo[i] = NULL; 118 track->cb_color_cmask_bo[i] = NULL; 119 track->cb_color_cmask_slice[i] = 0; 120 track->cb_color_fmask_slice[i] = 0; 121 } 122 123 for (i = 0; i < 12; i++) { 124 track->cb_color_bo[i] = NULL; 125 track->cb_color_bo_offset[i] = 0xFFFFFFFF; 126 track->cb_color_info[i] = 0; 127 track->cb_color_view[i] = 0xFFFFFFFF; 128 track->cb_color_pitch[i] = 0; 129 track->cb_color_slice[i] = 0xfffffff; 130 track->cb_color_slice_idx[i] = 0; 131 } 132 track->cb_target_mask = 0xFFFFFFFF; 133 track->cb_shader_mask = 0xFFFFFFFF; 134 track->cb_dirty = true; 135 136 track->db_depth_slice = 0xffffffff; 137 track->db_depth_view = 0xFFFFC000; 138 track->db_depth_size = 0xFFFFFFFF; 139 track->db_depth_control = 0xFFFFFFFF; 140 track->db_z_info = 0xFFFFFFFF; 141 track->db_z_read_offset = 0xFFFFFFFF; 142 track->db_z_write_offset = 0xFFFFFFFF; 143 track->db_z_read_bo = NULL; 144 track->db_z_write_bo = NULL; 145 track->db_s_info = 0xFFFFFFFF; 146 track->db_s_read_offset = 0xFFFFFFFF; 147 track->db_s_write_offset = 0xFFFFFFFF; 148 track->db_s_read_bo = NULL; 149 track->db_s_write_bo = NULL; 150 track->db_dirty = true; 151 track->htile_bo = NULL; 152 track->htile_offset = 0xFFFFFFFF; 153 track->htile_surface = 0; 154 155 for (i = 0; i < 4; i++) { 156 track->vgt_strmout_size[i] = 0; 157 track->vgt_strmout_bo[i] = NULL; 158 track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF; 159 } 160 track->streamout_dirty = true; 161 track->sx_misc_kill_all_prims = false; 162 } 163 164 struct eg_surface { 165 /* value gathered from cs */ 166 unsigned nbx; 167 unsigned nby; 168 unsigned format; 169 unsigned mode; 170 unsigned nbanks; 171 unsigned bankw; 172 unsigned bankh; 173 unsigned tsplit; 174 unsigned mtilea; 175 unsigned nsamples; 176 /* output value */ 177 unsigned bpe; 178 unsigned layer_size; 179 unsigned palign; 180 unsigned halign; 181 unsigned long base_align; 182 }; 183 184 static int evergreen_surface_check_linear(struct radeon_cs_parser *p, 185 struct eg_surface *surf, 186 const char *prefix) 187 { 188 surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples; 189 surf->base_align = surf->bpe; 190 surf->palign = 1; 191 surf->halign = 1; 192 return 0; 193 } 194 195 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p, 196 struct eg_surface *surf, 197 const char *prefix) 198 { 199 struct evergreen_cs_track *track = p->track; 200 unsigned palign; 201 202 palign = MAX(64, track->group_size / surf->bpe); 203 surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples; 204 surf->base_align = track->group_size; 205 surf->palign = palign; 206 surf->halign = 1; 207 if (surf->nbx & (palign - 1)) { 208 if (prefix) { 209 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n", 210 __func__, __LINE__, prefix, surf->nbx, palign); 211 } 212 return -EINVAL; 213 } 214 return 0; 215 } 216 217 static int evergreen_surface_check_1d(struct radeon_cs_parser *p, 218 struct eg_surface *surf, 219 const char *prefix) 220 { 221 struct evergreen_cs_track *track = p->track; 222 unsigned palign; 223 224 palign = track->group_size / (8 * surf->bpe * surf->nsamples); 225 palign = MAX(8, palign); 226 surf->layer_size = surf->nbx * surf->nby * surf->bpe; 227 surf->base_align = track->group_size; 228 surf->palign = palign; 229 surf->halign = 8; 230 if ((surf->nbx & (palign - 1))) { 231 if (prefix) { 232 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n", 233 __func__, __LINE__, prefix, surf->nbx, palign, 234 track->group_size, surf->bpe, surf->nsamples); 235 } 236 return -EINVAL; 237 } 238 if ((surf->nby & (8 - 1))) { 239 if (prefix) { 240 dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n", 241 __func__, __LINE__, prefix, surf->nby); 242 } 243 return -EINVAL; 244 } 245 return 0; 246 } 247 248 static int evergreen_surface_check_2d(struct radeon_cs_parser *p, 249 struct eg_surface *surf, 250 const char *prefix) 251 { 252 struct evergreen_cs_track *track = p->track; 253 unsigned palign, halign, tileb, slice_pt; 254 unsigned mtile_pr, mtile_ps, mtileb; 255 256 tileb = 64 * surf->bpe * surf->nsamples; 257 slice_pt = 1; 258 if (tileb > surf->tsplit) { 259 slice_pt = tileb / surf->tsplit; 260 } 261 tileb = tileb / slice_pt; 262 /* macro tile width & height */ 263 palign = (8 * surf->bankw * track->npipes) * surf->mtilea; 264 halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea; 265 mtileb = (palign / 8) * (halign / 8) * tileb; 266 mtile_pr = surf->nbx / palign; 267 mtile_ps = (mtile_pr * surf->nby) / halign; 268 surf->layer_size = mtile_ps * mtileb * slice_pt; 269 surf->base_align = (palign / 8) * (halign / 8) * tileb; 270 surf->palign = palign; 271 surf->halign = halign; 272 273 if ((surf->nbx & (palign - 1))) { 274 if (prefix) { 275 dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n", 276 __func__, __LINE__, prefix, surf->nbx, palign); 277 } 278 return -EINVAL; 279 } 280 if ((surf->nby & (halign - 1))) { 281 if (prefix) { 282 dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n", 283 __func__, __LINE__, prefix, surf->nby, halign); 284 } 285 return -EINVAL; 286 } 287 288 return 0; 289 } 290 291 static int evergreen_surface_check(struct radeon_cs_parser *p, 292 struct eg_surface *surf, 293 const char *prefix) 294 { 295 /* some common value computed here */ 296 surf->bpe = r600_fmt_get_blocksize(surf->format); 297 298 switch (surf->mode) { 299 case ARRAY_LINEAR_GENERAL: 300 return evergreen_surface_check_linear(p, surf, prefix); 301 case ARRAY_LINEAR_ALIGNED: 302 return evergreen_surface_check_linear_aligned(p, surf, prefix); 303 case ARRAY_1D_TILED_THIN1: 304 return evergreen_surface_check_1d(p, surf, prefix); 305 case ARRAY_2D_TILED_THIN1: 306 return evergreen_surface_check_2d(p, surf, prefix); 307 default: 308 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", 309 __func__, __LINE__, prefix, surf->mode); 310 return -EINVAL; 311 } 312 return -EINVAL; 313 } 314 315 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p, 316 struct eg_surface *surf, 317 const char *prefix) 318 { 319 switch (surf->mode) { 320 case ARRAY_2D_TILED_THIN1: 321 break; 322 case ARRAY_LINEAR_GENERAL: 323 case ARRAY_LINEAR_ALIGNED: 324 case ARRAY_1D_TILED_THIN1: 325 return 0; 326 default: 327 dev_warn(p->dev, "%s:%d %s invalid array mode %d\n", 328 __func__, __LINE__, prefix, surf->mode); 329 return -EINVAL; 330 } 331 332 switch (surf->nbanks) { 333 case 0: surf->nbanks = 2; break; 334 case 1: surf->nbanks = 4; break; 335 case 2: surf->nbanks = 8; break; 336 case 3: surf->nbanks = 16; break; 337 default: 338 dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n", 339 __func__, __LINE__, prefix, surf->nbanks); 340 return -EINVAL; 341 } 342 switch (surf->bankw) { 343 case 0: surf->bankw = 1; break; 344 case 1: surf->bankw = 2; break; 345 case 2: surf->bankw = 4; break; 346 case 3: surf->bankw = 8; break; 347 default: 348 dev_warn(p->dev, "%s:%d %s invalid bankw %d\n", 349 __func__, __LINE__, prefix, surf->bankw); 350 return -EINVAL; 351 } 352 switch (surf->bankh) { 353 case 0: surf->bankh = 1; break; 354 case 1: surf->bankh = 2; break; 355 case 2: surf->bankh = 4; break; 356 case 3: surf->bankh = 8; break; 357 default: 358 dev_warn(p->dev, "%s:%d %s invalid bankh %d\n", 359 __func__, __LINE__, prefix, surf->bankh); 360 return -EINVAL; 361 } 362 switch (surf->mtilea) { 363 case 0: surf->mtilea = 1; break; 364 case 1: surf->mtilea = 2; break; 365 case 2: surf->mtilea = 4; break; 366 case 3: surf->mtilea = 8; break; 367 default: 368 dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n", 369 __func__, __LINE__, prefix, surf->mtilea); 370 return -EINVAL; 371 } 372 switch (surf->tsplit) { 373 case 0: surf->tsplit = 64; break; 374 case 1: surf->tsplit = 128; break; 375 case 2: surf->tsplit = 256; break; 376 case 3: surf->tsplit = 512; break; 377 case 4: surf->tsplit = 1024; break; 378 case 5: surf->tsplit = 2048; break; 379 case 6: surf->tsplit = 4096; break; 380 default: 381 dev_warn(p->dev, "%s:%d %s invalid tile split %d\n", 382 __func__, __LINE__, prefix, surf->tsplit); 383 return -EINVAL; 384 } 385 return 0; 386 } 387 388 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id) 389 { 390 struct evergreen_cs_track *track = p->track; 391 struct eg_surface surf; 392 unsigned pitch, slice, mslice; 393 unsigned long offset; 394 int r; 395 396 mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; 397 pitch = track->cb_color_pitch[id]; 398 slice = track->cb_color_slice[id]; 399 surf.nbx = (pitch + 1) * 8; 400 surf.nby = ((slice + 1) * 64) / surf.nbx; 401 surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]); 402 surf.format = G_028C70_FORMAT(track->cb_color_info[id]); 403 surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]); 404 surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]); 405 surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]); 406 surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]); 407 surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]); 408 surf.nsamples = 1; 409 410 if (!r600_fmt_is_valid_color(surf.format)) { 411 dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n", 412 __func__, __LINE__, surf.format, 413 id, track->cb_color_info[id]); 414 return -EINVAL; 415 } 416 417 r = evergreen_surface_value_conv_check(p, &surf, "cb"); 418 if (r) { 419 return r; 420 } 421 422 r = evergreen_surface_check(p, &surf, "cb"); 423 if (r) { 424 dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", 425 __func__, __LINE__, id, track->cb_color_pitch[id], 426 track->cb_color_slice[id], track->cb_color_attrib[id], 427 track->cb_color_info[id]); 428 return r; 429 } 430 431 offset = track->cb_color_bo_offset[id] << 8; 432 if (offset & (surf.base_align - 1)) { 433 dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", 434 __func__, __LINE__, id, offset, surf.base_align); 435 return -EINVAL; 436 } 437 438 offset += surf.layer_size * mslice; 439 if (offset > radeon_bo_size(track->cb_color_bo[id])) { 440 /* old ddx are broken they allocate bo with w*h*bpp but 441 * program slice with ALIGN(h, 8), catch this and patch 442 * command stream. 443 */ 444 if (!surf.mode) { 445 volatile u32 *ib = p->ib.ptr; 446 unsigned long tmp, nby, bsize, size, min = 0; 447 448 /* find the height the ddx wants */ 449 if (surf.nby > 8) { 450 min = surf.nby - 8; 451 } 452 bsize = radeon_bo_size(track->cb_color_bo[id]); 453 tmp = track->cb_color_bo_offset[id] << 8; 454 for (nby = surf.nby; nby > min; nby--) { 455 size = nby * surf.nbx * surf.bpe * surf.nsamples; 456 if ((tmp + size * mslice) <= bsize) { 457 break; 458 } 459 } 460 if (nby > min) { 461 surf.nby = nby; 462 slice = ((nby * surf.nbx) / 64) - 1; 463 if (!evergreen_surface_check(p, &surf, "cb")) { 464 /* check if this one works */ 465 tmp += surf.layer_size * mslice; 466 if (tmp <= bsize) { 467 ib[track->cb_color_slice_idx[id]] = slice; 468 goto old_ddx_ok; 469 } 470 } 471 } 472 } 473 dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " 474 "offset %d, max layer %d, bo size %ld, slice %d)\n", 475 __func__, __LINE__, id, surf.layer_size, 476 track->cb_color_bo_offset[id] << 8, mslice, 477 radeon_bo_size(track->cb_color_bo[id]), slice); 478 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", 479 __func__, __LINE__, surf.nbx, surf.nby, 480 surf.mode, surf.bpe, surf.nsamples, 481 surf.bankw, surf.bankh, 482 surf.tsplit, surf.mtilea); 483 return -EINVAL; 484 } 485 old_ddx_ok: 486 487 return 0; 488 } 489 490 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p, 491 unsigned nbx, unsigned nby) 492 { 493 struct evergreen_cs_track *track = p->track; 494 unsigned long size; 495 496 if (track->htile_bo == NULL) { 497 dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n", 498 __func__, __LINE__, track->db_z_info); 499 return -EINVAL; 500 } 501 502 if (G_028ABC_LINEAR(track->htile_surface)) { 503 /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */ 504 nbx = round_up(nbx, 16 * 8); 505 /* height is npipes htiles aligned == npipes * 8 pixel aligned */ 506 nby = round_up(nby, track->npipes * 8); 507 } else { 508 /* always assume 8x8 htile */ 509 /* align is htile align * 8, htile align vary according to 510 * number of pipe and tile width and nby 511 */ 512 switch (track->npipes) { 513 case 8: 514 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 515 nbx = round_up(nbx, 64 * 8); 516 nby = round_up(nby, 64 * 8); 517 break; 518 case 4: 519 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 520 nbx = round_up(nbx, 64 * 8); 521 nby = round_up(nby, 32 * 8); 522 break; 523 case 2: 524 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 525 nbx = round_up(nbx, 32 * 8); 526 nby = round_up(nby, 32 * 8); 527 break; 528 case 1: 529 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 530 nbx = round_up(nbx, 32 * 8); 531 nby = round_up(nby, 16 * 8); 532 break; 533 default: 534 dev_warn(p->dev, "%s:%d invalid num pipes %d\n", 535 __func__, __LINE__, track->npipes); 536 return -EINVAL; 537 } 538 } 539 /* compute number of htile */ 540 nbx = nbx >> 3; 541 nby = nby >> 3; 542 /* size must be aligned on npipes * 2K boundary */ 543 size = roundup(nbx * nby * 4, track->npipes * (2 << 10)); 544 size += track->htile_offset; 545 546 if (size > radeon_bo_size(track->htile_bo)) { 547 dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n", 548 __func__, __LINE__, radeon_bo_size(track->htile_bo), 549 size, nbx, nby); 550 return -EINVAL; 551 } 552 return 0; 553 } 554 555 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) 556 { 557 struct evergreen_cs_track *track = p->track; 558 struct eg_surface surf; 559 unsigned pitch, slice, mslice; 560 unsigned long offset; 561 int r; 562 563 mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; 564 pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size); 565 slice = track->db_depth_slice; 566 surf.nbx = (pitch + 1) * 8; 567 surf.nby = ((slice + 1) * 64) / surf.nbx; 568 surf.mode = G_028040_ARRAY_MODE(track->db_z_info); 569 surf.format = G_028044_FORMAT(track->db_s_info); 570 surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info); 571 surf.nbanks = G_028040_NUM_BANKS(track->db_z_info); 572 surf.bankw = G_028040_BANK_WIDTH(track->db_z_info); 573 surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info); 574 surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info); 575 surf.nsamples = 1; 576 577 if (surf.format != 1) { 578 dev_warn(p->dev, "%s:%d stencil invalid format %d\n", 579 __func__, __LINE__, surf.format); 580 return -EINVAL; 581 } 582 /* replace by color format so we can use same code */ 583 surf.format = V_028C70_COLOR_8; 584 585 r = evergreen_surface_value_conv_check(p, &surf, "stencil"); 586 if (r) { 587 return r; 588 } 589 590 r = evergreen_surface_check(p, &surf, NULL); 591 if (r) { 592 /* old userspace doesn't compute proper depth/stencil alignment 593 * check that alignment against a bigger byte per elements and 594 * only report if that alignment is wrong too. 595 */ 596 surf.format = V_028C70_COLOR_8_8_8_8; 597 r = evergreen_surface_check(p, &surf, "stencil"); 598 if (r) { 599 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", 600 __func__, __LINE__, track->db_depth_size, 601 track->db_depth_slice, track->db_s_info, track->db_z_info); 602 } 603 return r; 604 } 605 606 offset = track->db_s_read_offset << 8; 607 if (offset & (surf.base_align - 1)) { 608 dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", 609 __func__, __LINE__, offset, surf.base_align); 610 return -EINVAL; 611 } 612 offset += surf.layer_size * mslice; 613 if (offset > radeon_bo_size(track->db_s_read_bo)) { 614 dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " 615 "offset %ld, max layer %d, bo size %ld)\n", 616 __func__, __LINE__, surf.layer_size, 617 (unsigned long)track->db_s_read_offset << 8, mslice, 618 radeon_bo_size(track->db_s_read_bo)); 619 dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", 620 __func__, __LINE__, track->db_depth_size, 621 track->db_depth_slice, track->db_s_info, track->db_z_info); 622 return -EINVAL; 623 } 624 625 offset = track->db_s_write_offset << 8; 626 if (offset & (surf.base_align - 1)) { 627 dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", 628 __func__, __LINE__, offset, surf.base_align); 629 return -EINVAL; 630 } 631 offset += surf.layer_size * mslice; 632 if (offset > radeon_bo_size(track->db_s_write_bo)) { 633 dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " 634 "offset %ld, max layer %d, bo size %ld)\n", 635 __func__, __LINE__, surf.layer_size, 636 (unsigned long)track->db_s_write_offset << 8, mslice, 637 radeon_bo_size(track->db_s_write_bo)); 638 return -EINVAL; 639 } 640 641 /* hyperz */ 642 if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { 643 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby); 644 if (r) { 645 return r; 646 } 647 } 648 649 return 0; 650 } 651 652 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) 653 { 654 struct evergreen_cs_track *track = p->track; 655 struct eg_surface surf; 656 unsigned pitch, slice, mslice; 657 unsigned long offset; 658 int r; 659 660 mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; 661 pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size); 662 slice = track->db_depth_slice; 663 surf.nbx = (pitch + 1) * 8; 664 surf.nby = ((slice + 1) * 64) / surf.nbx; 665 surf.mode = G_028040_ARRAY_MODE(track->db_z_info); 666 surf.format = G_028040_FORMAT(track->db_z_info); 667 surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info); 668 surf.nbanks = G_028040_NUM_BANKS(track->db_z_info); 669 surf.bankw = G_028040_BANK_WIDTH(track->db_z_info); 670 surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info); 671 surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info); 672 surf.nsamples = 1; 673 674 switch (surf.format) { 675 case V_028040_Z_16: 676 surf.format = V_028C70_COLOR_16; 677 break; 678 case V_028040_Z_24: 679 case V_028040_Z_32_FLOAT: 680 surf.format = V_028C70_COLOR_8_8_8_8; 681 break; 682 default: 683 dev_warn(p->dev, "%s:%d depth invalid format %d\n", 684 __func__, __LINE__, surf.format); 685 return -EINVAL; 686 } 687 688 r = evergreen_surface_value_conv_check(p, &surf, "depth"); 689 if (r) { 690 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n", 691 __func__, __LINE__, track->db_depth_size, 692 track->db_depth_slice, track->db_z_info); 693 return r; 694 } 695 696 r = evergreen_surface_check(p, &surf, "depth"); 697 if (r) { 698 dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n", 699 __func__, __LINE__, track->db_depth_size, 700 track->db_depth_slice, track->db_z_info); 701 return r; 702 } 703 704 offset = track->db_z_read_offset << 8; 705 if (offset & (surf.base_align - 1)) { 706 dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", 707 __func__, __LINE__, offset, surf.base_align); 708 return -EINVAL; 709 } 710 offset += surf.layer_size * mslice; 711 if (offset > radeon_bo_size(track->db_z_read_bo)) { 712 dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " 713 "offset %ld, max layer %d, bo size %ld)\n", 714 __func__, __LINE__, surf.layer_size, 715 (unsigned long)track->db_z_read_offset << 8, mslice, 716 radeon_bo_size(track->db_z_read_bo)); 717 return -EINVAL; 718 } 719 720 offset = track->db_z_write_offset << 8; 721 if (offset & (surf.base_align - 1)) { 722 dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", 723 __func__, __LINE__, offset, surf.base_align); 724 return -EINVAL; 725 } 726 offset += surf.layer_size * mslice; 727 if (offset > radeon_bo_size(track->db_z_write_bo)) { 728 dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " 729 "offset %ld, max layer %d, bo size %ld)\n", 730 __func__, __LINE__, surf.layer_size, 731 (unsigned long)track->db_z_write_offset << 8, mslice, 732 radeon_bo_size(track->db_z_write_bo)); 733 return -EINVAL; 734 } 735 736 /* hyperz */ 737 if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) { 738 r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby); 739 if (r) { 740 return r; 741 } 742 } 743 744 return 0; 745 } 746 747 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p, 748 struct radeon_bo *texture, 749 struct radeon_bo *mipmap, 750 unsigned idx) 751 { 752 struct eg_surface surf; 753 unsigned long toffset, moffset; 754 unsigned dim, llevel, mslice, width, height, depth, i; 755 u32 texdw[8]; 756 int r; 757 758 texdw[0] = radeon_get_ib_value(p, idx + 0); 759 texdw[1] = radeon_get_ib_value(p, idx + 1); 760 texdw[2] = radeon_get_ib_value(p, idx + 2); 761 texdw[3] = radeon_get_ib_value(p, idx + 3); 762 texdw[4] = radeon_get_ib_value(p, idx + 4); 763 texdw[5] = radeon_get_ib_value(p, idx + 5); 764 texdw[6] = radeon_get_ib_value(p, idx + 6); 765 texdw[7] = radeon_get_ib_value(p, idx + 7); 766 dim = G_030000_DIM(texdw[0]); 767 llevel = G_030014_LAST_LEVEL(texdw[5]); 768 mslice = G_030014_LAST_ARRAY(texdw[5]) + 1; 769 width = G_030000_TEX_WIDTH(texdw[0]) + 1; 770 height = G_030004_TEX_HEIGHT(texdw[1]) + 1; 771 depth = G_030004_TEX_DEPTH(texdw[1]) + 1; 772 surf.format = G_03001C_DATA_FORMAT(texdw[7]); 773 surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8; 774 surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx); 775 surf.nby = r600_fmt_get_nblocksy(surf.format, height); 776 surf.mode = G_030004_ARRAY_MODE(texdw[1]); 777 surf.tsplit = G_030018_TILE_SPLIT(texdw[6]); 778 surf.nbanks = G_03001C_NUM_BANKS(texdw[7]); 779 surf.bankw = G_03001C_BANK_WIDTH(texdw[7]); 780 surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]); 781 surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]); 782 surf.nsamples = 1; 783 toffset = texdw[2] << 8; 784 moffset = texdw[3] << 8; 785 786 if (!r600_fmt_is_valid_texture(surf.format, p->family)) { 787 dev_warn(p->dev, "%s:%d texture invalid format %d\n", 788 __func__, __LINE__, surf.format); 789 return -EINVAL; 790 } 791 switch (dim) { 792 case V_030000_SQ_TEX_DIM_1D: 793 case V_030000_SQ_TEX_DIM_2D: 794 case V_030000_SQ_TEX_DIM_CUBEMAP: 795 case V_030000_SQ_TEX_DIM_1D_ARRAY: 796 case V_030000_SQ_TEX_DIM_2D_ARRAY: 797 depth = 1; 798 break; 799 case V_030000_SQ_TEX_DIM_2D_MSAA: 800 case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA: 801 surf.nsamples = 1 << llevel; 802 llevel = 0; 803 depth = 1; 804 break; 805 case V_030000_SQ_TEX_DIM_3D: 806 break; 807 default: 808 dev_warn(p->dev, "%s:%d texture invalid dimension %d\n", 809 __func__, __LINE__, dim); 810 return -EINVAL; 811 } 812 813 r = evergreen_surface_value_conv_check(p, &surf, "texture"); 814 if (r) { 815 return r; 816 } 817 818 /* align height */ 819 evergreen_surface_check(p, &surf, NULL); 820 surf.nby = ALIGN(surf.nby, surf.halign); 821 822 r = evergreen_surface_check(p, &surf, "texture"); 823 if (r) { 824 dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n", 825 __func__, __LINE__, texdw[0], texdw[1], texdw[4], 826 texdw[5], texdw[6], texdw[7]); 827 return r; 828 } 829 830 /* check texture size */ 831 if (toffset & (surf.base_align - 1)) { 832 dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n", 833 __func__, __LINE__, toffset, surf.base_align); 834 return -EINVAL; 835 } 836 if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) { 837 dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n", 838 __func__, __LINE__, moffset, surf.base_align); 839 return -EINVAL; 840 } 841 if (dim == SQ_TEX_DIM_3D) { 842 toffset += surf.layer_size * depth; 843 } else { 844 toffset += surf.layer_size * mslice; 845 } 846 if (toffset > radeon_bo_size(texture)) { 847 dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, " 848 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n", 849 __func__, __LINE__, surf.layer_size, 850 (unsigned long)texdw[2] << 8, mslice, 851 depth, radeon_bo_size(texture), 852 surf.nbx, surf.nby); 853 return -EINVAL; 854 } 855 856 if (!mipmap) { 857 if (llevel) { 858 dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n", 859 __func__, __LINE__); 860 return -EINVAL; 861 } else { 862 return 0; /* everything's ok */ 863 } 864 } 865 866 /* check mipmap size */ 867 for (i = 1; i <= llevel; i++) { 868 unsigned w, h, d; 869 870 w = r600_mip_minify(width, i); 871 h = r600_mip_minify(height, i); 872 d = r600_mip_minify(depth, i); 873 surf.nbx = r600_fmt_get_nblocksx(surf.format, w); 874 surf.nby = r600_fmt_get_nblocksy(surf.format, h); 875 876 switch (surf.mode) { 877 case ARRAY_2D_TILED_THIN1: 878 if (surf.nbx < surf.palign || surf.nby < surf.halign) { 879 surf.mode = ARRAY_1D_TILED_THIN1; 880 } 881 /* recompute alignment */ 882 evergreen_surface_check(p, &surf, NULL); 883 break; 884 case ARRAY_LINEAR_GENERAL: 885 case ARRAY_LINEAR_ALIGNED: 886 case ARRAY_1D_TILED_THIN1: 887 break; 888 default: 889 dev_warn(p->dev, "%s:%d invalid array mode %d\n", 890 __func__, __LINE__, surf.mode); 891 return -EINVAL; 892 } 893 surf.nbx = ALIGN(surf.nbx, surf.palign); 894 surf.nby = ALIGN(surf.nby, surf.halign); 895 896 r = evergreen_surface_check(p, &surf, "mipmap"); 897 if (r) { 898 return r; 899 } 900 901 if (dim == SQ_TEX_DIM_3D) { 902 moffset += surf.layer_size * d; 903 } else { 904 moffset += surf.layer_size * mslice; 905 } 906 if (moffset > radeon_bo_size(mipmap)) { 907 dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, " 908 "offset %ld, coffset %ld, max layer %d, depth %d, " 909 "bo size %ld) level0 (%d %d %d)\n", 910 __func__, __LINE__, i, surf.layer_size, 911 (unsigned long)texdw[3] << 8, moffset, mslice, 912 d, radeon_bo_size(mipmap), 913 width, height, depth); 914 dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", 915 __func__, __LINE__, surf.nbx, surf.nby, 916 surf.mode, surf.bpe, surf.nsamples, 917 surf.bankw, surf.bankh, 918 surf.tsplit, surf.mtilea); 919 return -EINVAL; 920 } 921 } 922 923 return 0; 924 } 925 926 static int evergreen_cs_track_check(struct radeon_cs_parser *p) 927 { 928 struct evergreen_cs_track *track = p->track; 929 unsigned tmp, i; 930 int r; 931 unsigned buffer_mask = 0; 932 933 /* check streamout */ 934 if (track->streamout_dirty && track->vgt_strmout_config) { 935 for (i = 0; i < 4; i++) { 936 if (track->vgt_strmout_config & (1 << i)) { 937 buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf; 938 } 939 } 940 941 for (i = 0; i < 4; i++) { 942 if (buffer_mask & (1 << i)) { 943 if (track->vgt_strmout_bo[i]) { 944 u64 offset = (u64)track->vgt_strmout_bo_offset[i] + 945 (u64)track->vgt_strmout_size[i]; 946 if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { 947 DRM_ERROR("streamout %d bo too small: 0x%jx, 0x%lx\n", 948 i, offset, 949 radeon_bo_size(track->vgt_strmout_bo[i])); 950 return -EINVAL; 951 } 952 } else { 953 dev_warn(p->dev, "No buffer for streamout %d\n", i); 954 return -EINVAL; 955 } 956 } 957 } 958 track->streamout_dirty = false; 959 } 960 961 if (track->sx_misc_kill_all_prims) 962 return 0; 963 964 /* check that we have a cb for each enabled target 965 */ 966 if (track->cb_dirty) { 967 tmp = track->cb_target_mask; 968 for (i = 0; i < 8; i++) { 969 u32 format = G_028C70_FORMAT(track->cb_color_info[i]); 970 971 if (format != V_028C70_COLOR_INVALID && 972 (tmp >> (i * 4)) & 0xF) { 973 /* at least one component is enabled */ 974 if (track->cb_color_bo[i] == NULL) { 975 dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", 976 __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); 977 return -EINVAL; 978 } 979 /* check cb */ 980 r = evergreen_cs_track_validate_cb(p, i); 981 if (r) { 982 return r; 983 } 984 } 985 } 986 track->cb_dirty = false; 987 } 988 989 if (track->db_dirty) { 990 /* Check stencil buffer */ 991 if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID && 992 G_028800_STENCIL_ENABLE(track->db_depth_control)) { 993 r = evergreen_cs_track_validate_stencil(p); 994 if (r) 995 return r; 996 } 997 /* Check depth buffer */ 998 if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID && 999 G_028800_Z_ENABLE(track->db_depth_control)) { 1000 r = evergreen_cs_track_validate_depth(p); 1001 if (r) 1002 return r; 1003 } 1004 track->db_dirty = false; 1005 } 1006 1007 return 0; 1008 } 1009 1010 /** 1011 * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet 1012 * @parser: parser structure holding parsing context. 1013 * 1014 * This is an Evergreen(+)-specific function for parsing VLINE packets. 1015 * Real work is done by r600_cs_common_vline_parse function. 1016 * Here we just set up ASIC-specific register table and call 1017 * the common implementation function. 1018 */ 1019 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p) 1020 { 1021 1022 static uint32_t vline_start_end[6] = { 1023 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET, 1024 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET, 1025 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET, 1026 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET, 1027 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET, 1028 EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET 1029 }; 1030 static uint32_t vline_status[6] = { 1031 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, 1032 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, 1033 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, 1034 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, 1035 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, 1036 EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET 1037 }; 1038 1039 return r600_cs_common_vline_parse(p, vline_start_end, vline_status); 1040 } 1041 1042 static int evergreen_packet0_check(struct radeon_cs_parser *p, 1043 struct radeon_cs_packet *pkt, 1044 unsigned idx, unsigned reg) 1045 { 1046 int r; 1047 1048 switch (reg) { 1049 case EVERGREEN_VLINE_START_END: 1050 r = evergreen_cs_packet_parse_vline(p); 1051 if (r) { 1052 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 1053 idx, reg); 1054 return r; 1055 } 1056 break; 1057 default: 1058 printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", 1059 reg, idx); 1060 return -EINVAL; 1061 } 1062 return 0; 1063 } 1064 1065 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p, 1066 struct radeon_cs_packet *pkt) 1067 { 1068 unsigned reg, i; 1069 unsigned idx; 1070 int r; 1071 1072 idx = pkt->idx + 1; 1073 reg = pkt->reg; 1074 for (i = 0; i <= pkt->count; i++, idx++, reg += 4) { 1075 r = evergreen_packet0_check(p, pkt, idx, reg); 1076 if (r) { 1077 return r; 1078 } 1079 } 1080 return 0; 1081 } 1082 1083 /** 1084 * evergreen_cs_check_reg() - check if register is authorized or not 1085 * @parser: parser structure holding parsing context 1086 * @reg: register we are testing 1087 * @idx: index into the cs buffer 1088 * 1089 * This function will test against evergreen_reg_safe_bm and return 0 1090 * if register is safe. If register is not flag as safe this function 1091 * will test it against a list of register needind special handling. 1092 */ 1093 static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) 1094 { 1095 struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track; 1096 struct radeon_cs_reloc *reloc; 1097 u32 last_reg; 1098 u32 m, i, tmp, *ib; 1099 int r; 1100 1101 if (p->rdev->family >= CHIP_CAYMAN) 1102 last_reg = ARRAY_SIZE(cayman_reg_safe_bm); 1103 else 1104 last_reg = ARRAY_SIZE(evergreen_reg_safe_bm); 1105 1106 i = (reg >> 7); 1107 if (i >= last_reg) { 1108 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); 1109 return -EINVAL; 1110 } 1111 m = 1 << ((reg >> 2) & 31); 1112 if (p->rdev->family >= CHIP_CAYMAN) { 1113 if (!(cayman_reg_safe_bm[i] & m)) 1114 return 0; 1115 } else { 1116 if (!(evergreen_reg_safe_bm[i] & m)) 1117 return 0; 1118 } 1119 ib = p->ib.ptr; 1120 switch (reg) { 1121 /* force following reg to 0 in an attempt to disable out buffer 1122 * which will need us to better understand how it works to perform 1123 * security check on it (Jerome) 1124 */ 1125 case SQ_ESGS_RING_SIZE: 1126 case SQ_GSVS_RING_SIZE: 1127 case SQ_ESTMP_RING_SIZE: 1128 case SQ_GSTMP_RING_SIZE: 1129 case SQ_HSTMP_RING_SIZE: 1130 case SQ_LSTMP_RING_SIZE: 1131 case SQ_PSTMP_RING_SIZE: 1132 case SQ_VSTMP_RING_SIZE: 1133 case SQ_ESGS_RING_ITEMSIZE: 1134 case SQ_ESTMP_RING_ITEMSIZE: 1135 case SQ_GSTMP_RING_ITEMSIZE: 1136 case SQ_GSVS_RING_ITEMSIZE: 1137 case SQ_GS_VERT_ITEMSIZE: 1138 case SQ_GS_VERT_ITEMSIZE_1: 1139 case SQ_GS_VERT_ITEMSIZE_2: 1140 case SQ_GS_VERT_ITEMSIZE_3: 1141 case SQ_GSVS_RING_OFFSET_1: 1142 case SQ_GSVS_RING_OFFSET_2: 1143 case SQ_GSVS_RING_OFFSET_3: 1144 case SQ_HSTMP_RING_ITEMSIZE: 1145 case SQ_LSTMP_RING_ITEMSIZE: 1146 case SQ_PSTMP_RING_ITEMSIZE: 1147 case SQ_VSTMP_RING_ITEMSIZE: 1148 case VGT_TF_RING_SIZE: 1149 /* get value to populate the IB don't remove */ 1150 /*tmp =radeon_get_ib_value(p, idx); 1151 ib[idx] = 0;*/ 1152 break; 1153 case SQ_ESGS_RING_BASE: 1154 case SQ_GSVS_RING_BASE: 1155 case SQ_ESTMP_RING_BASE: 1156 case SQ_GSTMP_RING_BASE: 1157 case SQ_HSTMP_RING_BASE: 1158 case SQ_LSTMP_RING_BASE: 1159 case SQ_PSTMP_RING_BASE: 1160 case SQ_VSTMP_RING_BASE: 1161 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1162 if (r) { 1163 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1164 "0x%04X\n", reg); 1165 return -EINVAL; 1166 } 1167 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1168 break; 1169 case DB_DEPTH_CONTROL: 1170 track->db_depth_control = radeon_get_ib_value(p, idx); 1171 track->db_dirty = true; 1172 break; 1173 case CAYMAN_DB_EQAA: 1174 if (p->rdev->family < CHIP_CAYMAN) { 1175 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1176 "0x%04X\n", reg); 1177 return -EINVAL; 1178 } 1179 break; 1180 case CAYMAN_DB_DEPTH_INFO: 1181 if (p->rdev->family < CHIP_CAYMAN) { 1182 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1183 "0x%04X\n", reg); 1184 return -EINVAL; 1185 } 1186 break; 1187 case DB_Z_INFO: 1188 track->db_z_info = radeon_get_ib_value(p, idx); 1189 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1190 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1191 if (r) { 1192 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1193 "0x%04X\n", reg); 1194 return -EINVAL; 1195 } 1196 ib[idx] &= ~Z_ARRAY_MODE(0xf); 1197 track->db_z_info &= ~Z_ARRAY_MODE(0xf); 1198 ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1199 track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1200 if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1201 unsigned bankw, bankh, mtaspect, tile_split; 1202 1203 evergreen_tiling_fields(reloc->tiling_flags, 1204 &bankw, &bankh, &mtaspect, 1205 &tile_split); 1206 ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); 1207 ib[idx] |= DB_TILE_SPLIT(tile_split) | 1208 DB_BANK_WIDTH(bankw) | 1209 DB_BANK_HEIGHT(bankh) | 1210 DB_MACRO_TILE_ASPECT(mtaspect); 1211 } 1212 } 1213 track->db_dirty = true; 1214 break; 1215 case DB_STENCIL_INFO: 1216 track->db_s_info = radeon_get_ib_value(p, idx); 1217 track->db_dirty = true; 1218 break; 1219 case DB_DEPTH_VIEW: 1220 track->db_depth_view = radeon_get_ib_value(p, idx); 1221 track->db_dirty = true; 1222 break; 1223 case DB_DEPTH_SIZE: 1224 track->db_depth_size = radeon_get_ib_value(p, idx); 1225 track->db_dirty = true; 1226 break; 1227 case R_02805C_DB_DEPTH_SLICE: 1228 track->db_depth_slice = radeon_get_ib_value(p, idx); 1229 track->db_dirty = true; 1230 break; 1231 case DB_Z_READ_BASE: 1232 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1233 if (r) { 1234 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1235 "0x%04X\n", reg); 1236 return -EINVAL; 1237 } 1238 track->db_z_read_offset = radeon_get_ib_value(p, idx); 1239 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1240 track->db_z_read_bo = reloc->robj; 1241 track->db_dirty = true; 1242 break; 1243 case DB_Z_WRITE_BASE: 1244 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1245 if (r) { 1246 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1247 "0x%04X\n", reg); 1248 return -EINVAL; 1249 } 1250 track->db_z_write_offset = radeon_get_ib_value(p, idx); 1251 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1252 track->db_z_write_bo = reloc->robj; 1253 track->db_dirty = true; 1254 break; 1255 case DB_STENCIL_READ_BASE: 1256 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1257 if (r) { 1258 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1259 "0x%04X\n", reg); 1260 return -EINVAL; 1261 } 1262 track->db_s_read_offset = radeon_get_ib_value(p, idx); 1263 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1264 track->db_s_read_bo = reloc->robj; 1265 track->db_dirty = true; 1266 break; 1267 case DB_STENCIL_WRITE_BASE: 1268 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1269 if (r) { 1270 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1271 "0x%04X\n", reg); 1272 return -EINVAL; 1273 } 1274 track->db_s_write_offset = radeon_get_ib_value(p, idx); 1275 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1276 track->db_s_write_bo = reloc->robj; 1277 track->db_dirty = true; 1278 break; 1279 case VGT_STRMOUT_CONFIG: 1280 track->vgt_strmout_config = radeon_get_ib_value(p, idx); 1281 track->streamout_dirty = true; 1282 break; 1283 case VGT_STRMOUT_BUFFER_CONFIG: 1284 track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx); 1285 track->streamout_dirty = true; 1286 break; 1287 case VGT_STRMOUT_BUFFER_BASE_0: 1288 case VGT_STRMOUT_BUFFER_BASE_1: 1289 case VGT_STRMOUT_BUFFER_BASE_2: 1290 case VGT_STRMOUT_BUFFER_BASE_3: 1291 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1292 if (r) { 1293 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1294 "0x%04X\n", reg); 1295 return -EINVAL; 1296 } 1297 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; 1298 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; 1299 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1300 track->vgt_strmout_bo[tmp] = reloc->robj; 1301 track->streamout_dirty = true; 1302 break; 1303 case VGT_STRMOUT_BUFFER_SIZE_0: 1304 case VGT_STRMOUT_BUFFER_SIZE_1: 1305 case VGT_STRMOUT_BUFFER_SIZE_2: 1306 case VGT_STRMOUT_BUFFER_SIZE_3: 1307 tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16; 1308 /* size in register is DWs, convert to bytes */ 1309 track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4; 1310 track->streamout_dirty = true; 1311 break; 1312 case CP_COHER_BASE: 1313 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1314 if (r) { 1315 dev_warn(p->dev, "missing reloc for CP_COHER_BASE " 1316 "0x%04X\n", reg); 1317 return -EINVAL; 1318 } 1319 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1320 case CB_TARGET_MASK: 1321 track->cb_target_mask = radeon_get_ib_value(p, idx); 1322 track->cb_dirty = true; 1323 break; 1324 case CB_SHADER_MASK: 1325 track->cb_shader_mask = radeon_get_ib_value(p, idx); 1326 track->cb_dirty = true; 1327 break; 1328 case PA_SC_AA_CONFIG: 1329 if (p->rdev->family >= CHIP_CAYMAN) { 1330 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1331 "0x%04X\n", reg); 1332 return -EINVAL; 1333 } 1334 tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK; 1335 track->nsamples = 1 << tmp; 1336 break; 1337 case CAYMAN_PA_SC_AA_CONFIG: 1338 if (p->rdev->family < CHIP_CAYMAN) { 1339 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1340 "0x%04X\n", reg); 1341 return -EINVAL; 1342 } 1343 tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK; 1344 track->nsamples = 1 << tmp; 1345 break; 1346 case CB_COLOR0_VIEW: 1347 case CB_COLOR1_VIEW: 1348 case CB_COLOR2_VIEW: 1349 case CB_COLOR3_VIEW: 1350 case CB_COLOR4_VIEW: 1351 case CB_COLOR5_VIEW: 1352 case CB_COLOR6_VIEW: 1353 case CB_COLOR7_VIEW: 1354 tmp = (reg - CB_COLOR0_VIEW) / 0x3c; 1355 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); 1356 track->cb_dirty = true; 1357 break; 1358 case CB_COLOR8_VIEW: 1359 case CB_COLOR9_VIEW: 1360 case CB_COLOR10_VIEW: 1361 case CB_COLOR11_VIEW: 1362 tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8; 1363 track->cb_color_view[tmp] = radeon_get_ib_value(p, idx); 1364 track->cb_dirty = true; 1365 break; 1366 case CB_COLOR0_INFO: 1367 case CB_COLOR1_INFO: 1368 case CB_COLOR2_INFO: 1369 case CB_COLOR3_INFO: 1370 case CB_COLOR4_INFO: 1371 case CB_COLOR5_INFO: 1372 case CB_COLOR6_INFO: 1373 case CB_COLOR7_INFO: 1374 tmp = (reg - CB_COLOR0_INFO) / 0x3c; 1375 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 1376 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1377 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1378 if (r) { 1379 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1380 "0x%04X\n", reg); 1381 return -EINVAL; 1382 } 1383 ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1384 track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1385 } 1386 track->cb_dirty = true; 1387 break; 1388 case CB_COLOR8_INFO: 1389 case CB_COLOR9_INFO: 1390 case CB_COLOR10_INFO: 1391 case CB_COLOR11_INFO: 1392 tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8; 1393 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 1394 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1395 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1396 if (r) { 1397 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1398 "0x%04X\n", reg); 1399 return -EINVAL; 1400 } 1401 ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1402 track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 1403 } 1404 track->cb_dirty = true; 1405 break; 1406 case CB_COLOR0_PITCH: 1407 case CB_COLOR1_PITCH: 1408 case CB_COLOR2_PITCH: 1409 case CB_COLOR3_PITCH: 1410 case CB_COLOR4_PITCH: 1411 case CB_COLOR5_PITCH: 1412 case CB_COLOR6_PITCH: 1413 case CB_COLOR7_PITCH: 1414 tmp = (reg - CB_COLOR0_PITCH) / 0x3c; 1415 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx); 1416 track->cb_dirty = true; 1417 break; 1418 case CB_COLOR8_PITCH: 1419 case CB_COLOR9_PITCH: 1420 case CB_COLOR10_PITCH: 1421 case CB_COLOR11_PITCH: 1422 tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8; 1423 track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx); 1424 track->cb_dirty = true; 1425 break; 1426 case CB_COLOR0_SLICE: 1427 case CB_COLOR1_SLICE: 1428 case CB_COLOR2_SLICE: 1429 case CB_COLOR3_SLICE: 1430 case CB_COLOR4_SLICE: 1431 case CB_COLOR5_SLICE: 1432 case CB_COLOR6_SLICE: 1433 case CB_COLOR7_SLICE: 1434 tmp = (reg - CB_COLOR0_SLICE) / 0x3c; 1435 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); 1436 track->cb_color_slice_idx[tmp] = idx; 1437 track->cb_dirty = true; 1438 break; 1439 case CB_COLOR8_SLICE: 1440 case CB_COLOR9_SLICE: 1441 case CB_COLOR10_SLICE: 1442 case CB_COLOR11_SLICE: 1443 tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8; 1444 track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); 1445 track->cb_color_slice_idx[tmp] = idx; 1446 track->cb_dirty = true; 1447 break; 1448 case CB_COLOR0_ATTRIB: 1449 case CB_COLOR1_ATTRIB: 1450 case CB_COLOR2_ATTRIB: 1451 case CB_COLOR3_ATTRIB: 1452 case CB_COLOR4_ATTRIB: 1453 case CB_COLOR5_ATTRIB: 1454 case CB_COLOR6_ATTRIB: 1455 case CB_COLOR7_ATTRIB: 1456 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1457 if (r) { 1458 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1459 "0x%04X\n", reg); 1460 return -EINVAL; 1461 } 1462 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1463 if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1464 unsigned bankw, bankh, mtaspect, tile_split; 1465 1466 evergreen_tiling_fields(reloc->tiling_flags, 1467 &bankw, &bankh, &mtaspect, 1468 &tile_split); 1469 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); 1470 ib[idx] |= CB_TILE_SPLIT(tile_split) | 1471 CB_BANK_WIDTH(bankw) | 1472 CB_BANK_HEIGHT(bankh) | 1473 CB_MACRO_TILE_ASPECT(mtaspect); 1474 } 1475 } 1476 tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c); 1477 track->cb_color_attrib[tmp] = ib[idx]; 1478 track->cb_dirty = true; 1479 break; 1480 case CB_COLOR8_ATTRIB: 1481 case CB_COLOR9_ATTRIB: 1482 case CB_COLOR10_ATTRIB: 1483 case CB_COLOR11_ATTRIB: 1484 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1485 if (r) { 1486 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1487 "0x%04X\n", reg); 1488 return -EINVAL; 1489 } 1490 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 1491 if (reloc->tiling_flags & RADEON_TILING_MACRO) { 1492 unsigned bankw, bankh, mtaspect, tile_split; 1493 1494 evergreen_tiling_fields(reloc->tiling_flags, 1495 &bankw, &bankh, &mtaspect, 1496 &tile_split); 1497 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); 1498 ib[idx] |= CB_TILE_SPLIT(tile_split) | 1499 CB_BANK_WIDTH(bankw) | 1500 CB_BANK_HEIGHT(bankh) | 1501 CB_MACRO_TILE_ASPECT(mtaspect); 1502 } 1503 } 1504 tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8; 1505 track->cb_color_attrib[tmp] = ib[idx]; 1506 track->cb_dirty = true; 1507 break; 1508 case CB_COLOR0_FMASK: 1509 case CB_COLOR1_FMASK: 1510 case CB_COLOR2_FMASK: 1511 case CB_COLOR3_FMASK: 1512 case CB_COLOR4_FMASK: 1513 case CB_COLOR5_FMASK: 1514 case CB_COLOR6_FMASK: 1515 case CB_COLOR7_FMASK: 1516 tmp = (reg - CB_COLOR0_FMASK) / 0x3c; 1517 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1518 if (r) { 1519 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); 1520 return -EINVAL; 1521 } 1522 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1523 track->cb_color_fmask_bo[tmp] = reloc->robj; 1524 break; 1525 case CB_COLOR0_CMASK: 1526 case CB_COLOR1_CMASK: 1527 case CB_COLOR2_CMASK: 1528 case CB_COLOR3_CMASK: 1529 case CB_COLOR4_CMASK: 1530 case CB_COLOR5_CMASK: 1531 case CB_COLOR6_CMASK: 1532 case CB_COLOR7_CMASK: 1533 tmp = (reg - CB_COLOR0_CMASK) / 0x3c; 1534 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1535 if (r) { 1536 dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); 1537 return -EINVAL; 1538 } 1539 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1540 track->cb_color_cmask_bo[tmp] = reloc->robj; 1541 break; 1542 case CB_COLOR0_FMASK_SLICE: 1543 case CB_COLOR1_FMASK_SLICE: 1544 case CB_COLOR2_FMASK_SLICE: 1545 case CB_COLOR3_FMASK_SLICE: 1546 case CB_COLOR4_FMASK_SLICE: 1547 case CB_COLOR5_FMASK_SLICE: 1548 case CB_COLOR6_FMASK_SLICE: 1549 case CB_COLOR7_FMASK_SLICE: 1550 tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c; 1551 track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx); 1552 break; 1553 case CB_COLOR0_CMASK_SLICE: 1554 case CB_COLOR1_CMASK_SLICE: 1555 case CB_COLOR2_CMASK_SLICE: 1556 case CB_COLOR3_CMASK_SLICE: 1557 case CB_COLOR4_CMASK_SLICE: 1558 case CB_COLOR5_CMASK_SLICE: 1559 case CB_COLOR6_CMASK_SLICE: 1560 case CB_COLOR7_CMASK_SLICE: 1561 tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c; 1562 track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx); 1563 break; 1564 case CB_COLOR0_BASE: 1565 case CB_COLOR1_BASE: 1566 case CB_COLOR2_BASE: 1567 case CB_COLOR3_BASE: 1568 case CB_COLOR4_BASE: 1569 case CB_COLOR5_BASE: 1570 case CB_COLOR6_BASE: 1571 case CB_COLOR7_BASE: 1572 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1573 if (r) { 1574 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1575 "0x%04X\n", reg); 1576 return -EINVAL; 1577 } 1578 tmp = (reg - CB_COLOR0_BASE) / 0x3c; 1579 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); 1580 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1581 track->cb_color_bo[tmp] = reloc->robj; 1582 track->cb_dirty = true; 1583 break; 1584 case CB_COLOR8_BASE: 1585 case CB_COLOR9_BASE: 1586 case CB_COLOR10_BASE: 1587 case CB_COLOR11_BASE: 1588 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1589 if (r) { 1590 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1591 "0x%04X\n", reg); 1592 return -EINVAL; 1593 } 1594 tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; 1595 track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); 1596 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1597 track->cb_color_bo[tmp] = reloc->robj; 1598 track->cb_dirty = true; 1599 break; 1600 case DB_HTILE_DATA_BASE: 1601 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1602 if (r) { 1603 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1604 "0x%04X\n", reg); 1605 return -EINVAL; 1606 } 1607 track->htile_offset = radeon_get_ib_value(p, idx); 1608 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1609 track->htile_bo = reloc->robj; 1610 track->db_dirty = true; 1611 break; 1612 case DB_HTILE_SURFACE: 1613 /* 8x8 only */ 1614 track->htile_surface = radeon_get_ib_value(p, idx); 1615 /* force 8x8 htile width and height */ 1616 ib[idx] |= 3; 1617 track->db_dirty = true; 1618 break; 1619 case CB_IMMED0_BASE: 1620 case CB_IMMED1_BASE: 1621 case CB_IMMED2_BASE: 1622 case CB_IMMED3_BASE: 1623 case CB_IMMED4_BASE: 1624 case CB_IMMED5_BASE: 1625 case CB_IMMED6_BASE: 1626 case CB_IMMED7_BASE: 1627 case CB_IMMED8_BASE: 1628 case CB_IMMED9_BASE: 1629 case CB_IMMED10_BASE: 1630 case CB_IMMED11_BASE: 1631 case SQ_PGM_START_FS: 1632 case SQ_PGM_START_ES: 1633 case SQ_PGM_START_VS: 1634 case SQ_PGM_START_GS: 1635 case SQ_PGM_START_PS: 1636 case SQ_PGM_START_HS: 1637 case SQ_PGM_START_LS: 1638 case SQ_CONST_MEM_BASE: 1639 case SQ_ALU_CONST_CACHE_GS_0: 1640 case SQ_ALU_CONST_CACHE_GS_1: 1641 case SQ_ALU_CONST_CACHE_GS_2: 1642 case SQ_ALU_CONST_CACHE_GS_3: 1643 case SQ_ALU_CONST_CACHE_GS_4: 1644 case SQ_ALU_CONST_CACHE_GS_5: 1645 case SQ_ALU_CONST_CACHE_GS_6: 1646 case SQ_ALU_CONST_CACHE_GS_7: 1647 case SQ_ALU_CONST_CACHE_GS_8: 1648 case SQ_ALU_CONST_CACHE_GS_9: 1649 case SQ_ALU_CONST_CACHE_GS_10: 1650 case SQ_ALU_CONST_CACHE_GS_11: 1651 case SQ_ALU_CONST_CACHE_GS_12: 1652 case SQ_ALU_CONST_CACHE_GS_13: 1653 case SQ_ALU_CONST_CACHE_GS_14: 1654 case SQ_ALU_CONST_CACHE_GS_15: 1655 case SQ_ALU_CONST_CACHE_PS_0: 1656 case SQ_ALU_CONST_CACHE_PS_1: 1657 case SQ_ALU_CONST_CACHE_PS_2: 1658 case SQ_ALU_CONST_CACHE_PS_3: 1659 case SQ_ALU_CONST_CACHE_PS_4: 1660 case SQ_ALU_CONST_CACHE_PS_5: 1661 case SQ_ALU_CONST_CACHE_PS_6: 1662 case SQ_ALU_CONST_CACHE_PS_7: 1663 case SQ_ALU_CONST_CACHE_PS_8: 1664 case SQ_ALU_CONST_CACHE_PS_9: 1665 case SQ_ALU_CONST_CACHE_PS_10: 1666 case SQ_ALU_CONST_CACHE_PS_11: 1667 case SQ_ALU_CONST_CACHE_PS_12: 1668 case SQ_ALU_CONST_CACHE_PS_13: 1669 case SQ_ALU_CONST_CACHE_PS_14: 1670 case SQ_ALU_CONST_CACHE_PS_15: 1671 case SQ_ALU_CONST_CACHE_VS_0: 1672 case SQ_ALU_CONST_CACHE_VS_1: 1673 case SQ_ALU_CONST_CACHE_VS_2: 1674 case SQ_ALU_CONST_CACHE_VS_3: 1675 case SQ_ALU_CONST_CACHE_VS_4: 1676 case SQ_ALU_CONST_CACHE_VS_5: 1677 case SQ_ALU_CONST_CACHE_VS_6: 1678 case SQ_ALU_CONST_CACHE_VS_7: 1679 case SQ_ALU_CONST_CACHE_VS_8: 1680 case SQ_ALU_CONST_CACHE_VS_9: 1681 case SQ_ALU_CONST_CACHE_VS_10: 1682 case SQ_ALU_CONST_CACHE_VS_11: 1683 case SQ_ALU_CONST_CACHE_VS_12: 1684 case SQ_ALU_CONST_CACHE_VS_13: 1685 case SQ_ALU_CONST_CACHE_VS_14: 1686 case SQ_ALU_CONST_CACHE_VS_15: 1687 case SQ_ALU_CONST_CACHE_HS_0: 1688 case SQ_ALU_CONST_CACHE_HS_1: 1689 case SQ_ALU_CONST_CACHE_HS_2: 1690 case SQ_ALU_CONST_CACHE_HS_3: 1691 case SQ_ALU_CONST_CACHE_HS_4: 1692 case SQ_ALU_CONST_CACHE_HS_5: 1693 case SQ_ALU_CONST_CACHE_HS_6: 1694 case SQ_ALU_CONST_CACHE_HS_7: 1695 case SQ_ALU_CONST_CACHE_HS_8: 1696 case SQ_ALU_CONST_CACHE_HS_9: 1697 case SQ_ALU_CONST_CACHE_HS_10: 1698 case SQ_ALU_CONST_CACHE_HS_11: 1699 case SQ_ALU_CONST_CACHE_HS_12: 1700 case SQ_ALU_CONST_CACHE_HS_13: 1701 case SQ_ALU_CONST_CACHE_HS_14: 1702 case SQ_ALU_CONST_CACHE_HS_15: 1703 case SQ_ALU_CONST_CACHE_LS_0: 1704 case SQ_ALU_CONST_CACHE_LS_1: 1705 case SQ_ALU_CONST_CACHE_LS_2: 1706 case SQ_ALU_CONST_CACHE_LS_3: 1707 case SQ_ALU_CONST_CACHE_LS_4: 1708 case SQ_ALU_CONST_CACHE_LS_5: 1709 case SQ_ALU_CONST_CACHE_LS_6: 1710 case SQ_ALU_CONST_CACHE_LS_7: 1711 case SQ_ALU_CONST_CACHE_LS_8: 1712 case SQ_ALU_CONST_CACHE_LS_9: 1713 case SQ_ALU_CONST_CACHE_LS_10: 1714 case SQ_ALU_CONST_CACHE_LS_11: 1715 case SQ_ALU_CONST_CACHE_LS_12: 1716 case SQ_ALU_CONST_CACHE_LS_13: 1717 case SQ_ALU_CONST_CACHE_LS_14: 1718 case SQ_ALU_CONST_CACHE_LS_15: 1719 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1720 if (r) { 1721 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1722 "0x%04X\n", reg); 1723 return -EINVAL; 1724 } 1725 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1726 break; 1727 case SX_MEMORY_EXPORT_BASE: 1728 if (p->rdev->family >= CHIP_CAYMAN) { 1729 dev_warn(p->dev, "bad SET_CONFIG_REG " 1730 "0x%04X\n", reg); 1731 return -EINVAL; 1732 } 1733 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1734 if (r) { 1735 dev_warn(p->dev, "bad SET_CONFIG_REG " 1736 "0x%04X\n", reg); 1737 return -EINVAL; 1738 } 1739 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1740 break; 1741 case CAYMAN_SX_SCATTER_EXPORT_BASE: 1742 if (p->rdev->family < CHIP_CAYMAN) { 1743 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1744 "0x%04X\n", reg); 1745 return -EINVAL; 1746 } 1747 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1748 if (r) { 1749 dev_warn(p->dev, "bad SET_CONTEXT_REG " 1750 "0x%04X\n", reg); 1751 return -EINVAL; 1752 } 1753 ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 1754 break; 1755 case SX_MISC: 1756 track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; 1757 break; 1758 default: 1759 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); 1760 return -EINVAL; 1761 } 1762 return 0; 1763 } 1764 1765 static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) 1766 { 1767 u32 last_reg, m, i; 1768 1769 if (p->rdev->family >= CHIP_CAYMAN) 1770 last_reg = ARRAY_SIZE(cayman_reg_safe_bm); 1771 else 1772 last_reg = ARRAY_SIZE(evergreen_reg_safe_bm); 1773 1774 i = (reg >> 7); 1775 if (i >= last_reg) { 1776 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); 1777 return false; 1778 } 1779 m = 1 << ((reg >> 2) & 31); 1780 if (p->rdev->family >= CHIP_CAYMAN) { 1781 if (!(cayman_reg_safe_bm[i] & m)) 1782 return true; 1783 } else { 1784 if (!(evergreen_reg_safe_bm[i] & m)) 1785 return true; 1786 } 1787 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); 1788 return false; 1789 } 1790 1791 static int evergreen_packet3_check(struct radeon_cs_parser *p, 1792 struct radeon_cs_packet *pkt) 1793 { 1794 struct radeon_cs_reloc *reloc; 1795 struct evergreen_cs_track *track; 1796 volatile u32 *ib; 1797 unsigned idx; 1798 unsigned i; 1799 unsigned start_reg, end_reg, reg; 1800 int r; 1801 u32 idx_value; 1802 1803 track = (struct evergreen_cs_track *)p->track; 1804 ib = p->ib.ptr; 1805 idx = pkt->idx + 1; 1806 idx_value = radeon_get_ib_value(p, idx); 1807 1808 switch (pkt->opcode) { 1809 case PACKET3_SET_PREDICATION: 1810 { 1811 int pred_op; 1812 int tmp; 1813 uint64_t offset; 1814 1815 if (pkt->count != 1) { 1816 DRM_ERROR("bad SET PREDICATION\n"); 1817 return -EINVAL; 1818 } 1819 1820 tmp = radeon_get_ib_value(p, idx + 1); 1821 pred_op = (tmp >> 16) & 0x7; 1822 1823 /* for the clear predicate operation */ 1824 if (pred_op == 0) 1825 return 0; 1826 1827 if (pred_op > 2) { 1828 DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op); 1829 return -EINVAL; 1830 } 1831 1832 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1833 if (r) { 1834 DRM_ERROR("bad SET PREDICATION\n"); 1835 return -EINVAL; 1836 } 1837 1838 offset = reloc->gpu_offset + 1839 (idx_value & 0xfffffff0) + 1840 ((u64)(tmp & 0xff) << 32); 1841 1842 ib[idx + 0] = offset; 1843 ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff); 1844 } 1845 break; 1846 case PACKET3_CONTEXT_CONTROL: 1847 if (pkt->count != 1) { 1848 DRM_ERROR("bad CONTEXT_CONTROL\n"); 1849 return -EINVAL; 1850 } 1851 break; 1852 case PACKET3_INDEX_TYPE: 1853 case PACKET3_NUM_INSTANCES: 1854 case PACKET3_CLEAR_STATE: 1855 if (pkt->count) { 1856 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); 1857 return -EINVAL; 1858 } 1859 break; 1860 case CAYMAN_PACKET3_DEALLOC_STATE: 1861 if (p->rdev->family < CHIP_CAYMAN) { 1862 DRM_ERROR("bad PACKET3_DEALLOC_STATE\n"); 1863 return -EINVAL; 1864 } 1865 if (pkt->count) { 1866 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); 1867 return -EINVAL; 1868 } 1869 break; 1870 case PACKET3_INDEX_BASE: 1871 { 1872 uint64_t offset; 1873 1874 if (pkt->count != 1) { 1875 DRM_ERROR("bad INDEX_BASE\n"); 1876 return -EINVAL; 1877 } 1878 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1879 if (r) { 1880 DRM_ERROR("bad INDEX_BASE\n"); 1881 return -EINVAL; 1882 } 1883 1884 offset = reloc->gpu_offset + 1885 idx_value + 1886 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 1887 1888 ib[idx+0] = offset; 1889 ib[idx+1] = upper_32_bits(offset) & 0xff; 1890 1891 r = evergreen_cs_track_check(p); 1892 if (r) { 1893 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 1894 return r; 1895 } 1896 break; 1897 } 1898 case PACKET3_DRAW_INDEX: 1899 { 1900 uint64_t offset; 1901 if (pkt->count != 3) { 1902 DRM_ERROR("bad DRAW_INDEX\n"); 1903 return -EINVAL; 1904 } 1905 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1906 if (r) { 1907 DRM_ERROR("bad DRAW_INDEX\n"); 1908 return -EINVAL; 1909 } 1910 1911 offset = reloc->gpu_offset + 1912 idx_value + 1913 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 1914 1915 ib[idx+0] = offset; 1916 ib[idx+1] = upper_32_bits(offset) & 0xff; 1917 1918 r = evergreen_cs_track_check(p); 1919 if (r) { 1920 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 1921 return r; 1922 } 1923 break; 1924 } 1925 case PACKET3_DRAW_INDEX_2: 1926 { 1927 uint64_t offset; 1928 1929 if (pkt->count != 4) { 1930 DRM_ERROR("bad DRAW_INDEX_2\n"); 1931 return -EINVAL; 1932 } 1933 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 1934 if (r) { 1935 DRM_ERROR("bad DRAW_INDEX_2\n"); 1936 return -EINVAL; 1937 } 1938 1939 offset = reloc->gpu_offset + 1940 radeon_get_ib_value(p, idx+1) + 1941 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 1942 1943 ib[idx+1] = offset; 1944 ib[idx+2] = upper_32_bits(offset) & 0xff; 1945 1946 r = evergreen_cs_track_check(p); 1947 if (r) { 1948 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 1949 return r; 1950 } 1951 break; 1952 } 1953 case PACKET3_DRAW_INDEX_AUTO: 1954 if (pkt->count != 1) { 1955 DRM_ERROR("bad DRAW_INDEX_AUTO\n"); 1956 return -EINVAL; 1957 } 1958 r = evergreen_cs_track_check(p); 1959 if (r) { 1960 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); 1961 return r; 1962 } 1963 break; 1964 case PACKET3_DRAW_INDEX_MULTI_AUTO: 1965 if (pkt->count != 2) { 1966 DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n"); 1967 return -EINVAL; 1968 } 1969 r = evergreen_cs_track_check(p); 1970 if (r) { 1971 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); 1972 return r; 1973 } 1974 break; 1975 case PACKET3_DRAW_INDEX_IMMD: 1976 if (pkt->count < 2) { 1977 DRM_ERROR("bad DRAW_INDEX_IMMD\n"); 1978 return -EINVAL; 1979 } 1980 r = evergreen_cs_track_check(p); 1981 if (r) { 1982 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 1983 return r; 1984 } 1985 break; 1986 case PACKET3_DRAW_INDEX_OFFSET: 1987 if (pkt->count != 2) { 1988 DRM_ERROR("bad DRAW_INDEX_OFFSET\n"); 1989 return -EINVAL; 1990 } 1991 r = evergreen_cs_track_check(p); 1992 if (r) { 1993 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 1994 return r; 1995 } 1996 break; 1997 case PACKET3_DRAW_INDEX_OFFSET_2: 1998 if (pkt->count != 3) { 1999 DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n"); 2000 return -EINVAL; 2001 } 2002 r = evergreen_cs_track_check(p); 2003 if (r) { 2004 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 2005 return r; 2006 } 2007 break; 2008 case PACKET3_DISPATCH_DIRECT: 2009 if (pkt->count != 3) { 2010 DRM_ERROR("bad DISPATCH_DIRECT\n"); 2011 return -EINVAL; 2012 } 2013 r = evergreen_cs_track_check(p); 2014 if (r) { 2015 dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); 2016 return r; 2017 } 2018 break; 2019 case PACKET3_DISPATCH_INDIRECT: 2020 if (pkt->count != 1) { 2021 DRM_ERROR("bad DISPATCH_INDIRECT\n"); 2022 return -EINVAL; 2023 } 2024 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2025 if (r) { 2026 DRM_ERROR("bad DISPATCH_INDIRECT\n"); 2027 return -EINVAL; 2028 } 2029 ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff); 2030 r = evergreen_cs_track_check(p); 2031 if (r) { 2032 dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); 2033 return r; 2034 } 2035 break; 2036 case PACKET3_WAIT_REG_MEM: 2037 if (pkt->count != 5) { 2038 DRM_ERROR("bad WAIT_REG_MEM\n"); 2039 return -EINVAL; 2040 } 2041 /* bit 4 is reg (0) or mem (1) */ 2042 if (idx_value & 0x10) { 2043 uint64_t offset; 2044 2045 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2046 if (r) { 2047 DRM_ERROR("bad WAIT_REG_MEM\n"); 2048 return -EINVAL; 2049 } 2050 2051 offset = reloc->gpu_offset + 2052 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2053 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2054 2055 ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc); 2056 ib[idx+2] = upper_32_bits(offset) & 0xff; 2057 } else if (idx_value & 0x100) { 2058 DRM_ERROR("cannot use PFP on REG wait\n"); 2059 return -EINVAL; 2060 } 2061 break; 2062 case PACKET3_CP_DMA: 2063 { 2064 u32 command, size, info; 2065 u64 offset, tmp; 2066 if (pkt->count != 4) { 2067 DRM_ERROR("bad CP DMA\n"); 2068 return -EINVAL; 2069 } 2070 command = radeon_get_ib_value(p, idx+4); 2071 size = command & 0x1fffff; 2072 info = radeon_get_ib_value(p, idx+1); 2073 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */ 2074 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */ 2075 ((((info & 0x00300000) >> 20) == 0) && 2076 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */ 2077 ((((info & 0x60000000) >> 29) == 0) && 2078 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ 2079 /* non mem to mem copies requires dw aligned count */ 2080 if (size % 4) { 2081 DRM_ERROR("CP DMA command requires dw count alignment\n"); 2082 return -EINVAL; 2083 } 2084 } 2085 if (command & PACKET3_CP_DMA_CMD_SAS) { 2086 /* src address space is register */ 2087 /* GDS is ok */ 2088 if (((info & 0x60000000) >> 29) != 1) { 2089 DRM_ERROR("CP DMA SAS not supported\n"); 2090 return -EINVAL; 2091 } 2092 } else { 2093 if (command & PACKET3_CP_DMA_CMD_SAIC) { 2094 DRM_ERROR("CP DMA SAIC only supported for registers\n"); 2095 return -EINVAL; 2096 } 2097 /* src address space is memory */ 2098 if (((info & 0x60000000) >> 29) == 0) { 2099 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2100 if (r) { 2101 DRM_ERROR("bad CP DMA SRC\n"); 2102 return -EINVAL; 2103 } 2104 2105 tmp = radeon_get_ib_value(p, idx) + 2106 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); 2107 2108 offset = reloc->gpu_offset + tmp; 2109 2110 if ((tmp + size) > radeon_bo_size(reloc->robj)) { 2111 dev_warn(p->dev, "CP DMA src buffer too small (%ju %lu)\n", 2112 tmp + size, radeon_bo_size(reloc->robj)); 2113 return -EINVAL; 2114 } 2115 2116 ib[idx] = offset; 2117 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff); 2118 } else if (((info & 0x60000000) >> 29) != 2) { 2119 DRM_ERROR("bad CP DMA SRC_SEL\n"); 2120 return -EINVAL; 2121 } 2122 } 2123 if (command & PACKET3_CP_DMA_CMD_DAS) { 2124 /* dst address space is register */ 2125 /* GDS is ok */ 2126 if (((info & 0x00300000) >> 20) != 1) { 2127 DRM_ERROR("CP DMA DAS not supported\n"); 2128 return -EINVAL; 2129 } 2130 } else { 2131 /* dst address space is memory */ 2132 if (command & PACKET3_CP_DMA_CMD_DAIC) { 2133 DRM_ERROR("CP DMA DAIC only supported for registers\n"); 2134 return -EINVAL; 2135 } 2136 if (((info & 0x00300000) >> 20) == 0) { 2137 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2138 if (r) { 2139 DRM_ERROR("bad CP DMA DST\n"); 2140 return -EINVAL; 2141 } 2142 2143 tmp = radeon_get_ib_value(p, idx+2) + 2144 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); 2145 2146 offset = reloc->gpu_offset + tmp; 2147 2148 if ((tmp + size) > radeon_bo_size(reloc->robj)) { 2149 dev_warn(p->dev, "CP DMA dst buffer too small (%ju %lu)\n", 2150 tmp + size, radeon_bo_size(reloc->robj)); 2151 return -EINVAL; 2152 } 2153 2154 ib[idx+2] = offset; 2155 ib[idx+3] = upper_32_bits(offset) & 0xff; 2156 } else { 2157 DRM_ERROR("bad CP DMA DST_SEL\n"); 2158 return -EINVAL; 2159 } 2160 } 2161 break; 2162 } 2163 case PACKET3_SURFACE_SYNC: 2164 if (pkt->count != 3) { 2165 DRM_ERROR("bad SURFACE_SYNC\n"); 2166 return -EINVAL; 2167 } 2168 /* 0xffffffff/0x0 is flush all cache flag */ 2169 if (radeon_get_ib_value(p, idx + 1) != 0xffffffff || 2170 radeon_get_ib_value(p, idx + 2) != 0) { 2171 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2172 if (r) { 2173 DRM_ERROR("bad SURFACE_SYNC\n"); 2174 return -EINVAL; 2175 } 2176 ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2177 } 2178 break; 2179 case PACKET3_EVENT_WRITE: 2180 if (pkt->count != 2 && pkt->count != 0) { 2181 DRM_ERROR("bad EVENT_WRITE\n"); 2182 return -EINVAL; 2183 } 2184 if (pkt->count) { 2185 uint64_t offset; 2186 2187 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2188 if (r) { 2189 DRM_ERROR("bad EVENT_WRITE\n"); 2190 return -EINVAL; 2191 } 2192 offset = reloc->gpu_offset + 2193 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + 2194 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2195 2196 ib[idx+1] = offset & 0xfffffff8; 2197 ib[idx+2] = upper_32_bits(offset) & 0xff; 2198 } 2199 break; 2200 case PACKET3_EVENT_WRITE_EOP: 2201 { 2202 uint64_t offset; 2203 2204 if (pkt->count != 4) { 2205 DRM_ERROR("bad EVENT_WRITE_EOP\n"); 2206 return -EINVAL; 2207 } 2208 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2209 if (r) { 2210 DRM_ERROR("bad EVENT_WRITE_EOP\n"); 2211 return -EINVAL; 2212 } 2213 2214 offset = reloc->gpu_offset + 2215 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2216 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2217 2218 ib[idx+1] = offset & 0xfffffffc; 2219 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); 2220 break; 2221 } 2222 case PACKET3_EVENT_WRITE_EOS: 2223 { 2224 uint64_t offset; 2225 2226 if (pkt->count != 3) { 2227 DRM_ERROR("bad EVENT_WRITE_EOS\n"); 2228 return -EINVAL; 2229 } 2230 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2231 if (r) { 2232 DRM_ERROR("bad EVENT_WRITE_EOS\n"); 2233 return -EINVAL; 2234 } 2235 2236 offset = reloc->gpu_offset + 2237 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + 2238 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); 2239 2240 ib[idx+1] = offset & 0xfffffffc; 2241 ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); 2242 break; 2243 } 2244 case PACKET3_SET_CONFIG_REG: 2245 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START; 2246 end_reg = 4 * pkt->count + start_reg - 4; 2247 if ((start_reg < PACKET3_SET_CONFIG_REG_START) || 2248 (start_reg >= PACKET3_SET_CONFIG_REG_END) || 2249 (end_reg >= PACKET3_SET_CONFIG_REG_END)) { 2250 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); 2251 return -EINVAL; 2252 } 2253 for (i = 0; i < pkt->count; i++) { 2254 reg = start_reg + (4 * i); 2255 r = evergreen_cs_check_reg(p, reg, idx+1+i); 2256 if (r) 2257 return r; 2258 } 2259 break; 2260 case PACKET3_SET_CONTEXT_REG: 2261 start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START; 2262 end_reg = 4 * pkt->count + start_reg - 4; 2263 if ((start_reg < PACKET3_SET_CONTEXT_REG_START) || 2264 (start_reg >= PACKET3_SET_CONTEXT_REG_END) || 2265 (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { 2266 DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); 2267 return -EINVAL; 2268 } 2269 for (i = 0; i < pkt->count; i++) { 2270 reg = start_reg + (4 * i); 2271 r = evergreen_cs_check_reg(p, reg, idx+1+i); 2272 if (r) 2273 return r; 2274 } 2275 break; 2276 case PACKET3_SET_RESOURCE: 2277 if (pkt->count % 8) { 2278 DRM_ERROR("bad SET_RESOURCE\n"); 2279 return -EINVAL; 2280 } 2281 start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START; 2282 end_reg = 4 * pkt->count + start_reg - 4; 2283 if ((start_reg < PACKET3_SET_RESOURCE_START) || 2284 (start_reg >= PACKET3_SET_RESOURCE_END) || 2285 (end_reg >= PACKET3_SET_RESOURCE_END)) { 2286 DRM_ERROR("bad SET_RESOURCE\n"); 2287 return -EINVAL; 2288 } 2289 for (i = 0; i < (pkt->count / 8); i++) { 2290 struct radeon_bo *texture, *mipmap; 2291 u32 toffset, moffset; 2292 u32 size, offset, mip_address, tex_dim; 2293 2294 switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) { 2295 case SQ_TEX_VTX_VALID_TEXTURE: 2296 /* tex base */ 2297 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2298 if (r) { 2299 DRM_ERROR("bad SET_RESOURCE (tex)\n"); 2300 return -EINVAL; 2301 } 2302 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { 2303 ib[idx+1+(i*8)+1] |= 2304 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); 2305 if (reloc->tiling_flags & RADEON_TILING_MACRO) { 2306 unsigned bankw, bankh, mtaspect, tile_split; 2307 2308 evergreen_tiling_fields(reloc->tiling_flags, 2309 &bankw, &bankh, &mtaspect, 2310 &tile_split); 2311 ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split); 2312 ib[idx+1+(i*8)+7] |= 2313 TEX_BANK_WIDTH(bankw) | 2314 TEX_BANK_HEIGHT(bankh) | 2315 MACRO_TILE_ASPECT(mtaspect) | 2316 TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); 2317 } 2318 } 2319 texture = reloc->robj; 2320 toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2321 2322 /* tex mip base */ 2323 tex_dim = ib[idx+1+(i*8)+0] & 0x7; 2324 mip_address = ib[idx+1+(i*8)+3]; 2325 2326 if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) && 2327 !mip_address && 2328 !radeon_cs_packet_next_is_pkt3_nop(p)) { 2329 /* MIP_ADDRESS should point to FMASK for an MSAA texture. 2330 * It should be 0 if FMASK is disabled. */ 2331 moffset = 0; 2332 mipmap = NULL; 2333 } else { 2334 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2335 if (r) { 2336 DRM_ERROR("bad SET_RESOURCE (tex)\n"); 2337 return -EINVAL; 2338 } 2339 moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); 2340 mipmap = reloc->robj; 2341 } 2342 2343 r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8)); 2344 if (r) 2345 return r; 2346 ib[idx+1+(i*8)+2] += toffset; 2347 ib[idx+1+(i*8)+3] += moffset; 2348 break; 2349 case SQ_TEX_VTX_VALID_BUFFER: 2350 { 2351 uint64_t offset64; 2352 /* vtx base */ 2353 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2354 if (r) { 2355 DRM_ERROR("bad SET_RESOURCE (vtx)\n"); 2356 return -EINVAL; 2357 } 2358 offset = radeon_get_ib_value(p, idx+1+(i*8)+0); 2359 size = radeon_get_ib_value(p, idx+1+(i*8)+1); 2360 if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { 2361 /* force size to size of the buffer */ 2362 dev_warn(p->dev, "vbo resource seems too big for the bo\n"); 2363 ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; 2364 } 2365 2366 offset64 = reloc->gpu_offset + offset; 2367 ib[idx+1+(i*8)+0] = offset64; 2368 ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | 2369 (upper_32_bits(offset64) & 0xff); 2370 break; 2371 } 2372 case SQ_TEX_VTX_INVALID_TEXTURE: 2373 case SQ_TEX_VTX_INVALID_BUFFER: 2374 default: 2375 DRM_ERROR("bad SET_RESOURCE\n"); 2376 return -EINVAL; 2377 } 2378 } 2379 break; 2380 case PACKET3_SET_ALU_CONST: 2381 /* XXX fix me ALU const buffers only */ 2382 break; 2383 case PACKET3_SET_BOOL_CONST: 2384 start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START; 2385 end_reg = 4 * pkt->count + start_reg - 4; 2386 if ((start_reg < PACKET3_SET_BOOL_CONST_START) || 2387 (start_reg >= PACKET3_SET_BOOL_CONST_END) || 2388 (end_reg >= PACKET3_SET_BOOL_CONST_END)) { 2389 DRM_ERROR("bad SET_BOOL_CONST\n"); 2390 return -EINVAL; 2391 } 2392 break; 2393 case PACKET3_SET_LOOP_CONST: 2394 start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START; 2395 end_reg = 4 * pkt->count + start_reg - 4; 2396 if ((start_reg < PACKET3_SET_LOOP_CONST_START) || 2397 (start_reg >= PACKET3_SET_LOOP_CONST_END) || 2398 (end_reg >= PACKET3_SET_LOOP_CONST_END)) { 2399 DRM_ERROR("bad SET_LOOP_CONST\n"); 2400 return -EINVAL; 2401 } 2402 break; 2403 case PACKET3_SET_CTL_CONST: 2404 start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START; 2405 end_reg = 4 * pkt->count + start_reg - 4; 2406 if ((start_reg < PACKET3_SET_CTL_CONST_START) || 2407 (start_reg >= PACKET3_SET_CTL_CONST_END) || 2408 (end_reg >= PACKET3_SET_CTL_CONST_END)) { 2409 DRM_ERROR("bad SET_CTL_CONST\n"); 2410 return -EINVAL; 2411 } 2412 break; 2413 case PACKET3_SET_SAMPLER: 2414 if (pkt->count % 3) { 2415 DRM_ERROR("bad SET_SAMPLER\n"); 2416 return -EINVAL; 2417 } 2418 start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START; 2419 end_reg = 4 * pkt->count + start_reg - 4; 2420 if ((start_reg < PACKET3_SET_SAMPLER_START) || 2421 (start_reg >= PACKET3_SET_SAMPLER_END) || 2422 (end_reg >= PACKET3_SET_SAMPLER_END)) { 2423 DRM_ERROR("bad SET_SAMPLER\n"); 2424 return -EINVAL; 2425 } 2426 break; 2427 case PACKET3_STRMOUT_BUFFER_UPDATE: 2428 if (pkt->count != 4) { 2429 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); 2430 return -EINVAL; 2431 } 2432 /* Updating memory at DST_ADDRESS. */ 2433 if (idx_value & 0x1) { 2434 u64 offset; 2435 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2436 if (r) { 2437 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); 2438 return -EINVAL; 2439 } 2440 offset = radeon_get_ib_value(p, idx+1); 2441 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; 2442 if ((offset + 4) > radeon_bo_size(reloc->robj)) { 2443 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%jx, 0x%lx\n", 2444 offset + 4, radeon_bo_size(reloc->robj)); 2445 return -EINVAL; 2446 } 2447 offset += reloc->gpu_offset; 2448 ib[idx+1] = offset; 2449 ib[idx+2] = upper_32_bits(offset) & 0xff; 2450 } 2451 /* Reading data from SRC_ADDRESS. */ 2452 if (((idx_value >> 1) & 0x3) == 2) { 2453 u64 offset; 2454 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2455 if (r) { 2456 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); 2457 return -EINVAL; 2458 } 2459 offset = radeon_get_ib_value(p, idx+3); 2460 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2461 if ((offset + 4) > radeon_bo_size(reloc->robj)) { 2462 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%jx, 0x%lx\n", 2463 offset + 4, radeon_bo_size(reloc->robj)); 2464 return -EINVAL; 2465 } 2466 offset += reloc->gpu_offset; 2467 ib[idx+3] = offset; 2468 ib[idx+4] = upper_32_bits(offset) & 0xff; 2469 } 2470 break; 2471 case PACKET3_MEM_WRITE: 2472 { 2473 u64 offset; 2474 2475 if (pkt->count != 3) { 2476 DRM_ERROR("bad MEM_WRITE (invalid count)\n"); 2477 return -EINVAL; 2478 } 2479 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2480 if (r) { 2481 DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); 2482 return -EINVAL; 2483 } 2484 offset = radeon_get_ib_value(p, idx+0); 2485 offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; 2486 if (offset & 0x7) { 2487 DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); 2488 return -EINVAL; 2489 } 2490 if ((offset + 8) > radeon_bo_size(reloc->robj)) { 2491 DRM_ERROR("bad MEM_WRITE bo too small: 0x%jx, 0x%lx\n", 2492 offset + 8, radeon_bo_size(reloc->robj)); 2493 return -EINVAL; 2494 } 2495 offset += reloc->gpu_offset; 2496 ib[idx+0] = offset; 2497 ib[idx+1] = upper_32_bits(offset) & 0xff; 2498 break; 2499 } 2500 case PACKET3_COPY_DW: 2501 if (pkt->count != 4) { 2502 DRM_ERROR("bad COPY_DW (invalid count)\n"); 2503 return -EINVAL; 2504 } 2505 if (idx_value & 0x1) { 2506 u64 offset; 2507 /* SRC is memory. */ 2508 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2509 if (r) { 2510 DRM_ERROR("bad COPY_DW (missing src reloc)\n"); 2511 return -EINVAL; 2512 } 2513 offset = radeon_get_ib_value(p, idx+1); 2514 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; 2515 if ((offset + 4) > radeon_bo_size(reloc->robj)) { 2516 DRM_ERROR("bad COPY_DW src bo too small: 0x%jx, 0x%lx\n", 2517 offset + 4, radeon_bo_size(reloc->robj)); 2518 return -EINVAL; 2519 } 2520 offset += reloc->gpu_offset; 2521 ib[idx+1] = offset; 2522 ib[idx+2] = upper_32_bits(offset) & 0xff; 2523 } else { 2524 /* SRC is a reg. */ 2525 reg = radeon_get_ib_value(p, idx+1) << 2; 2526 if (!evergreen_is_safe_reg(p, reg, idx+1)) 2527 return -EINVAL; 2528 } 2529 if (idx_value & 0x2) { 2530 u64 offset; 2531 /* DST is memory. */ 2532 r = radeon_cs_packet_next_reloc(p, &reloc, 0); 2533 if (r) { 2534 DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); 2535 return -EINVAL; 2536 } 2537 offset = radeon_get_ib_value(p, idx+3); 2538 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2539 if ((offset + 4) > radeon_bo_size(reloc->robj)) { 2540 DRM_ERROR("bad COPY_DW dst bo too small: 0x%jx, 0x%lx\n", 2541 offset + 4, radeon_bo_size(reloc->robj)); 2542 return -EINVAL; 2543 } 2544 offset += reloc->gpu_offset; 2545 ib[idx+3] = offset; 2546 ib[idx+4] = upper_32_bits(offset) & 0xff; 2547 } else { 2548 /* DST is a reg. */ 2549 reg = radeon_get_ib_value(p, idx+3) << 2; 2550 if (!evergreen_is_safe_reg(p, reg, idx+3)) 2551 return -EINVAL; 2552 } 2553 break; 2554 case PACKET3_NOP: 2555 break; 2556 default: 2557 DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); 2558 return -EINVAL; 2559 } 2560 return 0; 2561 } 2562 2563 int evergreen_cs_parse(struct radeon_cs_parser *p) 2564 { 2565 struct radeon_cs_packet pkt; 2566 struct evergreen_cs_track *track; 2567 u32 tmp; 2568 int r; 2569 2570 if (p->track == NULL) { 2571 /* initialize tracker, we are in kms */ 2572 track = kzalloc(sizeof(*track), GFP_KERNEL); 2573 if (track == NULL) 2574 return -ENOMEM; 2575 evergreen_cs_track_init(track); 2576 if (p->rdev->family >= CHIP_CAYMAN) 2577 tmp = p->rdev->config.cayman.tile_config; 2578 else 2579 tmp = p->rdev->config.evergreen.tile_config; 2580 2581 switch (tmp & 0xf) { 2582 case 0: 2583 track->npipes = 1; 2584 break; 2585 case 1: 2586 default: 2587 track->npipes = 2; 2588 break; 2589 case 2: 2590 track->npipes = 4; 2591 break; 2592 case 3: 2593 track->npipes = 8; 2594 break; 2595 } 2596 2597 switch ((tmp & 0xf0) >> 4) { 2598 case 0: 2599 track->nbanks = 4; 2600 break; 2601 case 1: 2602 default: 2603 track->nbanks = 8; 2604 break; 2605 case 2: 2606 track->nbanks = 16; 2607 break; 2608 } 2609 2610 switch ((tmp & 0xf00) >> 8) { 2611 case 0: 2612 track->group_size = 256; 2613 break; 2614 case 1: 2615 default: 2616 track->group_size = 512; 2617 break; 2618 } 2619 2620 switch ((tmp & 0xf000) >> 12) { 2621 case 0: 2622 track->row_size = 1; 2623 break; 2624 case 1: 2625 default: 2626 track->row_size = 2; 2627 break; 2628 case 2: 2629 track->row_size = 4; 2630 break; 2631 } 2632 2633 p->track = track; 2634 } 2635 do { 2636 r = radeon_cs_packet_parse(p, &pkt, p->idx); 2637 if (r) { 2638 kfree(p->track); 2639 p->track = NULL; 2640 return r; 2641 } 2642 p->idx += pkt.count + 2; 2643 switch (pkt.type) { 2644 case RADEON_PACKET_TYPE0: 2645 r = evergreen_cs_parse_packet0(p, &pkt); 2646 break; 2647 case RADEON_PACKET_TYPE2: 2648 break; 2649 case RADEON_PACKET_TYPE3: 2650 r = evergreen_packet3_check(p, &pkt); 2651 break; 2652 default: 2653 DRM_ERROR("Unknown packet type %d !\n", pkt.type); 2654 kfree(p->track); 2655 p->track = NULL; 2656 return -EINVAL; 2657 } 2658 if (r) { 2659 kfree(p->track); 2660 p->track = NULL; 2661 return r; 2662 } 2663 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 2664 #if 0 2665 for (r = 0; r < p->ib.length_dw; r++) { 2666 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); 2667 mdelay(1); 2668 } 2669 #endif 2670 kfree(p->track); 2671 p->track = NULL; 2672 return 0; 2673 } 2674 2675 /** 2676 * evergreen_dma_cs_parse() - parse the DMA IB 2677 * @p: parser structure holding parsing context. 2678 * 2679 * Parses the DMA IB from the CS ioctl and updates 2680 * the GPU addresses based on the reloc information and 2681 * checks for errors. (Evergreen-Cayman) 2682 * Returns 0 for success and an error on failure. 2683 **/ 2684 int evergreen_dma_cs_parse(struct radeon_cs_parser *p) 2685 { 2686 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 2687 struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc; 2688 u32 header, cmd, count, sub_cmd; 2689 volatile u32 *ib = p->ib.ptr; 2690 u32 idx; 2691 u64 src_offset, dst_offset, dst2_offset; 2692 int r; 2693 2694 do { 2695 if (p->idx >= ib_chunk->length_dw) { 2696 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 2697 p->idx, ib_chunk->length_dw); 2698 return -EINVAL; 2699 } 2700 idx = p->idx; 2701 header = radeon_get_ib_value(p, idx); 2702 cmd = GET_DMA_CMD(header); 2703 count = GET_DMA_COUNT(header); 2704 sub_cmd = GET_DMA_SUB_CMD(header); 2705 2706 switch (cmd) { 2707 case DMA_PACKET_WRITE: 2708 r = r600_dma_cs_next_reloc(p, &dst_reloc); 2709 if (r) { 2710 DRM_ERROR("bad DMA_PACKET_WRITE\n"); 2711 return -EINVAL; 2712 } 2713 switch (sub_cmd) { 2714 /* tiled */ 2715 case 8: 2716 dst_offset = radeon_get_ib_value(p, idx+1); 2717 dst_offset <<= 8; 2718 2719 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2720 p->idx += count + 7; 2721 break; 2722 /* linear */ 2723 case 0: 2724 dst_offset = radeon_get_ib_value(p, idx+1); 2725 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; 2726 2727 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2728 ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2729 p->idx += count + 3; 2730 break; 2731 default: 2732 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); 2733 return -EINVAL; 2734 } 2735 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2736 dev_warn(p->dev, "DMA write buffer too small (%ju %lu)\n", 2737 dst_offset, radeon_bo_size(dst_reloc->robj)); 2738 return -EINVAL; 2739 } 2740 break; 2741 case DMA_PACKET_COPY: 2742 r = r600_dma_cs_next_reloc(p, &src_reloc); 2743 if (r) { 2744 DRM_ERROR("bad DMA_PACKET_COPY\n"); 2745 return -EINVAL; 2746 } 2747 r = r600_dma_cs_next_reloc(p, &dst_reloc); 2748 if (r) { 2749 DRM_ERROR("bad DMA_PACKET_COPY\n"); 2750 return -EINVAL; 2751 } 2752 switch (sub_cmd) { 2753 /* Copy L2L, DW aligned */ 2754 case 0x00: 2755 /* L2L, dw */ 2756 src_offset = radeon_get_ib_value(p, idx+2); 2757 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2758 dst_offset = radeon_get_ib_value(p, idx+1); 2759 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; 2760 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2761 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%ju %lu)\n", 2762 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2763 return -EINVAL; 2764 } 2765 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2766 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%ju %lu)\n", 2767 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2768 return -EINVAL; 2769 } 2770 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2771 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2772 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2773 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2774 p->idx += 5; 2775 break; 2776 /* Copy L2T/T2L */ 2777 case 0x08: 2778 /* detile bit */ 2779 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 2780 /* tiled src, linear dst */ 2781 src_offset = radeon_get_ib_value(p, idx+1); 2782 src_offset <<= 8; 2783 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 2784 2785 dst_offset = radeon_get_ib_value(p, idx + 7); 2786 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 2787 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2788 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2789 } else { 2790 /* linear src, tiled dst */ 2791 src_offset = radeon_get_ib_value(p, idx+7); 2792 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 2793 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2794 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2795 2796 dst_offset = radeon_get_ib_value(p, idx+1); 2797 dst_offset <<= 8; 2798 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2799 } 2800 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2801 dev_warn(p->dev, "DMA L2T, src buffer too small (%ju %lu)\n", 2802 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2803 return -EINVAL; 2804 } 2805 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2806 dev_warn(p->dev, "DMA L2T, dst buffer too small (%ju %lu)\n", 2807 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2808 return -EINVAL; 2809 } 2810 p->idx += 9; 2811 break; 2812 /* Copy L2L, byte aligned */ 2813 case 0x40: 2814 /* L2L, byte */ 2815 src_offset = radeon_get_ib_value(p, idx+2); 2816 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2817 dst_offset = radeon_get_ib_value(p, idx+1); 2818 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; 2819 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { 2820 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%ju %lu)\n", 2821 src_offset + count, radeon_bo_size(src_reloc->robj)); 2822 return -EINVAL; 2823 } 2824 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { 2825 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%ju %lu)\n", 2826 dst_offset + count, radeon_bo_size(dst_reloc->robj)); 2827 return -EINVAL; 2828 } 2829 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff); 2830 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff); 2831 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2832 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2833 p->idx += 5; 2834 break; 2835 /* Copy L2L, partial */ 2836 case 0x41: 2837 /* L2L, partial */ 2838 if (p->family < CHIP_CAYMAN) { 2839 DRM_ERROR("L2L Partial is cayman only !\n"); 2840 return -EINVAL; 2841 } 2842 ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff); 2843 ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2844 ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff); 2845 ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2846 2847 p->idx += 9; 2848 break; 2849 /* Copy L2L, DW aligned, broadcast */ 2850 case 0x44: 2851 /* L2L, dw, broadcast */ 2852 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 2853 if (r) { 2854 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); 2855 return -EINVAL; 2856 } 2857 dst_offset = radeon_get_ib_value(p, idx+1); 2858 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; 2859 dst2_offset = radeon_get_ib_value(p, idx+2); 2860 dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32; 2861 src_offset = radeon_get_ib_value(p, idx+3); 2862 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; 2863 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2864 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%ju %lu)\n", 2865 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2866 return -EINVAL; 2867 } 2868 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2869 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%ju %lu)\n", 2870 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2871 return -EINVAL; 2872 } 2873 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 2874 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%ju %lu)\n", 2875 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 2876 return -EINVAL; 2877 } 2878 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2879 ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc); 2880 ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2881 ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2882 ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff; 2883 ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2884 p->idx += 7; 2885 break; 2886 /* Copy L2T Frame to Field */ 2887 case 0x48: 2888 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 2889 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); 2890 return -EINVAL; 2891 } 2892 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 2893 if (r) { 2894 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); 2895 return -EINVAL; 2896 } 2897 dst_offset = radeon_get_ib_value(p, idx+1); 2898 dst_offset <<= 8; 2899 dst2_offset = radeon_get_ib_value(p, idx+2); 2900 dst2_offset <<= 8; 2901 src_offset = radeon_get_ib_value(p, idx+8); 2902 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; 2903 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2904 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%ju %lu)\n", 2905 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2906 return -EINVAL; 2907 } 2908 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2909 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%ju %lu)\n", 2910 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2911 return -EINVAL; 2912 } 2913 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 2914 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%ju %lu)\n", 2915 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 2916 return -EINVAL; 2917 } 2918 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2919 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 2920 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2921 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2922 p->idx += 10; 2923 break; 2924 /* Copy L2T/T2L, partial */ 2925 case 0x49: 2926 /* L2T, T2L partial */ 2927 if (p->family < CHIP_CAYMAN) { 2928 DRM_ERROR("L2T, T2L Partial is cayman only !\n"); 2929 return -EINVAL; 2930 } 2931 /* detile bit */ 2932 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 2933 /* tiled src, linear dst */ 2934 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 2935 2936 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2937 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 2938 } else { 2939 /* linear src, tiled dst */ 2940 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2941 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2942 2943 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2944 } 2945 p->idx += 12; 2946 break; 2947 /* Copy L2T broadcast */ 2948 case 0x4b: 2949 /* L2T, broadcast */ 2950 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 2951 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 2952 return -EINVAL; 2953 } 2954 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 2955 if (r) { 2956 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 2957 return -EINVAL; 2958 } 2959 dst_offset = radeon_get_ib_value(p, idx+1); 2960 dst_offset <<= 8; 2961 dst2_offset = radeon_get_ib_value(p, idx+2); 2962 dst2_offset <<= 8; 2963 src_offset = radeon_get_ib_value(p, idx+8); 2964 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; 2965 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 2966 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n", 2967 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 2968 return -EINVAL; 2969 } 2970 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 2971 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n", 2972 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 2973 return -EINVAL; 2974 } 2975 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 2976 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%ju %lu)\n", 2977 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 2978 return -EINVAL; 2979 } 2980 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 2981 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 2982 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 2983 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 2984 p->idx += 10; 2985 break; 2986 /* Copy L2T/T2L (tile units) */ 2987 case 0x4c: 2988 /* L2T, T2L */ 2989 /* detile bit */ 2990 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 2991 /* tiled src, linear dst */ 2992 src_offset = radeon_get_ib_value(p, idx+1); 2993 src_offset <<= 8; 2994 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 2995 2996 dst_offset = radeon_get_ib_value(p, idx+7); 2997 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 2998 ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 2999 ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; 3000 } else { 3001 /* linear src, tiled dst */ 3002 src_offset = radeon_get_ib_value(p, idx+7); 3003 src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; 3004 ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3005 ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3006 3007 dst_offset = radeon_get_ib_value(p, idx+1); 3008 dst_offset <<= 8; 3009 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3010 } 3011 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3012 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%ju %lu)\n", 3013 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3014 return -EINVAL; 3015 } 3016 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3017 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%ju %lu)\n", 3018 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 3019 return -EINVAL; 3020 } 3021 p->idx += 9; 3022 break; 3023 /* Copy T2T, partial (tile units) */ 3024 case 0x4d: 3025 /* T2T partial */ 3026 if (p->family < CHIP_CAYMAN) { 3027 DRM_ERROR("L2T, T2L Partial is cayman only !\n"); 3028 return -EINVAL; 3029 } 3030 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); 3031 ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8); 3032 p->idx += 13; 3033 break; 3034 /* Copy L2T broadcast (tile units) */ 3035 case 0x4f: 3036 /* L2T, broadcast */ 3037 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { 3038 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 3039 return -EINVAL; 3040 } 3041 r = r600_dma_cs_next_reloc(p, &dst2_reloc); 3042 if (r) { 3043 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); 3044 return -EINVAL; 3045 } 3046 dst_offset = radeon_get_ib_value(p, idx+1); 3047 dst_offset <<= 8; 3048 dst2_offset = radeon_get_ib_value(p, idx+2); 3049 dst2_offset <<= 8; 3050 src_offset = radeon_get_ib_value(p, idx+8); 3051 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; 3052 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { 3053 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%ju %lu)\n", 3054 src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); 3055 return -EINVAL; 3056 } 3057 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3058 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%ju %lu)\n", 3059 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); 3060 return -EINVAL; 3061 } 3062 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { 3063 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%ju %lu)\n", 3064 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); 3065 return -EINVAL; 3066 } 3067 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); 3068 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); 3069 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); 3070 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; 3071 p->idx += 10; 3072 break; 3073 default: 3074 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); 3075 return -EINVAL; 3076 } 3077 break; 3078 case DMA_PACKET_CONSTANT_FILL: 3079 r = r600_dma_cs_next_reloc(p, &dst_reloc); 3080 if (r) { 3081 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); 3082 return -EINVAL; 3083 } 3084 dst_offset = radeon_get_ib_value(p, idx+1); 3085 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; 3086 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { 3087 dev_warn(p->dev, "DMA constant fill buffer too small (%ju %lu)\n", 3088 dst_offset, radeon_bo_size(dst_reloc->robj)); 3089 return -EINVAL; 3090 } 3091 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); 3092 ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; 3093 p->idx += 4; 3094 break; 3095 case DMA_PACKET_NOP: 3096 p->idx += 1; 3097 break; 3098 default: 3099 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); 3100 return -EINVAL; 3101 } 3102 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); 3103 #if 0 3104 for (r = 0; r < p->ib->length_dw; r++) { 3105 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); 3106 mdelay(1); 3107 } 3108 #endif 3109 return 0; 3110 } 3111 3112 /* vm parser */ 3113 static bool evergreen_vm_reg_valid(u32 reg) 3114 { 3115 /* context regs are fine */ 3116 if (reg >= 0x28000) 3117 return true; 3118 3119 /* check config regs */ 3120 switch (reg) { 3121 case WAIT_UNTIL: 3122 case GRBM_GFX_INDEX: 3123 case CP_STRMOUT_CNTL: 3124 case CP_COHER_CNTL: 3125 case CP_COHER_SIZE: 3126 case VGT_VTX_VECT_EJECT_REG: 3127 case VGT_CACHE_INVALIDATION: 3128 case VGT_GS_VERTEX_REUSE: 3129 case VGT_PRIMITIVE_TYPE: 3130 case VGT_INDEX_TYPE: 3131 case VGT_NUM_INDICES: 3132 case VGT_NUM_INSTANCES: 3133 case VGT_COMPUTE_DIM_X: 3134 case VGT_COMPUTE_DIM_Y: 3135 case VGT_COMPUTE_DIM_Z: 3136 case VGT_COMPUTE_START_X: 3137 case VGT_COMPUTE_START_Y: 3138 case VGT_COMPUTE_START_Z: 3139 case VGT_COMPUTE_INDEX: 3140 case VGT_COMPUTE_THREAD_GROUP_SIZE: 3141 case VGT_HS_OFFCHIP_PARAM: 3142 case PA_CL_ENHANCE: 3143 case PA_SU_LINE_STIPPLE_VALUE: 3144 case PA_SC_LINE_STIPPLE_STATE: 3145 case PA_SC_ENHANCE: 3146 case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ: 3147 case SQ_DYN_GPR_SIMD_LOCK_EN: 3148 case SQ_CONFIG: 3149 case SQ_GPR_RESOURCE_MGMT_1: 3150 case SQ_GLOBAL_GPR_RESOURCE_MGMT_1: 3151 case SQ_GLOBAL_GPR_RESOURCE_MGMT_2: 3152 case SQ_CONST_MEM_BASE: 3153 case SQ_STATIC_THREAD_MGMT_1: 3154 case SQ_STATIC_THREAD_MGMT_2: 3155 case SQ_STATIC_THREAD_MGMT_3: 3156 case SPI_CONFIG_CNTL: 3157 case SPI_CONFIG_CNTL_1: 3158 case TA_CNTL_AUX: 3159 case DB_DEBUG: 3160 case DB_DEBUG2: 3161 case DB_DEBUG3: 3162 case DB_DEBUG4: 3163 case DB_WATERMARKS: 3164 case TD_PS_BORDER_COLOR_INDEX: 3165 case TD_PS_BORDER_COLOR_RED: 3166 case TD_PS_BORDER_COLOR_GREEN: 3167 case TD_PS_BORDER_COLOR_BLUE: 3168 case TD_PS_BORDER_COLOR_ALPHA: 3169 case TD_VS_BORDER_COLOR_INDEX: 3170 case TD_VS_BORDER_COLOR_RED: 3171 case TD_VS_BORDER_COLOR_GREEN: 3172 case TD_VS_BORDER_COLOR_BLUE: 3173 case TD_VS_BORDER_COLOR_ALPHA: 3174 case TD_GS_BORDER_COLOR_INDEX: 3175 case TD_GS_BORDER_COLOR_RED: 3176 case TD_GS_BORDER_COLOR_GREEN: 3177 case TD_GS_BORDER_COLOR_BLUE: 3178 case TD_GS_BORDER_COLOR_ALPHA: 3179 case TD_HS_BORDER_COLOR_INDEX: 3180 case TD_HS_BORDER_COLOR_RED: 3181 case TD_HS_BORDER_COLOR_GREEN: 3182 case TD_HS_BORDER_COLOR_BLUE: 3183 case TD_HS_BORDER_COLOR_ALPHA: 3184 case TD_LS_BORDER_COLOR_INDEX: 3185 case TD_LS_BORDER_COLOR_RED: 3186 case TD_LS_BORDER_COLOR_GREEN: 3187 case TD_LS_BORDER_COLOR_BLUE: 3188 case TD_LS_BORDER_COLOR_ALPHA: 3189 case TD_CS_BORDER_COLOR_INDEX: 3190 case TD_CS_BORDER_COLOR_RED: 3191 case TD_CS_BORDER_COLOR_GREEN: 3192 case TD_CS_BORDER_COLOR_BLUE: 3193 case TD_CS_BORDER_COLOR_ALPHA: 3194 case SQ_ESGS_RING_SIZE: 3195 case SQ_GSVS_RING_SIZE: 3196 case SQ_ESTMP_RING_SIZE: 3197 case SQ_GSTMP_RING_SIZE: 3198 case SQ_HSTMP_RING_SIZE: 3199 case SQ_LSTMP_RING_SIZE: 3200 case SQ_PSTMP_RING_SIZE: 3201 case SQ_VSTMP_RING_SIZE: 3202 case SQ_ESGS_RING_ITEMSIZE: 3203 case SQ_ESTMP_RING_ITEMSIZE: 3204 case SQ_GSTMP_RING_ITEMSIZE: 3205 case SQ_GSVS_RING_ITEMSIZE: 3206 case SQ_GS_VERT_ITEMSIZE: 3207 case SQ_GS_VERT_ITEMSIZE_1: 3208 case SQ_GS_VERT_ITEMSIZE_2: 3209 case SQ_GS_VERT_ITEMSIZE_3: 3210 case SQ_GSVS_RING_OFFSET_1: 3211 case SQ_GSVS_RING_OFFSET_2: 3212 case SQ_GSVS_RING_OFFSET_3: 3213 case SQ_HSTMP_RING_ITEMSIZE: 3214 case SQ_LSTMP_RING_ITEMSIZE: 3215 case SQ_PSTMP_RING_ITEMSIZE: 3216 case SQ_VSTMP_RING_ITEMSIZE: 3217 case VGT_TF_RING_SIZE: 3218 case SQ_ESGS_RING_BASE: 3219 case SQ_GSVS_RING_BASE: 3220 case SQ_ESTMP_RING_BASE: 3221 case SQ_GSTMP_RING_BASE: 3222 case SQ_HSTMP_RING_BASE: 3223 case SQ_LSTMP_RING_BASE: 3224 case SQ_PSTMP_RING_BASE: 3225 case SQ_VSTMP_RING_BASE: 3226 case CAYMAN_VGT_OFFCHIP_LDS_BASE: 3227 case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS: 3228 return true; 3229 default: 3230 DRM_ERROR("Invalid register 0x%x in CS\n", reg); 3231 return false; 3232 } 3233 } 3234 3235 static int evergreen_vm_packet3_check(struct radeon_device *rdev, 3236 u32 *ib, struct radeon_cs_packet *pkt) 3237 { 3238 u32 idx = pkt->idx + 1; 3239 u32 idx_value = ib[idx]; 3240 u32 start_reg, end_reg, reg, i; 3241 u32 command, info; 3242 3243 switch (pkt->opcode) { 3244 case PACKET3_NOP: 3245 case PACKET3_SET_BASE: 3246 case PACKET3_CLEAR_STATE: 3247 case PACKET3_INDEX_BUFFER_SIZE: 3248 case PACKET3_DISPATCH_DIRECT: 3249 case PACKET3_DISPATCH_INDIRECT: 3250 case PACKET3_MODE_CONTROL: 3251 case PACKET3_SET_PREDICATION: 3252 case PACKET3_COND_EXEC: 3253 case PACKET3_PRED_EXEC: 3254 case PACKET3_DRAW_INDIRECT: 3255 case PACKET3_DRAW_INDEX_INDIRECT: 3256 case PACKET3_INDEX_BASE: 3257 case PACKET3_DRAW_INDEX_2: 3258 case PACKET3_CONTEXT_CONTROL: 3259 case PACKET3_DRAW_INDEX_OFFSET: 3260 case PACKET3_INDEX_TYPE: 3261 case PACKET3_DRAW_INDEX: 3262 case PACKET3_DRAW_INDEX_AUTO: 3263 case PACKET3_DRAW_INDEX_IMMD: 3264 case PACKET3_NUM_INSTANCES: 3265 case PACKET3_DRAW_INDEX_MULTI_AUTO: 3266 case PACKET3_STRMOUT_BUFFER_UPDATE: 3267 case PACKET3_DRAW_INDEX_OFFSET_2: 3268 case PACKET3_DRAW_INDEX_MULTI_ELEMENT: 3269 case PACKET3_MPEG_INDEX: 3270 case PACKET3_WAIT_REG_MEM: 3271 case PACKET3_MEM_WRITE: 3272 case PACKET3_SURFACE_SYNC: 3273 case PACKET3_EVENT_WRITE: 3274 case PACKET3_EVENT_WRITE_EOP: 3275 case PACKET3_EVENT_WRITE_EOS: 3276 case PACKET3_SET_CONTEXT_REG: 3277 case PACKET3_SET_BOOL_CONST: 3278 case PACKET3_SET_LOOP_CONST: 3279 case PACKET3_SET_RESOURCE: 3280 case PACKET3_SET_SAMPLER: 3281 case PACKET3_SET_CTL_CONST: 3282 case PACKET3_SET_RESOURCE_OFFSET: 3283 case PACKET3_SET_CONTEXT_REG_INDIRECT: 3284 case PACKET3_SET_RESOURCE_INDIRECT: 3285 case CAYMAN_PACKET3_DEALLOC_STATE: 3286 break; 3287 case PACKET3_COND_WRITE: 3288 if (idx_value & 0x100) { 3289 reg = ib[idx + 5] * 4; 3290 if (!evergreen_vm_reg_valid(reg)) 3291 return -EINVAL; 3292 } 3293 break; 3294 case PACKET3_COPY_DW: 3295 if (idx_value & 0x2) { 3296 reg = ib[idx + 3] * 4; 3297 if (!evergreen_vm_reg_valid(reg)) 3298 return -EINVAL; 3299 } 3300 break; 3301 case PACKET3_SET_CONFIG_REG: 3302 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START; 3303 end_reg = 4 * pkt->count + start_reg - 4; 3304 if ((start_reg < PACKET3_SET_CONFIG_REG_START) || 3305 (start_reg >= PACKET3_SET_CONFIG_REG_END) || 3306 (end_reg >= PACKET3_SET_CONFIG_REG_END)) { 3307 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); 3308 return -EINVAL; 3309 } 3310 for (i = 0; i < pkt->count; i++) { 3311 reg = start_reg + (4 * i); 3312 if (!evergreen_vm_reg_valid(reg)) 3313 return -EINVAL; 3314 } 3315 break; 3316 case PACKET3_CP_DMA: 3317 command = ib[idx + 4]; 3318 info = ib[idx + 1]; 3319 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */ 3320 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */ 3321 ((((info & 0x00300000) >> 20) == 0) && 3322 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */ 3323 ((((info & 0x60000000) >> 29) == 0) && 3324 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ 3325 /* non mem to mem copies requires dw aligned count */ 3326 if ((command & 0x1fffff) % 4) { 3327 DRM_ERROR("CP DMA command requires dw count alignment\n"); 3328 return -EINVAL; 3329 } 3330 } 3331 if (command & PACKET3_CP_DMA_CMD_SAS) { 3332 /* src address space is register */ 3333 if (((info & 0x60000000) >> 29) == 0) { 3334 start_reg = idx_value << 2; 3335 if (command & PACKET3_CP_DMA_CMD_SAIC) { 3336 reg = start_reg; 3337 if (!evergreen_vm_reg_valid(reg)) { 3338 DRM_ERROR("CP DMA Bad SRC register\n"); 3339 return -EINVAL; 3340 } 3341 } else { 3342 for (i = 0; i < (command & 0x1fffff); i++) { 3343 reg = start_reg + (4 * i); 3344 if (!evergreen_vm_reg_valid(reg)) { 3345 DRM_ERROR("CP DMA Bad SRC register\n"); 3346 return -EINVAL; 3347 } 3348 } 3349 } 3350 } 3351 } 3352 if (command & PACKET3_CP_DMA_CMD_DAS) { 3353 /* dst address space is register */ 3354 if (((info & 0x00300000) >> 20) == 0) { 3355 start_reg = ib[idx + 2]; 3356 if (command & PACKET3_CP_DMA_CMD_DAIC) { 3357 reg = start_reg; 3358 if (!evergreen_vm_reg_valid(reg)) { 3359 DRM_ERROR("CP DMA Bad DST register\n"); 3360 return -EINVAL; 3361 } 3362 } else { 3363 for (i = 0; i < (command & 0x1fffff); i++) { 3364 reg = start_reg + (4 * i); 3365 if (!evergreen_vm_reg_valid(reg)) { 3366 DRM_ERROR("CP DMA Bad DST register\n"); 3367 return -EINVAL; 3368 } 3369 } 3370 } 3371 } 3372 } 3373 break; 3374 default: 3375 return -EINVAL; 3376 } 3377 return 0; 3378 } 3379 3380 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 3381 { 3382 int ret = 0; 3383 u32 idx = 0; 3384 struct radeon_cs_packet pkt; 3385 3386 do { 3387 pkt.idx = idx; 3388 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]); 3389 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]); 3390 pkt.one_reg_wr = 0; 3391 switch (pkt.type) { 3392 case RADEON_PACKET_TYPE0: 3393 dev_err(rdev->dev, "Packet0 not allowed!\n"); 3394 ret = -EINVAL; 3395 break; 3396 case RADEON_PACKET_TYPE2: 3397 idx += 1; 3398 break; 3399 case RADEON_PACKET_TYPE3: 3400 pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]); 3401 ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt); 3402 idx += pkt.count + 2; 3403 break; 3404 default: 3405 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type); 3406 ret = -EINVAL; 3407 break; 3408 } 3409 if (ret) 3410 break; 3411 } while (idx < ib->length_dw); 3412 3413 return ret; 3414 } 3415 3416 /** 3417 * evergreen_dma_ib_parse() - parse the DMA IB for VM 3418 * @rdev: radeon_device pointer 3419 * @ib: radeon_ib pointer 3420 * 3421 * Parses the DMA IB from the VM CS ioctl 3422 * checks for errors. (Cayman-SI) 3423 * Returns 0 for success and an error on failure. 3424 **/ 3425 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) 3426 { 3427 u32 idx = 0; 3428 u32 header, cmd, count, sub_cmd; 3429 3430 do { 3431 header = ib->ptr[idx]; 3432 cmd = GET_DMA_CMD(header); 3433 count = GET_DMA_COUNT(header); 3434 sub_cmd = GET_DMA_SUB_CMD(header); 3435 3436 switch (cmd) { 3437 case DMA_PACKET_WRITE: 3438 switch (sub_cmd) { 3439 /* tiled */ 3440 case 8: 3441 idx += count + 7; 3442 break; 3443 /* linear */ 3444 case 0: 3445 idx += count + 3; 3446 break; 3447 default: 3448 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]); 3449 return -EINVAL; 3450 } 3451 break; 3452 case DMA_PACKET_COPY: 3453 switch (sub_cmd) { 3454 /* Copy L2L, DW aligned */ 3455 case 0x00: 3456 idx += 5; 3457 break; 3458 /* Copy L2T/T2L */ 3459 case 0x08: 3460 idx += 9; 3461 break; 3462 /* Copy L2L, byte aligned */ 3463 case 0x40: 3464 idx += 5; 3465 break; 3466 /* Copy L2L, partial */ 3467 case 0x41: 3468 idx += 9; 3469 break; 3470 /* Copy L2L, DW aligned, broadcast */ 3471 case 0x44: 3472 idx += 7; 3473 break; 3474 /* Copy L2T Frame to Field */ 3475 case 0x48: 3476 idx += 10; 3477 break; 3478 /* Copy L2T/T2L, partial */ 3479 case 0x49: 3480 idx += 12; 3481 break; 3482 /* Copy L2T broadcast */ 3483 case 0x4b: 3484 idx += 10; 3485 break; 3486 /* Copy L2T/T2L (tile units) */ 3487 case 0x4c: 3488 idx += 9; 3489 break; 3490 /* Copy T2T, partial (tile units) */ 3491 case 0x4d: 3492 idx += 13; 3493 break; 3494 /* Copy L2T broadcast (tile units) */ 3495 case 0x4f: 3496 idx += 10; 3497 break; 3498 default: 3499 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]); 3500 return -EINVAL; 3501 } 3502 break; 3503 case DMA_PACKET_CONSTANT_FILL: 3504 idx += 4; 3505 break; 3506 case DMA_PACKET_NOP: 3507 idx += 1; 3508 break; 3509 default: 3510 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); 3511 return -EINVAL; 3512 } 3513 } while (idx < ib->length_dw); 3514 3515 return 0; 3516 } 3517