1 /* savage_state.c -- State and drawing support for Savage 2 * 3 * Copyright 2004 Felix Kuehling 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sub license, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial portions 15 * of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR 21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 22 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 #include "drmP.h" 26 #include "savage_drm.h" 27 #include "savage_drv.h" 28 29 void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv, 30 const struct drm_clip_rect *pbox) 31 { 32 uint32_t scstart = dev_priv->state.s3d.new_scstart; 33 uint32_t scend = dev_priv->state.s3d.new_scend; 34 scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) | 35 ((uint32_t)pbox->x1 & 0x000007ff) | 36 (((uint32_t)pbox->y1 << 16) & 0x07ff0000); 37 scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) | 38 (((uint32_t)pbox->x2 - 1) & 0x000007ff) | 39 ((((uint32_t)pbox->y2 - 1) << 16) & 0x07ff0000); 40 if (scstart != dev_priv->state.s3d.scstart || 41 scend != dev_priv->state.s3d.scend) { 42 DMA_LOCALS; 43 BEGIN_DMA(4); 44 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 45 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); 46 DMA_WRITE(scstart); 47 DMA_WRITE(scend); 48 dev_priv->state.s3d.scstart = scstart; 49 dev_priv->state.s3d.scend = scend; 50 dev_priv->waiting = 1; 51 DMA_COMMIT(); 52 } 53 } 54 55 void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, 56 const struct drm_clip_rect *pbox) 57 { 58 uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0; 59 uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1; 60 drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) | 61 ((uint32_t)pbox->x1 & 0x000007ff) | 62 (((uint32_t)pbox->y1 << 12) & 0x00fff000); 63 drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) | 64 (((uint32_t)pbox->x2 - 1) & 0x000007ff) | 65 ((((uint32_t)pbox->y2 - 1) << 12) & 0x00fff000); 66 if (drawctrl0 != dev_priv->state.s4.drawctrl0 || 67 drawctrl1 != dev_priv->state.s4.drawctrl1) { 68 DMA_LOCALS; 69 BEGIN_DMA(4); 70 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 71 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); 72 DMA_WRITE(drawctrl0); 73 DMA_WRITE(drawctrl1); 74 dev_priv->state.s4.drawctrl0 = drawctrl0; 75 dev_priv->state.s4.drawctrl1 = drawctrl1; 76 dev_priv->waiting = 1; 77 DMA_COMMIT(); 78 } 79 } 80 81 static int savage_verify_texaddr(drm_savage_private_t *dev_priv, int unit, 82 uint32_t addr) 83 { 84 if ((addr & 6) != 2) { /* reserved bits */ 85 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr); 86 return -EINVAL; 87 } 88 if (!(addr & 1)) { /* local */ 89 addr &= ~7; 90 if (addr < dev_priv->texture_offset || 91 addr >= dev_priv->texture_offset + dev_priv->texture_size) { 92 DRM_ERROR 93 ("bad texAddr%d %08x (local addr out of range)\n", 94 unit, addr); 95 return -EINVAL; 96 } 97 } else { /* AGP */ 98 if (!dev_priv->agp_textures) { 99 DRM_ERROR("bad texAddr%d %08x (AGP not available)\n", 100 unit, addr); 101 return -EINVAL; 102 } 103 addr &= ~7; 104 if (addr < dev_priv->agp_textures->offset || 105 addr >= (dev_priv->agp_textures->offset + 106 dev_priv->agp_textures->size)) { 107 DRM_ERROR 108 ("bad texAddr%d %08x (AGP addr out of range)\n", 109 unit, addr); 110 return -EINVAL; 111 } 112 } 113 return 0; 114 } 115 116 #define SAVE_STATE(reg,where) \ 117 if(start <= reg && start + count > reg) \ 118 dev_priv->state.where = regs[reg - start] 119 #define SAVE_STATE_MASK(reg,where,mask) do { \ 120 if(start <= reg && start + count > reg) { \ 121 uint32_t tmp; \ 122 tmp = regs[reg - start]; \ 123 dev_priv->state.where = (tmp & (mask)) | \ 124 (dev_priv->state.where & ~(mask)); \ 125 } \ 126 } while (0) 127 static int savage_verify_state_s3d(drm_savage_private_t *dev_priv, 128 unsigned int start, unsigned int count, 129 const uint32_t *regs) 130 { 131 if (start < SAVAGE_TEXPALADDR_S3D || 132 start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) { 133 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 134 start, start + count - 1); 135 return -EINVAL; 136 } 137 138 SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart, 139 ~SAVAGE_SCISSOR_MASK_S3D); 140 SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend, 141 ~SAVAGE_SCISSOR_MASK_S3D); 142 143 /* if any texture regs were changed ... */ 144 if (start <= SAVAGE_TEXCTRL_S3D && 145 start + count > SAVAGE_TEXPALADDR_S3D) { 146 /* ... check texture state */ 147 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl); 148 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr); 149 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK) 150 return savage_verify_texaddr(dev_priv, 0, 151 dev_priv->state.s3d.texaddr); 152 } 153 154 return 0; 155 } 156 157 static int savage_verify_state_s4(drm_savage_private_t *dev_priv, 158 unsigned int start, unsigned int count, 159 const uint32_t *regs) 160 { 161 int ret = 0; 162 163 if (start < SAVAGE_DRAWLOCALCTRL_S4 || 164 start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) { 165 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n", 166 start, start + count - 1); 167 return -EINVAL; 168 } 169 170 SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0, 171 ~SAVAGE_SCISSOR_MASK_S4); 172 SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1, 173 ~SAVAGE_SCISSOR_MASK_S4); 174 175 /* if any texture regs were changed ... */ 176 if (start <= SAVAGE_TEXDESCR_S4 && 177 start + count > SAVAGE_TEXPALADDR_S4) { 178 /* ... check texture state */ 179 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr); 180 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0); 181 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1); 182 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK) 183 ret |= savage_verify_texaddr(dev_priv, 0, 184 dev_priv->state.s4.texaddr0); 185 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK) 186 ret |= savage_verify_texaddr(dev_priv, 1, 187 dev_priv->state.s4.texaddr1); 188 } 189 190 return ret; 191 } 192 #undef SAVE_STATE 193 #undef SAVE_STATE_MASK 194 195 static int savage_dispatch_state(drm_savage_private_t *dev_priv, 196 const drm_savage_cmd_header_t *cmd_header, 197 const uint32_t *regs) 198 { 199 unsigned int count = cmd_header->state.count; 200 unsigned int start = cmd_header->state.start; 201 unsigned int count2 = 0; 202 unsigned int bci_size; 203 int ret; 204 DMA_LOCALS; 205 206 if (!count) 207 return 0; 208 209 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 210 ret = savage_verify_state_s3d(dev_priv, start, count, regs); 211 if (ret != 0) 212 return ret; 213 /* scissor regs are emitted in savage_dispatch_draw */ 214 if (start < SAVAGE_SCSTART_S3D) { 215 if (start + count > SAVAGE_SCEND_S3D + 1) 216 count2 = count - (SAVAGE_SCEND_S3D + 1 - start); 217 if (start + count > SAVAGE_SCSTART_S3D) 218 count = SAVAGE_SCSTART_S3D - start; 219 } else if (start <= SAVAGE_SCEND_S3D) { 220 if (start + count > SAVAGE_SCEND_S3D + 1) { 221 count -= SAVAGE_SCEND_S3D + 1 - start; 222 start = SAVAGE_SCEND_S3D + 1; 223 } else 224 return 0; 225 } 226 } else { 227 ret = savage_verify_state_s4(dev_priv, start, count, regs); 228 if (ret != 0) 229 return ret; 230 /* scissor regs are emitted in savage_dispatch_draw */ 231 if (start < SAVAGE_DRAWCTRL0_S4) { 232 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) 233 count2 = count - 234 (SAVAGE_DRAWCTRL1_S4 + 1 - start); 235 if (start + count > SAVAGE_DRAWCTRL0_S4) 236 count = SAVAGE_DRAWCTRL0_S4 - start; 237 } else if (start <= SAVAGE_DRAWCTRL1_S4) { 238 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) { 239 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start; 240 start = SAVAGE_DRAWCTRL1_S4 + 1; 241 } else 242 return 0; 243 } 244 } 245 246 bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255; 247 248 if (cmd_header->state.global) { 249 BEGIN_DMA(bci_size + 1); 250 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); 251 dev_priv->waiting = 1; 252 } else { 253 BEGIN_DMA(bci_size); 254 } 255 256 do { 257 while (count > 0) { 258 unsigned int n = count < 255 ? count : 255; 259 DMA_SET_REGISTERS(start, n); 260 DMA_COPY(regs, n); 261 count -= n; 262 start += n; 263 regs += n; 264 } 265 start += 2; 266 regs += 2; 267 count = count2; 268 count2 = 0; 269 } while (count); 270 271 DMA_COMMIT(); 272 273 return 0; 274 } 275 276 static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv, 277 const drm_savage_cmd_header_t *cmd_header, 278 const struct drm_buf *dmabuf) 279 { 280 unsigned char reorder = 0; 281 unsigned int prim = cmd_header->prim.prim; 282 unsigned int skip = cmd_header->prim.skip; 283 unsigned int n = cmd_header->prim.count; 284 unsigned int start = cmd_header->prim.start; 285 unsigned int i; 286 BCI_LOCALS; 287 288 if (!dmabuf) { 289 DRM_ERROR("called without dma buffers!\n"); 290 return -EINVAL; 291 } 292 293 if (!n) 294 return 0; 295 296 switch (prim) { 297 case SAVAGE_PRIM_TRILIST_201: 298 reorder = 1; 299 prim = SAVAGE_PRIM_TRILIST; 300 case SAVAGE_PRIM_TRILIST: 301 if (n % 3 != 0) { 302 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 303 n); 304 return -EINVAL; 305 } 306 break; 307 case SAVAGE_PRIM_TRISTRIP: 308 case SAVAGE_PRIM_TRIFAN: 309 if (n < 3) { 310 DRM_ERROR 311 ("wrong number of vertices %u in TRIFAN/STRIP\n", 312 n); 313 return -EINVAL; 314 } 315 break; 316 default: 317 DRM_ERROR("invalid primitive type %u\n", prim); 318 return -EINVAL; 319 } 320 321 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 322 if (skip != 0) { 323 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 324 return -EINVAL; 325 } 326 } else { 327 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 328 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 329 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 330 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 331 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 332 return -EINVAL; 333 } 334 if (reorder) { 335 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 336 return -EINVAL; 337 } 338 } 339 340 if (start + n > dmabuf->total / 32) { 341 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 342 start, start + n - 1, dmabuf->total / 32); 343 return -EINVAL; 344 } 345 346 /* Vertex DMA doesn't work with command DMA at the same time, 347 * so we use BCI_... to submit commands here. Flush buffered 348 * faked DMA first. */ 349 DMA_FLUSH(); 350 351 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 352 BEGIN_BCI(2); 353 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 354 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 355 dev_priv->state.common.vbaddr = dmabuf->bus_address; 356 } 357 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 358 /* Workaround for what looks like a hardware bug. If a 359 * WAIT_3D_IDLE was emitted some time before the 360 * indexed drawing command then the engine will lock 361 * up. There are two known workarounds: 362 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 363 BEGIN_BCI(63); 364 for (i = 0; i < 63; ++i) 365 BCI_WRITE(BCI_CMD_WAIT); 366 dev_priv->waiting = 0; 367 } 368 369 prim <<= 25; 370 while (n != 0) { 371 /* Can emit up to 255 indices (85 triangles) at once. */ 372 unsigned int count = n > 255 ? 255 : n; 373 if (reorder) { 374 /* Need to reorder indices for correct flat 375 * shading while preserving the clock sense 376 * for correct culling. Only on Savage3D. */ 377 int reorderarr[3] = { -1, -1, -1 }; 378 reorderarr[start % 3] = 2; 379 380 BEGIN_BCI((count + 1 + 1) / 2); 381 BCI_DRAW_INDICES_S3D(count, prim, start + 2); 382 383 for (i = start + 1; i + 1 < start + count; i += 2) 384 BCI_WRITE((i + reorderarr[i % 3]) | 385 ((i + 1 + 386 reorderarr[(i + 1) % 3]) << 16)); 387 if (i < start + count) 388 BCI_WRITE(i + reorderarr[i % 3]); 389 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 390 BEGIN_BCI((count + 1 + 1) / 2); 391 BCI_DRAW_INDICES_S3D(count, prim, start); 392 393 for (i = start + 1; i + 1 < start + count; i += 2) 394 BCI_WRITE(i | ((i + 1) << 16)); 395 if (i < start + count) 396 BCI_WRITE(i); 397 } else { 398 BEGIN_BCI((count + 2 + 1) / 2); 399 BCI_DRAW_INDICES_S4(count, prim, skip); 400 401 for (i = start; i + 1 < start + count; i += 2) 402 BCI_WRITE(i | ((i + 1) << 16)); 403 if (i < start + count) 404 BCI_WRITE(i); 405 } 406 407 start += count; 408 n -= count; 409 410 prim |= BCI_CMD_DRAW_CONT; 411 } 412 413 return 0; 414 } 415 416 static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv, 417 const drm_savage_cmd_header_t *cmd_header, 418 const uint32_t *vtxbuf, unsigned int vb_size, 419 unsigned int vb_stride) 420 { 421 unsigned char reorder = 0; 422 unsigned int prim = cmd_header->prim.prim; 423 unsigned int skip = cmd_header->prim.skip; 424 unsigned int n = cmd_header->prim.count; 425 unsigned int start = cmd_header->prim.start; 426 unsigned int vtx_size; 427 unsigned int i; 428 DMA_LOCALS; 429 430 if (!n) 431 return 0; 432 433 switch (prim) { 434 case SAVAGE_PRIM_TRILIST_201: 435 reorder = 1; 436 prim = SAVAGE_PRIM_TRILIST; 437 case SAVAGE_PRIM_TRILIST: 438 if (n % 3 != 0) { 439 DRM_ERROR("wrong number of vertices %u in TRILIST\n", 440 n); 441 return -EINVAL; 442 } 443 break; 444 case SAVAGE_PRIM_TRISTRIP: 445 case SAVAGE_PRIM_TRIFAN: 446 if (n < 3) { 447 DRM_ERROR 448 ("wrong number of vertices %u in TRIFAN/STRIP\n", 449 n); 450 return -EINVAL; 451 } 452 break; 453 default: 454 DRM_ERROR("invalid primitive type %u\n", prim); 455 return -EINVAL; 456 } 457 458 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 459 if (skip > SAVAGE_SKIP_ALL_S3D) { 460 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 461 return -EINVAL; 462 } 463 vtx_size = 8; /* full vertex */ 464 } else { 465 if (skip > SAVAGE_SKIP_ALL_S4) { 466 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 467 return -EINVAL; 468 } 469 vtx_size = 10; /* full vertex */ 470 } 471 472 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 473 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 474 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 475 476 if (vtx_size > vb_stride) { 477 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 478 vtx_size, vb_stride); 479 return -EINVAL; 480 } 481 482 if (start + n > vb_size / (vb_stride * 4)) { 483 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n", 484 start, start + n - 1, vb_size / (vb_stride * 4)); 485 return -EINVAL; 486 } 487 488 prim <<= 25; 489 while (n != 0) { 490 /* Can emit up to 255 vertices (85 triangles) at once. */ 491 unsigned int count = n > 255 ? 255 : n; 492 if (reorder) { 493 /* Need to reorder vertices for correct flat 494 * shading while preserving the clock sense 495 * for correct culling. Only on Savage3D. */ 496 int reorderarr[3] = { -1, -1, -1 }; 497 reorderarr[start % 3] = 2; 498 499 BEGIN_DMA(count * vtx_size + 1); 500 DMA_DRAW_PRIMITIVE(count, prim, skip); 501 502 for (i = start; i < start + count; ++i) { 503 unsigned int j = i + reorderarr[i % 3]; 504 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 505 } 506 507 DMA_COMMIT(); 508 } else { 509 BEGIN_DMA(count * vtx_size + 1); 510 DMA_DRAW_PRIMITIVE(count, prim, skip); 511 512 if (vb_stride == vtx_size) { 513 DMA_COPY(&vtxbuf[vb_stride * start], 514 vtx_size * count); 515 } else { 516 for (i = start; i < start + count; ++i) { 517 DMA_COPY(&vtxbuf[vb_stride * i], 518 vtx_size); 519 } 520 } 521 522 DMA_COMMIT(); 523 } 524 525 start += count; 526 n -= count; 527 528 prim |= BCI_CMD_DRAW_CONT; 529 } 530 531 return 0; 532 } 533 534 static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv, 535 const drm_savage_cmd_header_t *cmd_header, 536 const uint16_t *idx, 537 const struct drm_buf *dmabuf) 538 { 539 unsigned char reorder = 0; 540 unsigned int prim = cmd_header->idx.prim; 541 unsigned int skip = cmd_header->idx.skip; 542 unsigned int n = cmd_header->idx.count; 543 unsigned int i; 544 BCI_LOCALS; 545 546 if (!dmabuf) { 547 DRM_ERROR("called without dma buffers!\n"); 548 return -EINVAL; 549 } 550 551 if (!n) 552 return 0; 553 554 switch (prim) { 555 case SAVAGE_PRIM_TRILIST_201: 556 reorder = 1; 557 prim = SAVAGE_PRIM_TRILIST; 558 case SAVAGE_PRIM_TRILIST: 559 if (n % 3 != 0) { 560 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 561 return -EINVAL; 562 } 563 break; 564 case SAVAGE_PRIM_TRISTRIP: 565 case SAVAGE_PRIM_TRIFAN: 566 if (n < 3) { 567 DRM_ERROR 568 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 569 return -EINVAL; 570 } 571 break; 572 default: 573 DRM_ERROR("invalid primitive type %u\n", prim); 574 return -EINVAL; 575 } 576 577 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 578 if (skip != 0) { 579 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 580 return -EINVAL; 581 } 582 } else { 583 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) - 584 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) - 585 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1); 586 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) { 587 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip); 588 return -EINVAL; 589 } 590 if (reorder) { 591 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n"); 592 return -EINVAL; 593 } 594 } 595 596 /* Vertex DMA doesn't work with command DMA at the same time, 597 * so we use BCI_... to submit commands here. Flush buffered 598 * faked DMA first. */ 599 DMA_FLUSH(); 600 601 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { 602 BEGIN_BCI(2); 603 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); 604 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type); 605 dev_priv->state.common.vbaddr = dmabuf->bus_address; 606 } 607 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) { 608 /* Workaround for what looks like a hardware bug. If a 609 * WAIT_3D_IDLE was emitted some time before the 610 * indexed drawing command then the engine will lock 611 * up. There are two known workarounds: 612 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */ 613 BEGIN_BCI(63); 614 for (i = 0; i < 63; ++i) 615 BCI_WRITE(BCI_CMD_WAIT); 616 dev_priv->waiting = 0; 617 } 618 619 prim <<= 25; 620 while (n != 0) { 621 /* Can emit up to 255 indices (85 triangles) at once. */ 622 unsigned int count = n > 255 ? 255 : n; 623 624 /* check indices */ 625 for (i = 0; i < count; ++i) { 626 if (idx[i] > dmabuf->total / 32) { 627 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 628 i, idx[i], dmabuf->total / 32); 629 return -EINVAL; 630 } 631 } 632 633 if (reorder) { 634 /* Need to reorder indices for correct flat 635 * shading while preserving the clock sense 636 * for correct culling. Only on Savage3D. */ 637 int reorderarr[3] = { 2, -1, -1 }; 638 639 BEGIN_BCI((count + 1 + 1) / 2); 640 BCI_DRAW_INDICES_S3D(count, prim, idx[2]); 641 642 for (i = 1; i + 1 < count; i += 2) 643 BCI_WRITE(idx[i + reorderarr[i % 3]] | 644 (idx[i + 1 + 645 reorderarr[(i + 1) % 3]] << 16)); 646 if (i < count) 647 BCI_WRITE(idx[i + reorderarr[i % 3]]); 648 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 649 BEGIN_BCI((count + 1 + 1) / 2); 650 BCI_DRAW_INDICES_S3D(count, prim, idx[0]); 651 652 for (i = 1; i + 1 < count; i += 2) 653 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 654 if (i < count) 655 BCI_WRITE(idx[i]); 656 } else { 657 BEGIN_BCI((count + 2 + 1) / 2); 658 BCI_DRAW_INDICES_S4(count, prim, skip); 659 660 for (i = 0; i + 1 < count; i += 2) 661 BCI_WRITE(idx[i] | (idx[i + 1] << 16)); 662 if (i < count) 663 BCI_WRITE(idx[i]); 664 } 665 666 idx += count; 667 n -= count; 668 669 prim |= BCI_CMD_DRAW_CONT; 670 } 671 672 return 0; 673 } 674 675 static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv, 676 const drm_savage_cmd_header_t *cmd_header, 677 const uint16_t *idx, 678 const uint32_t *vtxbuf, 679 unsigned int vb_size, unsigned int vb_stride) 680 { 681 unsigned char reorder = 0; 682 unsigned int prim = cmd_header->idx.prim; 683 unsigned int skip = cmd_header->idx.skip; 684 unsigned int n = cmd_header->idx.count; 685 unsigned int vtx_size; 686 unsigned int i; 687 DMA_LOCALS; 688 689 if (!n) 690 return 0; 691 692 switch (prim) { 693 case SAVAGE_PRIM_TRILIST_201: 694 reorder = 1; 695 prim = SAVAGE_PRIM_TRILIST; 696 case SAVAGE_PRIM_TRILIST: 697 if (n % 3 != 0) { 698 DRM_ERROR("wrong number of indices %u in TRILIST\n", n); 699 return -EINVAL; 700 } 701 break; 702 case SAVAGE_PRIM_TRISTRIP: 703 case SAVAGE_PRIM_TRIFAN: 704 if (n < 3) { 705 DRM_ERROR 706 ("wrong number of indices %u in TRIFAN/STRIP\n", n); 707 return -EINVAL; 708 } 709 break; 710 default: 711 DRM_ERROR("invalid primitive type %u\n", prim); 712 return -EINVAL; 713 } 714 715 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { 716 if (skip > SAVAGE_SKIP_ALL_S3D) { 717 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 718 return -EINVAL; 719 } 720 vtx_size = 8; /* full vertex */ 721 } else { 722 if (skip > SAVAGE_SKIP_ALL_S4) { 723 DRM_ERROR("invalid skip flags 0x%04x\n", skip); 724 return -EINVAL; 725 } 726 vtx_size = 10; /* full vertex */ 727 } 728 729 vtx_size -= (skip & 1) + (skip >> 1 & 1) + 730 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) + 731 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1); 732 733 if (vtx_size > vb_stride) { 734 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n", 735 vtx_size, vb_stride); 736 return -EINVAL; 737 } 738 739 prim <<= 25; 740 while (n != 0) { 741 /* Can emit up to 255 vertices (85 triangles) at once. */ 742 unsigned int count = n > 255 ? 255 : n; 743 744 /* Check indices */ 745 for (i = 0; i < count; ++i) { 746 if (idx[i] > vb_size / (vb_stride * 4)) { 747 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n", 748 i, idx[i], vb_size / (vb_stride * 4)); 749 return -EINVAL; 750 } 751 } 752 753 if (reorder) { 754 /* Need to reorder vertices for correct flat 755 * shading while preserving the clock sense 756 * for correct culling. Only on Savage3D. */ 757 int reorderarr[3] = { 2, -1, -1 }; 758 759 BEGIN_DMA(count * vtx_size + 1); 760 DMA_DRAW_PRIMITIVE(count, prim, skip); 761 762 for (i = 0; i < count; ++i) { 763 unsigned int j = idx[i + reorderarr[i % 3]]; 764 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 765 } 766 767 DMA_COMMIT(); 768 } else { 769 BEGIN_DMA(count * vtx_size + 1); 770 DMA_DRAW_PRIMITIVE(count, prim, skip); 771 772 for (i = 0; i < count; ++i) { 773 unsigned int j = idx[i]; 774 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size); 775 } 776 777 DMA_COMMIT(); 778 } 779 780 idx += count; 781 n -= count; 782 783 prim |= BCI_CMD_DRAW_CONT; 784 } 785 786 return 0; 787 } 788 789 static int savage_dispatch_clear(drm_savage_private_t *dev_priv, 790 const drm_savage_cmd_header_t *cmd_header, 791 const drm_savage_cmd_header_t *data, 792 unsigned int nbox, 793 const struct drm_clip_rect *boxes) 794 { 795 unsigned int flags = cmd_header->clear0.flags; 796 unsigned int clear_cmd; 797 unsigned int i, nbufs; 798 DMA_LOCALS; 799 800 if (nbox == 0) 801 return 0; 802 803 clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 804 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW; 805 BCI_CMD_SET_ROP(clear_cmd,0xCC); 806 807 nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) + 808 ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0); 809 if (nbufs == 0) 810 return 0; 811 812 if (data->clear1.mask != 0xffffffff) { 813 /* set mask */ 814 BEGIN_DMA(2); 815 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 816 DMA_WRITE(data->clear1.mask); 817 DMA_COMMIT(); 818 } 819 for (i = 0; i < nbox; ++i) { 820 unsigned int x, y, w, h; 821 unsigned int buf; 822 823 x = boxes[i].x1, y = boxes[i].y1; 824 w = boxes[i].x2 - boxes[i].x1; 825 h = boxes[i].y2 - boxes[i].y1; 826 BEGIN_DMA(nbufs * 6); 827 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { 828 if (!(flags & buf)) 829 continue; 830 DMA_WRITE(clear_cmd); 831 switch (buf) { 832 case SAVAGE_FRONT: 833 DMA_WRITE(dev_priv->front_offset); 834 DMA_WRITE(dev_priv->front_bd); 835 break; 836 case SAVAGE_BACK: 837 DMA_WRITE(dev_priv->back_offset); 838 DMA_WRITE(dev_priv->back_bd); 839 break; 840 case SAVAGE_DEPTH: 841 DMA_WRITE(dev_priv->depth_offset); 842 DMA_WRITE(dev_priv->depth_bd); 843 break; 844 } 845 DMA_WRITE(data->clear1.value); 846 DMA_WRITE(BCI_X_Y(x, y)); 847 DMA_WRITE(BCI_W_H(w, h)); 848 } 849 DMA_COMMIT(); 850 } 851 if (data->clear1.mask != 0xffffffff) { 852 /* reset mask */ 853 BEGIN_DMA(2); 854 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); 855 DMA_WRITE(0xffffffff); 856 DMA_COMMIT(); 857 } 858 859 return 0; 860 } 861 862 static int savage_dispatch_swap(drm_savage_private_t *dev_priv, 863 unsigned int nbox, const struct drm_clip_rect *boxes) 864 { 865 unsigned int swap_cmd; 866 unsigned int i; 867 DMA_LOCALS; 868 869 if (nbox == 0) 870 return 0; 871 872 swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP | 873 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD; 874 BCI_CMD_SET_ROP(swap_cmd,0xCC); 875 876 for (i = 0; i < nbox; ++i) { 877 BEGIN_DMA(6); 878 DMA_WRITE(swap_cmd); 879 DMA_WRITE(dev_priv->back_offset); 880 DMA_WRITE(dev_priv->back_bd); 881 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 882 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1)); 883 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1, 884 boxes[i].y2 - boxes[i].y1)); 885 DMA_COMMIT(); 886 } 887 888 return 0; 889 } 890 891 static int savage_dispatch_draw(drm_savage_private_t *dev_priv, 892 const drm_savage_cmd_header_t *start, 893 const drm_savage_cmd_header_t *end, 894 const struct drm_buf *dmabuf, 895 const unsigned int *vtxbuf, 896 unsigned int vb_size, unsigned int vb_stride, 897 unsigned int nbox, 898 const struct drm_clip_rect *boxes) 899 { 900 unsigned int i, j; 901 int ret; 902 903 for (i = 0; i < nbox; ++i) { 904 const drm_savage_cmd_header_t *cmdbuf; 905 dev_priv->emit_clip_rect(dev_priv, &boxes[i]); 906 907 cmdbuf = start; 908 while (cmdbuf < end) { 909 drm_savage_cmd_header_t cmd_header; 910 cmd_header = *cmdbuf; 911 cmdbuf++; 912 switch (cmd_header.cmd.cmd) { 913 case SAVAGE_CMD_DMA_PRIM: 914 ret = savage_dispatch_dma_prim( 915 dev_priv, &cmd_header, dmabuf); 916 break; 917 case SAVAGE_CMD_VB_PRIM: 918 ret = savage_dispatch_vb_prim( 919 dev_priv, &cmd_header, 920 vtxbuf, vb_size, vb_stride); 921 break; 922 case SAVAGE_CMD_DMA_IDX: 923 j = (cmd_header.idx.count + 3) / 4; 924 /* j was check in savage_bci_cmdbuf */ 925 ret = savage_dispatch_dma_idx(dev_priv, 926 &cmd_header, (const uint16_t *)cmdbuf, 927 dmabuf); 928 cmdbuf += j; 929 break; 930 case SAVAGE_CMD_VB_IDX: 931 j = (cmd_header.idx.count + 3) / 4; 932 /* j was check in savage_bci_cmdbuf */ 933 ret = savage_dispatch_vb_idx(dev_priv, 934 &cmd_header, (const uint16_t *)cmdbuf, 935 (const uint32_t *)vtxbuf, vb_size, 936 vb_stride); 937 cmdbuf += j; 938 break; 939 default: 940 /* What's the best return code? EFAULT? */ 941 DRM_ERROR("IMPLEMENTATION ERROR: " 942 "non-drawing-command %d\n", 943 cmd_header.cmd.cmd); 944 return -EINVAL; 945 } 946 947 if (ret != 0) 948 return ret; 949 } 950 } 951 952 return 0; 953 } 954 955 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv) 956 { 957 drm_savage_private_t *dev_priv = dev->dev_private; 958 struct drm_device_dma *dma = dev->dma; 959 struct drm_buf *dmabuf; 960 drm_savage_cmdbuf_t *cmdbuf = data; 961 drm_savage_cmd_header_t *kcmd_addr = NULL; 962 drm_savage_cmd_header_t *first_draw_cmd; 963 unsigned int *kvb_addr = NULL; 964 struct drm_clip_rect *kbox_addr = NULL; 965 unsigned int i, j; 966 int ret = 0; 967 968 DRM_DEBUG("\n"); 969 970 LOCK_TEST_WITH_RETURN(dev, file_priv); 971 972 if (dma && dma->buflist) { 973 if (cmdbuf->dma_idx > dma->buf_count) { 974 DRM_ERROR 975 ("vertex buffer index %u out of range (0-%u)\n", 976 cmdbuf->dma_idx, dma->buf_count - 1); 977 return -EINVAL; 978 } 979 dmabuf = dma->buflist[cmdbuf->dma_idx]; 980 } else { 981 dmabuf = NULL; 982 } 983 984 /* Copy the user buffers into kernel temporary areas. This hasn't been 985 * a performance loss compared to VERIFYAREA_READ/ 986 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct 987 * for locking on FreeBSD. 988 */ 989 if (cmdbuf->size) { 990 kcmd_addr = drm_alloc(cmdbuf->size * 8, DRM_MEM_DRIVER); 991 if (kcmd_addr == NULL) 992 return -ENOMEM; 993 994 if (DRM_COPY_FROM_USER(kcmd_addr, cmdbuf->cmd_addr, 995 cmdbuf->size * 8)) 996 { 997 drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER); 998 return -EFAULT; 999 } 1000 cmdbuf->cmd_addr = kcmd_addr; 1001 } 1002 if (cmdbuf->vb_size) { 1003 kvb_addr = drm_alloc(cmdbuf->vb_size, DRM_MEM_DRIVER); 1004 if (kvb_addr == NULL) { 1005 ret = -ENOMEM; 1006 goto done; 1007 } 1008 1009 if (DRM_COPY_FROM_USER(kvb_addr, cmdbuf->vb_addr, 1010 cmdbuf->vb_size)) { 1011 ret = -EFAULT; 1012 goto done; 1013 } 1014 cmdbuf->vb_addr = kvb_addr; 1015 } 1016 if (cmdbuf->nbox) { 1017 kbox_addr = drm_alloc(cmdbuf->nbox * 1018 sizeof(struct drm_clip_rect), 1019 DRM_MEM_DRIVER); 1020 if (kbox_addr == NULL) { 1021 ret = -ENOMEM; 1022 goto done; 1023 } 1024 1025 if (DRM_COPY_FROM_USER(kbox_addr, cmdbuf->box_addr, 1026 cmdbuf->nbox * 1027 sizeof(struct drm_clip_rect))) { 1028 ret = -EFAULT; 1029 goto done; 1030 } 1031 cmdbuf->box_addr = kbox_addr; 1032 } 1033 1034 /* Make sure writes to DMA buffers are finished before sending 1035 * DMA commands to the graphics hardware. */ 1036 DRM_MEMORYBARRIER(); 1037 1038 /* Coming from user space. Don't know if the Xserver has 1039 * emitted wait commands. Assuming the worst. */ 1040 dev_priv->waiting = 1; 1041 1042 i = 0; 1043 first_draw_cmd = NULL; 1044 while (i < cmdbuf->size) { 1045 drm_savage_cmd_header_t cmd_header; 1046 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr; 1047 cmdbuf->cmd_addr++; 1048 i++; 1049 1050 /* Group drawing commands with same state to minimize 1051 * iterations over clip rects. */ 1052 j = 0; 1053 switch (cmd_header.cmd.cmd) { 1054 case SAVAGE_CMD_DMA_IDX: 1055 case SAVAGE_CMD_VB_IDX: 1056 j = (cmd_header.idx.count + 3) / 4; 1057 if (i + j > cmdbuf->size) { 1058 DRM_ERROR("indexed drawing command extends " 1059 "beyond end of command buffer\n"); 1060 DMA_FLUSH(); 1061 return -EINVAL; 1062 } 1063 /* fall through */ 1064 case SAVAGE_CMD_DMA_PRIM: 1065 case SAVAGE_CMD_VB_PRIM: 1066 if (!first_draw_cmd) 1067 first_draw_cmd = cmdbuf->cmd_addr - 1; 1068 cmdbuf->cmd_addr += j; 1069 i += j; 1070 break; 1071 default: 1072 if (first_draw_cmd) { 1073 ret = savage_dispatch_draw( 1074 dev_priv, first_draw_cmd, 1075 cmdbuf->cmd_addr - 1, 1076 dmabuf, cmdbuf->vb_addr, 1077 cmdbuf->vb_size, 1078 cmdbuf->vb_stride, 1079 cmdbuf->nbox, cmdbuf->box_addr); 1080 if (ret != 0) 1081 return ret; 1082 first_draw_cmd = NULL; 1083 } 1084 } 1085 if (first_draw_cmd) 1086 continue; 1087 1088 switch (cmd_header.cmd.cmd) { 1089 case SAVAGE_CMD_STATE: 1090 j = (cmd_header.state.count + 1) / 2; 1091 if (i + j > cmdbuf->size) { 1092 DRM_ERROR("command SAVAGE_CMD_STATE extends " 1093 "beyond end of command buffer\n"); 1094 DMA_FLUSH(); 1095 ret = -EINVAL; 1096 goto done; 1097 } 1098 ret = savage_dispatch_state(dev_priv, &cmd_header, 1099 (const uint32_t *)cmdbuf->cmd_addr); 1100 cmdbuf->cmd_addr += j; 1101 i += j; 1102 break; 1103 case SAVAGE_CMD_CLEAR: 1104 if (i + 1 > cmdbuf->size) { 1105 DRM_ERROR("command SAVAGE_CMD_CLEAR extends " 1106 "beyond end of command buffer\n"); 1107 DMA_FLUSH(); 1108 ret = -EINVAL; 1109 goto done; 1110 } 1111 ret = savage_dispatch_clear(dev_priv, &cmd_header, 1112 cmdbuf->cmd_addr, 1113 cmdbuf->nbox, 1114 cmdbuf->box_addr); 1115 cmdbuf->cmd_addr++; 1116 i++; 1117 break; 1118 case SAVAGE_CMD_SWAP: 1119 ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox, 1120 cmdbuf->box_addr); 1121 break; 1122 default: 1123 DRM_ERROR("invalid command 0x%x\n", 1124 cmd_header.cmd.cmd); 1125 DMA_FLUSH(); 1126 ret = -EINVAL; 1127 goto done; 1128 } 1129 1130 if (ret != 0) { 1131 DMA_FLUSH(); 1132 goto done; 1133 } 1134 } 1135 1136 if (first_draw_cmd) { 1137 ret = savage_dispatch_draw( 1138 dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf, 1139 cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride, 1140 cmdbuf->nbox, cmdbuf->box_addr); 1141 if (ret != 0) { 1142 DMA_FLUSH(); 1143 goto done; 1144 } 1145 } 1146 1147 DMA_FLUSH(); 1148 1149 if (dmabuf && cmdbuf->discard) { 1150 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; 1151 uint16_t event; 1152 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); 1153 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap); 1154 savage_freelist_put(dev, dmabuf); 1155 } 1156 1157 done: 1158 /* If we didn't need to allocate them, these'll be NULL */ 1159 drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER); 1160 drm_free(kvb_addr, cmdbuf->vb_size, DRM_MEM_DRIVER); 1161 drm_free(kbox_addr, cmdbuf->nbox * sizeof(struct drm_clip_rect), 1162 DRM_MEM_DRIVER); 1163 1164 return ret; 1165 } 1166