1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <glisse@freedesktop.org> 26 */ 27 #include <linux/list_sort.h> 28 #include <drm/drmP.h> 29 #include <uapi_drm/radeon_drm.h> 30 #include "radeon_reg.h" 31 #include "radeon.h" 32 #ifdef TRACE_TODO 33 #include "radeon_trace.h" 34 #endif 35 36 #define RADEON_CS_MAX_PRIORITY 32u 37 #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) 38 39 /* This is based on the bucket sort with O(n) time complexity. 40 * An item with priority "i" is added to bucket[i]. The lists are then 41 * concatenated in descending order. 42 */ 43 struct radeon_cs_buckets { 44 struct list_head bucket[RADEON_CS_NUM_BUCKETS]; 45 }; 46 47 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) 48 { 49 unsigned i; 50 51 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) 52 INIT_LIST_HEAD(&b->bucket[i]); 53 } 54 55 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, 56 struct list_head *item, unsigned priority) 57 { 58 /* Since buffers which appear sooner in the relocation list are 59 * likely to be used more often than buffers which appear later 60 * in the list, the sort mustn't change the ordering of buffers 61 * with the same priority, i.e. it must be stable. 62 */ 63 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); 64 } 65 66 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, 67 struct list_head *out_list) 68 { 69 unsigned i; 70 71 /* Connect the sorted buckets in the output list. */ 72 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { 73 list_splice(&b->bucket[i], out_list); 74 } 75 } 76 77 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 78 { 79 struct drm_device *ddev = p->rdev->ddev; 80 struct radeon_cs_chunk *chunk; 81 struct radeon_cs_buckets buckets; 82 unsigned i, j; 83 bool duplicate; 84 85 if (p->chunk_relocs_idx == -1) { 86 return 0; 87 } 88 chunk = &p->chunks[p->chunk_relocs_idx]; 89 p->dma_reloc_idx = 0; 90 /* FIXME: we assume that each relocs use 4 dwords */ 91 p->nrelocs = chunk->length_dw / 4; 92 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); 93 if (p->relocs_ptr == NULL) { 94 return -ENOMEM; 95 } 96 p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL); 97 if (p->relocs == NULL) { 98 return -ENOMEM; 99 } 100 101 radeon_cs_buckets_init(&buckets); 102 103 for (i = 0; i < p->nrelocs; i++) { 104 struct drm_radeon_cs_reloc *r; 105 unsigned priority; 106 107 duplicate = false; 108 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; 109 for (j = 0; j < i; j++) { 110 if (r->handle == p->relocs[j].handle) { 111 p->relocs_ptr[i] = &p->relocs[j]; 112 duplicate = true; 113 break; 114 } 115 } 116 if (duplicate) { 117 p->relocs[i].handle = 0; 118 continue; 119 } 120 121 p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp, 122 r->handle); 123 if (p->relocs[i].gobj == NULL) { 124 DRM_ERROR("gem object lookup failed 0x%x\n", 125 r->handle); 126 return -ENOENT; 127 } 128 p->relocs_ptr[i] = &p->relocs[i]; 129 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); 130 131 /* The userspace buffer priorities are from 0 to 15. A higher 132 * number means the buffer is more important. 133 * Also, the buffers used for write have a higher priority than 134 * the buffers used for read only, which doubles the range 135 * to 0 to 31. 32 is reserved for the kernel driver. 136 */ 137 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2 138 + !!r->write_domain; 139 140 /* the first reloc of an UVD job is the msg and that must be in 141 VRAM, also but everything into VRAM on AGP cards to avoid 142 image corruptions */ 143 if (p->ring == R600_RING_TYPE_UVD_INDEX && 144 (i == 0 || p->rdev->flags & RADEON_IS_AGP)) { 145 /* TODO: is this still needed for NI+ ? */ 146 p->relocs[i].prefered_domains = 147 RADEON_GEM_DOMAIN_VRAM; 148 149 p->relocs[i].allowed_domains = 150 RADEON_GEM_DOMAIN_VRAM; 151 152 /* prioritize this over any other relocation */ 153 priority = RADEON_CS_MAX_PRIORITY; 154 } else { 155 uint32_t domain = r->write_domain ? 156 r->write_domain : r->read_domains; 157 158 if (domain & RADEON_GEM_DOMAIN_CPU) { 159 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid " 160 "for command submission\n"); 161 return -EINVAL; 162 } 163 164 p->relocs[i].prefered_domains = domain; 165 if (domain == RADEON_GEM_DOMAIN_VRAM) 166 domain |= RADEON_GEM_DOMAIN_GTT; 167 p->relocs[i].allowed_domains = domain; 168 } 169 170 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; 171 p->relocs[i].handle = r->handle; 172 173 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, 174 priority); 175 } 176 177 radeon_cs_buckets_get_list(&buckets, &p->validated); 178 179 if (p->cs_flags & RADEON_CS_USE_VM) 180 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, 181 &p->validated); 182 183 return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); 184 } 185 186 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) 187 { 188 p->priority = priority; 189 190 switch (ring) { 191 default: 192 DRM_ERROR("unknown ring id: %d\n", ring); 193 return -EINVAL; 194 case RADEON_CS_RING_GFX: 195 p->ring = RADEON_RING_TYPE_GFX_INDEX; 196 break; 197 case RADEON_CS_RING_COMPUTE: 198 if (p->rdev->family >= CHIP_TAHITI) { 199 if (p->priority > 0) 200 p->ring = CAYMAN_RING_TYPE_CP1_INDEX; 201 else 202 p->ring = CAYMAN_RING_TYPE_CP2_INDEX; 203 } else 204 p->ring = RADEON_RING_TYPE_GFX_INDEX; 205 break; 206 case RADEON_CS_RING_DMA: 207 if (p->rdev->family >= CHIP_CAYMAN) { 208 if (p->priority > 0) 209 p->ring = R600_RING_TYPE_DMA_INDEX; 210 else 211 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; 212 } else if (p->rdev->family >= CHIP_RV770) { 213 p->ring = R600_RING_TYPE_DMA_INDEX; 214 } else { 215 return -EINVAL; 216 } 217 break; 218 case RADEON_CS_RING_UVD: 219 p->ring = R600_RING_TYPE_UVD_INDEX; 220 break; 221 case RADEON_CS_RING_VCE: 222 /* TODO: only use the low priority ring for now */ 223 p->ring = TN_RING_TYPE_VCE1_INDEX; 224 break; 225 } 226 return 0; 227 } 228 229 static void radeon_cs_sync_rings(struct radeon_cs_parser *p) 230 { 231 int i; 232 233 for (i = 0; i < p->nrelocs; i++) { 234 if (!p->relocs[i].robj) 235 continue; 236 237 radeon_semaphore_sync_to(p->ib.semaphore, 238 p->relocs[i].robj->tbo.sync_obj); 239 } 240 } 241 242 /* XXX: note that this is called from the legacy UMS CS ioctl as well */ 243 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) 244 { 245 struct drm_radeon_cs *cs = data; 246 uint64_t *chunk_array_ptr; 247 unsigned size, i; 248 u32 ring = RADEON_CS_RING_GFX; 249 s32 priority = 0; 250 251 if (!cs->num_chunks) { 252 return 0; 253 } 254 /* get chunks */ 255 INIT_LIST_HEAD(&p->validated); 256 p->idx = 0; 257 p->ib.sa_bo = NULL; 258 p->ib.semaphore = NULL; 259 p->const_ib.sa_bo = NULL; 260 p->const_ib.semaphore = NULL; 261 p->chunk_ib_idx = -1; 262 p->chunk_relocs_idx = -1; 263 p->chunk_flags_idx = -1; 264 p->chunk_const_ib_idx = -1; 265 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); 266 if (p->chunks_array == NULL) { 267 return -ENOMEM; 268 } 269 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks); 270 if (copy_from_user(p->chunks_array, chunk_array_ptr, 271 sizeof(uint64_t)*cs->num_chunks)) { 272 return -EFAULT; 273 } 274 p->cs_flags = 0; 275 p->nchunks = cs->num_chunks; 276 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); 277 if (p->chunks == NULL) { 278 return -ENOMEM; 279 } 280 for (i = 0; i < p->nchunks; i++) { 281 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL; 282 struct drm_radeon_cs_chunk user_chunk; 283 uint32_t __user *cdata; 284 285 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i]; 286 if (copy_from_user(&user_chunk, chunk_ptr, 287 sizeof(struct drm_radeon_cs_chunk))) { 288 return -EFAULT; 289 } 290 p->chunks[i].length_dw = user_chunk.length_dw; 291 p->chunks[i].chunk_id = user_chunk.chunk_id; 292 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { 293 p->chunk_relocs_idx = i; 294 } 295 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { 296 p->chunk_ib_idx = i; 297 /* zero length IB isn't useful */ 298 if (p->chunks[i].length_dw == 0) 299 return -EINVAL; 300 } 301 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) { 302 p->chunk_const_ib_idx = i; 303 /* zero length CONST IB isn't useful */ 304 if (p->chunks[i].length_dw == 0) 305 return -EINVAL; 306 } 307 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 308 p->chunk_flags_idx = i; 309 /* zero length flags aren't useful */ 310 if (p->chunks[i].length_dw == 0) 311 return -EINVAL; 312 } 313 314 size = p->chunks[i].length_dw; 315 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 316 p->chunks[i].user_ptr = cdata; 317 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) 318 continue; 319 320 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { 321 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP)) 322 continue; 323 } 324 325 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 326 size *= sizeof(uint32_t); 327 if (p->chunks[i].kdata == NULL) { 328 return -ENOMEM; 329 } 330 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 331 return -EFAULT; 332 } 333 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 334 p->cs_flags = p->chunks[i].kdata[0]; 335 if (p->chunks[i].length_dw > 1) 336 ring = p->chunks[i].kdata[1]; 337 if (p->chunks[i].length_dw > 2) 338 priority = (s32)p->chunks[i].kdata[2]; 339 } 340 } 341 342 /* these are KMS only */ 343 if (p->rdev) { 344 if ((p->cs_flags & RADEON_CS_USE_VM) && 345 !p->rdev->vm_manager.enabled) { 346 DRM_ERROR("VM not active on asic!\n"); 347 return -EINVAL; 348 } 349 350 if (radeon_cs_get_ring(p, ring, priority)) 351 return -EINVAL; 352 353 /* we only support VM on some SI+ rings */ 354 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) { 355 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) { 356 DRM_ERROR("Ring %d requires VM!\n", p->ring); 357 return -EINVAL; 358 } 359 } else { 360 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) { 361 DRM_ERROR("VM not supported on ring %d!\n", 362 p->ring); 363 return -EINVAL; 364 } 365 } 366 } 367 368 return 0; 369 } 370 371 static int cmp_size_smaller_first(void *priv, struct list_head *a, 372 struct list_head *b) 373 { 374 struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head); 375 struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head); 376 377 /* Sort A before B if A is smaller. */ 378 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 379 } 380 381 /** 382 * cs_parser_fini() - clean parser states 383 * @parser: parser structure holding parsing context. 384 * @error: error number 385 * 386 * If error is set than unvalidate buffer, otherwise just free memory 387 * used by parsing context. 388 **/ 389 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) 390 { 391 unsigned i; 392 393 if (!error) { 394 /* Sort the buffer list from the smallest to largest buffer, 395 * which affects the order of buffers in the LRU list. 396 * This assures that the smallest buffers are added first 397 * to the LRU list, so they are likely to be later evicted 398 * first, instead of large buffers whose eviction is more 399 * expensive. 400 * 401 * This slightly lowers the number of bytes moved by TTM 402 * per frame under memory pressure. 403 */ 404 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 405 406 ttm_eu_fence_buffer_objects(&parser->ticket, 407 &parser->validated, 408 parser->ib.fence); 409 } else if (backoff) { 410 ttm_eu_backoff_reservation(&parser->ticket, 411 &parser->validated); 412 } 413 414 if (parser->relocs != NULL) { 415 for (i = 0; i < parser->nrelocs; i++) { 416 if (parser->relocs[i].gobj) 417 drm_gem_object_unreference_unlocked(parser->relocs[i].gobj); 418 } 419 } 420 kfree(parser->track); 421 kfree(parser->relocs); 422 kfree(parser->relocs_ptr); 423 kfree(parser->vm_bos); 424 for (i = 0; i < parser->nchunks; i++) 425 drm_free_large(parser->chunks[i].kdata); 426 kfree(parser->chunks); 427 kfree(parser->chunks_array); 428 radeon_ib_free(parser->rdev, &parser->ib); 429 radeon_ib_free(parser->rdev, &parser->const_ib); 430 } 431 432 static int radeon_cs_ib_chunk(struct radeon_device *rdev, 433 struct radeon_cs_parser *parser) 434 { 435 int r; 436 437 if (parser->chunk_ib_idx == -1) 438 return 0; 439 440 if (parser->cs_flags & RADEON_CS_USE_VM) 441 return 0; 442 443 r = radeon_cs_parse(rdev, parser->ring, parser); 444 if (r || parser->parser_error) { 445 DRM_ERROR("Invalid command stream !\n"); 446 return r; 447 } 448 449 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 450 radeon_uvd_note_usage(rdev); 451 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || 452 (parser->ring == TN_RING_TYPE_VCE2_INDEX)) 453 radeon_vce_note_usage(rdev); 454 455 radeon_cs_sync_rings(parser); 456 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 457 if (r) { 458 DRM_ERROR("Failed to schedule IB !\n"); 459 } 460 return r; 461 } 462 463 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, 464 struct radeon_vm *vm) 465 { 466 struct radeon_device *rdev = p->rdev; 467 struct radeon_bo_va *bo_va; 468 int i, r; 469 470 r = radeon_vm_update_page_directory(rdev, vm); 471 if (r) 472 return r; 473 474 r = radeon_vm_clear_freed(rdev, vm); 475 if (r) 476 return r; 477 478 if (vm->ib_bo_va == NULL) { 479 DRM_ERROR("Tmp BO not in VM!\n"); 480 return -EINVAL; 481 } 482 483 r = radeon_vm_bo_update(rdev, vm->ib_bo_va, 484 &rdev->ring_tmp_bo.bo->tbo.mem); 485 if (r) 486 return r; 487 488 for (i = 0; i < p->nrelocs; i++) { 489 struct radeon_bo *bo; 490 491 /* ignore duplicates */ 492 if (p->relocs_ptr[i] != &p->relocs[i]) 493 continue; 494 495 bo = p->relocs[i].robj; 496 bo_va = radeon_vm_bo_find(vm, bo); 497 if (bo_va == NULL) { 498 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); 499 return -EINVAL; 500 } 501 502 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); 503 if (r) 504 return r; 505 } 506 507 return radeon_vm_clear_invalids(rdev, vm); 508 } 509 510 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, 511 struct radeon_cs_parser *parser) 512 { 513 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 514 struct radeon_vm *vm = &fpriv->vm; 515 int r; 516 517 if (parser->chunk_ib_idx == -1) 518 return 0; 519 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) 520 return 0; 521 522 if (parser->const_ib.length_dw) { 523 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); 524 if (r) { 525 return r; 526 } 527 } 528 529 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); 530 if (r) { 531 return r; 532 } 533 534 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 535 radeon_uvd_note_usage(rdev); 536 537 lockmgr(&vm->mutex, LK_EXCLUSIVE); 538 r = radeon_bo_vm_update_pte(parser, vm); 539 if (r) { 540 goto out; 541 } 542 radeon_cs_sync_rings(parser); 543 radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); 544 545 if ((rdev->family >= CHIP_TAHITI) && 546 (parser->chunk_const_ib_idx != -1)) { 547 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); 548 } else { 549 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 550 } 551 552 out: 553 lockmgr(&vm->mutex, LK_RELEASE); 554 return r; 555 } 556 557 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r) 558 { 559 if (r == -EDEADLK) { 560 r = radeon_gpu_reset(rdev); 561 if (!r) 562 r = -EAGAIN; 563 } 564 return r; 565 } 566 567 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser) 568 { 569 struct radeon_cs_chunk *ib_chunk; 570 struct radeon_vm *vm = NULL; 571 int r; 572 573 if (parser->chunk_ib_idx == -1) 574 return 0; 575 576 if (parser->cs_flags & RADEON_CS_USE_VM) { 577 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 578 vm = &fpriv->vm; 579 580 if ((rdev->family >= CHIP_TAHITI) && 581 (parser->chunk_const_ib_idx != -1)) { 582 ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; 583 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 584 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); 585 return -EINVAL; 586 } 587 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, 588 vm, ib_chunk->length_dw * 4); 589 if (r) { 590 DRM_ERROR("Failed to get const ib !\n"); 591 return r; 592 } 593 parser->const_ib.is_const_ib = true; 594 parser->const_ib.length_dw = ib_chunk->length_dw; 595 if (copy_from_user(parser->const_ib.ptr, 596 ib_chunk->user_ptr, 597 ib_chunk->length_dw * 4)) 598 return -EFAULT; 599 } 600 601 ib_chunk = &parser->chunks[parser->chunk_ib_idx]; 602 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 603 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); 604 return -EINVAL; 605 } 606 } 607 ib_chunk = &parser->chunks[parser->chunk_ib_idx]; 608 609 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 610 vm, ib_chunk->length_dw * 4); 611 if (r) { 612 DRM_ERROR("Failed to get ib !\n"); 613 return r; 614 } 615 parser->ib.length_dw = ib_chunk->length_dw; 616 if (ib_chunk->kdata) 617 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); 618 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) 619 return -EFAULT; 620 return 0; 621 } 622 623 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 624 { 625 struct radeon_device *rdev = dev->dev_private; 626 struct radeon_cs_parser parser; 627 int r; 628 629 lockmgr(&rdev->exclusive_lock, LK_EXCLUSIVE); 630 if (!rdev->accel_working) { 631 lockmgr(&rdev->exclusive_lock, LK_RELEASE); 632 return -EBUSY; 633 } 634 /* initialize parser */ 635 memset(&parser, 0, sizeof(struct radeon_cs_parser)); 636 parser.filp = filp; 637 parser.rdev = rdev; 638 parser.dev = rdev->dev; 639 parser.family = rdev->family; 640 r = radeon_cs_parser_init(&parser, data); 641 if (r) { 642 DRM_ERROR("Failed to initialize parser !\n"); 643 radeon_cs_parser_fini(&parser, r, false); 644 lockmgr(&rdev->exclusive_lock, LK_RELEASE); 645 r = radeon_cs_handle_lockup(rdev, r); 646 return r; 647 } 648 649 r = radeon_cs_ib_fill(rdev, &parser); 650 if (!r) { 651 r = radeon_cs_parser_relocs(&parser); 652 if (r && r != -ERESTARTSYS) 653 DRM_ERROR("Failed to parse relocation %d!\n", r); 654 } 655 656 if (r) { 657 radeon_cs_parser_fini(&parser, r, false); 658 lockmgr(&rdev->exclusive_lock, LK_RELEASE); 659 r = radeon_cs_handle_lockup(rdev, r); 660 return r; 661 } 662 663 #if 0 664 trace_radeon_cs(&parser); 665 #endif 666 667 r = radeon_cs_ib_chunk(rdev, &parser); 668 if (r) { 669 goto out; 670 } 671 r = radeon_cs_ib_vm_chunk(rdev, &parser); 672 if (r) { 673 goto out; 674 } 675 out: 676 radeon_cs_parser_fini(&parser, r, true); 677 lockmgr(&rdev->exclusive_lock, LK_RELEASE); 678 r = radeon_cs_handle_lockup(rdev, r); 679 return r; 680 } 681 682 /** 683 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet 684 * @parser: parser structure holding parsing context. 685 * @pkt: where to store packet information 686 * 687 * Assume that chunk_ib_index is properly set. Will return -EINVAL 688 * if packet is bigger than remaining ib size. or if packets is unknown. 689 **/ 690 int radeon_cs_packet_parse(struct radeon_cs_parser *p, 691 struct radeon_cs_packet *pkt, 692 unsigned idx) 693 { 694 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 695 struct radeon_device *rdev = p->rdev; 696 uint32_t header; 697 698 if (idx >= ib_chunk->length_dw) { 699 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 700 idx, ib_chunk->length_dw); 701 return -EINVAL; 702 } 703 header = radeon_get_ib_value(p, idx); 704 pkt->idx = idx; 705 pkt->type = RADEON_CP_PACKET_GET_TYPE(header); 706 pkt->count = RADEON_CP_PACKET_GET_COUNT(header); 707 pkt->one_reg_wr = 0; 708 switch (pkt->type) { 709 case RADEON_PACKET_TYPE0: 710 if (rdev->family < CHIP_R600) { 711 pkt->reg = R100_CP_PACKET0_GET_REG(header); 712 pkt->one_reg_wr = 713 RADEON_CP_PACKET0_GET_ONE_REG_WR(header); 714 } else 715 pkt->reg = R600_CP_PACKET0_GET_REG(header); 716 break; 717 case RADEON_PACKET_TYPE3: 718 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header); 719 break; 720 case RADEON_PACKET_TYPE2: 721 pkt->count = -1; 722 break; 723 default: 724 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 725 return -EINVAL; 726 } 727 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 728 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 729 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 730 return -EINVAL; 731 } 732 return 0; 733 } 734 735 /** 736 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP 737 * @p: structure holding the parser context. 738 * 739 * Check if the next packet is NOP relocation packet3. 740 **/ 741 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) 742 { 743 struct radeon_cs_packet p3reloc; 744 int r; 745 746 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 747 if (r) 748 return false; 749 if (p3reloc.type != RADEON_PACKET_TYPE3) 750 return false; 751 if (p3reloc.opcode != RADEON_PACKET3_NOP) 752 return false; 753 return true; 754 } 755 756 /** 757 * radeon_cs_dump_packet() - dump raw packet context 758 * @p: structure holding the parser context. 759 * @pkt: structure holding the packet. 760 * 761 * Used mostly for debugging and error reporting. 762 **/ 763 void radeon_cs_dump_packet(struct radeon_cs_parser *p, 764 struct radeon_cs_packet *pkt) 765 { 766 volatile uint32_t *ib; 767 unsigned i; 768 unsigned idx; 769 770 ib = p->ib.ptr; 771 idx = pkt->idx; 772 for (i = 0; i <= (pkt->count + 1); i++, idx++) 773 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 774 } 775 776 /** 777 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet 778 * @parser: parser structure holding parsing context. 779 * @data: pointer to relocation data 780 * @offset_start: starting offset 781 * @offset_mask: offset mask (to align start offset on) 782 * @reloc: reloc informations 783 * 784 * Check if next packet is relocation packet3, do bo validation and compute 785 * GPU offset using the provided start. 786 **/ 787 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, 788 struct radeon_cs_reloc **cs_reloc, 789 int nomm) 790 { 791 struct radeon_cs_chunk *relocs_chunk; 792 struct radeon_cs_packet p3reloc; 793 unsigned idx; 794 int r; 795 796 if (p->chunk_relocs_idx == -1) { 797 DRM_ERROR("No relocation chunk !\n"); 798 return -EINVAL; 799 } 800 *cs_reloc = NULL; 801 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 802 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 803 if (r) 804 return r; 805 p->idx += p3reloc.count + 2; 806 if (p3reloc.type != RADEON_PACKET_TYPE3 || 807 p3reloc.opcode != RADEON_PACKET3_NOP) { 808 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 809 p3reloc.idx); 810 radeon_cs_dump_packet(p, &p3reloc); 811 return -EINVAL; 812 } 813 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 814 if (idx >= relocs_chunk->length_dw) { 815 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 816 idx, relocs_chunk->length_dw); 817 radeon_cs_dump_packet(p, &p3reloc); 818 return -EINVAL; 819 } 820 /* FIXME: we assume reloc size is 4 dwords */ 821 if (nomm) { 822 *cs_reloc = p->relocs; 823 (*cs_reloc)->gpu_offset = 824 (u64)relocs_chunk->kdata[idx + 3] << 32; 825 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; 826 } else 827 *cs_reloc = p->relocs_ptr[(idx / 4)]; 828 return 0; 829 } 830