1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <glisse@freedesktop.org> 26 */ 27 #include <linux/list_sort.h> 28 #include <drm/drmP.h> 29 #include <drm/radeon_drm.h> 30 #include "radeon_reg.h" 31 #include "radeon.h" 32 #ifdef TRACE_TODO 33 #include "radeon_trace.h" 34 #endif 35 36 #define RADEON_CS_MAX_PRIORITY 32u 37 #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) 38 39 /* This is based on the bucket sort with O(n) time complexity. 40 * An item with priority "i" is added to bucket[i]. The lists are then 41 * concatenated in descending order. 42 */ 43 struct radeon_cs_buckets { 44 struct list_head bucket[RADEON_CS_NUM_BUCKETS]; 45 }; 46 47 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) 48 { 49 unsigned i; 50 51 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) 52 INIT_LIST_HEAD(&b->bucket[i]); 53 } 54 55 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, 56 struct list_head *item, unsigned priority) 57 { 58 /* Since buffers which appear sooner in the relocation list are 59 * likely to be used more often than buffers which appear later 60 * in the list, the sort mustn't change the ordering of buffers 61 * with the same priority, i.e. it must be stable. 62 */ 63 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); 64 } 65 66 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, 67 struct list_head *out_list) 68 { 69 unsigned i; 70 71 /* Connect the sorted buckets in the output list. */ 72 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { 73 list_splice(&b->bucket[i], out_list); 74 } 75 } 76 77 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 78 { 79 struct radeon_cs_chunk *chunk; 80 struct radeon_cs_buckets buckets; 81 unsigned i, j; 82 bool duplicate; 83 int r; 84 85 if (p->chunk_relocs_idx == -1) { 86 return 0; 87 } 88 chunk = &p->chunks[p->chunk_relocs_idx]; 89 p->dma_reloc_idx = 0; 90 /* FIXME: we assume that each relocs use 4 dwords */ 91 p->nrelocs = chunk->length_dw / 4; 92 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); 93 if (p->relocs_ptr == NULL) { 94 return -ENOMEM; 95 } 96 p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL); 97 if (p->relocs == NULL) { 98 return -ENOMEM; 99 } 100 101 radeon_cs_buckets_init(&buckets); 102 103 for (i = 0; i < p->nrelocs; i++) { 104 struct drm_radeon_cs_reloc *r; 105 unsigned priority; 106 107 duplicate = false; 108 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; 109 for (j = 0; j < i; j++) { 110 if (r->handle == p->relocs[j].handle) { 111 p->relocs_ptr[i] = &p->relocs[j]; 112 duplicate = true; 113 break; 114 } 115 } 116 if (duplicate) { 117 p->relocs[i].handle = 0; 118 continue; 119 } 120 121 p->relocs[i].gobj = drm_gem_object_lookup(p->filp, r->handle); 122 if (p->relocs[i].gobj == NULL) { 123 DRM_ERROR("gem object lookup failed 0x%x\n", 124 r->handle); 125 return -ENOENT; 126 } 127 p->relocs_ptr[i] = &p->relocs[i]; 128 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); 129 130 /* The userspace buffer priorities are from 0 to 15. A higher 131 * number means the buffer is more important. 132 * Also, the buffers used for write have a higher priority than 133 * the buffers used for read only, which doubles the range 134 * to 0 to 31. 32 is reserved for the kernel driver. 135 */ 136 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2 137 + !!r->write_domain; 138 139 /* the first reloc of an UVD job is the msg and that must be in 140 VRAM, also but everything into VRAM on AGP cards and older 141 IGP chips to avoid image corruptions */ 142 if (p->ring == R600_RING_TYPE_UVD_INDEX && 143 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) || 144 p->rdev->family == CHIP_RS780 || 145 p->rdev->family == CHIP_RS880)) { 146 147 /* TODO: is this still needed for NI+ ? */ 148 p->relocs[i].prefered_domains = 149 RADEON_GEM_DOMAIN_VRAM; 150 151 p->relocs[i].allowed_domains = 152 RADEON_GEM_DOMAIN_VRAM; 153 154 /* prioritize this over any other relocation */ 155 priority = RADEON_CS_MAX_PRIORITY; 156 } else { 157 uint32_t domain = r->write_domain ? 158 r->write_domain : r->read_domains; 159 160 if (domain & RADEON_GEM_DOMAIN_CPU) { 161 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid " 162 "for command submission\n"); 163 return -EINVAL; 164 } 165 166 p->relocs[i].prefered_domains = domain; 167 if (domain == RADEON_GEM_DOMAIN_VRAM) 168 domain |= RADEON_GEM_DOMAIN_GTT; 169 p->relocs[i].allowed_domains = domain; 170 } 171 172 #if 0 173 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) { 174 uint32_t domain = p->relocs[i].prefered_domains; 175 if (!(domain & RADEON_GEM_DOMAIN_GTT)) { 176 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is " 177 "allowed for userptr BOs\n"); 178 return -EINVAL; 179 } 180 need_mmap_lock = true; 181 domain = RADEON_GEM_DOMAIN_GTT; 182 p->relocs[i].prefered_domains = domain; 183 p->relocs[i].allowed_domains = domain; 184 } 185 #endif 186 187 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; 188 p->relocs[i].tv.shared = !r->write_domain; 189 p->relocs[i].handle = r->handle; 190 191 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, 192 priority); 193 } 194 195 radeon_cs_buckets_get_list(&buckets, &p->validated); 196 197 if (p->cs_flags & RADEON_CS_USE_VM) 198 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, 199 &p->validated); 200 201 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); 202 203 return r; 204 } 205 206 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) 207 { 208 p->priority = priority; 209 210 switch (ring) { 211 default: 212 DRM_ERROR("unknown ring id: %d\n", ring); 213 return -EINVAL; 214 case RADEON_CS_RING_GFX: 215 p->ring = RADEON_RING_TYPE_GFX_INDEX; 216 break; 217 case RADEON_CS_RING_COMPUTE: 218 if (p->rdev->family >= CHIP_TAHITI) { 219 if (p->priority > 0) 220 p->ring = CAYMAN_RING_TYPE_CP1_INDEX; 221 else 222 p->ring = CAYMAN_RING_TYPE_CP2_INDEX; 223 } else 224 p->ring = RADEON_RING_TYPE_GFX_INDEX; 225 break; 226 case RADEON_CS_RING_DMA: 227 if (p->rdev->family >= CHIP_CAYMAN) { 228 if (p->priority > 0) 229 p->ring = R600_RING_TYPE_DMA_INDEX; 230 else 231 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; 232 } else if (p->rdev->family >= CHIP_RV770) { 233 p->ring = R600_RING_TYPE_DMA_INDEX; 234 } else { 235 return -EINVAL; 236 } 237 break; 238 case RADEON_CS_RING_UVD: 239 p->ring = R600_RING_TYPE_UVD_INDEX; 240 break; 241 case RADEON_CS_RING_VCE: 242 /* TODO: only use the low priority ring for now */ 243 p->ring = TN_RING_TYPE_VCE1_INDEX; 244 break; 245 } 246 return 0; 247 } 248 249 static int radeon_cs_sync_rings(struct radeon_cs_parser *p) 250 { 251 struct radeon_cs_reloc *reloc; 252 int r; 253 254 list_for_each_entry(reloc, &p->validated, tv.head) { 255 struct reservation_object *resv; 256 257 resv = reloc->robj->tbo.resv; 258 r = radeon_semaphore_sync_resv(p->rdev, p->ib.semaphore, resv, 259 reloc->tv.shared); 260 if (r) 261 return r; 262 } 263 return 0; 264 } 265 266 /* XXX: note that this is called from the legacy UMS CS ioctl as well */ 267 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) 268 { 269 struct drm_radeon_cs *cs = data; 270 uint64_t *chunk_array_ptr; 271 unsigned size, i; 272 u32 ring = RADEON_CS_RING_GFX; 273 s32 priority = 0; 274 275 if (!cs->num_chunks) { 276 return 0; 277 } 278 /* get chunks */ 279 INIT_LIST_HEAD(&p->validated); 280 p->idx = 0; 281 p->ib.sa_bo = NULL; 282 p->ib.semaphore = NULL; 283 p->const_ib.sa_bo = NULL; 284 p->const_ib.semaphore = NULL; 285 p->chunk_ib_idx = -1; 286 p->chunk_relocs_idx = -1; 287 p->chunk_flags_idx = -1; 288 p->chunk_const_ib_idx = -1; 289 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); 290 if (p->chunks_array == NULL) { 291 return -ENOMEM; 292 } 293 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks); 294 if (copy_from_user(p->chunks_array, chunk_array_ptr, 295 sizeof(uint64_t)*cs->num_chunks)) { 296 return -EFAULT; 297 } 298 p->cs_flags = 0; 299 p->nchunks = cs->num_chunks; 300 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); 301 if (p->chunks == NULL) { 302 return -ENOMEM; 303 } 304 for (i = 0; i < p->nchunks; i++) { 305 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL; 306 struct drm_radeon_cs_chunk user_chunk; 307 uint32_t __user *cdata; 308 309 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i]; 310 if (copy_from_user(&user_chunk, chunk_ptr, 311 sizeof(struct drm_radeon_cs_chunk))) { 312 return -EFAULT; 313 } 314 p->chunks[i].length_dw = user_chunk.length_dw; 315 p->chunks[i].chunk_id = user_chunk.chunk_id; 316 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { 317 p->chunk_relocs_idx = i; 318 } 319 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { 320 p->chunk_ib_idx = i; 321 /* zero length IB isn't useful */ 322 if (p->chunks[i].length_dw == 0) 323 return -EINVAL; 324 } 325 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) { 326 p->chunk_const_ib_idx = i; 327 /* zero length CONST IB isn't useful */ 328 if (p->chunks[i].length_dw == 0) 329 return -EINVAL; 330 } 331 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 332 p->chunk_flags_idx = i; 333 /* zero length flags aren't useful */ 334 if (p->chunks[i].length_dw == 0) 335 return -EINVAL; 336 } 337 338 size = p->chunks[i].length_dw; 339 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 340 p->chunks[i].user_ptr = cdata; 341 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) 342 continue; 343 344 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { 345 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP)) 346 continue; 347 } 348 349 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 350 size *= sizeof(uint32_t); 351 if (p->chunks[i].kdata == NULL) { 352 return -ENOMEM; 353 } 354 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 355 return -EFAULT; 356 } 357 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 358 p->cs_flags = p->chunks[i].kdata[0]; 359 if (p->chunks[i].length_dw > 1) 360 ring = p->chunks[i].kdata[1]; 361 if (p->chunks[i].length_dw > 2) 362 priority = (s32)p->chunks[i].kdata[2]; 363 } 364 } 365 366 /* these are KMS only */ 367 if (p->rdev) { 368 if ((p->cs_flags & RADEON_CS_USE_VM) && 369 !p->rdev->vm_manager.enabled) { 370 DRM_ERROR("VM not active on asic!\n"); 371 return -EINVAL; 372 } 373 374 if (radeon_cs_get_ring(p, ring, priority)) 375 return -EINVAL; 376 377 /* we only support VM on some SI+ rings */ 378 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) { 379 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) { 380 DRM_ERROR("Ring %d requires VM!\n", p->ring); 381 return -EINVAL; 382 } 383 } else { 384 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) { 385 DRM_ERROR("VM not supported on ring %d!\n", 386 p->ring); 387 return -EINVAL; 388 } 389 } 390 } 391 392 return 0; 393 } 394 395 static int cmp_size_smaller_first(void *priv, struct list_head *a, 396 struct list_head *b) 397 { 398 struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head); 399 struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head); 400 401 /* Sort A before B if A is smaller. */ 402 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 403 } 404 405 /** 406 * cs_parser_fini() - clean parser states 407 * @parser: parser structure holding parsing context. 408 * @error: error number 409 * 410 * If error is set than unvalidate buffer, otherwise just free memory 411 * used by parsing context. 412 **/ 413 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) 414 { 415 unsigned i; 416 417 if (!error) { 418 /* Sort the buffer list from the smallest to largest buffer, 419 * which affects the order of buffers in the LRU list. 420 * This assures that the smallest buffers are added first 421 * to the LRU list, so they are likely to be later evicted 422 * first, instead of large buffers whose eviction is more 423 * expensive. 424 * 425 * This slightly lowers the number of bytes moved by TTM 426 * per frame under memory pressure. 427 */ 428 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 429 430 ttm_eu_fence_buffer_objects(&parser->ticket, 431 &parser->validated, 432 &parser->ib.fence->base); 433 } else if (backoff) { 434 ttm_eu_backoff_reservation(&parser->ticket, 435 &parser->validated); 436 } 437 438 if (parser->relocs != NULL) { 439 for (i = 0; i < parser->nrelocs; i++) { 440 if (parser->relocs[i].gobj) 441 drm_gem_object_unreference_unlocked(parser->relocs[i].gobj); 442 } 443 } 444 kfree(parser->track); 445 kfree(parser->relocs); 446 kfree(parser->relocs_ptr); 447 drm_free_large(parser->vm_bos); 448 for (i = 0; i < parser->nchunks; i++) 449 drm_free_large(parser->chunks[i].kdata); 450 kfree(parser->chunks); 451 kfree(parser->chunks_array); 452 radeon_ib_free(parser->rdev, &parser->ib); 453 radeon_ib_free(parser->rdev, &parser->const_ib); 454 } 455 456 static int radeon_cs_ib_chunk(struct radeon_device *rdev, 457 struct radeon_cs_parser *parser) 458 { 459 int r; 460 461 if (parser->chunk_ib_idx == -1) 462 return 0; 463 464 if (parser->cs_flags & RADEON_CS_USE_VM) 465 return 0; 466 467 r = radeon_cs_parse(rdev, parser->ring, parser); 468 if (r || parser->parser_error) { 469 DRM_ERROR("Invalid command stream !\n"); 470 return r; 471 } 472 473 r = radeon_cs_sync_rings(parser); 474 if (r) { 475 if (r != -ERESTARTSYS) 476 DRM_ERROR("Failed to sync rings: %i\n", r); 477 return r; 478 } 479 480 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 481 radeon_uvd_note_usage(rdev); 482 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || 483 (parser->ring == TN_RING_TYPE_VCE2_INDEX)) 484 radeon_vce_note_usage(rdev); 485 486 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 487 if (r) { 488 DRM_ERROR("Failed to schedule IB !\n"); 489 } 490 return r; 491 } 492 493 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, 494 struct radeon_vm *vm) 495 { 496 struct radeon_device *rdev = p->rdev; 497 struct radeon_bo_va *bo_va; 498 int i, r; 499 500 r = radeon_vm_update_page_directory(rdev, vm); 501 if (r) 502 return r; 503 504 r = radeon_vm_clear_freed(rdev, vm); 505 if (r) 506 return r; 507 508 if (vm->ib_bo_va == NULL) { 509 DRM_ERROR("Tmp BO not in VM!\n"); 510 return -EINVAL; 511 } 512 513 r = radeon_vm_bo_update(rdev, vm->ib_bo_va, 514 &rdev->ring_tmp_bo.bo->tbo.mem); 515 if (r) 516 return r; 517 518 for (i = 0; i < p->nrelocs; i++) { 519 struct radeon_bo *bo; 520 521 /* ignore duplicates */ 522 if (p->relocs_ptr[i] != &p->relocs[i]) 523 continue; 524 525 bo = p->relocs[i].robj; 526 bo_va = radeon_vm_bo_find(vm, bo); 527 if (bo_va == NULL) { 528 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); 529 return -EINVAL; 530 } 531 532 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); 533 if (r) 534 return r; 535 } 536 537 return radeon_vm_clear_invalids(rdev, vm); 538 } 539 540 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, 541 struct radeon_cs_parser *parser) 542 { 543 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 544 struct radeon_vm *vm = &fpriv->vm; 545 int r; 546 547 if (parser->chunk_ib_idx == -1) 548 return 0; 549 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) 550 return 0; 551 552 if (parser->const_ib.length_dw) { 553 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); 554 if (r) { 555 return r; 556 } 557 } 558 559 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); 560 if (r) { 561 return r; 562 } 563 564 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 565 radeon_uvd_note_usage(rdev); 566 567 mutex_lock(&vm->mutex); 568 r = radeon_bo_vm_update_pte(parser, vm); 569 if (r) { 570 goto out; 571 } 572 573 r = radeon_cs_sync_rings(parser); 574 if (r) { 575 if (r != -ERESTARTSYS) 576 DRM_ERROR("Failed to sync rings: %i\n", r); 577 goto out; 578 } 579 radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence); 580 581 if ((rdev->family >= CHIP_TAHITI) && 582 (parser->chunk_const_ib_idx != -1)) { 583 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); 584 } else { 585 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 586 } 587 588 out: 589 mutex_unlock(&vm->mutex); 590 return r; 591 } 592 593 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r) 594 { 595 if (r == -EDEADLK) { 596 r = radeon_gpu_reset(rdev); 597 if (!r) 598 r = -EAGAIN; 599 } 600 return r; 601 } 602 603 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser) 604 { 605 struct radeon_cs_chunk *ib_chunk; 606 struct radeon_vm *vm = NULL; 607 int r; 608 609 if (parser->chunk_ib_idx == -1) 610 return 0; 611 612 if (parser->cs_flags & RADEON_CS_USE_VM) { 613 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 614 vm = &fpriv->vm; 615 616 if ((rdev->family >= CHIP_TAHITI) && 617 (parser->chunk_const_ib_idx != -1)) { 618 ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; 619 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 620 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); 621 return -EINVAL; 622 } 623 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, 624 vm, ib_chunk->length_dw * 4); 625 if (r) { 626 DRM_ERROR("Failed to get const ib !\n"); 627 return r; 628 } 629 parser->const_ib.is_const_ib = true; 630 parser->const_ib.length_dw = ib_chunk->length_dw; 631 if (copy_from_user(parser->const_ib.ptr, 632 ib_chunk->user_ptr, 633 ib_chunk->length_dw * 4)) 634 return -EFAULT; 635 } 636 637 ib_chunk = &parser->chunks[parser->chunk_ib_idx]; 638 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 639 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); 640 return -EINVAL; 641 } 642 } 643 ib_chunk = &parser->chunks[parser->chunk_ib_idx]; 644 645 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 646 vm, ib_chunk->length_dw * 4); 647 if (r) { 648 DRM_ERROR("Failed to get ib !\n"); 649 return r; 650 } 651 parser->ib.length_dw = ib_chunk->length_dw; 652 if (ib_chunk->kdata) 653 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); 654 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) 655 return -EFAULT; 656 return 0; 657 } 658 659 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 660 { 661 struct radeon_device *rdev = dev->dev_private; 662 struct radeon_cs_parser parser; 663 int r; 664 665 down_read(&rdev->exclusive_lock); 666 if (!rdev->accel_working) { 667 up_read(&rdev->exclusive_lock); 668 return -EBUSY; 669 } 670 if (rdev->in_reset) { 671 up_read(&rdev->exclusive_lock); 672 r = radeon_gpu_reset(rdev); 673 if (!r) 674 r = -EAGAIN; 675 return r; 676 } 677 /* initialize parser */ 678 memset(&parser, 0, sizeof(struct radeon_cs_parser)); 679 parser.filp = filp; 680 parser.rdev = rdev; 681 parser.dev = rdev->dev; 682 parser.family = rdev->family; 683 r = radeon_cs_parser_init(&parser, data); 684 if (r) { 685 DRM_ERROR("Failed to initialize parser !\n"); 686 radeon_cs_parser_fini(&parser, r, false); 687 up_read(&rdev->exclusive_lock); 688 r = radeon_cs_handle_lockup(rdev, r); 689 return r; 690 } 691 692 r = radeon_cs_ib_fill(rdev, &parser); 693 if (!r) { 694 r = radeon_cs_parser_relocs(&parser); 695 if (r && r != -ERESTARTSYS) 696 DRM_ERROR("Failed to parse relocation %d!\n", r); 697 } 698 699 if (r) { 700 radeon_cs_parser_fini(&parser, r, false); 701 up_read(&rdev->exclusive_lock); 702 r = radeon_cs_handle_lockup(rdev, r); 703 return r; 704 } 705 706 #ifdef TRACE_TODO 707 trace_radeon_cs(&parser); 708 #endif 709 710 r = radeon_cs_ib_chunk(rdev, &parser); 711 if (r) { 712 goto out; 713 } 714 r = radeon_cs_ib_vm_chunk(rdev, &parser); 715 if (r) { 716 goto out; 717 } 718 out: 719 radeon_cs_parser_fini(&parser, r, true); 720 up_read(&rdev->exclusive_lock); 721 r = radeon_cs_handle_lockup(rdev, r); 722 return r; 723 } 724 725 /** 726 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet 727 * @parser: parser structure holding parsing context. 728 * @pkt: where to store packet information 729 * 730 * Assume that chunk_ib_index is properly set. Will return -EINVAL 731 * if packet is bigger than remaining ib size. or if packets is unknown. 732 **/ 733 int radeon_cs_packet_parse(struct radeon_cs_parser *p, 734 struct radeon_cs_packet *pkt, 735 unsigned idx) 736 { 737 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; 738 struct radeon_device *rdev = p->rdev; 739 uint32_t header; 740 741 if (idx >= ib_chunk->length_dw) { 742 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 743 idx, ib_chunk->length_dw); 744 return -EINVAL; 745 } 746 header = radeon_get_ib_value(p, idx); 747 pkt->idx = idx; 748 pkt->type = RADEON_CP_PACKET_GET_TYPE(header); 749 pkt->count = RADEON_CP_PACKET_GET_COUNT(header); 750 pkt->one_reg_wr = 0; 751 switch (pkt->type) { 752 case RADEON_PACKET_TYPE0: 753 if (rdev->family < CHIP_R600) { 754 pkt->reg = R100_CP_PACKET0_GET_REG(header); 755 pkt->one_reg_wr = 756 RADEON_CP_PACKET0_GET_ONE_REG_WR(header); 757 } else 758 pkt->reg = R600_CP_PACKET0_GET_REG(header); 759 break; 760 case RADEON_PACKET_TYPE3: 761 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header); 762 break; 763 case RADEON_PACKET_TYPE2: 764 pkt->count = -1; 765 break; 766 default: 767 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 768 return -EINVAL; 769 } 770 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 771 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 772 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 773 return -EINVAL; 774 } 775 return 0; 776 } 777 778 /** 779 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP 780 * @p: structure holding the parser context. 781 * 782 * Check if the next packet is NOP relocation packet3. 783 **/ 784 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) 785 { 786 struct radeon_cs_packet p3reloc; 787 int r; 788 789 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 790 if (r) 791 return false; 792 if (p3reloc.type != RADEON_PACKET_TYPE3) 793 return false; 794 if (p3reloc.opcode != RADEON_PACKET3_NOP) 795 return false; 796 return true; 797 } 798 799 /** 800 * radeon_cs_dump_packet() - dump raw packet context 801 * @p: structure holding the parser context. 802 * @pkt: structure holding the packet. 803 * 804 * Used mostly for debugging and error reporting. 805 **/ 806 void radeon_cs_dump_packet(struct radeon_cs_parser *p, 807 struct radeon_cs_packet *pkt) 808 { 809 volatile uint32_t *ib; 810 unsigned i; 811 unsigned idx; 812 813 ib = p->ib.ptr; 814 idx = pkt->idx; 815 for (i = 0; i <= (pkt->count + 1); i++, idx++) 816 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 817 } 818 819 /** 820 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet 821 * @parser: parser structure holding parsing context. 822 * @data: pointer to relocation data 823 * @offset_start: starting offset 824 * @offset_mask: offset mask (to align start offset on) 825 * @reloc: reloc informations 826 * 827 * Check if next packet is relocation packet3, do bo validation and compute 828 * GPU offset using the provided start. 829 **/ 830 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, 831 struct radeon_cs_reloc **cs_reloc, 832 int nomm) 833 { 834 struct radeon_cs_chunk *relocs_chunk; 835 struct radeon_cs_packet p3reloc; 836 unsigned idx; 837 int r; 838 839 if (p->chunk_relocs_idx == -1) { 840 DRM_ERROR("No relocation chunk !\n"); 841 return -EINVAL; 842 } 843 *cs_reloc = NULL; 844 relocs_chunk = &p->chunks[p->chunk_relocs_idx]; 845 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 846 if (r) 847 return r; 848 p->idx += p3reloc.count + 2; 849 if (p3reloc.type != RADEON_PACKET_TYPE3 || 850 p3reloc.opcode != RADEON_PACKET3_NOP) { 851 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 852 p3reloc.idx); 853 radeon_cs_dump_packet(p, &p3reloc); 854 return -EINVAL; 855 } 856 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 857 if (idx >= relocs_chunk->length_dw) { 858 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 859 idx, relocs_chunk->length_dw); 860 radeon_cs_dump_packet(p, &p3reloc); 861 return -EINVAL; 862 } 863 /* FIXME: we assume reloc size is 4 dwords */ 864 if (nomm) { 865 *cs_reloc = p->relocs; 866 (*cs_reloc)->gpu_offset = 867 (u64)relocs_chunk->kdata[idx + 3] << 32; 868 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; 869 } else 870 *cs_reloc = p->relocs_ptr[(idx / 4)]; 871 return 0; 872 } 873