xref: /dragonfly/sys/dev/drm/radeon/radeon_cs.c (revision 029e6489)
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/list_sort.h>
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_reg.h"
31 #include "radeon.h"
32 #include "radeon_trace.h"
33 
34 #define RADEON_CS_MAX_PRIORITY		32u
35 #define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
36 
37 /* This is based on the bucket sort with O(n) time complexity.
38  * An item with priority "i" is added to bucket[i]. The lists are then
39  * concatenated in descending order.
40  */
41 struct radeon_cs_buckets {
42 	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43 };
44 
45 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46 {
47 	unsigned i;
48 
49 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 		INIT_LIST_HEAD(&b->bucket[i]);
51 }
52 
53 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 				  struct list_head *item, unsigned priority)
55 {
56 	/* Since buffers which appear sooner in the relocation list are
57 	 * likely to be used more often than buffers which appear later
58 	 * in the list, the sort mustn't change the ordering of buffers
59 	 * with the same priority, i.e. it must be stable.
60 	 */
61 	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62 }
63 
64 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 				       struct list_head *out_list)
66 {
67 	unsigned i;
68 
69 	/* Connect the sorted buckets in the output list. */
70 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 		list_splice(&b->bucket[i], out_list);
72 	}
73 }
74 
75 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
76 {
77 	struct radeon_cs_chunk *chunk;
78 	struct radeon_cs_buckets buckets;
79 	unsigned i;
80 	bool need_mmap_lock = false;
81 	int r;
82 
83 	if (p->chunk_relocs == NULL) {
84 		return 0;
85 	}
86 	chunk = p->chunk_relocs;
87 	p->dma_reloc_idx = 0;
88 	/* FIXME: we assume that each relocs use 4 dwords */
89 	p->nrelocs = chunk->length_dw / 4;
90 	p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
91 	if (p->relocs == NULL) {
92 		return -ENOMEM;
93 	}
94 
95 	radeon_cs_buckets_init(&buckets);
96 
97 	for (i = 0; i < p->nrelocs; i++) {
98 		struct drm_radeon_cs_reloc *r;
99 		struct drm_gem_object *gobj;
100 		unsigned priority;
101 
102 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
103 		gobj = drm_gem_object_lookup(p->filp, r->handle);
104 		if (gobj == NULL) {
105 			DRM_ERROR("gem object lookup failed 0x%x\n",
106 				  r->handle);
107 			return -ENOENT;
108 		}
109 		p->relocs[i].robj = gem_to_radeon_bo(gobj);
110 
111 		/* The userspace buffer priorities are from 0 to 15. A higher
112 		 * number means the buffer is more important.
113 		 * Also, the buffers used for write have a higher priority than
114 		 * the buffers used for read only, which doubles the range
115 		 * to 0 to 31. 32 is reserved for the kernel driver.
116 		 */
117 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
118 			   + !!r->write_domain;
119 
120 		/* the first reloc of an UVD job is the msg and that must be in
121 		   VRAM, also but everything into VRAM on AGP cards and older
122 		   IGP chips to avoid image corruptions */
123 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
124 		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
125 		     p->rdev->family == CHIP_RS780 ||
126 		     p->rdev->family == CHIP_RS880)) {
127 
128 			/* TODO: is this still needed for NI+ ? */
129 			p->relocs[i].prefered_domains =
130 				RADEON_GEM_DOMAIN_VRAM;
131 
132 			p->relocs[i].allowed_domains =
133 				RADEON_GEM_DOMAIN_VRAM;
134 
135 			/* prioritize this over any other relocation */
136 			priority = RADEON_CS_MAX_PRIORITY;
137 		} else {
138 			uint32_t domain = r->write_domain ?
139 				r->write_domain : r->read_domains;
140 
141 			if (domain & RADEON_GEM_DOMAIN_CPU) {
142 				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
143 					  "for command submission\n");
144 				return -EINVAL;
145 			}
146 
147 			p->relocs[i].prefered_domains = domain;
148 			if (domain == RADEON_GEM_DOMAIN_VRAM)
149 				domain |= RADEON_GEM_DOMAIN_GTT;
150 			p->relocs[i].allowed_domains = domain;
151 		}
152 
153 #if 0
154 		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
155 			uint32_t domain = p->relocs[i].prefered_domains;
156 			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
157 				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
158 					  "allowed for userptr BOs\n");
159 				return -EINVAL;
160 			}
161 			need_mmap_lock = true;
162 			domain = RADEON_GEM_DOMAIN_GTT;
163 			p->relocs[i].prefered_domains = domain;
164 			p->relocs[i].allowed_domains = domain;
165 		}
166 #endif
167 
168 		/* Objects shared as dma-bufs cannot be moved to VRAM */
169 		if (p->relocs[i].robj->prime_shared_count) {
170 			p->relocs[i].allowed_domains &= ~RADEON_GEM_DOMAIN_VRAM;
171 			if (!p->relocs[i].allowed_domains) {
172 				DRM_ERROR("BO associated with dma-buf cannot "
173 					  "be moved to VRAM\n");
174 				return -EINVAL;
175 			}
176 		}
177 
178 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
179 		p->relocs[i].tv.shared = !r->write_domain;
180 
181 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
182 				      priority);
183 	}
184 
185 	radeon_cs_buckets_get_list(&buckets, &p->validated);
186 
187 	if (p->cs_flags & RADEON_CS_USE_VM)
188 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
189 					      &p->validated);
190 	if (need_mmap_lock)
191 		down_read(&current->mm->mmap_sem);
192 
193 	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
194 
195 	if (need_mmap_lock)
196 		up_read(&current->mm->mmap_sem);
197 
198 	return r;
199 }
200 
201 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
202 {
203 	p->priority = priority;
204 
205 	switch (ring) {
206 	default:
207 		DRM_ERROR("unknown ring id: %d\n", ring);
208 		return -EINVAL;
209 	case RADEON_CS_RING_GFX:
210 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
211 		break;
212 	case RADEON_CS_RING_COMPUTE:
213 		if (p->rdev->family >= CHIP_TAHITI) {
214 			if (p->priority > 0)
215 				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
216 			else
217 				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
218 		} else
219 			p->ring = RADEON_RING_TYPE_GFX_INDEX;
220 		break;
221 	case RADEON_CS_RING_DMA:
222 		if (p->rdev->family >= CHIP_CAYMAN) {
223 			if (p->priority > 0)
224 				p->ring = R600_RING_TYPE_DMA_INDEX;
225 			else
226 				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
227 		} else if (p->rdev->family >= CHIP_RV770) {
228 			p->ring = R600_RING_TYPE_DMA_INDEX;
229 		} else {
230 			return -EINVAL;
231 		}
232 		break;
233 	case RADEON_CS_RING_UVD:
234 		p->ring = R600_RING_TYPE_UVD_INDEX;
235 		break;
236 	case RADEON_CS_RING_VCE:
237 		/* TODO: only use the low priority ring for now */
238 		p->ring = TN_RING_TYPE_VCE1_INDEX;
239 		break;
240 	}
241 	return 0;
242 }
243 
244 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
245 {
246 	struct radeon_bo_list *reloc;
247 	int r;
248 
249 	list_for_each_entry(reloc, &p->validated, tv.head) {
250 		struct reservation_object *resv;
251 
252 		resv = reloc->robj->tbo.resv;
253 		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
254 				     reloc->tv.shared);
255 		if (r)
256 			return r;
257 	}
258 	return 0;
259 }
260 
261 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
262 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
263 {
264 	struct drm_radeon_cs *cs = data;
265 	uint64_t *chunk_array_ptr;
266 	unsigned size, i;
267 	u32 ring = RADEON_CS_RING_GFX;
268 	s32 priority = 0;
269 
270 	INIT_LIST_HEAD(&p->validated);
271 
272 	if (!cs->num_chunks) {
273 		return 0;
274 	}
275 
276 	/* get chunks */
277 	p->idx = 0;
278 	p->ib.sa_bo = NULL;
279 	p->const_ib.sa_bo = NULL;
280 	p->chunk_ib = NULL;
281 	p->chunk_relocs = NULL;
282 	p->chunk_flags = NULL;
283 	p->chunk_const_ib = NULL;
284 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
285 	if (p->chunks_array == NULL) {
286 		return -ENOMEM;
287 	}
288 	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
289 	if (copy_from_user(p->chunks_array, chunk_array_ptr,
290 			       sizeof(uint64_t)*cs->num_chunks)) {
291 		return -EFAULT;
292 	}
293 	p->cs_flags = 0;
294 	p->nchunks = cs->num_chunks;
295 	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
296 	if (p->chunks == NULL) {
297 		return -ENOMEM;
298 	}
299 	for (i = 0; i < p->nchunks; i++) {
300 		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
301 		struct drm_radeon_cs_chunk user_chunk;
302 		uint32_t __user *cdata;
303 
304 		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
305 		if (copy_from_user(&user_chunk, chunk_ptr,
306 				       sizeof(struct drm_radeon_cs_chunk))) {
307 			return -EFAULT;
308 		}
309 		p->chunks[i].length_dw = user_chunk.length_dw;
310 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
311 			p->chunk_relocs = &p->chunks[i];
312 		}
313 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
314 			p->chunk_ib = &p->chunks[i];
315 			/* zero length IB isn't useful */
316 			if (p->chunks[i].length_dw == 0)
317 				return -EINVAL;
318 		}
319 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
320 			p->chunk_const_ib = &p->chunks[i];
321 			/* zero length CONST IB isn't useful */
322 			if (p->chunks[i].length_dw == 0)
323 				return -EINVAL;
324 		}
325 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
326 			p->chunk_flags = &p->chunks[i];
327 			/* zero length flags aren't useful */
328 			if (p->chunks[i].length_dw == 0)
329 				return -EINVAL;
330 		}
331 
332 		size = p->chunks[i].length_dw;
333 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
334 		p->chunks[i].user_ptr = cdata;
335 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
336 			continue;
337 
338 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
339 			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
340 				continue;
341 		}
342 
343 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
344 		size *= sizeof(uint32_t);
345 		if (p->chunks[i].kdata == NULL) {
346 			return -ENOMEM;
347 		}
348 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
349 			return -EFAULT;
350 		}
351 		if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
352 			p->cs_flags = p->chunks[i].kdata[0];
353 			if (p->chunks[i].length_dw > 1)
354 				ring = p->chunks[i].kdata[1];
355 			if (p->chunks[i].length_dw > 2)
356 				priority = (s32)p->chunks[i].kdata[2];
357 		}
358 	}
359 
360 	/* these are KMS only */
361 	if (p->rdev) {
362 		if ((p->cs_flags & RADEON_CS_USE_VM) &&
363 		    !p->rdev->vm_manager.enabled) {
364 			DRM_ERROR("VM not active on asic!\n");
365 			return -EINVAL;
366 		}
367 
368 		if (radeon_cs_get_ring(p, ring, priority))
369 			return -EINVAL;
370 
371 		/* we only support VM on some SI+ rings */
372 		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
373 			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
374 				DRM_ERROR("Ring %d requires VM!\n", p->ring);
375 				return -EINVAL;
376 			}
377 		} else {
378 			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
379 				DRM_ERROR("VM not supported on ring %d!\n",
380 					  p->ring);
381 				return -EINVAL;
382 			}
383 		}
384 	}
385 
386 	return 0;
387 }
388 
389 static int cmp_size_smaller_first(void *priv, struct list_head *a,
390 				  struct list_head *b)
391 {
392 	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
393 	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
394 
395 	/* Sort A before B if A is smaller. */
396 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
397 }
398 
399 /**
400  * cs_parser_fini() - clean parser states
401  * @parser:	parser structure holding parsing context.
402  * @error:	error number
403  *
404  * If error is set than unvalidate buffer, otherwise just free memory
405  * used by parsing context.
406  **/
407 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
408 {
409 	unsigned i;
410 
411 	if (!error) {
412 		/* Sort the buffer list from the smallest to largest buffer,
413 		 * which affects the order of buffers in the LRU list.
414 		 * This assures that the smallest buffers are added first
415 		 * to the LRU list, so they are likely to be later evicted
416 		 * first, instead of large buffers whose eviction is more
417 		 * expensive.
418 		 *
419 		 * This slightly lowers the number of bytes moved by TTM
420 		 * per frame under memory pressure.
421 		 */
422 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
423 
424 		ttm_eu_fence_buffer_objects(&parser->ticket,
425 					    &parser->validated,
426 					    &parser->ib.fence->base);
427 	} else if (backoff) {
428 		ttm_eu_backoff_reservation(&parser->ticket,
429 					   &parser->validated);
430 	}
431 
432 	if (parser->relocs != NULL) {
433 		for (i = 0; i < parser->nrelocs; i++) {
434 			struct radeon_bo *bo = parser->relocs[i].robj;
435 			if (bo == NULL)
436 				continue;
437 
438 			drm_gem_object_unreference_unlocked(&bo->gem_base);
439 		}
440 	}
441 	kfree(parser->track);
442 	drm_free_large(parser->relocs);
443 	drm_free_large(parser->vm_bos);
444 	for (i = 0; i < parser->nchunks; i++)
445 		drm_free_large(parser->chunks[i].kdata);
446 	kfree(parser->chunks);
447 	kfree(parser->chunks_array);
448 	radeon_ib_free(parser->rdev, &parser->ib);
449 	radeon_ib_free(parser->rdev, &parser->const_ib);
450 }
451 
452 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
453 			      struct radeon_cs_parser *parser)
454 {
455 	int r;
456 
457 	if (parser->chunk_ib == NULL)
458 		return 0;
459 
460 	if (parser->cs_flags & RADEON_CS_USE_VM)
461 		return 0;
462 
463 	r = radeon_cs_parse(rdev, parser->ring, parser);
464 	if (r || parser->parser_error) {
465 		DRM_ERROR("Invalid command stream !\n");
466 		return r;
467 	}
468 
469 	r = radeon_cs_sync_rings(parser);
470 	if (r) {
471 		if (r != -ERESTARTSYS)
472 			DRM_ERROR("Failed to sync rings: %i\n", r);
473 		return r;
474 	}
475 
476 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
477 		radeon_uvd_note_usage(rdev);
478 	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
479 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
480 		radeon_vce_note_usage(rdev);
481 
482 	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
483 	if (r) {
484 		DRM_ERROR("Failed to schedule IB !\n");
485 	}
486 	return r;
487 }
488 
489 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
490 				   struct radeon_vm *vm)
491 {
492 	struct radeon_device *rdev = p->rdev;
493 	struct radeon_bo_va *bo_va;
494 	int i, r;
495 
496 	r = radeon_vm_update_page_directory(rdev, vm);
497 	if (r)
498 		return r;
499 
500 	r = radeon_vm_clear_freed(rdev, vm);
501 	if (r)
502 		return r;
503 
504 	if (vm->ib_bo_va == NULL) {
505 		DRM_ERROR("Tmp BO not in VM!\n");
506 		return -EINVAL;
507 	}
508 
509 	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
510 				&rdev->ring_tmp_bo.bo->tbo.mem);
511 	if (r)
512 		return r;
513 
514 	for (i = 0; i < p->nrelocs; i++) {
515 		struct radeon_bo *bo;
516 
517 		bo = p->relocs[i].robj;
518 		bo_va = radeon_vm_bo_find(vm, bo);
519 		if (bo_va == NULL) {
520 			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
521 			return -EINVAL;
522 		}
523 
524 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
525 		if (r)
526 			return r;
527 
528 		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
529 	}
530 
531 	return radeon_vm_clear_invalids(rdev, vm);
532 }
533 
534 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
535 				 struct radeon_cs_parser *parser)
536 {
537 	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
538 	struct radeon_vm *vm = &fpriv->vm;
539 	int r;
540 
541 	if (parser->chunk_ib == NULL)
542 		return 0;
543 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
544 		return 0;
545 
546 	if (parser->const_ib.length_dw) {
547 		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
548 		if (r) {
549 			return r;
550 		}
551 	}
552 
553 	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
554 	if (r) {
555 		return r;
556 	}
557 
558 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
559 		radeon_uvd_note_usage(rdev);
560 
561 	mutex_lock(&vm->mutex);
562 	r = radeon_bo_vm_update_pte(parser, vm);
563 	if (r) {
564 		goto out;
565 	}
566 
567 	r = radeon_cs_sync_rings(parser);
568 	if (r) {
569 		if (r != -ERESTARTSYS)
570 			DRM_ERROR("Failed to sync rings: %i\n", r);
571 		goto out;
572 	}
573 
574 	if ((rdev->family >= CHIP_TAHITI) &&
575 	    (parser->chunk_const_ib != NULL)) {
576 		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
577 	} else {
578 		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
579 	}
580 
581 out:
582 	mutex_unlock(&vm->mutex);
583 	return r;
584 }
585 
586 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
587 {
588 	if (r == -EDEADLK) {
589 		r = radeon_gpu_reset(rdev);
590 		if (!r)
591 			r = -EAGAIN;
592 	}
593 	return r;
594 }
595 
596 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
597 {
598 	struct radeon_cs_chunk *ib_chunk;
599 	struct radeon_vm *vm = NULL;
600 	int r;
601 
602 	if (parser->chunk_ib == NULL)
603 		return 0;
604 
605 	if (parser->cs_flags & RADEON_CS_USE_VM) {
606 		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
607 		vm = &fpriv->vm;
608 
609 		if ((rdev->family >= CHIP_TAHITI) &&
610 		    (parser->chunk_const_ib != NULL)) {
611 			ib_chunk = parser->chunk_const_ib;
612 			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
613 				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
614 				return -EINVAL;
615 			}
616 			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
617 					   vm, ib_chunk->length_dw * 4);
618 			if (r) {
619 				DRM_ERROR("Failed to get const ib !\n");
620 				return r;
621 			}
622 			parser->const_ib.is_const_ib = true;
623 			parser->const_ib.length_dw = ib_chunk->length_dw;
624 			if (copy_from_user(parser->const_ib.ptr,
625 					       ib_chunk->user_ptr,
626 					       ib_chunk->length_dw * 4))
627 				return -EFAULT;
628 		}
629 
630 		ib_chunk = parser->chunk_ib;
631 		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
632 			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
633 			return -EINVAL;
634 		}
635 	}
636 	ib_chunk = parser->chunk_ib;
637 
638 	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
639 			   vm, ib_chunk->length_dw * 4);
640 	if (r) {
641 		DRM_ERROR("Failed to get ib !\n");
642 		return r;
643 	}
644 	parser->ib.length_dw = ib_chunk->length_dw;
645 	if (ib_chunk->kdata)
646 		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
647 	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
648 		return -EFAULT;
649 	return 0;
650 }
651 
652 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
653 {
654 	struct radeon_device *rdev = dev->dev_private;
655 	struct radeon_cs_parser parser;
656 	int r;
657 
658 	down_read(&rdev->exclusive_lock);
659 	if (!rdev->accel_working) {
660 		up_read(&rdev->exclusive_lock);
661 		return -EBUSY;
662 	}
663 	if (rdev->in_reset) {
664 		up_read(&rdev->exclusive_lock);
665 		r = radeon_gpu_reset(rdev);
666 		if (!r)
667 			r = -EAGAIN;
668 		return r;
669 	}
670 	/* initialize parser */
671 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
672 	parser.filp = filp;
673 	parser.rdev = rdev;
674 	parser.dev = rdev->dev;
675 	parser.family = rdev->family;
676 	r = radeon_cs_parser_init(&parser, data);
677 	if (r) {
678 		DRM_ERROR("Failed to initialize parser !\n");
679 		radeon_cs_parser_fini(&parser, r, false);
680 		up_read(&rdev->exclusive_lock);
681 		r = radeon_cs_handle_lockup(rdev, r);
682 		return r;
683 	}
684 
685 	r = radeon_cs_ib_fill(rdev, &parser);
686 	if (!r) {
687 		r = radeon_cs_parser_relocs(&parser);
688 		if (r && r != -ERESTARTSYS)
689 			DRM_ERROR("Failed to parse relocation %d!\n", r);
690 	}
691 
692 	if (r) {
693 		radeon_cs_parser_fini(&parser, r, false);
694 		up_read(&rdev->exclusive_lock);
695 		r = radeon_cs_handle_lockup(rdev, r);
696 		return r;
697 	}
698 
699 #ifdef TRACE_TODO
700 	trace_radeon_cs(&parser);
701 #endif
702 
703 	r = radeon_cs_ib_chunk(rdev, &parser);
704 	if (r) {
705 		goto out;
706 	}
707 	r = radeon_cs_ib_vm_chunk(rdev, &parser);
708 	if (r) {
709 		goto out;
710 	}
711 out:
712 	radeon_cs_parser_fini(&parser, r, true);
713 	up_read(&rdev->exclusive_lock);
714 	r = radeon_cs_handle_lockup(rdev, r);
715 	return r;
716 }
717 
718 /**
719  * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
720  * @parser:	parser structure holding parsing context.
721  * @pkt:	where to store packet information
722  *
723  * Assume that chunk_ib_index is properly set. Will return -EINVAL
724  * if packet is bigger than remaining ib size. or if packets is unknown.
725  **/
726 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
727 			   struct radeon_cs_packet *pkt,
728 			   unsigned idx)
729 {
730 	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
731 	struct radeon_device *rdev = p->rdev;
732 	uint32_t header;
733 	int ret = 0, i;
734 
735 	if (idx >= ib_chunk->length_dw) {
736 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
737 			  idx, ib_chunk->length_dw);
738 		return -EINVAL;
739 	}
740 	header = radeon_get_ib_value(p, idx);
741 	pkt->idx = idx;
742 	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
743 	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
744 	pkt->one_reg_wr = 0;
745 	switch (pkt->type) {
746 	case RADEON_PACKET_TYPE0:
747 		if (rdev->family < CHIP_R600) {
748 			pkt->reg = R100_CP_PACKET0_GET_REG(header);
749 			pkt->one_reg_wr =
750 				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
751 		} else
752 			pkt->reg = R600_CP_PACKET0_GET_REG(header);
753 		break;
754 	case RADEON_PACKET_TYPE3:
755 		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
756 		break;
757 	case RADEON_PACKET_TYPE2:
758 		pkt->count = -1;
759 		break;
760 	default:
761 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
762 		ret = -EINVAL;
763 		goto dump_ib;
764 	}
765 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
766 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
767 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
768 		ret = -EINVAL;
769 		goto dump_ib;
770 	}
771 	return 0;
772 
773 dump_ib:
774 	for (i = 0; i < ib_chunk->length_dw; i++) {
775 		if (i == idx)
776 			printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
777 		else
778 			printk("\t0x%08x\n", radeon_get_ib_value(p, i));
779 	}
780 	return ret;
781 }
782 
783 /**
784  * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
785  * @p:		structure holding the parser context.
786  *
787  * Check if the next packet is NOP relocation packet3.
788  **/
789 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
790 {
791 	struct radeon_cs_packet p3reloc;
792 	int r;
793 
794 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
795 	if (r)
796 		return false;
797 	if (p3reloc.type != RADEON_PACKET_TYPE3)
798 		return false;
799 	if (p3reloc.opcode != RADEON_PACKET3_NOP)
800 		return false;
801 	return true;
802 }
803 
804 /**
805  * radeon_cs_dump_packet() - dump raw packet context
806  * @p:		structure holding the parser context.
807  * @pkt:	structure holding the packet.
808  *
809  * Used mostly for debugging and error reporting.
810  **/
811 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
812 			   struct radeon_cs_packet *pkt)
813 {
814 	volatile uint32_t *ib;
815 	unsigned i;
816 	unsigned idx;
817 
818 	ib = p->ib.ptr;
819 	idx = pkt->idx;
820 	for (i = 0; i <= (pkt->count + 1); i++, idx++)
821 		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
822 }
823 
824 /**
825  * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
826  * @parser:		parser structure holding parsing context.
827  * @data:		pointer to relocation data
828  * @offset_start:	starting offset
829  * @offset_mask:	offset mask (to align start offset on)
830  * @reloc:		reloc informations
831  *
832  * Check if next packet is relocation packet3, do bo validation and compute
833  * GPU offset using the provided start.
834  **/
835 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
836 				struct radeon_bo_list **cs_reloc,
837 				int nomm)
838 {
839 	struct radeon_cs_chunk *relocs_chunk;
840 	struct radeon_cs_packet p3reloc;
841 	unsigned idx;
842 	int r;
843 
844 	if (p->chunk_relocs == NULL) {
845 		DRM_ERROR("No relocation chunk !\n");
846 		return -EINVAL;
847 	}
848 	*cs_reloc = NULL;
849 	relocs_chunk = p->chunk_relocs;
850 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
851 	if (r)
852 		return r;
853 	p->idx += p3reloc.count + 2;
854 	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
855 	    p3reloc.opcode != RADEON_PACKET3_NOP) {
856 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
857 			  p3reloc.idx);
858 		radeon_cs_dump_packet(p, &p3reloc);
859 		return -EINVAL;
860 	}
861 	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
862 	if (idx >= relocs_chunk->length_dw) {
863 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
864 			  idx, relocs_chunk->length_dw);
865 		radeon_cs_dump_packet(p, &p3reloc);
866 		return -EINVAL;
867 	}
868 	/* FIXME: we assume reloc size is 4 dwords */
869 	if (nomm) {
870 		*cs_reloc = p->relocs;
871 		(*cs_reloc)->gpu_offset =
872 			(u64)relocs_chunk->kdata[idx + 3] << 32;
873 		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
874 	} else
875 		*cs_reloc = &p->relocs[(idx / 4)];
876 	return 0;
877 }
878