xref: /dragonfly/sys/dev/drm/radeon/radeon_cs.c (revision 8edfbc5e)
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/list_sort.h>
28 #include <drm/drmP.h>
29 #include <uapi_drm/radeon_drm.h>
30 #include "radeon_reg.h"
31 #include "radeon.h"
32 #ifdef TRACE_TODO
33 #include "radeon_trace.h"
34 #endif
35 
36 #define RADEON_CS_MAX_PRIORITY		32u
37 #define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
38 
39 /* This is based on the bucket sort with O(n) time complexity.
40  * An item with priority "i" is added to bucket[i]. The lists are then
41  * concatenated in descending order.
42  */
43 struct radeon_cs_buckets {
44 	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
45 };
46 
47 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
48 {
49 	unsigned i;
50 
51 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
52 		INIT_LIST_HEAD(&b->bucket[i]);
53 }
54 
55 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
56 				  struct list_head *item, unsigned priority)
57 {
58 	/* Since buffers which appear sooner in the relocation list are
59 	 * likely to be used more often than buffers which appear later
60 	 * in the list, the sort mustn't change the ordering of buffers
61 	 * with the same priority, i.e. it must be stable.
62 	 */
63 	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
64 }
65 
66 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
67 				       struct list_head *out_list)
68 {
69 	unsigned i;
70 
71 	/* Connect the sorted buckets in the output list. */
72 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
73 		list_splice(&b->bucket[i], out_list);
74 	}
75 }
76 
77 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
78 {
79 	struct drm_device *ddev = p->rdev->ddev;
80 	struct radeon_cs_chunk *chunk;
81 	struct radeon_cs_buckets buckets;
82 	unsigned i, j;
83 	bool duplicate;
84 
85 	if (p->chunk_relocs_idx == -1) {
86 		return 0;
87 	}
88 	chunk = &p->chunks[p->chunk_relocs_idx];
89 	p->dma_reloc_idx = 0;
90 	/* FIXME: we assume that each relocs use 4 dwords */
91 	p->nrelocs = chunk->length_dw / 4;
92 	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
93 	if (p->relocs_ptr == NULL) {
94 		return -ENOMEM;
95 	}
96 	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
97 	if (p->relocs == NULL) {
98 		return -ENOMEM;
99 	}
100 
101 	radeon_cs_buckets_init(&buckets);
102 
103 	for (i = 0; i < p->nrelocs; i++) {
104 		struct drm_radeon_cs_reloc *r;
105 		unsigned priority;
106 
107 		duplicate = false;
108 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
109 		for (j = 0; j < i; j++) {
110 			if (r->handle == p->relocs[j].handle) {
111 				p->relocs_ptr[i] = &p->relocs[j];
112 				duplicate = true;
113 				break;
114 			}
115 		}
116 		if (duplicate) {
117 			p->relocs[i].handle = 0;
118 			continue;
119 		}
120 
121 		p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
122 							  r->handle);
123 		if (p->relocs[i].gobj == NULL) {
124 			DRM_ERROR("gem object lookup failed 0x%x\n",
125 				  r->handle);
126 			return -ENOENT;
127 		}
128 		p->relocs_ptr[i] = &p->relocs[i];
129 		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
130 
131 		/* The userspace buffer priorities are from 0 to 15. A higher
132 		 * number means the buffer is more important.
133 		 * Also, the buffers used for write have a higher priority than
134 		 * the buffers used for read only, which doubles the range
135 		 * to 0 to 31. 32 is reserved for the kernel driver.
136 		 */
137 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
138 			   + !!r->write_domain;
139 
140 		/* the first reloc of an UVD job is the msg and that must be in
141 		   VRAM, also but everything into VRAM on AGP cards and older
142 		   IGP chips to avoid image corruptions */
143 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
144 		    (i == 0 || (p->rdev->flags & RADEON_IS_AGP) ||
145 		     p->rdev->family == CHIP_RS780 ||
146 		     p->rdev->family == CHIP_RS880)) {
147 
148 			/* TODO: is this still needed for NI+ ? */
149 			p->relocs[i].prefered_domains =
150 				RADEON_GEM_DOMAIN_VRAM;
151 
152 			p->relocs[i].allowed_domains =
153 				RADEON_GEM_DOMAIN_VRAM;
154 
155 			/* prioritize this over any other relocation */
156 			priority = RADEON_CS_MAX_PRIORITY;
157 		} else {
158 			uint32_t domain = r->write_domain ?
159 				r->write_domain : r->read_domains;
160 
161 			if (domain & RADEON_GEM_DOMAIN_CPU) {
162 				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
163 					  "for command submission\n");
164 				return -EINVAL;
165 			}
166 
167 			p->relocs[i].prefered_domains = domain;
168 			if (domain == RADEON_GEM_DOMAIN_VRAM)
169 				domain |= RADEON_GEM_DOMAIN_GTT;
170 			p->relocs[i].allowed_domains = domain;
171 		}
172 
173 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
174 		p->relocs[i].handle = r->handle;
175 
176 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
177 				      priority);
178 	}
179 
180 	radeon_cs_buckets_get_list(&buckets, &p->validated);
181 
182 	if (p->cs_flags & RADEON_CS_USE_VM)
183 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
184 					      &p->validated);
185 
186 	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
187 }
188 
189 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
190 {
191 	p->priority = priority;
192 
193 	switch (ring) {
194 	default:
195 		DRM_ERROR("unknown ring id: %d\n", ring);
196 		return -EINVAL;
197 	case RADEON_CS_RING_GFX:
198 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
199 		break;
200 	case RADEON_CS_RING_COMPUTE:
201 		if (p->rdev->family >= CHIP_TAHITI) {
202 			if (p->priority > 0)
203 				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
204 			else
205 				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
206 		} else
207 			p->ring = RADEON_RING_TYPE_GFX_INDEX;
208 		break;
209 	case RADEON_CS_RING_DMA:
210 		if (p->rdev->family >= CHIP_CAYMAN) {
211 			if (p->priority > 0)
212 				p->ring = R600_RING_TYPE_DMA_INDEX;
213 			else
214 				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
215 		} else if (p->rdev->family >= CHIP_RV770) {
216 			p->ring = R600_RING_TYPE_DMA_INDEX;
217 		} else {
218 			return -EINVAL;
219 		}
220 		break;
221 	case RADEON_CS_RING_UVD:
222 		p->ring = R600_RING_TYPE_UVD_INDEX;
223 		break;
224 	case RADEON_CS_RING_VCE:
225 		/* TODO: only use the low priority ring for now */
226 		p->ring = TN_RING_TYPE_VCE1_INDEX;
227 		break;
228 	}
229 	return 0;
230 }
231 
232 static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
233 {
234 	int i;
235 
236 	for (i = 0; i < p->nrelocs; i++) {
237 		if (!p->relocs[i].robj)
238 			continue;
239 
240 		radeon_semaphore_sync_to(p->ib.semaphore,
241 					 p->relocs[i].robj->tbo.sync_obj);
242 	}
243 }
244 
245 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
246 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
247 {
248 	struct drm_radeon_cs *cs = data;
249 	uint64_t *chunk_array_ptr;
250 	unsigned size, i;
251 	u32 ring = RADEON_CS_RING_GFX;
252 	s32 priority = 0;
253 
254 	if (!cs->num_chunks) {
255 		return 0;
256 	}
257 	/* get chunks */
258 	INIT_LIST_HEAD(&p->validated);
259 	p->idx = 0;
260 	p->ib.sa_bo = NULL;
261 	p->ib.semaphore = NULL;
262 	p->const_ib.sa_bo = NULL;
263 	p->const_ib.semaphore = NULL;
264 	p->chunk_ib_idx = -1;
265 	p->chunk_relocs_idx = -1;
266 	p->chunk_flags_idx = -1;
267 	p->chunk_const_ib_idx = -1;
268 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
269 	if (p->chunks_array == NULL) {
270 		return -ENOMEM;
271 	}
272 	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
273 	if (copy_from_user(p->chunks_array, chunk_array_ptr,
274 			       sizeof(uint64_t)*cs->num_chunks)) {
275 		return -EFAULT;
276 	}
277 	p->cs_flags = 0;
278 	p->nchunks = cs->num_chunks;
279 	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
280 	if (p->chunks == NULL) {
281 		return -ENOMEM;
282 	}
283 	for (i = 0; i < p->nchunks; i++) {
284 		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
285 		struct drm_radeon_cs_chunk user_chunk;
286 		uint32_t __user *cdata;
287 
288 		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
289 		if (copy_from_user(&user_chunk, chunk_ptr,
290 				       sizeof(struct drm_radeon_cs_chunk))) {
291 			return -EFAULT;
292 		}
293 		p->chunks[i].length_dw = user_chunk.length_dw;
294 		p->chunks[i].chunk_id = user_chunk.chunk_id;
295 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
296 			p->chunk_relocs_idx = i;
297 		}
298 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
299 			p->chunk_ib_idx = i;
300 			/* zero length IB isn't useful */
301 			if (p->chunks[i].length_dw == 0)
302 				return -EINVAL;
303 		}
304 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
305 			p->chunk_const_ib_idx = i;
306 			/* zero length CONST IB isn't useful */
307 			if (p->chunks[i].length_dw == 0)
308 				return -EINVAL;
309 		}
310 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
311 			p->chunk_flags_idx = i;
312 			/* zero length flags aren't useful */
313 			if (p->chunks[i].length_dw == 0)
314 				return -EINVAL;
315 		}
316 
317 		size = p->chunks[i].length_dw;
318 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
319 		p->chunks[i].user_ptr = cdata;
320 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB)
321 			continue;
322 
323 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
324 			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
325 				continue;
326 		}
327 
328 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
329 		size *= sizeof(uint32_t);
330 		if (p->chunks[i].kdata == NULL) {
331 			return -ENOMEM;
332 		}
333 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
334 			return -EFAULT;
335 		}
336 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
337 			p->cs_flags = p->chunks[i].kdata[0];
338 			if (p->chunks[i].length_dw > 1)
339 				ring = p->chunks[i].kdata[1];
340 			if (p->chunks[i].length_dw > 2)
341 				priority = (s32)p->chunks[i].kdata[2];
342 		}
343 	}
344 
345 	/* these are KMS only */
346 	if (p->rdev) {
347 		if ((p->cs_flags & RADEON_CS_USE_VM) &&
348 		    !p->rdev->vm_manager.enabled) {
349 			DRM_ERROR("VM not active on asic!\n");
350 			return -EINVAL;
351 		}
352 
353 		if (radeon_cs_get_ring(p, ring, priority))
354 			return -EINVAL;
355 
356 		/* we only support VM on some SI+ rings */
357 		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
358 			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
359 				DRM_ERROR("Ring %d requires VM!\n", p->ring);
360 				return -EINVAL;
361 			}
362 		} else {
363 			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
364 				DRM_ERROR("VM not supported on ring %d!\n",
365 					  p->ring);
366 				return -EINVAL;
367 			}
368 		}
369 	}
370 
371 	return 0;
372 }
373 
374 static int cmp_size_smaller_first(void *priv, struct list_head *a,
375 				  struct list_head *b)
376 {
377 	struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
378 	struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
379 
380 	/* Sort A before B if A is smaller. */
381 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
382 }
383 
384 /**
385  * cs_parser_fini() - clean parser states
386  * @parser:	parser structure holding parsing context.
387  * @error:	error number
388  *
389  * If error is set than unvalidate buffer, otherwise just free memory
390  * used by parsing context.
391  **/
392 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
393 {
394 	unsigned i;
395 
396 	if (!error) {
397 		/* Sort the buffer list from the smallest to largest buffer,
398 		 * which affects the order of buffers in the LRU list.
399 		 * This assures that the smallest buffers are added first
400 		 * to the LRU list, so they are likely to be later evicted
401 		 * first, instead of large buffers whose eviction is more
402 		 * expensive.
403 		 *
404 		 * This slightly lowers the number of bytes moved by TTM
405 		 * per frame under memory pressure.
406 		 */
407 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
408 
409 		ttm_eu_fence_buffer_objects(&parser->ticket,
410 					    &parser->validated,
411 					    parser->ib.fence);
412 	} else if (backoff) {
413 		ttm_eu_backoff_reservation(&parser->ticket,
414 					   &parser->validated);
415 	}
416 
417 	if (parser->relocs != NULL) {
418 		for (i = 0; i < parser->nrelocs; i++) {
419 			if (parser->relocs[i].gobj)
420 				drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
421 		}
422 	}
423 	kfree(parser->track);
424 	kfree(parser->relocs);
425 	kfree(parser->relocs_ptr);
426 	drm_free_large(parser->vm_bos);
427 	for (i = 0; i < parser->nchunks; i++)
428 		drm_free_large(parser->chunks[i].kdata);
429 	kfree(parser->chunks);
430 	kfree(parser->chunks_array);
431 	radeon_ib_free(parser->rdev, &parser->ib);
432 	radeon_ib_free(parser->rdev, &parser->const_ib);
433 }
434 
435 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
436 			      struct radeon_cs_parser *parser)
437 {
438 	int r;
439 
440 	if (parser->chunk_ib_idx == -1)
441 		return 0;
442 
443 	if (parser->cs_flags & RADEON_CS_USE_VM)
444 		return 0;
445 
446 	r = radeon_cs_parse(rdev, parser->ring, parser);
447 	if (r || parser->parser_error) {
448 		DRM_ERROR("Invalid command stream !\n");
449 		return r;
450 	}
451 
452 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
453 		radeon_uvd_note_usage(rdev);
454 	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
455 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
456 		radeon_vce_note_usage(rdev);
457 
458 	radeon_cs_sync_rings(parser);
459 	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
460 	if (r) {
461 		DRM_ERROR("Failed to schedule IB !\n");
462 	}
463 	return r;
464 }
465 
466 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
467 				   struct radeon_vm *vm)
468 {
469 	struct radeon_device *rdev = p->rdev;
470 	struct radeon_bo_va *bo_va;
471 	int i, r;
472 
473 	r = radeon_vm_update_page_directory(rdev, vm);
474 	if (r)
475 		return r;
476 
477 	r = radeon_vm_clear_freed(rdev, vm);
478 	if (r)
479 		return r;
480 
481 	if (vm->ib_bo_va == NULL) {
482 		DRM_ERROR("Tmp BO not in VM!\n");
483 		return -EINVAL;
484 	}
485 
486 	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
487 				&rdev->ring_tmp_bo.bo->tbo.mem);
488 	if (r)
489 		return r;
490 
491 	for (i = 0; i < p->nrelocs; i++) {
492 		struct radeon_bo *bo;
493 
494 		/* ignore duplicates */
495 		if (p->relocs_ptr[i] != &p->relocs[i])
496 			continue;
497 
498 		bo = p->relocs[i].robj;
499 		bo_va = radeon_vm_bo_find(vm, bo);
500 		if (bo_va == NULL) {
501 			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
502 			return -EINVAL;
503 		}
504 
505 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
506 		if (r)
507 			return r;
508 	}
509 
510 	return radeon_vm_clear_invalids(rdev, vm);
511 }
512 
513 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
514 				 struct radeon_cs_parser *parser)
515 {
516 	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
517 	struct radeon_vm *vm = &fpriv->vm;
518 	int r;
519 
520 	if (parser->chunk_ib_idx == -1)
521 		return 0;
522 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
523 		return 0;
524 
525 	if (parser->const_ib.length_dw) {
526 		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
527 		if (r) {
528 			return r;
529 		}
530 	}
531 
532 	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
533 	if (r) {
534 		return r;
535 	}
536 
537 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
538 		radeon_uvd_note_usage(rdev);
539 
540 	lockmgr(&vm->mutex, LK_EXCLUSIVE);
541 	r = radeon_bo_vm_update_pte(parser, vm);
542 	if (r) {
543 		goto out;
544 	}
545 	radeon_cs_sync_rings(parser);
546 	radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
547 
548 	if ((rdev->family >= CHIP_TAHITI) &&
549 	    (parser->chunk_const_ib_idx != -1)) {
550 		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
551 	} else {
552 		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
553 	}
554 
555 out:
556 	lockmgr(&vm->mutex, LK_RELEASE);
557 	return r;
558 }
559 
560 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
561 {
562 	if (r == -EDEADLK) {
563 		r = radeon_gpu_reset(rdev);
564 		if (!r)
565 			r = -EAGAIN;
566 	}
567 	return r;
568 }
569 
570 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
571 {
572 	struct radeon_cs_chunk *ib_chunk;
573 	struct radeon_vm *vm = NULL;
574 	int r;
575 
576 	if (parser->chunk_ib_idx == -1)
577 		return 0;
578 
579 	if (parser->cs_flags & RADEON_CS_USE_VM) {
580 		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
581 		vm = &fpriv->vm;
582 
583 		if ((rdev->family >= CHIP_TAHITI) &&
584 		    (parser->chunk_const_ib_idx != -1)) {
585 			ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
586 			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
587 				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
588 				return -EINVAL;
589 			}
590 			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
591 					   vm, ib_chunk->length_dw * 4);
592 			if (r) {
593 				DRM_ERROR("Failed to get const ib !\n");
594 				return r;
595 			}
596 			parser->const_ib.is_const_ib = true;
597 			parser->const_ib.length_dw = ib_chunk->length_dw;
598 			if (copy_from_user(parser->const_ib.ptr,
599 					       ib_chunk->user_ptr,
600 					       ib_chunk->length_dw * 4))
601 				return -EFAULT;
602 		}
603 
604 		ib_chunk = &parser->chunks[parser->chunk_ib_idx];
605 		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
606 			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
607 			return -EINVAL;
608 		}
609 	}
610 	ib_chunk = &parser->chunks[parser->chunk_ib_idx];
611 
612 	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
613 			   vm, ib_chunk->length_dw * 4);
614 	if (r) {
615 		DRM_ERROR("Failed to get ib !\n");
616 		return r;
617 	}
618 	parser->ib.length_dw = ib_chunk->length_dw;
619 	if (ib_chunk->kdata)
620 		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
621 	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
622 		return -EFAULT;
623 	return 0;
624 }
625 
626 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
627 {
628 	struct radeon_device *rdev = dev->dev_private;
629 	struct radeon_cs_parser parser;
630 	int r;
631 
632 	lockmgr(&rdev->exclusive_lock, LK_EXCLUSIVE);
633 	if (!rdev->accel_working) {
634 		lockmgr(&rdev->exclusive_lock, LK_RELEASE);
635 		return -EBUSY;
636 	}
637 	if (rdev->in_reset) {
638 		lockmgr(&rdev->exclusive_lock, LK_RELEASE);
639 		r = radeon_gpu_reset(rdev);
640 		if (!r)
641 			r = -EAGAIN;
642 		return r;
643 	}
644 	/* initialize parser */
645 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
646 	parser.filp = filp;
647 	parser.rdev = rdev;
648 	parser.dev = rdev->dev;
649 	parser.family = rdev->family;
650 	r = radeon_cs_parser_init(&parser, data);
651 	if (r) {
652 		DRM_ERROR("Failed to initialize parser !\n");
653 		radeon_cs_parser_fini(&parser, r, false);
654 		lockmgr(&rdev->exclusive_lock, LK_RELEASE);
655 		r = radeon_cs_handle_lockup(rdev, r);
656 		return r;
657 	}
658 
659 	r = radeon_cs_ib_fill(rdev, &parser);
660 	if (!r) {
661 		r = radeon_cs_parser_relocs(&parser);
662 		if (r && r != -ERESTARTSYS)
663 			DRM_ERROR("Failed to parse relocation %d!\n", r);
664 	}
665 
666 	if (r) {
667 		radeon_cs_parser_fini(&parser, r, false);
668 		lockmgr(&rdev->exclusive_lock, LK_RELEASE);
669 		r = radeon_cs_handle_lockup(rdev, r);
670 		return r;
671 	}
672 
673 #if TRACE_TODO
674 	trace_radeon_cs(&parser);
675 #endif
676 
677 	r = radeon_cs_ib_chunk(rdev, &parser);
678 	if (r) {
679 		goto out;
680 	}
681 	r = radeon_cs_ib_vm_chunk(rdev, &parser);
682 	if (r) {
683 		goto out;
684 	}
685 out:
686 	radeon_cs_parser_fini(&parser, r, true);
687 	lockmgr(&rdev->exclusive_lock, LK_RELEASE);
688 	r = radeon_cs_handle_lockup(rdev, r);
689 	return r;
690 }
691 
692 /**
693  * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
694  * @parser:	parser structure holding parsing context.
695  * @pkt:	where to store packet information
696  *
697  * Assume that chunk_ib_index is properly set. Will return -EINVAL
698  * if packet is bigger than remaining ib size. or if packets is unknown.
699  **/
700 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
701 			   struct radeon_cs_packet *pkt,
702 			   unsigned idx)
703 {
704 	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
705 	struct radeon_device *rdev = p->rdev;
706 	uint32_t header;
707 
708 	if (idx >= ib_chunk->length_dw) {
709 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
710 			  idx, ib_chunk->length_dw);
711 		return -EINVAL;
712 	}
713 	header = radeon_get_ib_value(p, idx);
714 	pkt->idx = idx;
715 	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
716 	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
717 	pkt->one_reg_wr = 0;
718 	switch (pkt->type) {
719 	case RADEON_PACKET_TYPE0:
720 		if (rdev->family < CHIP_R600) {
721 			pkt->reg = R100_CP_PACKET0_GET_REG(header);
722 			pkt->one_reg_wr =
723 				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
724 		} else
725 			pkt->reg = R600_CP_PACKET0_GET_REG(header);
726 		break;
727 	case RADEON_PACKET_TYPE3:
728 		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
729 		break;
730 	case RADEON_PACKET_TYPE2:
731 		pkt->count = -1;
732 		break;
733 	default:
734 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
735 		return -EINVAL;
736 	}
737 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
738 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
739 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
740 		return -EINVAL;
741 	}
742 	return 0;
743 }
744 
745 /**
746  * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
747  * @p:		structure holding the parser context.
748  *
749  * Check if the next packet is NOP relocation packet3.
750  **/
751 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
752 {
753 	struct radeon_cs_packet p3reloc;
754 	int r;
755 
756 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
757 	if (r)
758 		return false;
759 	if (p3reloc.type != RADEON_PACKET_TYPE3)
760 		return false;
761 	if (p3reloc.opcode != RADEON_PACKET3_NOP)
762 		return false;
763 	return true;
764 }
765 
766 /**
767  * radeon_cs_dump_packet() - dump raw packet context
768  * @p:		structure holding the parser context.
769  * @pkt:	structure holding the packet.
770  *
771  * Used mostly for debugging and error reporting.
772  **/
773 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
774 			   struct radeon_cs_packet *pkt)
775 {
776 	volatile uint32_t *ib;
777 	unsigned i;
778 	unsigned idx;
779 
780 	ib = p->ib.ptr;
781 	idx = pkt->idx;
782 	for (i = 0; i <= (pkt->count + 1); i++, idx++)
783 		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
784 }
785 
786 /**
787  * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
788  * @parser:		parser structure holding parsing context.
789  * @data:		pointer to relocation data
790  * @offset_start:	starting offset
791  * @offset_mask:	offset mask (to align start offset on)
792  * @reloc:		reloc informations
793  *
794  * Check if next packet is relocation packet3, do bo validation and compute
795  * GPU offset using the provided start.
796  **/
797 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
798 				struct radeon_cs_reloc **cs_reloc,
799 				int nomm)
800 {
801 	struct radeon_cs_chunk *relocs_chunk;
802 	struct radeon_cs_packet p3reloc;
803 	unsigned idx;
804 	int r;
805 
806 	if (p->chunk_relocs_idx == -1) {
807 		DRM_ERROR("No relocation chunk !\n");
808 		return -EINVAL;
809 	}
810 	*cs_reloc = NULL;
811 	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
812 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
813 	if (r)
814 		return r;
815 	p->idx += p3reloc.count + 2;
816 	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
817 	    p3reloc.opcode != RADEON_PACKET3_NOP) {
818 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
819 			  p3reloc.idx);
820 		radeon_cs_dump_packet(p, &p3reloc);
821 		return -EINVAL;
822 	}
823 	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
824 	if (idx >= relocs_chunk->length_dw) {
825 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
826 			  idx, relocs_chunk->length_dw);
827 		radeon_cs_dump_packet(p, &p3reloc);
828 		return -EINVAL;
829 	}
830 	/* FIXME: we assume reloc size is 4 dwords */
831 	if (nomm) {
832 		*cs_reloc = p->relocs;
833 		(*cs_reloc)->gpu_offset =
834 			(u64)relocs_chunk->kdata[idx + 3] << 32;
835 		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
836 	} else
837 		*cs_reloc = p->relocs_ptr[(idx / 4)];
838 	return 0;
839 }
840