xref: /dragonfly/sys/dev/drm/radeon/radeon_cs.c (revision 62dc643e)
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/list_sort.h>
28 #include <drm/drmP.h>
29 #include <uapi_drm/radeon_drm.h>
30 #include "radeon_reg.h"
31 #include "radeon.h"
32 #ifdef TRACE_TODO
33 #include "radeon_trace.h"
34 #endif
35 
36 #define RADEON_CS_MAX_PRIORITY		32u
37 #define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
38 
39 /* This is based on the bucket sort with O(n) time complexity.
40  * An item with priority "i" is added to bucket[i]. The lists are then
41  * concatenated in descending order.
42  */
43 struct radeon_cs_buckets {
44 	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
45 };
46 
47 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
48 {
49 	unsigned i;
50 
51 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
52 		INIT_LIST_HEAD(&b->bucket[i]);
53 }
54 
55 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
56 				  struct list_head *item, unsigned priority)
57 {
58 	/* Since buffers which appear sooner in the relocation list are
59 	 * likely to be used more often than buffers which appear later
60 	 * in the list, the sort mustn't change the ordering of buffers
61 	 * with the same priority, i.e. it must be stable.
62 	 */
63 	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
64 }
65 
66 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
67 				       struct list_head *out_list)
68 {
69 	unsigned i;
70 
71 	/* Connect the sorted buckets in the output list. */
72 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
73 		list_splice(&b->bucket[i], out_list);
74 	}
75 }
76 
77 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
78 {
79 	struct radeon_cs_chunk *chunk;
80 	struct radeon_cs_buckets buckets;
81 	unsigned i, j;
82 	bool duplicate;
83 
84 	if (p->chunk_relocs_idx == -1) {
85 		return 0;
86 	}
87 	chunk = &p->chunks[p->chunk_relocs_idx];
88 	p->dma_reloc_idx = 0;
89 	/* FIXME: we assume that each relocs use 4 dwords */
90 	p->nrelocs = chunk->length_dw / 4;
91 	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
92 	if (p->relocs_ptr == NULL) {
93 		return -ENOMEM;
94 	}
95 	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL);
96 	if (p->relocs == NULL) {
97 		return -ENOMEM;
98 	}
99 
100 	radeon_cs_buckets_init(&buckets);
101 
102 	for (i = 0; i < p->nrelocs; i++) {
103 		struct drm_radeon_cs_reloc *r;
104 		struct drm_gem_object *gobj;
105 		unsigned priority;
106 
107 		duplicate = false;
108 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
109 		for (j = 0; j < i; j++) {
110 			struct drm_radeon_cs_reloc *other;
111 			other = (void *)&chunk->kdata[j*4];
112 			if (r->handle == other->handle) {
113 				p->relocs_ptr[i] = &p->relocs[j];
114 				duplicate = true;
115 				break;
116 			}
117 		}
118 		if (duplicate) {
119 			continue;
120 		}
121 
122 		gobj = drm_gem_object_lookup(p->filp, r->handle);
123 		if (gobj == NULL) {
124 			DRM_ERROR("gem object lookup failed 0x%x\n",
125 				  r->handle);
126 			return -ENOENT;
127 		}
128 		p->relocs_ptr[i] = &p->relocs[i];
129 		p->relocs[i].robj = gem_to_radeon_bo(gobj);
130 
131 		/* The userspace buffer priorities are from 0 to 15. A higher
132 		 * number means the buffer is more important.
133 		 * Also, the buffers used for write have a higher priority than
134 		 * the buffers used for read only, which doubles the range
135 		 * to 0 to 31. 32 is reserved for the kernel driver.
136 		 */
137 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
138 			   + !!r->write_domain;
139 
140 		/* the first reloc of an UVD job is the msg and that must be in
141 		   VRAM, also but everything into VRAM on AGP cards and older
142 		   IGP chips to avoid image corruptions */
143 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
144 		    (i == 0 || (p->rdev->flags & RADEON_IS_AGP) ||
145 		     p->rdev->family == CHIP_RS780 ||
146 		     p->rdev->family == CHIP_RS880)) {
147 
148 			/* TODO: is this still needed for NI+ ? */
149 			p->relocs[i].prefered_domains =
150 				RADEON_GEM_DOMAIN_VRAM;
151 
152 			p->relocs[i].allowed_domains =
153 				RADEON_GEM_DOMAIN_VRAM;
154 
155 			/* prioritize this over any other relocation */
156 			priority = RADEON_CS_MAX_PRIORITY;
157 		} else {
158 			uint32_t domain = r->write_domain ?
159 				r->write_domain : r->read_domains;
160 
161 			if (domain & RADEON_GEM_DOMAIN_CPU) {
162 				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
163 					  "for command submission\n");
164 				return -EINVAL;
165 			}
166 
167 			p->relocs[i].prefered_domains = domain;
168 			if (domain == RADEON_GEM_DOMAIN_VRAM)
169 				domain |= RADEON_GEM_DOMAIN_GTT;
170 			p->relocs[i].allowed_domains = domain;
171 		}
172 
173 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
174 
175 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
176 				      priority);
177 	}
178 
179 	radeon_cs_buckets_get_list(&buckets, &p->validated);
180 
181 	if (p->cs_flags & RADEON_CS_USE_VM)
182 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
183 					      &p->validated);
184 
185 	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
186 }
187 
188 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
189 {
190 	p->priority = priority;
191 
192 	switch (ring) {
193 	default:
194 		DRM_ERROR("unknown ring id: %d\n", ring);
195 		return -EINVAL;
196 	case RADEON_CS_RING_GFX:
197 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
198 		break;
199 	case RADEON_CS_RING_COMPUTE:
200 		if (p->rdev->family >= CHIP_TAHITI) {
201 			if (p->priority > 0)
202 				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
203 			else
204 				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
205 		} else
206 			p->ring = RADEON_RING_TYPE_GFX_INDEX;
207 		break;
208 	case RADEON_CS_RING_DMA:
209 		if (p->rdev->family >= CHIP_CAYMAN) {
210 			if (p->priority > 0)
211 				p->ring = R600_RING_TYPE_DMA_INDEX;
212 			else
213 				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
214 		} else if (p->rdev->family >= CHIP_RV770) {
215 			p->ring = R600_RING_TYPE_DMA_INDEX;
216 		} else {
217 			return -EINVAL;
218 		}
219 		break;
220 	case RADEON_CS_RING_UVD:
221 		p->ring = R600_RING_TYPE_UVD_INDEX;
222 		break;
223 	case RADEON_CS_RING_VCE:
224 		/* TODO: only use the low priority ring for now */
225 		p->ring = TN_RING_TYPE_VCE1_INDEX;
226 		break;
227 	}
228 	return 0;
229 }
230 
231 static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
232 {
233 	struct radeon_bo_list *reloc;
234 
235 	list_for_each_entry(reloc, &p->validated, tv.head) {
236 		radeon_semaphore_sync_to(p->ib.semaphore,
237 					 reloc->robj->tbo.sync_obj);
238 	}
239 }
240 
241 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
242 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
243 {
244 	struct drm_radeon_cs *cs = data;
245 	uint64_t *chunk_array_ptr;
246 	unsigned size, i;
247 	u32 ring = RADEON_CS_RING_GFX;
248 	s32 priority = 0;
249 
250 	INIT_LIST_HEAD(&p->validated);
251 
252 	if (!cs->num_chunks) {
253 		return 0;
254 	}
255 
256 	/* get chunks */
257 	p->idx = 0;
258 	p->ib.sa_bo = NULL;
259 	p->ib.semaphore = NULL;
260 	p->const_ib.sa_bo = NULL;
261 	p->const_ib.semaphore = NULL;
262 	p->chunk_ib_idx = -1;
263 	p->chunk_relocs_idx = -1;
264 	p->chunk_flags_idx = -1;
265 	p->chunk_const_ib_idx = -1;
266 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
267 	if (p->chunks_array == NULL) {
268 		return -ENOMEM;
269 	}
270 	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
271 	if (copy_from_user(p->chunks_array, chunk_array_ptr,
272 			       sizeof(uint64_t)*cs->num_chunks)) {
273 		return -EFAULT;
274 	}
275 	p->cs_flags = 0;
276 	p->nchunks = cs->num_chunks;
277 	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
278 	if (p->chunks == NULL) {
279 		return -ENOMEM;
280 	}
281 	for (i = 0; i < p->nchunks; i++) {
282 		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
283 		struct drm_radeon_cs_chunk user_chunk;
284 		uint32_t __user *cdata;
285 
286 		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
287 		if (copy_from_user(&user_chunk, chunk_ptr,
288 				       sizeof(struct drm_radeon_cs_chunk))) {
289 			return -EFAULT;
290 		}
291 		p->chunks[i].length_dw = user_chunk.length_dw;
292 		p->chunks[i].chunk_id = user_chunk.chunk_id;
293 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
294 			p->chunk_relocs_idx = i;
295 		}
296 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
297 			p->chunk_ib_idx = i;
298 			/* zero length IB isn't useful */
299 			if (p->chunks[i].length_dw == 0)
300 				return -EINVAL;
301 		}
302 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
303 			p->chunk_const_ib_idx = i;
304 			/* zero length CONST IB isn't useful */
305 			if (p->chunks[i].length_dw == 0)
306 				return -EINVAL;
307 		}
308 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
309 			p->chunk_flags_idx = i;
310 			/* zero length flags aren't useful */
311 			if (p->chunks[i].length_dw == 0)
312 				return -EINVAL;
313 		}
314 
315 		size = p->chunks[i].length_dw;
316 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
317 		p->chunks[i].user_ptr = cdata;
318 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB)
319 			continue;
320 
321 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
322 			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
323 				continue;
324 		}
325 
326 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
327 		size *= sizeof(uint32_t);
328 		if (p->chunks[i].kdata == NULL) {
329 			return -ENOMEM;
330 		}
331 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
332 			return -EFAULT;
333 		}
334 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
335 			p->cs_flags = p->chunks[i].kdata[0];
336 			if (p->chunks[i].length_dw > 1)
337 				ring = p->chunks[i].kdata[1];
338 			if (p->chunks[i].length_dw > 2)
339 				priority = (s32)p->chunks[i].kdata[2];
340 		}
341 	}
342 
343 	/* these are KMS only */
344 	if (p->rdev) {
345 		if ((p->cs_flags & RADEON_CS_USE_VM) &&
346 		    !p->rdev->vm_manager.enabled) {
347 			DRM_ERROR("VM not active on asic!\n");
348 			return -EINVAL;
349 		}
350 
351 		if (radeon_cs_get_ring(p, ring, priority))
352 			return -EINVAL;
353 
354 		/* we only support VM on some SI+ rings */
355 		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
356 			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
357 				DRM_ERROR("Ring %d requires VM!\n", p->ring);
358 				return -EINVAL;
359 			}
360 		} else {
361 			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
362 				DRM_ERROR("VM not supported on ring %d!\n",
363 					  p->ring);
364 				return -EINVAL;
365 			}
366 		}
367 	}
368 
369 	return 0;
370 }
371 
372 static int cmp_size_smaller_first(void *priv, struct list_head *a,
373 				  struct list_head *b)
374 {
375 	struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
376 	struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
377 
378 	/* Sort A before B if A is smaller. */
379 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
380 }
381 
382 /**
383  * cs_parser_fini() - clean parser states
384  * @parser:	parser structure holding parsing context.
385  * @error:	error number
386  *
387  * If error is set than unvalidate buffer, otherwise just free memory
388  * used by parsing context.
389  **/
390 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
391 {
392 	unsigned i;
393 
394 	if (!error) {
395 		/* Sort the buffer list from the smallest to largest buffer,
396 		 * which affects the order of buffers in the LRU list.
397 		 * This assures that the smallest buffers are added first
398 		 * to the LRU list, so they are likely to be later evicted
399 		 * first, instead of large buffers whose eviction is more
400 		 * expensive.
401 		 *
402 		 * This slightly lowers the number of bytes moved by TTM
403 		 * per frame under memory pressure.
404 		 */
405 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
406 
407 		ttm_eu_fence_buffer_objects(&parser->ticket,
408 					    &parser->validated,
409 					    parser->ib.fence);
410 	} else if (backoff) {
411 		ttm_eu_backoff_reservation(&parser->ticket,
412 					   &parser->validated);
413 	}
414 
415 	if (parser->relocs != NULL) {
416 		for (i = 0; i < parser->nrelocs; i++) {
417 			struct radeon_bo *bo = parser->relocs[i].robj;
418 			if (bo == NULL)
419 				continue;
420 
421 			drm_gem_object_unreference_unlocked(&bo->gem_base);
422 		}
423 	}
424 	kfree(parser->track);
425 	kfree(parser->relocs);
426 	kfree(parser->relocs_ptr);
427 	drm_free_large(parser->vm_bos);
428 	for (i = 0; i < parser->nchunks; i++)
429 		drm_free_large(parser->chunks[i].kdata);
430 	kfree(parser->chunks);
431 	kfree(parser->chunks_array);
432 	radeon_ib_free(parser->rdev, &parser->ib);
433 	radeon_ib_free(parser->rdev, &parser->const_ib);
434 }
435 
436 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
437 			      struct radeon_cs_parser *parser)
438 {
439 	int r;
440 
441 	if (parser->chunk_ib_idx == -1)
442 		return 0;
443 
444 	if (parser->cs_flags & RADEON_CS_USE_VM)
445 		return 0;
446 
447 	r = radeon_cs_parse(rdev, parser->ring, parser);
448 	if (r || parser->parser_error) {
449 		DRM_ERROR("Invalid command stream !\n");
450 		return r;
451 	}
452 
453 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
454 		radeon_uvd_note_usage(rdev);
455 	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
456 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
457 		radeon_vce_note_usage(rdev);
458 
459 	radeon_cs_sync_rings(parser);
460 	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
461 	if (r) {
462 		DRM_ERROR("Failed to schedule IB !\n");
463 	}
464 	return r;
465 }
466 
467 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
468 				   struct radeon_vm *vm)
469 {
470 	struct radeon_device *rdev = p->rdev;
471 	struct radeon_bo_va *bo_va;
472 	int i, r;
473 
474 	r = radeon_vm_update_page_directory(rdev, vm);
475 	if (r)
476 		return r;
477 
478 	r = radeon_vm_clear_freed(rdev, vm);
479 	if (r)
480 		return r;
481 
482 	if (vm->ib_bo_va == NULL) {
483 		DRM_ERROR("Tmp BO not in VM!\n");
484 		return -EINVAL;
485 	}
486 
487 	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
488 				&rdev->ring_tmp_bo.bo->tbo.mem);
489 	if (r)
490 		return r;
491 
492 	for (i = 0; i < p->nrelocs; i++) {
493 		struct radeon_bo *bo;
494 
495 		/* ignore duplicates */
496 		if (p->relocs_ptr[i] != &p->relocs[i])
497 			continue;
498 
499 		bo = p->relocs[i].robj;
500 		bo_va = radeon_vm_bo_find(vm, bo);
501 		if (bo_va == NULL) {
502 			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
503 			return -EINVAL;
504 		}
505 
506 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
507 		if (r)
508 			return r;
509 	}
510 
511 	return radeon_vm_clear_invalids(rdev, vm);
512 }
513 
514 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
515 				 struct radeon_cs_parser *parser)
516 {
517 	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
518 	struct radeon_vm *vm = &fpriv->vm;
519 	int r;
520 
521 	if (parser->chunk_ib_idx == -1)
522 		return 0;
523 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
524 		return 0;
525 
526 	if (parser->const_ib.length_dw) {
527 		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
528 		if (r) {
529 			return r;
530 		}
531 	}
532 
533 	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
534 	if (r) {
535 		return r;
536 	}
537 
538 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
539 		radeon_uvd_note_usage(rdev);
540 
541 	mutex_lock(&vm->mutex);
542 	r = radeon_bo_vm_update_pte(parser, vm);
543 	if (r) {
544 		goto out;
545 	}
546 	radeon_cs_sync_rings(parser);
547 	radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
548 
549 	if ((rdev->family >= CHIP_TAHITI) &&
550 	    (parser->chunk_const_ib_idx != -1)) {
551 		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
552 	} else {
553 		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
554 	}
555 
556 out:
557 	mutex_unlock(&vm->mutex);
558 	return r;
559 }
560 
561 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
562 {
563 	if (r == -EDEADLK) {
564 		r = radeon_gpu_reset(rdev);
565 		if (!r)
566 			r = -EAGAIN;
567 	}
568 	return r;
569 }
570 
571 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
572 {
573 	struct radeon_cs_chunk *ib_chunk;
574 	struct radeon_vm *vm = NULL;
575 	int r;
576 
577 	if (parser->chunk_ib_idx == -1)
578 		return 0;
579 
580 	if (parser->cs_flags & RADEON_CS_USE_VM) {
581 		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
582 		vm = &fpriv->vm;
583 
584 		if ((rdev->family >= CHIP_TAHITI) &&
585 		    (parser->chunk_const_ib_idx != -1)) {
586 			ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
587 			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
588 				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
589 				return -EINVAL;
590 			}
591 			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
592 					   vm, ib_chunk->length_dw * 4);
593 			if (r) {
594 				DRM_ERROR("Failed to get const ib !\n");
595 				return r;
596 			}
597 			parser->const_ib.is_const_ib = true;
598 			parser->const_ib.length_dw = ib_chunk->length_dw;
599 			if (copy_from_user(parser->const_ib.ptr,
600 					       ib_chunk->user_ptr,
601 					       ib_chunk->length_dw * 4))
602 				return -EFAULT;
603 		}
604 
605 		ib_chunk = &parser->chunks[parser->chunk_ib_idx];
606 		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
607 			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
608 			return -EINVAL;
609 		}
610 	}
611 	ib_chunk = &parser->chunks[parser->chunk_ib_idx];
612 
613 	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
614 			   vm, ib_chunk->length_dw * 4);
615 	if (r) {
616 		DRM_ERROR("Failed to get ib !\n");
617 		return r;
618 	}
619 	parser->ib.length_dw = ib_chunk->length_dw;
620 	if (ib_chunk->kdata)
621 		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
622 	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
623 		return -EFAULT;
624 	return 0;
625 }
626 
627 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
628 {
629 	struct radeon_device *rdev = dev->dev_private;
630 	struct radeon_cs_parser parser;
631 	int r;
632 
633 	lockmgr(&rdev->exclusive_lock, LK_EXCLUSIVE);
634 	if (!rdev->accel_working) {
635 		lockmgr(&rdev->exclusive_lock, LK_RELEASE);
636 		return -EBUSY;
637 	}
638 	if (rdev->in_reset) {
639 		lockmgr(&rdev->exclusive_lock, LK_RELEASE);
640 		r = radeon_gpu_reset(rdev);
641 		if (!r)
642 			r = -EAGAIN;
643 		return r;
644 	}
645 	/* initialize parser */
646 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
647 	parser.filp = filp;
648 	parser.rdev = rdev;
649 	parser.dev = rdev->dev;
650 	parser.family = rdev->family;
651 	r = radeon_cs_parser_init(&parser, data);
652 	if (r) {
653 		DRM_ERROR("Failed to initialize parser !\n");
654 		radeon_cs_parser_fini(&parser, r, false);
655 		lockmgr(&rdev->exclusive_lock, LK_RELEASE);
656 		r = radeon_cs_handle_lockup(rdev, r);
657 		return r;
658 	}
659 
660 	r = radeon_cs_ib_fill(rdev, &parser);
661 	if (!r) {
662 		r = radeon_cs_parser_relocs(&parser);
663 		if (r && r != -ERESTARTSYS)
664 			DRM_ERROR("Failed to parse relocation %d!\n", r);
665 	}
666 
667 	if (r) {
668 		radeon_cs_parser_fini(&parser, r, false);
669 		lockmgr(&rdev->exclusive_lock, LK_RELEASE);
670 		r = radeon_cs_handle_lockup(rdev, r);
671 		return r;
672 	}
673 
674 #if TRACE_TODO
675 	trace_radeon_cs(&parser);
676 #endif
677 
678 	r = radeon_cs_ib_chunk(rdev, &parser);
679 	if (r) {
680 		goto out;
681 	}
682 	r = radeon_cs_ib_vm_chunk(rdev, &parser);
683 	if (r) {
684 		goto out;
685 	}
686 out:
687 	radeon_cs_parser_fini(&parser, r, true);
688 	lockmgr(&rdev->exclusive_lock, LK_RELEASE);
689 	r = radeon_cs_handle_lockup(rdev, r);
690 	return r;
691 }
692 
693 /**
694  * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
695  * @parser:	parser structure holding parsing context.
696  * @pkt:	where to store packet information
697  *
698  * Assume that chunk_ib_index is properly set. Will return -EINVAL
699  * if packet is bigger than remaining ib size. or if packets is unknown.
700  **/
701 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
702 			   struct radeon_cs_packet *pkt,
703 			   unsigned idx)
704 {
705 	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
706 	struct radeon_device *rdev = p->rdev;
707 	uint32_t header;
708 	int ret = 0, i;
709 
710 	if (idx >= ib_chunk->length_dw) {
711 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
712 			  idx, ib_chunk->length_dw);
713 		return -EINVAL;
714 	}
715 	header = radeon_get_ib_value(p, idx);
716 	pkt->idx = idx;
717 	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
718 	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
719 	pkt->one_reg_wr = 0;
720 	switch (pkt->type) {
721 	case RADEON_PACKET_TYPE0:
722 		if (rdev->family < CHIP_R600) {
723 			pkt->reg = R100_CP_PACKET0_GET_REG(header);
724 			pkt->one_reg_wr =
725 				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
726 		} else
727 			pkt->reg = R600_CP_PACKET0_GET_REG(header);
728 		break;
729 	case RADEON_PACKET_TYPE3:
730 		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
731 		break;
732 	case RADEON_PACKET_TYPE2:
733 		pkt->count = -1;
734 		break;
735 	default:
736 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
737 		ret = -EINVAL;
738 		goto dump_ib;
739 	}
740 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
741 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
742 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
743 		ret = -EINVAL;
744 		goto dump_ib;
745 	}
746 	return 0;
747 
748 dump_ib:
749 	for (i = 0; i < ib_chunk->length_dw; i++) {
750 		if (i == idx)
751 			printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
752 		else
753 			printk("\t0x%08x\n", radeon_get_ib_value(p, i));
754 	}
755 	return ret;
756 }
757 
758 /**
759  * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
760  * @p:		structure holding the parser context.
761  *
762  * Check if the next packet is NOP relocation packet3.
763  **/
764 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
765 {
766 	struct radeon_cs_packet p3reloc;
767 	int r;
768 
769 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
770 	if (r)
771 		return false;
772 	if (p3reloc.type != RADEON_PACKET_TYPE3)
773 		return false;
774 	if (p3reloc.opcode != RADEON_PACKET3_NOP)
775 		return false;
776 	return true;
777 }
778 
779 /**
780  * radeon_cs_dump_packet() - dump raw packet context
781  * @p:		structure holding the parser context.
782  * @pkt:	structure holding the packet.
783  *
784  * Used mostly for debugging and error reporting.
785  **/
786 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
787 			   struct radeon_cs_packet *pkt)
788 {
789 	volatile uint32_t *ib;
790 	unsigned i;
791 	unsigned idx;
792 
793 	ib = p->ib.ptr;
794 	idx = pkt->idx;
795 	for (i = 0; i <= (pkt->count + 1); i++, idx++)
796 		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
797 }
798 
799 /**
800  * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
801  * @parser:		parser structure holding parsing context.
802  * @data:		pointer to relocation data
803  * @offset_start:	starting offset
804  * @offset_mask:	offset mask (to align start offset on)
805  * @reloc:		reloc informations
806  *
807  * Check if next packet is relocation packet3, do bo validation and compute
808  * GPU offset using the provided start.
809  **/
810 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
811 				struct radeon_bo_list **cs_reloc,
812 				int nomm)
813 {
814 	struct radeon_cs_chunk *relocs_chunk;
815 	struct radeon_cs_packet p3reloc;
816 	unsigned idx;
817 	int r;
818 
819 	if (p->chunk_relocs_idx == -1) {
820 		DRM_ERROR("No relocation chunk !\n");
821 		return -EINVAL;
822 	}
823 	*cs_reloc = NULL;
824 	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
825 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
826 	if (r)
827 		return r;
828 	p->idx += p3reloc.count + 2;
829 	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
830 	    p3reloc.opcode != RADEON_PACKET3_NOP) {
831 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
832 			  p3reloc.idx);
833 		radeon_cs_dump_packet(p, &p3reloc);
834 		return -EINVAL;
835 	}
836 	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
837 	if (idx >= relocs_chunk->length_dw) {
838 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
839 			  idx, relocs_chunk->length_dw);
840 		radeon_cs_dump_packet(p, &p3reloc);
841 		return -EINVAL;
842 	}
843 	/* FIXME: we assume reloc size is 4 dwords */
844 	if (nomm) {
845 		*cs_reloc = p->relocs;
846 		(*cs_reloc)->gpu_offset =
847 			(u64)relocs_chunk->kdata[idx + 3] << 32;
848 		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
849 	} else
850 		*cs_reloc = p->relocs_ptr[(idx / 4)];
851 	return 0;
852 }
853