xref: /dragonfly/sys/dev/drm/radeon/radeon_cs.c (revision f2187f0a)
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/list_sort.h>
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_reg.h"
31 #include "radeon.h"
32 #ifdef TRACE_TODO
33 #include "radeon_trace.h"
34 #endif
35 
36 #define RADEON_CS_MAX_PRIORITY		32u
37 #define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
38 
39 /* This is based on the bucket sort with O(n) time complexity.
40  * An item with priority "i" is added to bucket[i]. The lists are then
41  * concatenated in descending order.
42  */
43 struct radeon_cs_buckets {
44 	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
45 };
46 
47 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
48 {
49 	unsigned i;
50 
51 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
52 		INIT_LIST_HEAD(&b->bucket[i]);
53 }
54 
55 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
56 				  struct list_head *item, unsigned priority)
57 {
58 	/* Since buffers which appear sooner in the relocation list are
59 	 * likely to be used more often than buffers which appear later
60 	 * in the list, the sort mustn't change the ordering of buffers
61 	 * with the same priority, i.e. it must be stable.
62 	 */
63 	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
64 }
65 
66 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
67 				       struct list_head *out_list)
68 {
69 	unsigned i;
70 
71 	/* Connect the sorted buckets in the output list. */
72 	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
73 		list_splice(&b->bucket[i], out_list);
74 	}
75 }
76 
77 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
78 {
79 	struct radeon_cs_chunk *chunk;
80 	struct radeon_cs_buckets buckets;
81 	unsigned i, j;
82 	bool duplicate;
83 	int r;
84 
85 	if (p->chunk_relocs_idx == -1) {
86 		return 0;
87 	}
88 	chunk = &p->chunks[p->chunk_relocs_idx];
89 	p->dma_reloc_idx = 0;
90 	/* FIXME: we assume that each relocs use 4 dwords */
91 	p->nrelocs = chunk->length_dw / 4;
92 	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
93 	if (p->relocs_ptr == NULL) {
94 		return -ENOMEM;
95 	}
96 	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
97 	if (p->relocs == NULL) {
98 		return -ENOMEM;
99 	}
100 
101 	radeon_cs_buckets_init(&buckets);
102 
103 	for (i = 0; i < p->nrelocs; i++) {
104 		struct drm_radeon_cs_reloc *r;
105 		unsigned priority;
106 
107 		duplicate = false;
108 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
109 		for (j = 0; j < i; j++) {
110 			if (r->handle == p->relocs[j].handle) {
111 				p->relocs_ptr[i] = &p->relocs[j];
112 				duplicate = true;
113 				break;
114 			}
115 		}
116 		if (duplicate) {
117 			p->relocs[i].handle = 0;
118 			continue;
119 		}
120 
121 		p->relocs[i].gobj = drm_gem_object_lookup(p->filp, r->handle);
122 		if (p->relocs[i].gobj == NULL) {
123 			DRM_ERROR("gem object lookup failed 0x%x\n",
124 				  r->handle);
125 			return -ENOENT;
126 		}
127 		p->relocs_ptr[i] = &p->relocs[i];
128 		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
129 
130 		/* The userspace buffer priorities are from 0 to 15. A higher
131 		 * number means the buffer is more important.
132 		 * Also, the buffers used for write have a higher priority than
133 		 * the buffers used for read only, which doubles the range
134 		 * to 0 to 31. 32 is reserved for the kernel driver.
135 		 */
136 		priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
137 			   + !!r->write_domain;
138 
139 		/* the first reloc of an UVD job is the msg and that must be in
140 		   VRAM, also but everything into VRAM on AGP cards and older
141 		   IGP chips to avoid image corruptions */
142 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
143 		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
144 		     p->rdev->family == CHIP_RS780 ||
145 		     p->rdev->family == CHIP_RS880)) {
146 
147 			/* TODO: is this still needed for NI+ ? */
148 			p->relocs[i].prefered_domains =
149 				RADEON_GEM_DOMAIN_VRAM;
150 
151 			p->relocs[i].allowed_domains =
152 				RADEON_GEM_DOMAIN_VRAM;
153 
154 			/* prioritize this over any other relocation */
155 			priority = RADEON_CS_MAX_PRIORITY;
156 		} else {
157 			uint32_t domain = r->write_domain ?
158 				r->write_domain : r->read_domains;
159 
160 			if (domain & RADEON_GEM_DOMAIN_CPU) {
161 				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
162 					  "for command submission\n");
163 				return -EINVAL;
164 			}
165 
166 			p->relocs[i].prefered_domains = domain;
167 			if (domain == RADEON_GEM_DOMAIN_VRAM)
168 				domain |= RADEON_GEM_DOMAIN_GTT;
169 			p->relocs[i].allowed_domains = domain;
170 		}
171 
172 #if 0
173 		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
174 			uint32_t domain = p->relocs[i].prefered_domains;
175 			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
176 				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
177 					  "allowed for userptr BOs\n");
178 				return -EINVAL;
179 			}
180 			need_mmap_lock = true;
181 			domain = RADEON_GEM_DOMAIN_GTT;
182 			p->relocs[i].prefered_domains = domain;
183 			p->relocs[i].allowed_domains = domain;
184 		}
185 #endif
186 
187 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
188 		p->relocs[i].tv.shared = !r->write_domain;
189 		p->relocs[i].handle = r->handle;
190 
191 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
192 				      priority);
193 	}
194 
195 	radeon_cs_buckets_get_list(&buckets, &p->validated);
196 
197 	if (p->cs_flags & RADEON_CS_USE_VM)
198 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
199 					      &p->validated);
200 
201 	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
202 
203 	return r;
204 }
205 
206 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
207 {
208 	p->priority = priority;
209 
210 	switch (ring) {
211 	default:
212 		DRM_ERROR("unknown ring id: %d\n", ring);
213 		return -EINVAL;
214 	case RADEON_CS_RING_GFX:
215 		p->ring = RADEON_RING_TYPE_GFX_INDEX;
216 		break;
217 	case RADEON_CS_RING_COMPUTE:
218 		if (p->rdev->family >= CHIP_TAHITI) {
219 			if (p->priority > 0)
220 				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
221 			else
222 				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
223 		} else
224 			p->ring = RADEON_RING_TYPE_GFX_INDEX;
225 		break;
226 	case RADEON_CS_RING_DMA:
227 		if (p->rdev->family >= CHIP_CAYMAN) {
228 			if (p->priority > 0)
229 				p->ring = R600_RING_TYPE_DMA_INDEX;
230 			else
231 				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
232 		} else if (p->rdev->family >= CHIP_RV770) {
233 			p->ring = R600_RING_TYPE_DMA_INDEX;
234 		} else {
235 			return -EINVAL;
236 		}
237 		break;
238 	case RADEON_CS_RING_UVD:
239 		p->ring = R600_RING_TYPE_UVD_INDEX;
240 		break;
241 	case RADEON_CS_RING_VCE:
242 		/* TODO: only use the low priority ring for now */
243 		p->ring = TN_RING_TYPE_VCE1_INDEX;
244 		break;
245 	}
246 	return 0;
247 }
248 
249 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
250 {
251 	struct radeon_cs_reloc *reloc;
252 	int r;
253 
254 	list_for_each_entry(reloc, &p->validated, tv.head) {
255 		struct reservation_object *resv;
256 
257 		resv = reloc->robj->tbo.resv;
258 		r = radeon_semaphore_sync_resv(p->rdev, p->ib.semaphore, resv,
259 					       reloc->tv.shared);
260 		if (r)
261 			return r;
262 	}
263 	return 0;
264 }
265 
266 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
267 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
268 {
269 	struct drm_radeon_cs *cs = data;
270 	uint64_t *chunk_array_ptr;
271 	unsigned size, i;
272 	u32 ring = RADEON_CS_RING_GFX;
273 	s32 priority = 0;
274 
275 	if (!cs->num_chunks) {
276 		return 0;
277 	}
278 	/* get chunks */
279 	INIT_LIST_HEAD(&p->validated);
280 	p->idx = 0;
281 	p->ib.sa_bo = NULL;
282 	p->ib.semaphore = NULL;
283 	p->const_ib.sa_bo = NULL;
284 	p->const_ib.semaphore = NULL;
285 	p->chunk_ib_idx = -1;
286 	p->chunk_relocs_idx = -1;
287 	p->chunk_flags_idx = -1;
288 	p->chunk_const_ib_idx = -1;
289 	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
290 	if (p->chunks_array == NULL) {
291 		return -ENOMEM;
292 	}
293 	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
294 	if (copy_from_user(p->chunks_array, chunk_array_ptr,
295 			       sizeof(uint64_t)*cs->num_chunks)) {
296 		return -EFAULT;
297 	}
298 	p->cs_flags = 0;
299 	p->nchunks = cs->num_chunks;
300 	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
301 	if (p->chunks == NULL) {
302 		return -ENOMEM;
303 	}
304 	for (i = 0; i < p->nchunks; i++) {
305 		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
306 		struct drm_radeon_cs_chunk user_chunk;
307 		uint32_t __user *cdata;
308 
309 		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
310 		if (copy_from_user(&user_chunk, chunk_ptr,
311 				       sizeof(struct drm_radeon_cs_chunk))) {
312 			return -EFAULT;
313 		}
314 		p->chunks[i].length_dw = user_chunk.length_dw;
315 		p->chunks[i].chunk_id = user_chunk.chunk_id;
316 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
317 			p->chunk_relocs_idx = i;
318 		}
319 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
320 			p->chunk_ib_idx = i;
321 			/* zero length IB isn't useful */
322 			if (p->chunks[i].length_dw == 0)
323 				return -EINVAL;
324 		}
325 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
326 			p->chunk_const_ib_idx = i;
327 			/* zero length CONST IB isn't useful */
328 			if (p->chunks[i].length_dw == 0)
329 				return -EINVAL;
330 		}
331 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
332 			p->chunk_flags_idx = i;
333 			/* zero length flags aren't useful */
334 			if (p->chunks[i].length_dw == 0)
335 				return -EINVAL;
336 		}
337 
338 		size = p->chunks[i].length_dw;
339 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
340 		p->chunks[i].user_ptr = cdata;
341 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB)
342 			continue;
343 
344 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
345 			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
346 				continue;
347 		}
348 
349 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
350 		size *= sizeof(uint32_t);
351 		if (p->chunks[i].kdata == NULL) {
352 			return -ENOMEM;
353 		}
354 		if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
355 			return -EFAULT;
356 		}
357 		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
358 			p->cs_flags = p->chunks[i].kdata[0];
359 			if (p->chunks[i].length_dw > 1)
360 				ring = p->chunks[i].kdata[1];
361 			if (p->chunks[i].length_dw > 2)
362 				priority = (s32)p->chunks[i].kdata[2];
363 		}
364 	}
365 
366 	/* these are KMS only */
367 	if (p->rdev) {
368 		if ((p->cs_flags & RADEON_CS_USE_VM) &&
369 		    !p->rdev->vm_manager.enabled) {
370 			DRM_ERROR("VM not active on asic!\n");
371 			return -EINVAL;
372 		}
373 
374 		if (radeon_cs_get_ring(p, ring, priority))
375 			return -EINVAL;
376 
377 		/* we only support VM on some SI+ rings */
378 		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
379 			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
380 				DRM_ERROR("Ring %d requires VM!\n", p->ring);
381 				return -EINVAL;
382 			}
383 		} else {
384 			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
385 				DRM_ERROR("VM not supported on ring %d!\n",
386 					  p->ring);
387 				return -EINVAL;
388 			}
389 		}
390 	}
391 
392 	return 0;
393 }
394 
395 static int cmp_size_smaller_first(void *priv, struct list_head *a,
396 				  struct list_head *b)
397 {
398 	struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
399 	struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
400 
401 	/* Sort A before B if A is smaller. */
402 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
403 }
404 
405 /**
406  * cs_parser_fini() - clean parser states
407  * @parser:	parser structure holding parsing context.
408  * @error:	error number
409  *
410  * If error is set than unvalidate buffer, otherwise just free memory
411  * used by parsing context.
412  **/
413 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
414 {
415 	unsigned i;
416 
417 	if (!error) {
418 		/* Sort the buffer list from the smallest to largest buffer,
419 		 * which affects the order of buffers in the LRU list.
420 		 * This assures that the smallest buffers are added first
421 		 * to the LRU list, so they are likely to be later evicted
422 		 * first, instead of large buffers whose eviction is more
423 		 * expensive.
424 		 *
425 		 * This slightly lowers the number of bytes moved by TTM
426 		 * per frame under memory pressure.
427 		 */
428 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
429 
430 		ttm_eu_fence_buffer_objects(&parser->ticket,
431 					    &parser->validated,
432 					    &parser->ib.fence->base);
433 	} else if (backoff) {
434 		ttm_eu_backoff_reservation(&parser->ticket,
435 					   &parser->validated);
436 	}
437 
438 	if (parser->relocs != NULL) {
439 		for (i = 0; i < parser->nrelocs; i++) {
440 			if (parser->relocs[i].gobj)
441 				drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
442 		}
443 	}
444 	kfree(parser->track);
445 	kfree(parser->relocs);
446 	kfree(parser->relocs_ptr);
447 	drm_free_large(parser->vm_bos);
448 	for (i = 0; i < parser->nchunks; i++)
449 		drm_free_large(parser->chunks[i].kdata);
450 	kfree(parser->chunks);
451 	kfree(parser->chunks_array);
452 	radeon_ib_free(parser->rdev, &parser->ib);
453 	radeon_ib_free(parser->rdev, &parser->const_ib);
454 }
455 
456 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
457 			      struct radeon_cs_parser *parser)
458 {
459 	int r;
460 
461 	if (parser->chunk_ib_idx == -1)
462 		return 0;
463 
464 	if (parser->cs_flags & RADEON_CS_USE_VM)
465 		return 0;
466 
467 	r = radeon_cs_parse(rdev, parser->ring, parser);
468 	if (r || parser->parser_error) {
469 		DRM_ERROR("Invalid command stream !\n");
470 		return r;
471 	}
472 
473 	r = radeon_cs_sync_rings(parser);
474 	if (r) {
475 		if (r != -ERESTARTSYS)
476 			DRM_ERROR("Failed to sync rings: %i\n", r);
477 		return r;
478 	}
479 
480 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
481 		radeon_uvd_note_usage(rdev);
482 	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
483 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
484 		radeon_vce_note_usage(rdev);
485 
486 	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
487 	if (r) {
488 		DRM_ERROR("Failed to schedule IB !\n");
489 	}
490 	return r;
491 }
492 
493 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
494 				   struct radeon_vm *vm)
495 {
496 	struct radeon_device *rdev = p->rdev;
497 	struct radeon_bo_va *bo_va;
498 	int i, r;
499 
500 	r = radeon_vm_update_page_directory(rdev, vm);
501 	if (r)
502 		return r;
503 
504 	r = radeon_vm_clear_freed(rdev, vm);
505 	if (r)
506 		return r;
507 
508 	if (vm->ib_bo_va == NULL) {
509 		DRM_ERROR("Tmp BO not in VM!\n");
510 		return -EINVAL;
511 	}
512 
513 	r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
514 				&rdev->ring_tmp_bo.bo->tbo.mem);
515 	if (r)
516 		return r;
517 
518 	for (i = 0; i < p->nrelocs; i++) {
519 		struct radeon_bo *bo;
520 
521 		/* ignore duplicates */
522 		if (p->relocs_ptr[i] != &p->relocs[i])
523 			continue;
524 
525 		bo = p->relocs[i].robj;
526 		bo_va = radeon_vm_bo_find(vm, bo);
527 		if (bo_va == NULL) {
528 			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
529 			return -EINVAL;
530 		}
531 
532 		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
533 		if (r)
534 			return r;
535 	}
536 
537 	return radeon_vm_clear_invalids(rdev, vm);
538 }
539 
540 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
541 				 struct radeon_cs_parser *parser)
542 {
543 	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
544 	struct radeon_vm *vm = &fpriv->vm;
545 	int r;
546 
547 	if (parser->chunk_ib_idx == -1)
548 		return 0;
549 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
550 		return 0;
551 
552 	if (parser->const_ib.length_dw) {
553 		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
554 		if (r) {
555 			return r;
556 		}
557 	}
558 
559 	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
560 	if (r) {
561 		return r;
562 	}
563 
564 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
565 		radeon_uvd_note_usage(rdev);
566 
567 	mutex_lock(&vm->mutex);
568 	r = radeon_bo_vm_update_pte(parser, vm);
569 	if (r) {
570 		goto out;
571 	}
572 
573 	r = radeon_cs_sync_rings(parser);
574 	if (r) {
575 		if (r != -ERESTARTSYS)
576 			DRM_ERROR("Failed to sync rings: %i\n", r);
577 		goto out;
578 	}
579 	radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence);
580 
581 	if ((rdev->family >= CHIP_TAHITI) &&
582 	    (parser->chunk_const_ib_idx != -1)) {
583 		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
584 	} else {
585 		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
586 	}
587 
588 out:
589 	mutex_unlock(&vm->mutex);
590 	return r;
591 }
592 
593 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
594 {
595 	if (r == -EDEADLK) {
596 		r = radeon_gpu_reset(rdev);
597 		if (!r)
598 			r = -EAGAIN;
599 	}
600 	return r;
601 }
602 
603 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
604 {
605 	struct radeon_cs_chunk *ib_chunk;
606 	struct radeon_vm *vm = NULL;
607 	int r;
608 
609 	if (parser->chunk_ib_idx == -1)
610 		return 0;
611 
612 	if (parser->cs_flags & RADEON_CS_USE_VM) {
613 		struct radeon_fpriv *fpriv = parser->filp->driver_priv;
614 		vm = &fpriv->vm;
615 
616 		if ((rdev->family >= CHIP_TAHITI) &&
617 		    (parser->chunk_const_ib_idx != -1)) {
618 			ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
619 			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
620 				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
621 				return -EINVAL;
622 			}
623 			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
624 					   vm, ib_chunk->length_dw * 4);
625 			if (r) {
626 				DRM_ERROR("Failed to get const ib !\n");
627 				return r;
628 			}
629 			parser->const_ib.is_const_ib = true;
630 			parser->const_ib.length_dw = ib_chunk->length_dw;
631 			if (copy_from_user(parser->const_ib.ptr,
632 					       ib_chunk->user_ptr,
633 					       ib_chunk->length_dw * 4))
634 				return -EFAULT;
635 		}
636 
637 		ib_chunk = &parser->chunks[parser->chunk_ib_idx];
638 		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
639 			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
640 			return -EINVAL;
641 		}
642 	}
643 	ib_chunk = &parser->chunks[parser->chunk_ib_idx];
644 
645 	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
646 			   vm, ib_chunk->length_dw * 4);
647 	if (r) {
648 		DRM_ERROR("Failed to get ib !\n");
649 		return r;
650 	}
651 	parser->ib.length_dw = ib_chunk->length_dw;
652 	if (ib_chunk->kdata)
653 		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
654 	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
655 		return -EFAULT;
656 	return 0;
657 }
658 
659 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
660 {
661 	struct radeon_device *rdev = dev->dev_private;
662 	struct radeon_cs_parser parser;
663 	int r;
664 
665 	down_read(&rdev->exclusive_lock);
666 	if (!rdev->accel_working) {
667 		up_read(&rdev->exclusive_lock);
668 		return -EBUSY;
669 	}
670 	if (rdev->in_reset) {
671 		up_read(&rdev->exclusive_lock);
672 		r = radeon_gpu_reset(rdev);
673 		if (!r)
674 			r = -EAGAIN;
675 		return r;
676 	}
677 	/* initialize parser */
678 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
679 	parser.filp = filp;
680 	parser.rdev = rdev;
681 	parser.dev = rdev->dev;
682 	parser.family = rdev->family;
683 	r = radeon_cs_parser_init(&parser, data);
684 	if (r) {
685 		DRM_ERROR("Failed to initialize parser !\n");
686 		radeon_cs_parser_fini(&parser, r, false);
687 		up_read(&rdev->exclusive_lock);
688 		r = radeon_cs_handle_lockup(rdev, r);
689 		return r;
690 	}
691 
692 	r = radeon_cs_ib_fill(rdev, &parser);
693 	if (!r) {
694 		r = radeon_cs_parser_relocs(&parser);
695 		if (r && r != -ERESTARTSYS)
696 			DRM_ERROR("Failed to parse relocation %d!\n", r);
697 	}
698 
699 	if (r) {
700 		radeon_cs_parser_fini(&parser, r, false);
701 		up_read(&rdev->exclusive_lock);
702 		r = radeon_cs_handle_lockup(rdev, r);
703 		return r;
704 	}
705 
706 #ifdef TRACE_TODO
707 	trace_radeon_cs(&parser);
708 #endif
709 
710 	r = radeon_cs_ib_chunk(rdev, &parser);
711 	if (r) {
712 		goto out;
713 	}
714 	r = radeon_cs_ib_vm_chunk(rdev, &parser);
715 	if (r) {
716 		goto out;
717 	}
718 out:
719 	radeon_cs_parser_fini(&parser, r, true);
720 	up_read(&rdev->exclusive_lock);
721 	r = radeon_cs_handle_lockup(rdev, r);
722 	return r;
723 }
724 
725 /**
726  * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
727  * @parser:	parser structure holding parsing context.
728  * @pkt:	where to store packet information
729  *
730  * Assume that chunk_ib_index is properly set. Will return -EINVAL
731  * if packet is bigger than remaining ib size. or if packets is unknown.
732  **/
733 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
734 			   struct radeon_cs_packet *pkt,
735 			   unsigned idx)
736 {
737 	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
738 	struct radeon_device *rdev = p->rdev;
739 	uint32_t header;
740 
741 	if (idx >= ib_chunk->length_dw) {
742 		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
743 			  idx, ib_chunk->length_dw);
744 		return -EINVAL;
745 	}
746 	header = radeon_get_ib_value(p, idx);
747 	pkt->idx = idx;
748 	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
749 	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
750 	pkt->one_reg_wr = 0;
751 	switch (pkt->type) {
752 	case RADEON_PACKET_TYPE0:
753 		if (rdev->family < CHIP_R600) {
754 			pkt->reg = R100_CP_PACKET0_GET_REG(header);
755 			pkt->one_reg_wr =
756 				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
757 		} else
758 			pkt->reg = R600_CP_PACKET0_GET_REG(header);
759 		break;
760 	case RADEON_PACKET_TYPE3:
761 		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
762 		break;
763 	case RADEON_PACKET_TYPE2:
764 		pkt->count = -1;
765 		break;
766 	default:
767 		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
768 		return -EINVAL;
769 	}
770 	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
771 		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
772 			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
773 		return -EINVAL;
774 	}
775 	return 0;
776 }
777 
778 /**
779  * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
780  * @p:		structure holding the parser context.
781  *
782  * Check if the next packet is NOP relocation packet3.
783  **/
784 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
785 {
786 	struct radeon_cs_packet p3reloc;
787 	int r;
788 
789 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
790 	if (r)
791 		return false;
792 	if (p3reloc.type != RADEON_PACKET_TYPE3)
793 		return false;
794 	if (p3reloc.opcode != RADEON_PACKET3_NOP)
795 		return false;
796 	return true;
797 }
798 
799 /**
800  * radeon_cs_dump_packet() - dump raw packet context
801  * @p:		structure holding the parser context.
802  * @pkt:	structure holding the packet.
803  *
804  * Used mostly for debugging and error reporting.
805  **/
806 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
807 			   struct radeon_cs_packet *pkt)
808 {
809 	volatile uint32_t *ib;
810 	unsigned i;
811 	unsigned idx;
812 
813 	ib = p->ib.ptr;
814 	idx = pkt->idx;
815 	for (i = 0; i <= (pkt->count + 1); i++, idx++)
816 		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
817 }
818 
819 /**
820  * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
821  * @parser:		parser structure holding parsing context.
822  * @data:		pointer to relocation data
823  * @offset_start:	starting offset
824  * @offset_mask:	offset mask (to align start offset on)
825  * @reloc:		reloc informations
826  *
827  * Check if next packet is relocation packet3, do bo validation and compute
828  * GPU offset using the provided start.
829  **/
830 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
831 				struct radeon_cs_reloc **cs_reloc,
832 				int nomm)
833 {
834 	struct radeon_cs_chunk *relocs_chunk;
835 	struct radeon_cs_packet p3reloc;
836 	unsigned idx;
837 	int r;
838 
839 	if (p->chunk_relocs_idx == -1) {
840 		DRM_ERROR("No relocation chunk !\n");
841 		return -EINVAL;
842 	}
843 	*cs_reloc = NULL;
844 	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
845 	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
846 	if (r)
847 		return r;
848 	p->idx += p3reloc.count + 2;
849 	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
850 	    p3reloc.opcode != RADEON_PACKET3_NOP) {
851 		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
852 			  p3reloc.idx);
853 		radeon_cs_dump_packet(p, &p3reloc);
854 		return -EINVAL;
855 	}
856 	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
857 	if (idx >= relocs_chunk->length_dw) {
858 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
859 			  idx, relocs_chunk->length_dw);
860 		radeon_cs_dump_packet(p, &p3reloc);
861 		return -EINVAL;
862 	}
863 	/* FIXME: we assume reloc size is 4 dwords */
864 	if (nomm) {
865 		*cs_reloc = p->relocs;
866 		(*cs_reloc)->gpu_offset =
867 			(u64)relocs_chunk->kdata[idx + 3] << 32;
868 		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
869 	} else
870 		*cs_reloc = p->relocs_ptr[(idx / 4)];
871 	return 0;
872 }
873