1 /*	$NetBSD: nouveau_subdev_vm_base.c,v 1.3 2015/10/22 22:39:46 jmcneill Exp $	*/
2 
3 /*
4  * Copyright 2010 Red Hat Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Ben Skeggs
25  */
26 
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: nouveau_subdev_vm_base.c,v 1.3 2015/10/22 22:39:46 jmcneill Exp $");
29 
30 #include <core/gpuobj.h>
31 #include <core/mm.h>
32 
33 #include <subdev/fb.h>
34 #include <subdev/vm.h>
35 
36 void
nouveau_vm_map_at(struct nouveau_vma * vma,u64 delta,struct nouveau_mem * node)37 nouveau_vm_map_at(struct nouveau_vma *vma, u64 delta, struct nouveau_mem *node)
38 {
39 	struct nouveau_vm *vm = vma->vm;
40 	struct nouveau_vmmgr *vmm = vm->vmm;
41 	struct nouveau_mm_node *r;
42 	int big = vma->node->type != vmm->spg_shift;
43 	u32 offset = vma->node->offset + (delta >> 12);
44 	u32 bits = vma->node->type - 12;
45 	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
46 	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits;
47 	u32 max  = 1 << (vmm->pgt_bits - bits);
48 	u32 end, len;
49 
50 	delta = 0;
51 	list_for_each_entry(r, &node->regions, rl_entry) {
52 		u64 phys = (u64)r->offset << 12;
53 		u32 num  = r->length >> bits;
54 
55 		while (num) {
56 			struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
57 
58 			end = (pte + num);
59 			if (unlikely(end >= max))
60 				end = max;
61 			len = end - pte;
62 
63 			vmm->map(vma, pgt, node, pte, len, phys, delta);
64 
65 			num -= len;
66 			pte += len;
67 			if (unlikely(end >= max)) {
68 				phys += len << (bits + 12);
69 				pde++;
70 				pte = 0;
71 			}
72 
73 			delta += (u64)len << vma->node->type;
74 		}
75 	}
76 
77 	vmm->flush(vm);
78 }
79 
80 #ifdef __NetBSD__
81 
82 static void
nouveau_vm_map_dma(struct nouveau_vma * vma,u64 delta,u64 length,struct nouveau_mem * mem)83 nouveau_vm_map_dma(struct nouveau_vma *vma, u64 delta, u64 length,
84     struct nouveau_mem *mem)
85 {
86 	struct nouveau_vm *vm = vma->vm;
87 	struct nouveau_vmmgr *vmm = vm->vmm;
88 	int big = vma->node->type != vmm->spg_shift;
89 	u32 offset = vma->node->offset + (delta >> 12);
90 	u32 bits = vma->node->type - 12;
91 	u32 num  = length >> vma->node->type;
92 	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
93 	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits;
94 	u32 max  = 1 << (vmm->pgt_bits - bits);
95 	unsigned seg, pgoff;
96 
97 	for (seg = 0; seg < mem->pages->dm_nsegs; seg++) {
98 		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
99 		dma_addr_t addr = mem->pages->dm_segs[seg].ds_addr;
100 
101 		KASSERT((mem->pages->dm_segs[seg].ds_len & NOUVEAU_GPU_PAGE_MASK) == 0);
102 		for (pgoff = 0; pgoff < mem->pages->dm_segs[seg].ds_len;
103 		     pgoff += NOUVEAU_GPU_PAGE_SIZE, addr += NOUVEAU_GPU_PAGE_SIZE) {
104 
105 			vmm->map_sg(vma, pgt, mem, pte, 1, &addr);
106 			num--;
107 			pte++;
108 
109 			if (num == 0)
110 				goto finish;
111 
112 			if (__predict_false(pte >= max)) {
113 				pde++;
114 				pte = 0;
115 			}
116 		}
117 	}
118 
119 finish:
120 	vmm->flush(vm);
121 }
122 
123 #else
124 
125 static void
nouveau_vm_map_sg_table(struct nouveau_vma * vma,u64 delta,u64 length,struct nouveau_mem * mem)126 nouveau_vm_map_sg_table(struct nouveau_vma *vma, u64 delta, u64 length,
127 			struct nouveau_mem *mem)
128 {
129 	struct nouveau_vm *vm = vma->vm;
130 	struct nouveau_vmmgr *vmm = vm->vmm;
131 	int big = vma->node->type != vmm->spg_shift;
132 	u32 offset = vma->node->offset + (delta >> 12);
133 	u32 bits = vma->node->type - 12;
134 	u32 num  = length >> vma->node->type;
135 	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
136 	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits;
137 	u32 max  = 1 << (vmm->pgt_bits - bits);
138 	unsigned m, sglen;
139 	u32 end, len;
140 	int i;
141 	struct scatterlist *sg;
142 
143 	for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) {
144 		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
145 		sglen = sg_dma_len(sg) >> PAGE_SHIFT;
146 
147 		end = pte + sglen;
148 		if (unlikely(end >= max))
149 			end = max;
150 		len = end - pte;
151 
152 		for (m = 0; m < len; m++) {
153 			dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
154 
155 			vmm->map_sg(vma, pgt, mem, pte, 1, &addr);
156 			num--;
157 			pte++;
158 
159 			if (num == 0)
160 				goto finish;
161 		}
162 		if (unlikely(end >= max)) {
163 			pde++;
164 			pte = 0;
165 		}
166 		if (m < sglen) {
167 			for (; m < sglen; m++) {
168 				dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
169 
170 				vmm->map_sg(vma, pgt, mem, pte, 1, &addr);
171 				num--;
172 				pte++;
173 				if (num == 0)
174 					goto finish;
175 			}
176 		}
177 
178 	}
179 finish:
180 	vmm->flush(vm);
181 }
182 
183 static void
nouveau_vm_map_sg(struct nouveau_vma * vma,u64 delta,u64 length,struct nouveau_mem * mem)184 nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length,
185 		  struct nouveau_mem *mem)
186 {
187 	struct nouveau_vm *vm = vma->vm;
188 	struct nouveau_vmmgr *vmm = vm->vmm;
189 	dma_addr_t *list = mem->pages;
190 	int big = vma->node->type != vmm->spg_shift;
191 	u32 offset = vma->node->offset + (delta >> 12);
192 	u32 bits = vma->node->type - 12;
193 	u32 num  = length >> vma->node->type;
194 	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
195 	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits;
196 	u32 max  = 1 << (vmm->pgt_bits - bits);
197 	u32 end, len;
198 
199 	while (num) {
200 		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
201 
202 		end = (pte + num);
203 		if (unlikely(end >= max))
204 			end = max;
205 		len = end - pte;
206 
207 		vmm->map_sg(vma, pgt, mem, pte, len, list);
208 
209 		num  -= len;
210 		pte  += len;
211 		list += len;
212 		if (unlikely(end >= max)) {
213 			pde++;
214 			pte = 0;
215 		}
216 	}
217 
218 	vmm->flush(vm);
219 }
220 
221 #endif
222 
223 void
nouveau_vm_map(struct nouveau_vma * vma,struct nouveau_mem * node)224 nouveau_vm_map(struct nouveau_vma *vma, struct nouveau_mem *node)
225 {
226 #ifdef __NetBSD__
227 	if (node->pages)
228 		nouveau_vm_map_dma(vma, 0, node->size << 12, node);
229 	else
230 		nouveau_vm_map_at(vma, 0, node);
231 #else
232 	if (node->sg)
233 		nouveau_vm_map_sg_table(vma, 0, node->size << 12, node);
234 	else
235 	if (node->pages)
236 		nouveau_vm_map_sg(vma, 0, node->size << 12, node);
237 	else
238 		nouveau_vm_map_at(vma, 0, node);
239 #endif
240 }
241 
242 void
nouveau_vm_unmap_at(struct nouveau_vma * vma,u64 delta,u64 length)243 nouveau_vm_unmap_at(struct nouveau_vma *vma, u64 delta, u64 length)
244 {
245 	struct nouveau_vm *vm = vma->vm;
246 	struct nouveau_vmmgr *vmm = vm->vmm;
247 	int big = vma->node->type != vmm->spg_shift;
248 	u32 offset = vma->node->offset + (delta >> 12);
249 	u32 bits = vma->node->type - 12;
250 	u32 num  = length >> vma->node->type;
251 	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
252 	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits;
253 	u32 max  = 1 << (vmm->pgt_bits - bits);
254 	u32 end, len;
255 
256 	while (num) {
257 		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
258 
259 		end = (pte + num);
260 		if (unlikely(end >= max))
261 			end = max;
262 		len = end - pte;
263 
264 		vmm->unmap(pgt, pte, len);
265 
266 		num -= len;
267 		pte += len;
268 		if (unlikely(end >= max)) {
269 			pde++;
270 			pte = 0;
271 		}
272 	}
273 
274 	vmm->flush(vm);
275 }
276 
277 void
nouveau_vm_unmap(struct nouveau_vma * vma)278 nouveau_vm_unmap(struct nouveau_vma *vma)
279 {
280 	nouveau_vm_unmap_at(vma, 0, (u64)vma->node->length << 12);
281 }
282 
283 static void
nouveau_vm_unmap_pgt(struct nouveau_vm * vm,int big,u32 fpde,u32 lpde)284 nouveau_vm_unmap_pgt(struct nouveau_vm *vm, int big, u32 fpde, u32 lpde)
285 {
286 	struct nouveau_vmmgr *vmm = vm->vmm;
287 	struct nouveau_vm_pgd *vpgd;
288 	struct nouveau_vm_pgt *vpgt;
289 	struct nouveau_gpuobj *pgt;
290 	u32 pde;
291 
292 	for (pde = fpde; pde <= lpde; pde++) {
293 		vpgt = &vm->pgt[pde - vm->fpde];
294 		if (--vpgt->refcount[big])
295 			continue;
296 
297 		pgt = vpgt->obj[big];
298 		vpgt->obj[big] = NULL;
299 
300 		list_for_each_entry(vpgd, &vm->pgd_list, head) {
301 			vmm->map_pgt(vpgd->obj, pde, vpgt->obj);
302 		}
303 
304 		mutex_unlock(&nv_subdev(vmm)->mutex);
305 		nouveau_gpuobj_ref(NULL, &pgt);
306 		mutex_lock(&nv_subdev(vmm)->mutex);
307 	}
308 }
309 
310 static int
nouveau_vm_map_pgt(struct nouveau_vm * vm,u32 pde,u32 type)311 nouveau_vm_map_pgt(struct nouveau_vm *vm, u32 pde, u32 type)
312 {
313 	struct nouveau_vmmgr *vmm = vm->vmm;
314 	struct nouveau_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde];
315 	struct nouveau_vm_pgd *vpgd;
316 	struct nouveau_gpuobj *pgt;
317 	int big = (type != vmm->spg_shift);
318 	u32 pgt_size;
319 	int ret;
320 
321 	pgt_size  = (1 << (vmm->pgt_bits + 12)) >> type;
322 	pgt_size *= 8;
323 
324 	mutex_unlock(&nv_subdev(vmm)->mutex);
325 	ret = nouveau_gpuobj_new(nv_object(vm->vmm), NULL, pgt_size, 0x1000,
326 				 NVOBJ_FLAG_ZERO_ALLOC, &pgt);
327 	mutex_lock(&nv_subdev(vmm)->mutex);
328 	if (unlikely(ret))
329 		return ret;
330 
331 	/* someone beat us to filling the PDE while we didn't have the lock */
332 	if (unlikely(vpgt->refcount[big]++)) {
333 		mutex_unlock(&nv_subdev(vmm)->mutex);
334 		nouveau_gpuobj_ref(NULL, &pgt);
335 		mutex_lock(&nv_subdev(vmm)->mutex);
336 		return 0;
337 	}
338 
339 	vpgt->obj[big] = pgt;
340 	list_for_each_entry(vpgd, &vm->pgd_list, head) {
341 		vmm->map_pgt(vpgd->obj, pde, vpgt->obj);
342 	}
343 
344 	return 0;
345 }
346 
347 int
nouveau_vm_get(struct nouveau_vm * vm,u64 size,u32 page_shift,u32 access,struct nouveau_vma * vma)348 nouveau_vm_get(struct nouveau_vm *vm, u64 size, u32 page_shift,
349 	       u32 access, struct nouveau_vma *vma)
350 {
351 	struct nouveau_vmmgr *vmm = vm->vmm;
352 	u32 align = (1 << page_shift) >> 12;
353 	u32 msize = size >> 12;
354 	u32 fpde, lpde, pde;
355 	int ret;
356 
357 	mutex_lock(&nv_subdev(vmm)->mutex);
358 	ret = nouveau_mm_head(&vm->mm, page_shift, msize, msize, align,
359 			     &vma->node);
360 	if (unlikely(ret != 0)) {
361 		mutex_unlock(&nv_subdev(vmm)->mutex);
362 		return ret;
363 	}
364 
365 	fpde = (vma->node->offset >> vmm->pgt_bits);
366 	lpde = (vma->node->offset + vma->node->length - 1) >> vmm->pgt_bits;
367 
368 	for (pde = fpde; pde <= lpde; pde++) {
369 		struct nouveau_vm_pgt *vpgt = &vm->pgt[pde - vm->fpde];
370 		int big = (vma->node->type != vmm->spg_shift);
371 
372 		if (likely(vpgt->refcount[big])) {
373 			vpgt->refcount[big]++;
374 			continue;
375 		}
376 
377 		ret = nouveau_vm_map_pgt(vm, pde, vma->node->type);
378 		if (ret) {
379 			if (pde != fpde)
380 				nouveau_vm_unmap_pgt(vm, big, fpde, pde - 1);
381 			nouveau_mm_free(&vm->mm, &vma->node);
382 			mutex_unlock(&nv_subdev(vmm)->mutex);
383 			return ret;
384 		}
385 	}
386 	mutex_unlock(&nv_subdev(vmm)->mutex);
387 
388 	vma->vm = NULL;
389 	nouveau_vm_ref(vm, &vma->vm, NULL);
390 	vma->offset = (u64)vma->node->offset << 12;
391 	vma->access = access;
392 	return 0;
393 }
394 
395 void
nouveau_vm_put(struct nouveau_vma * vma)396 nouveau_vm_put(struct nouveau_vma *vma)
397 {
398 	struct nouveau_vm *vm = vma->vm;
399 	struct nouveau_vmmgr *vmm = vm->vmm;
400 	u32 fpde, lpde;
401 
402 	if (unlikely(vma->node == NULL))
403 		return;
404 	fpde = (vma->node->offset >> vmm->pgt_bits);
405 	lpde = (vma->node->offset + vma->node->length - 1) >> vmm->pgt_bits;
406 
407 	mutex_lock(&nv_subdev(vmm)->mutex);
408 	nouveau_vm_unmap_pgt(vm, vma->node->type != vmm->spg_shift, fpde, lpde);
409 	nouveau_mm_free(&vm->mm, &vma->node);
410 	mutex_unlock(&nv_subdev(vmm)->mutex);
411 
412 	nouveau_vm_ref(NULL, &vma->vm, NULL);
413 }
414 
415 int
nouveau_vm_create(struct nouveau_vmmgr * vmm,u64 offset,u64 length,u64 mm_offset,u32 block,struct nouveau_vm ** pvm)416 nouveau_vm_create(struct nouveau_vmmgr *vmm, u64 offset, u64 length,
417 		  u64 mm_offset, u32 block, struct nouveau_vm **pvm)
418 {
419 	struct nouveau_vm *vm;
420 	u64 mm_length = (offset + length) - mm_offset;
421 	int ret;
422 
423 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
424 	if (!vm)
425 		return -ENOMEM;
426 
427 	INIT_LIST_HEAD(&vm->pgd_list);
428 	vm->vmm = vmm;
429 	kref_init(&vm->refcount);
430 	vm->fpde = offset >> (vmm->pgt_bits + 12);
431 	vm->lpde = (offset + length - 1) >> (vmm->pgt_bits + 12);
432 
433 	vm->pgt  = vzalloc((vm->lpde - vm->fpde + 1) * sizeof(*vm->pgt));
434 	if (!vm->pgt) {
435 		kfree(vm);
436 		return -ENOMEM;
437 	}
438 
439 	ret = nouveau_mm_init(&vm->mm, mm_offset >> 12, mm_length >> 12,
440 			      block >> 12);
441 	if (ret) {
442 		vfree(vm->pgt);
443 		kfree(vm);
444 		return ret;
445 	}
446 
447 	*pvm = vm;
448 
449 	return 0;
450 }
451 
452 int
nouveau_vm_new(struct nouveau_device * device,u64 offset,u64 length,u64 mm_offset,struct nouveau_vm ** pvm)453 nouveau_vm_new(struct nouveau_device *device, u64 offset, u64 length,
454 	       u64 mm_offset, struct nouveau_vm **pvm)
455 {
456 	struct nouveau_vmmgr *vmm = nouveau_vmmgr(device);
457 	return vmm->create(vmm, offset, length, mm_offset, pvm);
458 }
459 
460 static int
nouveau_vm_link(struct nouveau_vm * vm,struct nouveau_gpuobj * pgd)461 nouveau_vm_link(struct nouveau_vm *vm, struct nouveau_gpuobj *pgd)
462 {
463 	struct nouveau_vmmgr *vmm = vm->vmm;
464 	struct nouveau_vm_pgd *vpgd;
465 	int i;
466 
467 	if (!pgd)
468 		return 0;
469 
470 	vpgd = kzalloc(sizeof(*vpgd), GFP_KERNEL);
471 	if (!vpgd)
472 		return -ENOMEM;
473 
474 	nouveau_gpuobj_ref(pgd, &vpgd->obj);
475 
476 	mutex_lock(&nv_subdev(vmm)->mutex);
477 	for (i = vm->fpde; i <= vm->lpde; i++)
478 		vmm->map_pgt(pgd, i, vm->pgt[i - vm->fpde].obj);
479 	list_add(&vpgd->head, &vm->pgd_list);
480 	mutex_unlock(&nv_subdev(vmm)->mutex);
481 	return 0;
482 }
483 
484 static void
nouveau_vm_unlink(struct nouveau_vm * vm,struct nouveau_gpuobj * mpgd)485 nouveau_vm_unlink(struct nouveau_vm *vm, struct nouveau_gpuobj *mpgd)
486 {
487 	struct nouveau_vmmgr *vmm = vm->vmm;
488 	struct nouveau_vm_pgd *vpgd, *tmp;
489 	struct nouveau_gpuobj *pgd = NULL;
490 
491 	if (!mpgd)
492 		return;
493 
494 	mutex_lock(&nv_subdev(vmm)->mutex);
495 	list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) {
496 		if (vpgd->obj == mpgd) {
497 			pgd = vpgd->obj;
498 			list_del(&vpgd->head);
499 			kfree(vpgd);
500 			break;
501 		}
502 	}
503 	mutex_unlock(&nv_subdev(vmm)->mutex);
504 
505 	nouveau_gpuobj_ref(NULL, &pgd);
506 }
507 
508 static void
nouveau_vm_del(struct kref * kref)509 nouveau_vm_del(struct kref *kref)
510 {
511 	struct nouveau_vm *vm = container_of(kref, typeof(*vm), refcount);
512 	struct nouveau_vm_pgd *vpgd, *tmp;
513 
514 	list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) {
515 		nouveau_vm_unlink(vm, vpgd->obj);
516 	}
517 
518 	nouveau_mm_fini(&vm->mm);
519 	vfree(vm->pgt);
520 	kfree(vm);
521 }
522 
523 int
nouveau_vm_ref(struct nouveau_vm * ref,struct nouveau_vm ** ptr,struct nouveau_gpuobj * pgd)524 nouveau_vm_ref(struct nouveau_vm *ref, struct nouveau_vm **ptr,
525 	       struct nouveau_gpuobj *pgd)
526 {
527 	if (ref) {
528 		int ret = nouveau_vm_link(ref, pgd);
529 		if (ret)
530 			return ret;
531 
532 		kref_get(&ref->refcount);
533 	}
534 
535 	if (*ptr) {
536 		nouveau_vm_unlink(*ptr, pgd);
537 		kref_put(&(*ptr)->refcount, nouveau_vm_del);
538 	}
539 
540 	*ptr = ref;
541 	return 0;
542 }
543