1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
6  * Copyright 2019 Marvell. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <linux/xarray.h>
41 #include "uverbs.h"
42 #include "core_priv.h"
43 
44 /**
45  * rdma_umap_priv_init() - Initialize the private data of a vma
46  *
47  * @priv: The already allocated private data
48  * @vma: The vm area struct that needs private data
49  * @entry: entry into the mmap_xa that needs to be linked with
50  *       this vma
51  *
52  * Each time we map IO memory into user space this keeps track of the
53  * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
54  * to point to the zero page and allow the hot unplug to proceed.
55  *
56  * This is necessary for cases like PCI physical hot unplug as the actual BAR
57  * memory may vanish after this and access to it from userspace could MCE.
58  *
59  * RDMA drivers supporting disassociation must have their user space designed
60  * to cope in some way with their IO pages going to the zero page.
61  *
62  */
63 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
64 			 struct vm_area_struct *vma,
65 			 struct rdma_user_mmap_entry *entry)
66 {
67 	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
68 
69 	priv->vma = vma;
70 	if (entry) {
71 		kref_get(&entry->ref);
72 		priv->entry = entry;
73 	}
74 	vma->vm_private_data = priv;
75 	/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
76 
77 	mutex_lock(&ufile->umap_lock);
78 	list_add(&priv->list, &ufile->umaps);
79 	mutex_unlock(&ufile->umap_lock);
80 }
81 EXPORT_SYMBOL(rdma_umap_priv_init);
82 
83 /**
84  * rdma_user_mmap_io() - Map IO memory into a process
85  *
86  * @ucontext: associated user context
87  * @vma: the vma related to the current mmap call
88  * @pfn: pfn to map
89  * @size: size to map
90  * @prot: pgprot to use in remap call
91  * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
92  *         if mmap_entry is not used by the driver
93  *
94  * This is to be called by drivers as part of their mmap() functions if they
95  * wish to send something like PCI-E BAR memory to userspace.
96  *
97  * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
98  * success.
99  */
100 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
101 		      unsigned long pfn, unsigned long size, pgprot_t prot,
102 		      struct rdma_user_mmap_entry *entry)
103 {
104 	struct ib_uverbs_file *ufile = ucontext->ufile;
105 	struct rdma_umap_priv *priv;
106 
107 	if (!(vma->vm_flags & VM_SHARED))
108 		return -EINVAL;
109 
110 	if (vma->vm_end - vma->vm_start != size)
111 		return -EINVAL;
112 
113 	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
114 	if (WARN_ON(!vma->vm_file ||
115 		    vma->vm_file->private_data != ufile))
116 		return -EINVAL;
117 
118 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
119 	if (!priv)
120 		return -ENOMEM;
121 
122 	vma->vm_page_prot = prot;
123 	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
124 		kfree(priv);
125 		return -EAGAIN;
126 	}
127 
128 	rdma_umap_priv_init(priv, vma, entry);
129 	return 0;
130 }
131 EXPORT_SYMBOL(rdma_user_mmap_io);
132 
133 /**
134  * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
135  *
136  * @ucontext: associated user context
137  * @pgoff: The mmap offset >> PAGE_SHIFT
138  *
139  * This function is called when a user tries to mmap with an offset (returned
140  * by rdma_user_mmap_get_offset()) it initially received from the driver. The
141  * rdma_user_mmap_entry was created by the function
142  * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
143  * entry so that it won't be deleted from the xarray in the meantime.
144  *
145  * Return an reference to an entry if exists or NULL if there is no
146  * match. rdma_user_mmap_entry_put() must be called to put the reference.
147  */
148 struct rdma_user_mmap_entry *
149 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
150 			       unsigned long pgoff)
151 {
152 	struct rdma_user_mmap_entry *entry;
153 
154 	if (pgoff > U32_MAX)
155 		return NULL;
156 
157 	xa_lock(&ucontext->mmap_xa);
158 
159 	entry = xa_load(&ucontext->mmap_xa, pgoff);
160 
161 	/*
162 	 * If refcount is zero, entry is already being deleted, driver_removed
163 	 * indicates that the no further mmaps are possible and we waiting for
164 	 * the active VMAs to be closed.
165 	 */
166 	if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
167 	    !kref_get_unless_zero(&entry->ref))
168 		goto err;
169 
170 	xa_unlock(&ucontext->mmap_xa);
171 
172 	return entry;
173 
174 err:
175 	xa_unlock(&ucontext->mmap_xa);
176 	return NULL;
177 }
178 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
179 
180 /**
181  * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
182  *
183  * @ucontext: associated user context
184  * @vma: the vma being mmap'd into
185  *
186  * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
187  * checks that the VMA is correct.
188  */
189 struct rdma_user_mmap_entry *
190 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
191 			 struct vm_area_struct *vma)
192 {
193 	struct rdma_user_mmap_entry *entry;
194 
195 	if (!(vma->vm_flags & VM_SHARED))
196 		return NULL;
197 	entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
198 	if (!entry)
199 		return NULL;
200 	if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
201 		rdma_user_mmap_entry_put(entry);
202 		return NULL;
203 	}
204 	return entry;
205 }
206 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
207 
208 static void rdma_user_mmap_entry_free(struct kref *kref)
209 {
210 	struct rdma_user_mmap_entry *entry =
211 		container_of(kref, struct rdma_user_mmap_entry, ref);
212 	struct ib_ucontext *ucontext = entry->ucontext;
213 	unsigned long i;
214 
215 	/*
216 	 * Erase all entries occupied by this single entry, this is deferred
217 	 * until all VMA are closed so that the mmap offsets remain unique.
218 	 */
219 	xa_lock(&ucontext->mmap_xa);
220 	for (i = 0; i < entry->npages; i++)
221 		__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
222 	xa_unlock(&ucontext->mmap_xa);
223 
224 	if (ucontext->device->mmap_free)
225 		ucontext->device->mmap_free(entry);
226 }
227 
228 /**
229  * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
230  *
231  * @entry: an entry in the mmap_xa
232  *
233  * This function is called when the mapping is closed if it was
234  * an io mapping or when the driver is done with the entry for
235  * some other reason.
236  * Should be called after rdma_user_mmap_entry_get was called
237  * and entry is no longer needed. This function will erase the
238  * entry and free it if its refcnt reaches zero.
239  */
240 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
241 {
242 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
243 }
244 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
245 
246 /**
247  * rdma_user_mmap_entry_remove() - Drop reference to entry and
248  *				   mark it as unmmapable
249  *
250  * @entry: the entry to insert into the mmap_xa
251  *
252  * Drivers can call this to prevent userspace from creating more mappings for
253  * entry, however existing mmaps continue to exist and ops->mmap_free() will
254  * not be called until all user mmaps are destroyed.
255  */
256 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
257 {
258 	if (!entry)
259 		return;
260 
261 	xa_lock(&entry->ucontext->mmap_xa);
262 	entry->driver_removed = true;
263 	xa_unlock(&entry->ucontext->mmap_xa);
264 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
265 }
266 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
267 
268 /**
269  * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
270  *					 in a given range.
271  *
272  * @ucontext: associated user context.
273  * @entry: the entry to insert into the mmap_xa
274  * @length: length of the address that will be mmapped
275  * @min_pgoff: minimum pgoff to be returned
276  * @max_pgoff: maximum pgoff to be returned
277  *
278  * This function should be called by drivers that use the rdma_user_mmap
279  * interface for implementing their mmap syscall A database of mmap offsets is
280  * handled in the core and helper functions are provided to insert entries
281  * into the database and extract entries when the user calls mmap with the
282  * given offset. The function allocates a unique page offset in a given range
283  * that should be provided to user, the user will use the offset to retrieve
284  * information such as address to be mapped and how.
285  *
286  * Return: 0 on success and -ENOMEM on failure
287  */
288 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
289 				      struct rdma_user_mmap_entry *entry,
290 				      size_t length, u32 min_pgoff,
291 				      u32 max_pgoff)
292 {
293 	struct ib_uverbs_file *ufile = ucontext->ufile;
294 	u32 xa_first, xa_last, npages;
295 	int err;
296 	u32 i;
297 	u32 j;
298 
299 	if (!entry)
300 		return -EINVAL;
301 
302 	kref_init(&entry->ref);
303 	entry->ucontext = ucontext;
304 
305 	/*
306 	 * We want the whole allocation to be done without interruption from a
307 	 * different thread. The allocation requires finding a free range and
308 	 * storing. During the xa_insert the lock could be released, possibly
309 	 * allowing another thread to choose the same range.
310 	 */
311 	mutex_lock(&ufile->umap_lock);
312 
313 	xa_lock(&ucontext->mmap_xa);
314 
315 	/* We want to find an empty range */
316 	npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
317 	entry->npages = npages;
318 
319 	/* Find an empty range */
320 	for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) {
321 		if (xa_load(&ucontext->mmap_xa, i + j) != NULL) {
322 			if (unlikely(i + j == max_pgoff))
323 				break;
324 			i = i + j + 1;
325 			j = 0;
326 		} else {
327 			if (unlikely(i + j == max_pgoff))
328 				break;
329 			j++;
330 		}
331 	}
332 
333 	if (j != npages)
334 		goto err_unlock;
335 
336 	xa_first = i;
337 	xa_last = i + j;
338 
339 	for (i = xa_first; i < xa_last; i++) {
340 		err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
341 		if (err)
342 			goto err_undo;
343 	}
344 
345 	/*
346 	 * Internally the kernel uses a page offset, in libc this is a byte
347 	 * offset. Drivers should not return pgoff to userspace.
348 	 */
349 	entry->start_pgoff = xa_first;
350 	xa_unlock(&ucontext->mmap_xa);
351 	mutex_unlock(&ufile->umap_lock);
352 
353 	return 0;
354 
355 err_undo:
356 	for (; i > xa_first; i--)
357 		__xa_erase(&ucontext->mmap_xa, i - 1);
358 
359 err_unlock:
360 	xa_unlock(&ucontext->mmap_xa);
361 	mutex_unlock(&ufile->umap_lock);
362 	return -ENOMEM;
363 }
364 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
365 
366 /**
367  * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
368  *
369  * @ucontext: associated user context.
370  * @entry: the entry to insert into the mmap_xa
371  * @length: length of the address that will be mmapped
372  *
373  * This function should be called by drivers that use the rdma_user_mmap
374  * interface for handling user mmapped addresses. The database is handled in
375  * the core and helper functions are provided to insert entries into the
376  * database and extract entries when the user calls mmap with the given offset.
377  * The function allocates a unique page offset that should be provided to user,
378  * the user will use the offset to retrieve information such as address to
379  * be mapped and how.
380  *
381  * Return: 0 on success and -ENOMEM on failure
382  */
383 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
384 				struct rdma_user_mmap_entry *entry,
385 				size_t length)
386 {
387 	return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
388 						 U32_MAX);
389 }
390 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
391