1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
6  * Copyright 2019 Marvell. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  */
36 
37 #include <sys/cdefs.h>
38 #include <linux/xarray.h>
39 #include "uverbs.h"
40 #include "core_priv.h"
41 
42 /**
43  * rdma_umap_priv_init() - Initialize the private data of a vma
44  *
45  * @priv: The already allocated private data
46  * @vma: The vm area struct that needs private data
47  * @entry: entry into the mmap_xa that needs to be linked with
48  *       this vma
49  *
50  * Each time we map IO memory into user space this keeps track of the
51  * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
52  * to point to the zero page and allow the hot unplug to proceed.
53  *
54  * This is necessary for cases like PCI physical hot unplug as the actual BAR
55  * memory may vanish after this and access to it from userspace could MCE.
56  *
57  * RDMA drivers supporting disassociation must have their user space designed
58  * to cope in some way with their IO pages going to the zero page.
59  *
60  */
rdma_umap_priv_init(struct rdma_umap_priv * priv,struct vm_area_struct * vma,struct rdma_user_mmap_entry * entry)61 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
62 			 struct vm_area_struct *vma,
63 			 struct rdma_user_mmap_entry *entry)
64 {
65 	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
66 
67 	priv->vma = vma;
68 	if (entry) {
69 		kref_get(&entry->ref);
70 		priv->entry = entry;
71 	}
72 	vma->vm_private_data = priv;
73 	/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
74 
75 	mutex_lock(&ufile->umap_lock);
76 	list_add(&priv->list, &ufile->umaps);
77 	mutex_unlock(&ufile->umap_lock);
78 }
79 EXPORT_SYMBOL(rdma_umap_priv_init);
80 
81 /**
82  * rdma_user_mmap_io() - Map IO memory into a process
83  *
84  * @ucontext: associated user context
85  * @vma: the vma related to the current mmap call
86  * @pfn: pfn to map
87  * @size: size to map
88  * @prot: pgprot to use in remap call
89  * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
90  *         if mmap_entry is not used by the driver
91  *
92  * This is to be called by drivers as part of their mmap() functions if they
93  * wish to send something like PCI-E BAR memory to userspace.
94  *
95  * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
96  * success.
97  */
rdma_user_mmap_io(struct ib_ucontext * ucontext,struct vm_area_struct * vma,unsigned long pfn,unsigned long size,pgprot_t prot,struct rdma_user_mmap_entry * entry)98 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
99 		      unsigned long pfn, unsigned long size, pgprot_t prot,
100 		      struct rdma_user_mmap_entry *entry)
101 {
102 	struct ib_uverbs_file *ufile = ucontext->ufile;
103 	struct rdma_umap_priv *priv;
104 
105 	if (!(vma->vm_flags & VM_SHARED))
106 		return -EINVAL;
107 
108 	if (vma->vm_end - vma->vm_start != size)
109 		return -EINVAL;
110 
111 	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
112 	if (WARN_ON(!vma->vm_file ||
113 		    vma->vm_file->private_data != ufile))
114 		return -EINVAL;
115 
116 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
117 	if (!priv)
118 		return -ENOMEM;
119 
120 	vma->vm_page_prot = prot;
121 	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
122 		kfree(priv);
123 		return -EAGAIN;
124 	}
125 
126 	rdma_umap_priv_init(priv, vma, entry);
127 	return 0;
128 }
129 EXPORT_SYMBOL(rdma_user_mmap_io);
130 
131 /**
132  * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
133  *
134  * @ucontext: associated user context
135  * @pgoff: The mmap offset >> PAGE_SHIFT
136  *
137  * This function is called when a user tries to mmap with an offset (returned
138  * by rdma_user_mmap_get_offset()) it initially received from the driver. The
139  * rdma_user_mmap_entry was created by the function
140  * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
141  * entry so that it won't be deleted from the xarray in the meantime.
142  *
143  * Return an reference to an entry if exists or NULL if there is no
144  * match. rdma_user_mmap_entry_put() must be called to put the reference.
145  */
146 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext * ucontext,unsigned long pgoff)147 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
148 			       unsigned long pgoff)
149 {
150 	struct rdma_user_mmap_entry *entry;
151 
152 	if (pgoff > U32_MAX)
153 		return NULL;
154 
155 	xa_lock(&ucontext->mmap_xa);
156 
157 	entry = xa_load(&ucontext->mmap_xa, pgoff);
158 
159 	/*
160 	 * If refcount is zero, entry is already being deleted, driver_removed
161 	 * indicates that the no further mmaps are possible and we waiting for
162 	 * the active VMAs to be closed.
163 	 */
164 	if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
165 	    !kref_get_unless_zero(&entry->ref))
166 		goto err;
167 
168 	xa_unlock(&ucontext->mmap_xa);
169 
170 	return entry;
171 
172 err:
173 	xa_unlock(&ucontext->mmap_xa);
174 	return NULL;
175 }
176 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
177 
178 /**
179  * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
180  *
181  * @ucontext: associated user context
182  * @vma: the vma being mmap'd into
183  *
184  * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
185  * checks that the VMA is correct.
186  */
187 struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get(struct ib_ucontext * ucontext,struct vm_area_struct * vma)188 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
189 			 struct vm_area_struct *vma)
190 {
191 	struct rdma_user_mmap_entry *entry;
192 
193 	if (!(vma->vm_flags & VM_SHARED))
194 		return NULL;
195 	entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
196 	if (!entry)
197 		return NULL;
198 	if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
199 		rdma_user_mmap_entry_put(entry);
200 		return NULL;
201 	}
202 	return entry;
203 }
204 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
205 
rdma_user_mmap_entry_free(struct kref * kref)206 static void rdma_user_mmap_entry_free(struct kref *kref)
207 {
208 	struct rdma_user_mmap_entry *entry =
209 		container_of(kref, struct rdma_user_mmap_entry, ref);
210 	struct ib_ucontext *ucontext = entry->ucontext;
211 	unsigned long i;
212 
213 	/*
214 	 * Erase all entries occupied by this single entry, this is deferred
215 	 * until all VMA are closed so that the mmap offsets remain unique.
216 	 */
217 	xa_lock(&ucontext->mmap_xa);
218 	for (i = 0; i < entry->npages; i++)
219 		__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
220 	xa_unlock(&ucontext->mmap_xa);
221 
222 	if (ucontext->device->mmap_free)
223 		ucontext->device->mmap_free(entry);
224 }
225 
226 /**
227  * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
228  *
229  * @entry: an entry in the mmap_xa
230  *
231  * This function is called when the mapping is closed if it was
232  * an io mapping or when the driver is done with the entry for
233  * some other reason.
234  * Should be called after rdma_user_mmap_entry_get was called
235  * and entry is no longer needed. This function will erase the
236  * entry and free it if its refcnt reaches zero.
237  */
rdma_user_mmap_entry_put(struct rdma_user_mmap_entry * entry)238 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
239 {
240 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
241 }
242 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
243 
244 /**
245  * rdma_user_mmap_entry_remove() - Drop reference to entry and
246  *				   mark it as unmmapable
247  *
248  * @entry: the entry to insert into the mmap_xa
249  *
250  * Drivers can call this to prevent userspace from creating more mappings for
251  * entry, however existing mmaps continue to exist and ops->mmap_free() will
252  * not be called until all user mmaps are destroyed.
253  */
rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry * entry)254 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
255 {
256 	if (!entry)
257 		return;
258 
259 	xa_lock(&entry->ucontext->mmap_xa);
260 	entry->driver_removed = true;
261 	xa_unlock(&entry->ucontext->mmap_xa);
262 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
263 }
264 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
265 
266 /**
267  * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
268  *					 in a given range.
269  *
270  * @ucontext: associated user context.
271  * @entry: the entry to insert into the mmap_xa
272  * @length: length of the address that will be mmapped
273  * @min_pgoff: minimum pgoff to be returned
274  * @max_pgoff: maximum pgoff to be returned
275  *
276  * This function should be called by drivers that use the rdma_user_mmap
277  * interface for implementing their mmap syscall A database of mmap offsets is
278  * handled in the core and helper functions are provided to insert entries
279  * into the database and extract entries when the user calls mmap with the
280  * given offset. The function allocates a unique page offset in a given range
281  * that should be provided to user, the user will use the offset to retrieve
282  * information such as address to be mapped and how.
283  *
284  * Return: 0 on success and -ENOMEM on failure
285  */
rdma_user_mmap_entry_insert_range(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length,u32 min_pgoff,u32 max_pgoff)286 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
287 				      struct rdma_user_mmap_entry *entry,
288 				      size_t length, u32 min_pgoff,
289 				      u32 max_pgoff)
290 {
291 	struct ib_uverbs_file *ufile = ucontext->ufile;
292 	u32 xa_first, xa_last, npages;
293 	int err;
294 	u32 i;
295 	u32 j;
296 
297 	if (!entry)
298 		return -EINVAL;
299 
300 	kref_init(&entry->ref);
301 	entry->ucontext = ucontext;
302 
303 	/*
304 	 * We want the whole allocation to be done without interruption from a
305 	 * different thread. The allocation requires finding a free range and
306 	 * storing. During the xa_insert the lock could be released, possibly
307 	 * allowing another thread to choose the same range.
308 	 */
309 	mutex_lock(&ufile->umap_lock);
310 
311 	xa_lock(&ucontext->mmap_xa);
312 
313 	/* We want to find an empty range */
314 	npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
315 	entry->npages = npages;
316 
317 	/* Find an empty range */
318 	for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) {
319 		if (xa_load(&ucontext->mmap_xa, i + j) != NULL) {
320 			if (unlikely(i + j == max_pgoff))
321 				break;
322 			i = i + j + 1;
323 			j = 0;
324 		} else {
325 			if (unlikely(i + j == max_pgoff))
326 				break;
327 			j++;
328 		}
329 	}
330 
331 	if (j != npages)
332 		goto err_unlock;
333 
334 	xa_first = i;
335 	xa_last = i + j;
336 
337 	for (i = xa_first; i < xa_last; i++) {
338 		err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
339 		if (err)
340 			goto err_undo;
341 	}
342 
343 	/*
344 	 * Internally the kernel uses a page offset, in libc this is a byte
345 	 * offset. Drivers should not return pgoff to userspace.
346 	 */
347 	entry->start_pgoff = xa_first;
348 	xa_unlock(&ucontext->mmap_xa);
349 	mutex_unlock(&ufile->umap_lock);
350 
351 	return 0;
352 
353 err_undo:
354 	for (; i > xa_first; i--)
355 		__xa_erase(&ucontext->mmap_xa, i - 1);
356 
357 err_unlock:
358 	xa_unlock(&ucontext->mmap_xa);
359 	mutex_unlock(&ufile->umap_lock);
360 	return -ENOMEM;
361 }
362 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
363 
364 /**
365  * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
366  *
367  * @ucontext: associated user context.
368  * @entry: the entry to insert into the mmap_xa
369  * @length: length of the address that will be mmapped
370  *
371  * This function should be called by drivers that use the rdma_user_mmap
372  * interface for handling user mmapped addresses. The database is handled in
373  * the core and helper functions are provided to insert entries into the
374  * database and extract entries when the user calls mmap with the given offset.
375  * The function allocates a unique page offset that should be provided to user,
376  * the user will use the offset to retrieve information such as address to
377  * be mapped and how.
378  *
379  * Return: 0 on success and -ENOMEM on failure
380  */
rdma_user_mmap_entry_insert(struct ib_ucontext * ucontext,struct rdma_user_mmap_entry * entry,size_t length)381 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
382 				struct rdma_user_mmap_entry *entry,
383 				size_t length)
384 {
385 	return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
386 						 U32_MAX);
387 }
388 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
389