1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
5  * Copyright 2019 Marvell. All rights reserved.
6  */
7 #include <linux/xarray.h>
8 #include "uverbs.h"
9 #include "core_priv.h"
10 
11 /**
12  * rdma_umap_priv_init() - Initialize the private data of a vma
13  *
14  * @priv: The already allocated private data
15  * @vma: The vm area struct that needs private data
16  * @entry: entry into the mmap_xa that needs to be linked with
17  *       this vma
18  *
19  * Each time we map IO memory into user space this keeps track of the
20  * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
21  * to point to the zero page and allow the hot unplug to proceed.
22  *
23  * This is necessary for cases like PCI physical hot unplug as the actual BAR
24  * memory may vanish after this and access to it from userspace could MCE.
25  *
26  * RDMA drivers supporting disassociation must have their user space designed
27  * to cope in some way with their IO pages going to the zero page.
28  *
29  */
30 void rdma_umap_priv_init(struct rdma_umap_priv *priv,
31 			 struct vm_area_struct *vma,
32 			 struct rdma_user_mmap_entry *entry)
33 {
34 	struct ib_uverbs_file *ufile = vma->vm_file->private_data;
35 
36 	priv->vma = vma;
37 	if (entry) {
38 		kref_get(&entry->ref);
39 		priv->entry = entry;
40 	}
41 	vma->vm_private_data = priv;
42 	/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
43 
44 	mutex_lock(&ufile->umap_lock);
45 	list_add(&priv->list, &ufile->umaps);
46 	mutex_unlock(&ufile->umap_lock);
47 }
48 EXPORT_SYMBOL(rdma_umap_priv_init);
49 
50 /**
51  * rdma_user_mmap_io() - Map IO memory into a process
52  *
53  * @ucontext: associated user context
54  * @vma: the vma related to the current mmap call
55  * @pfn: pfn to map
56  * @size: size to map
57  * @prot: pgprot to use in remap call
58  * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
59  *         if mmap_entry is not used by the driver
60  *
61  * This is to be called by drivers as part of their mmap() functions if they
62  * wish to send something like PCI-E BAR memory to userspace.
63  *
64  * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
65  * success.
66  */
67 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
68 		      unsigned long pfn, unsigned long size, pgprot_t prot,
69 		      struct rdma_user_mmap_entry *entry)
70 {
71 	struct ib_uverbs_file *ufile = ucontext->ufile;
72 	struct rdma_umap_priv *priv;
73 
74 	if (!(vma->vm_flags & VM_SHARED))
75 		return -EINVAL;
76 
77 	if (vma->vm_end - vma->vm_start != size)
78 		return -EINVAL;
79 
80 	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
81 	if (WARN_ON(!vma->vm_file ||
82 		    vma->vm_file->private_data != ufile))
83 		return -EINVAL;
84 	lockdep_assert_held(&ufile->device->disassociate_srcu);
85 
86 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
87 	if (!priv)
88 		return -ENOMEM;
89 
90 	vma->vm_page_prot = prot;
91 	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
92 		kfree(priv);
93 		return -EAGAIN;
94 	}
95 
96 	rdma_umap_priv_init(priv, vma, entry);
97 	return 0;
98 }
99 EXPORT_SYMBOL(rdma_user_mmap_io);
100 
101 /**
102  * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
103  *
104  * @ucontext: associated user context
105  * @pgoff: The mmap offset >> PAGE_SHIFT
106  *
107  * This function is called when a user tries to mmap with an offset (returned
108  * by rdma_user_mmap_get_offset()) it initially received from the driver. The
109  * rdma_user_mmap_entry was created by the function
110  * rdma_user_mmap_entry_insert().  This function increases the refcnt of the
111  * entry so that it won't be deleted from the xarray in the meantime.
112  *
113  * Return an reference to an entry if exists or NULL if there is no
114  * match. rdma_user_mmap_entry_put() must be called to put the reference.
115  */
116 struct rdma_user_mmap_entry *
117 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
118 			       unsigned long pgoff)
119 {
120 	struct rdma_user_mmap_entry *entry;
121 
122 	if (pgoff > U32_MAX)
123 		return NULL;
124 
125 	xa_lock(&ucontext->mmap_xa);
126 
127 	entry = xa_load(&ucontext->mmap_xa, pgoff);
128 
129 	/*
130 	 * If refcount is zero, entry is already being deleted, driver_removed
131 	 * indicates that the no further mmaps are possible and we waiting for
132 	 * the active VMAs to be closed.
133 	 */
134 	if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
135 	    !kref_get_unless_zero(&entry->ref))
136 		goto err;
137 
138 	xa_unlock(&ucontext->mmap_xa);
139 
140 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
141 		  pgoff, entry->npages);
142 
143 	return entry;
144 
145 err:
146 	xa_unlock(&ucontext->mmap_xa);
147 	return NULL;
148 }
149 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
150 
151 /**
152  * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
153  *
154  * @ucontext: associated user context
155  * @vma: the vma being mmap'd into
156  *
157  * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
158  * checks that the VMA is correct.
159  */
160 struct rdma_user_mmap_entry *
161 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
162 			 struct vm_area_struct *vma)
163 {
164 	struct rdma_user_mmap_entry *entry;
165 
166 	if (!(vma->vm_flags & VM_SHARED))
167 		return NULL;
168 	entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
169 	if (!entry)
170 		return NULL;
171 	if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
172 		rdma_user_mmap_entry_put(entry);
173 		return NULL;
174 	}
175 	return entry;
176 }
177 EXPORT_SYMBOL(rdma_user_mmap_entry_get);
178 
179 static void rdma_user_mmap_entry_free(struct kref *kref)
180 {
181 	struct rdma_user_mmap_entry *entry =
182 		container_of(kref, struct rdma_user_mmap_entry, ref);
183 	struct ib_ucontext *ucontext = entry->ucontext;
184 	unsigned long i;
185 
186 	/*
187 	 * Erase all entries occupied by this single entry, this is deferred
188 	 * until all VMA are closed so that the mmap offsets remain unique.
189 	 */
190 	xa_lock(&ucontext->mmap_xa);
191 	for (i = 0; i < entry->npages; i++)
192 		__xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
193 	xa_unlock(&ucontext->mmap_xa);
194 
195 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
196 		  entry->start_pgoff, entry->npages);
197 
198 	if (ucontext->device->ops.mmap_free)
199 		ucontext->device->ops.mmap_free(entry);
200 }
201 
202 /**
203  * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
204  *
205  * @entry: an entry in the mmap_xa
206  *
207  * This function is called when the mapping is closed if it was
208  * an io mapping or when the driver is done with the entry for
209  * some other reason.
210  * Should be called after rdma_user_mmap_entry_get was called
211  * and entry is no longer needed. This function will erase the
212  * entry and free it if its refcnt reaches zero.
213  */
214 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
215 {
216 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
217 }
218 EXPORT_SYMBOL(rdma_user_mmap_entry_put);
219 
220 /**
221  * rdma_user_mmap_entry_remove() - Drop reference to entry and
222  *				   mark it as unmmapable
223  *
224  * @entry: the entry to insert into the mmap_xa
225  *
226  * Drivers can call this to prevent userspace from creating more mappings for
227  * entry, however existing mmaps continue to exist and ops->mmap_free() will
228  * not be called until all user mmaps are destroyed.
229  */
230 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
231 {
232 	if (!entry)
233 		return;
234 
235 	xa_lock(&entry->ucontext->mmap_xa);
236 	entry->driver_removed = true;
237 	xa_unlock(&entry->ucontext->mmap_xa);
238 	kref_put(&entry->ref, rdma_user_mmap_entry_free);
239 }
240 EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
241 
242 /**
243  * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
244  *					 in a given range.
245  *
246  * @ucontext: associated user context.
247  * @entry: the entry to insert into the mmap_xa
248  * @length: length of the address that will be mmapped
249  * @min_pgoff: minimum pgoff to be returned
250  * @max_pgoff: maximum pgoff to be returned
251  *
252  * This function should be called by drivers that use the rdma_user_mmap
253  * interface for implementing their mmap syscall A database of mmap offsets is
254  * handled in the core and helper functions are provided to insert entries
255  * into the database and extract entries when the user calls mmap with the
256  * given offset. The function allocates a unique page offset in a given range
257  * that should be provided to user, the user will use the offset to retrieve
258  * information such as address to be mapped and how.
259  *
260  * Return: 0 on success and -ENOMEM on failure
261  */
262 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
263 				      struct rdma_user_mmap_entry *entry,
264 				      size_t length, u32 min_pgoff,
265 				      u32 max_pgoff)
266 {
267 	struct ib_uverbs_file *ufile = ucontext->ufile;
268 	XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
269 	u32 xa_first, xa_last, npages;
270 	int err;
271 	u32 i;
272 
273 	if (!entry)
274 		return -EINVAL;
275 
276 	kref_init(&entry->ref);
277 	entry->ucontext = ucontext;
278 
279 	/*
280 	 * We want the whole allocation to be done without interruption from a
281 	 * different thread. The allocation requires finding a free range and
282 	 * storing. During the xa_insert the lock could be released, possibly
283 	 * allowing another thread to choose the same range.
284 	 */
285 	mutex_lock(&ufile->umap_lock);
286 
287 	xa_lock(&ucontext->mmap_xa);
288 
289 	/* We want to find an empty range */
290 	npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
291 	entry->npages = npages;
292 	while (true) {
293 		/* First find an empty index */
294 		xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
295 		if (xas.xa_node == XAS_RESTART)
296 			goto err_unlock;
297 
298 		xa_first = xas.xa_index;
299 
300 		/* Is there enough room to have the range? */
301 		if (check_add_overflow(xa_first, npages, &xa_last))
302 			goto err_unlock;
303 
304 		/*
305 		 * Now look for the next present entry. If an entry doesn't
306 		 * exist, we found an empty range and can proceed.
307 		 */
308 		xas_next_entry(&xas, xa_last - 1);
309 		if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
310 			break;
311 	}
312 
313 	for (i = xa_first; i < xa_last; i++) {
314 		err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
315 		if (err)
316 			goto err_undo;
317 	}
318 
319 	/*
320 	 * Internally the kernel uses a page offset, in libc this is a byte
321 	 * offset. Drivers should not return pgoff to userspace.
322 	 */
323 	entry->start_pgoff = xa_first;
324 	xa_unlock(&ucontext->mmap_xa);
325 	mutex_unlock(&ufile->umap_lock);
326 
327 	ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
328 		  entry->start_pgoff, npages);
329 
330 	return 0;
331 
332 err_undo:
333 	for (; i > xa_first; i--)
334 		__xa_erase(&ucontext->mmap_xa, i - 1);
335 
336 err_unlock:
337 	xa_unlock(&ucontext->mmap_xa);
338 	mutex_unlock(&ufile->umap_lock);
339 	return -ENOMEM;
340 }
341 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
342 
343 /**
344  * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
345  *
346  * @ucontext: associated user context.
347  * @entry: the entry to insert into the mmap_xa
348  * @length: length of the address that will be mmapped
349  *
350  * This function should be called by drivers that use the rdma_user_mmap
351  * interface for handling user mmapped addresses. The database is handled in
352  * the core and helper functions are provided to insert entries into the
353  * database and extract entries when the user calls mmap with the given offset.
354  * The function allocates a unique page offset that should be provided to user,
355  * the user will use the offset to retrieve information such as address to
356  * be mapped and how.
357  *
358  * Return: 0 on success and -ENOMEM on failure
359  */
360 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
361 				struct rdma_user_mmap_entry *entry,
362 				size_t length)
363 {
364 	return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
365 						 U32_MAX);
366 }
367 EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
368