1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved. 5 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. 6 * Copyright 2019 Marvell. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <linux/xarray.h> 41 #include "uverbs.h" 42 #include "core_priv.h" 43 44 /** 45 * rdma_umap_priv_init() - Initialize the private data of a vma 46 * 47 * @priv: The already allocated private data 48 * @vma: The vm area struct that needs private data 49 * @entry: entry into the mmap_xa that needs to be linked with 50 * this vma 51 * 52 * Each time we map IO memory into user space this keeps track of the 53 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space 54 * to point to the zero page and allow the hot unplug to proceed. 55 * 56 * This is necessary for cases like PCI physical hot unplug as the actual BAR 57 * memory may vanish after this and access to it from userspace could MCE. 58 * 59 * RDMA drivers supporting disassociation must have their user space designed 60 * to cope in some way with their IO pages going to the zero page. 61 * 62 */ 63 void rdma_umap_priv_init(struct rdma_umap_priv *priv, 64 struct vm_area_struct *vma, 65 struct rdma_user_mmap_entry *entry) 66 { 67 struct ib_uverbs_file *ufile = vma->vm_file->private_data; 68 69 priv->vma = vma; 70 if (entry) { 71 kref_get(&entry->ref); 72 priv->entry = entry; 73 } 74 vma->vm_private_data = priv; 75 /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ 76 77 mutex_lock(&ufile->umap_lock); 78 list_add(&priv->list, &ufile->umaps); 79 mutex_unlock(&ufile->umap_lock); 80 } 81 EXPORT_SYMBOL(rdma_umap_priv_init); 82 83 /** 84 * rdma_user_mmap_io() - Map IO memory into a process 85 * 86 * @ucontext: associated user context 87 * @vma: the vma related to the current mmap call 88 * @pfn: pfn to map 89 * @size: size to map 90 * @prot: pgprot to use in remap call 91 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL 92 * if mmap_entry is not used by the driver 93 * 94 * This is to be called by drivers as part of their mmap() functions if they 95 * wish to send something like PCI-E BAR memory to userspace. 96 * 97 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on 98 * success. 99 */ 100 int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, 101 unsigned long pfn, unsigned long size, pgprot_t prot, 102 struct rdma_user_mmap_entry *entry) 103 { 104 struct ib_uverbs_file *ufile = ucontext->ufile; 105 struct rdma_umap_priv *priv; 106 107 if (!(vma->vm_flags & VM_SHARED)) 108 return -EINVAL; 109 110 if (vma->vm_end - vma->vm_start != size) 111 return -EINVAL; 112 113 /* Driver is using this wrong, must be called by ib_uverbs_mmap */ 114 if (WARN_ON(!vma->vm_file || 115 vma->vm_file->private_data != ufile)) 116 return -EINVAL; 117 118 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 119 if (!priv) 120 return -ENOMEM; 121 122 vma->vm_page_prot = prot; 123 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { 124 kfree(priv); 125 return -EAGAIN; 126 } 127 128 rdma_umap_priv_init(priv, vma, entry); 129 return 0; 130 } 131 EXPORT_SYMBOL(rdma_user_mmap_io); 132 133 /** 134 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa 135 * 136 * @ucontext: associated user context 137 * @pgoff: The mmap offset >> PAGE_SHIFT 138 * 139 * This function is called when a user tries to mmap with an offset (returned 140 * by rdma_user_mmap_get_offset()) it initially received from the driver. The 141 * rdma_user_mmap_entry was created by the function 142 * rdma_user_mmap_entry_insert(). This function increases the refcnt of the 143 * entry so that it won't be deleted from the xarray in the meantime. 144 * 145 * Return an reference to an entry if exists or NULL if there is no 146 * match. rdma_user_mmap_entry_put() must be called to put the reference. 147 */ 148 struct rdma_user_mmap_entry * 149 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, 150 unsigned long pgoff) 151 { 152 struct rdma_user_mmap_entry *entry; 153 154 if (pgoff > U32_MAX) 155 return NULL; 156 157 xa_lock(&ucontext->mmap_xa); 158 159 entry = xa_load(&ucontext->mmap_xa, pgoff); 160 161 /* 162 * If refcount is zero, entry is already being deleted, driver_removed 163 * indicates that the no further mmaps are possible and we waiting for 164 * the active VMAs to be closed. 165 */ 166 if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || 167 !kref_get_unless_zero(&entry->ref)) 168 goto err; 169 170 xa_unlock(&ucontext->mmap_xa); 171 172 return entry; 173 174 err: 175 xa_unlock(&ucontext->mmap_xa); 176 return NULL; 177 } 178 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); 179 180 /** 181 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa 182 * 183 * @ucontext: associated user context 184 * @vma: the vma being mmap'd into 185 * 186 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also 187 * checks that the VMA is correct. 188 */ 189 struct rdma_user_mmap_entry * 190 rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, 191 struct vm_area_struct *vma) 192 { 193 struct rdma_user_mmap_entry *entry; 194 195 if (!(vma->vm_flags & VM_SHARED)) 196 return NULL; 197 entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); 198 if (!entry) 199 return NULL; 200 if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { 201 rdma_user_mmap_entry_put(entry); 202 return NULL; 203 } 204 return entry; 205 } 206 EXPORT_SYMBOL(rdma_user_mmap_entry_get); 207 208 static void rdma_user_mmap_entry_free(struct kref *kref) 209 { 210 struct rdma_user_mmap_entry *entry = 211 container_of(kref, struct rdma_user_mmap_entry, ref); 212 struct ib_ucontext *ucontext = entry->ucontext; 213 unsigned long i; 214 215 /* 216 * Erase all entries occupied by this single entry, this is deferred 217 * until all VMA are closed so that the mmap offsets remain unique. 218 */ 219 xa_lock(&ucontext->mmap_xa); 220 for (i = 0; i < entry->npages; i++) 221 __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); 222 xa_unlock(&ucontext->mmap_xa); 223 224 if (ucontext->device->mmap_free) 225 ucontext->device->mmap_free(entry); 226 } 227 228 /** 229 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry 230 * 231 * @entry: an entry in the mmap_xa 232 * 233 * This function is called when the mapping is closed if it was 234 * an io mapping or when the driver is done with the entry for 235 * some other reason. 236 * Should be called after rdma_user_mmap_entry_get was called 237 * and entry is no longer needed. This function will erase the 238 * entry and free it if its refcnt reaches zero. 239 */ 240 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) 241 { 242 kref_put(&entry->ref, rdma_user_mmap_entry_free); 243 } 244 EXPORT_SYMBOL(rdma_user_mmap_entry_put); 245 246 /** 247 * rdma_user_mmap_entry_remove() - Drop reference to entry and 248 * mark it as unmmapable 249 * 250 * @entry: the entry to insert into the mmap_xa 251 * 252 * Drivers can call this to prevent userspace from creating more mappings for 253 * entry, however existing mmaps continue to exist and ops->mmap_free() will 254 * not be called until all user mmaps are destroyed. 255 */ 256 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) 257 { 258 if (!entry) 259 return; 260 261 xa_lock(&entry->ucontext->mmap_xa); 262 entry->driver_removed = true; 263 xa_unlock(&entry->ucontext->mmap_xa); 264 kref_put(&entry->ref, rdma_user_mmap_entry_free); 265 } 266 EXPORT_SYMBOL(rdma_user_mmap_entry_remove); 267 268 /** 269 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa 270 * in a given range. 271 * 272 * @ucontext: associated user context. 273 * @entry: the entry to insert into the mmap_xa 274 * @length: length of the address that will be mmapped 275 * @min_pgoff: minimum pgoff to be returned 276 * @max_pgoff: maximum pgoff to be returned 277 * 278 * This function should be called by drivers that use the rdma_user_mmap 279 * interface for implementing their mmap syscall A database of mmap offsets is 280 * handled in the core and helper functions are provided to insert entries 281 * into the database and extract entries when the user calls mmap with the 282 * given offset. The function allocates a unique page offset in a given range 283 * that should be provided to user, the user will use the offset to retrieve 284 * information such as address to be mapped and how. 285 * 286 * Return: 0 on success and -ENOMEM on failure 287 */ 288 int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, 289 struct rdma_user_mmap_entry *entry, 290 size_t length, u32 min_pgoff, 291 u32 max_pgoff) 292 { 293 struct ib_uverbs_file *ufile = ucontext->ufile; 294 u32 xa_first, xa_last, npages; 295 int err; 296 u32 i; 297 u32 j; 298 299 if (!entry) 300 return -EINVAL; 301 302 kref_init(&entry->ref); 303 entry->ucontext = ucontext; 304 305 /* 306 * We want the whole allocation to be done without interruption from a 307 * different thread. The allocation requires finding a free range and 308 * storing. During the xa_insert the lock could be released, possibly 309 * allowing another thread to choose the same range. 310 */ 311 mutex_lock(&ufile->umap_lock); 312 313 xa_lock(&ucontext->mmap_xa); 314 315 /* We want to find an empty range */ 316 npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); 317 entry->npages = npages; 318 319 /* Find an empty range */ 320 for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) { 321 if (xa_load(&ucontext->mmap_xa, i + j) != NULL) { 322 if (unlikely(i + j == max_pgoff)) 323 break; 324 i = i + j + 1; 325 j = 0; 326 } else { 327 if (unlikely(i + j == max_pgoff)) 328 break; 329 j++; 330 } 331 } 332 333 if (j != npages) 334 goto err_unlock; 335 336 xa_first = i; 337 xa_last = i + j; 338 339 for (i = xa_first; i < xa_last; i++) { 340 err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); 341 if (err) 342 goto err_undo; 343 } 344 345 /* 346 * Internally the kernel uses a page offset, in libc this is a byte 347 * offset. Drivers should not return pgoff to userspace. 348 */ 349 entry->start_pgoff = xa_first; 350 xa_unlock(&ucontext->mmap_xa); 351 mutex_unlock(&ufile->umap_lock); 352 353 return 0; 354 355 err_undo: 356 for (; i > xa_first; i--) 357 __xa_erase(&ucontext->mmap_xa, i - 1); 358 359 err_unlock: 360 xa_unlock(&ucontext->mmap_xa); 361 mutex_unlock(&ufile->umap_lock); 362 return -ENOMEM; 363 } 364 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range); 365 366 /** 367 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa. 368 * 369 * @ucontext: associated user context. 370 * @entry: the entry to insert into the mmap_xa 371 * @length: length of the address that will be mmapped 372 * 373 * This function should be called by drivers that use the rdma_user_mmap 374 * interface for handling user mmapped addresses. The database is handled in 375 * the core and helper functions are provided to insert entries into the 376 * database and extract entries when the user calls mmap with the given offset. 377 * The function allocates a unique page offset that should be provided to user, 378 * the user will use the offset to retrieve information such as address to 379 * be mapped and how. 380 * 381 * Return: 0 on success and -ENOMEM on failure 382 */ 383 int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, 384 struct rdma_user_mmap_entry *entry, 385 size_t length) 386 { 387 return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0, 388 U32_MAX); 389 } 390 EXPORT_SYMBOL(rdma_user_mmap_entry_insert); 391