1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2004 Topspin Communications. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <linux/errno.h> 40 #include <linux/spinlock.h> 41 #include <linux/slab.h> 42 #include <linux/jhash.h> 43 #include <linux/kthread.h> 44 #include <linux/wait.h> 45 46 #include <rdma/ib_fmr_pool.h> 47 48 #include "core_priv.h" 49 50 #define PFX "fmr_pool: " 51 52 enum { 53 IB_FMR_MAX_REMAPS = 32, 54 55 IB_FMR_HASH_BITS = 8, 56 IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, 57 IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 58 }; 59 60 /* 61 * If an FMR is not in use, then the list member will point to either 62 * its pool's free_list (if the FMR can be mapped again; that is, 63 * remap_count < pool->max_remaps) or its pool's dirty_list (if the 64 * FMR needs to be unmapped before being remapped). In either of 65 * these cases it is a bug if the ref_count is not 0. In other words, 66 * if ref_count is > 0, then the list member must not be linked into 67 * either free_list or dirty_list. 68 * 69 * The cache_node member is used to link the FMR into a cache bucket 70 * (if caching is enabled). This is independent of the reference 71 * count of the FMR. When a valid FMR is released, its ref_count is 72 * decremented, and if ref_count reaches 0, the FMR is placed in 73 * either free_list or dirty_list as appropriate. However, it is not 74 * removed from the cache and may be "revived" if a call to 75 * ib_fmr_register_physical() occurs before the FMR is remapped. In 76 * this case we just increment the ref_count and remove the FMR from 77 * free_list/dirty_list. 78 * 79 * Before we remap an FMR from free_list, we remove it from the cache 80 * (to prevent another user from obtaining a stale FMR). When an FMR 81 * is released, we add it to the tail of the free list, so that our 82 * cache eviction policy is "least recently used." 83 * 84 * All manipulation of ref_count, list and cache_node is protected by 85 * pool_lock to maintain consistency. 86 */ 87 88 struct ib_fmr_pool { 89 spinlock_t pool_lock; 90 91 int pool_size; 92 int max_pages; 93 int max_remaps; 94 int dirty_watermark; 95 int dirty_len; 96 struct list_head free_list; 97 struct list_head dirty_list; 98 struct hlist_head *cache_bucket; 99 100 void (*flush_function)(struct ib_fmr_pool *pool, 101 void * arg); 102 void *flush_arg; 103 104 struct task_struct *thread; 105 106 atomic_t req_ser; 107 atomic_t flush_ser; 108 109 wait_queue_head_t force_wait; 110 }; 111 112 static inline u32 ib_fmr_hash(u64 first_page) 113 { 114 return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) & 115 (IB_FMR_HASH_SIZE - 1); 116 } 117 118 /* Caller must hold pool_lock */ 119 static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, 120 u64 *page_list, 121 int page_list_len, 122 u64 io_virtual_address) 123 { 124 struct hlist_head *bucket; 125 struct ib_pool_fmr *fmr; 126 127 if (!pool->cache_bucket) 128 return NULL; 129 130 bucket = pool->cache_bucket + ib_fmr_hash(*page_list); 131 132 hlist_for_each_entry(fmr, bucket, cache_node) 133 if (io_virtual_address == fmr->io_virtual_address && 134 page_list_len == fmr->page_list_len && 135 !memcmp(page_list, fmr->page_list, 136 page_list_len * sizeof *page_list)) 137 return fmr; 138 139 return NULL; 140 } 141 142 static void ib_fmr_batch_release(struct ib_fmr_pool *pool) 143 { 144 int ret; 145 struct ib_pool_fmr *fmr; 146 LIST_HEAD(unmap_list); 147 LIST_HEAD(fmr_list); 148 149 spin_lock_irq(&pool->pool_lock); 150 151 list_for_each_entry(fmr, &pool->dirty_list, list) { 152 hlist_del_init(&fmr->cache_node); 153 fmr->remap_count = 0; 154 list_add_tail(&fmr->fmr->list, &fmr_list); 155 156 #ifdef DEBUG 157 if (fmr->ref_count !=0) { 158 pr_warn(PFX "Unmapping FMR %p with ref count %d\n", 159 fmr, fmr->ref_count); 160 } 161 #endif 162 } 163 164 list_splice_init(&pool->dirty_list, &unmap_list); 165 pool->dirty_len = 0; 166 167 spin_unlock_irq(&pool->pool_lock); 168 169 if (list_empty(&unmap_list)) { 170 return; 171 } 172 173 ret = ib_unmap_fmr(&fmr_list); 174 if (ret) 175 pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); 176 177 spin_lock_irq(&pool->pool_lock); 178 list_splice(&unmap_list, &pool->free_list); 179 spin_unlock_irq(&pool->pool_lock); 180 } 181 182 static int ib_fmr_cleanup_thread(void *pool_ptr) 183 { 184 struct ib_fmr_pool *pool = pool_ptr; 185 186 do { 187 if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { 188 ib_fmr_batch_release(pool); 189 190 atomic_inc(&pool->flush_ser); 191 wake_up_interruptible(&pool->force_wait); 192 193 if (pool->flush_function) 194 pool->flush_function(pool, pool->flush_arg); 195 } 196 197 set_current_state(TASK_INTERRUPTIBLE); 198 if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && 199 !kthread_should_stop()) 200 schedule(); 201 __set_current_state(TASK_RUNNING); 202 } while (!kthread_should_stop()); 203 204 return 0; 205 } 206 207 /** 208 * ib_create_fmr_pool - Create an FMR pool 209 * @pd:Protection domain for FMRs 210 * @params:FMR pool parameters 211 * 212 * Create a pool of FMRs. Return value is pointer to new pool or 213 * error code if creation failed. 214 */ 215 struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, 216 struct ib_fmr_pool_param *params) 217 { 218 struct ib_device *device; 219 struct ib_fmr_pool *pool; 220 int i; 221 int ret; 222 int max_remaps; 223 224 if (!params) 225 return ERR_PTR(-EINVAL); 226 227 device = pd->device; 228 if (!device->alloc_fmr || !device->dealloc_fmr || 229 !device->map_phys_fmr || !device->unmap_fmr) { 230 pr_info(PFX "Device %s does not support FMRs\n", device->name); 231 return ERR_PTR(-ENOSYS); 232 } 233 234 if (!device->attrs.max_map_per_fmr) 235 max_remaps = IB_FMR_MAX_REMAPS; 236 else 237 max_remaps = device->attrs.max_map_per_fmr; 238 239 pool = kmalloc(sizeof *pool, GFP_KERNEL); 240 if (!pool) 241 return ERR_PTR(-ENOMEM); 242 243 pool->cache_bucket = NULL; 244 pool->flush_function = params->flush_function; 245 pool->flush_arg = params->flush_arg; 246 247 INIT_LIST_HEAD(&pool->free_list); 248 INIT_LIST_HEAD(&pool->dirty_list); 249 250 if (params->cache) { 251 pool->cache_bucket = 252 kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, 253 GFP_KERNEL); 254 if (!pool->cache_bucket) { 255 pr_warn(PFX "Failed to allocate cache in pool\n"); 256 ret = -ENOMEM; 257 goto out_free_pool; 258 } 259 260 for (i = 0; i < IB_FMR_HASH_SIZE; ++i) 261 INIT_HLIST_HEAD(pool->cache_bucket + i); 262 } 263 264 pool->pool_size = 0; 265 pool->max_pages = params->max_pages_per_fmr; 266 pool->max_remaps = max_remaps; 267 pool->dirty_watermark = params->dirty_watermark; 268 pool->dirty_len = 0; 269 spin_lock_init(&pool->pool_lock); 270 atomic_set(&pool->req_ser, 0); 271 atomic_set(&pool->flush_ser, 0); 272 init_waitqueue_head(&pool->force_wait); 273 274 pool->thread = kthread_run(ib_fmr_cleanup_thread, 275 pool, 276 "ib_fmr(%s)", 277 device->name); 278 if (IS_ERR(pool->thread)) { 279 pr_warn(PFX "couldn't start cleanup thread\n"); 280 ret = PTR_ERR(pool->thread); 281 goto out_free_pool; 282 } 283 284 { 285 struct ib_pool_fmr *fmr; 286 struct ib_fmr_attr fmr_attr = { 287 .max_pages = params->max_pages_per_fmr, 288 .max_maps = pool->max_remaps, 289 .page_shift = params->page_shift 290 }; 291 int bytes_per_fmr = sizeof *fmr; 292 293 if (pool->cache_bucket) 294 bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64); 295 296 for (i = 0; i < params->pool_size; ++i) { 297 fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); 298 if (!fmr) 299 goto out_fail; 300 301 fmr->pool = pool; 302 fmr->remap_count = 0; 303 fmr->ref_count = 0; 304 INIT_HLIST_NODE(&fmr->cache_node); 305 306 fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); 307 if (IS_ERR(fmr->fmr)) { 308 pr_warn(PFX "fmr_create failed for FMR %d\n", 309 i); 310 kfree(fmr); 311 goto out_fail; 312 } 313 314 list_add_tail(&fmr->list, &pool->free_list); 315 ++pool->pool_size; 316 } 317 } 318 319 return pool; 320 321 out_free_pool: 322 kfree(pool->cache_bucket); 323 kfree(pool); 324 325 return ERR_PTR(ret); 326 327 out_fail: 328 ib_destroy_fmr_pool(pool); 329 330 return ERR_PTR(-ENOMEM); 331 } 332 EXPORT_SYMBOL(ib_create_fmr_pool); 333 334 /** 335 * ib_destroy_fmr_pool - Free FMR pool 336 * @pool:FMR pool to free 337 * 338 * Destroy an FMR pool and free all associated resources. 339 */ 340 void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) 341 { 342 struct ib_pool_fmr *fmr; 343 struct ib_pool_fmr *tmp; 344 LIST_HEAD(fmr_list); 345 int i; 346 347 kthread_stop(pool->thread); 348 ib_fmr_batch_release(pool); 349 350 i = 0; 351 list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { 352 if (fmr->remap_count) { 353 INIT_LIST_HEAD(&fmr_list); 354 list_add_tail(&fmr->fmr->list, &fmr_list); 355 ib_unmap_fmr(&fmr_list); 356 } 357 ib_dealloc_fmr(fmr->fmr); 358 list_del(&fmr->list); 359 kfree(fmr); 360 ++i; 361 } 362 363 if (i < pool->pool_size) 364 pr_warn(PFX "pool still has %d regions registered\n", 365 pool->pool_size - i); 366 367 kfree(pool->cache_bucket); 368 kfree(pool); 369 } 370 EXPORT_SYMBOL(ib_destroy_fmr_pool); 371 372 /** 373 * ib_flush_fmr_pool - Invalidate all unmapped FMRs 374 * @pool:FMR pool to flush 375 * 376 * Ensure that all unmapped FMRs are fully invalidated. 377 */ 378 int ib_flush_fmr_pool(struct ib_fmr_pool *pool) 379 { 380 int serial; 381 struct ib_pool_fmr *fmr, *next; 382 383 /* 384 * The free_list holds FMRs that may have been used 385 * but have not been remapped enough times to be dirty. 386 * Put them on the dirty list now so that the cleanup 387 * thread will reap them too. 388 */ 389 spin_lock_irq(&pool->pool_lock); 390 list_for_each_entry_safe(fmr, next, &pool->free_list, list) { 391 if (fmr->remap_count > 0) 392 list_move(&fmr->list, &pool->dirty_list); 393 } 394 spin_unlock_irq(&pool->pool_lock); 395 396 serial = atomic_inc_return(&pool->req_ser); 397 wake_up_process(pool->thread); 398 399 if (wait_event_interruptible(pool->force_wait, 400 atomic_read(&pool->flush_ser) - serial >= 0)) 401 return -EINTR; 402 403 return 0; 404 } 405 EXPORT_SYMBOL(ib_flush_fmr_pool); 406 407 /** 408 * ib_fmr_pool_map_phys - 409 * @pool:FMR pool to allocate FMR from 410 * @page_list:List of pages to map 411 * @list_len:Number of pages in @page_list 412 * @io_virtual_address:I/O virtual address for new FMR 413 * 414 * Map an FMR from an FMR pool. 415 */ 416 struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, 417 u64 *page_list, 418 int list_len, 419 u64 io_virtual_address) 420 { 421 struct ib_fmr_pool *pool = pool_handle; 422 struct ib_pool_fmr *fmr; 423 unsigned long flags; 424 int result; 425 426 if (list_len < 1 || list_len > pool->max_pages) 427 return ERR_PTR(-EINVAL); 428 429 spin_lock_irqsave(&pool->pool_lock, flags); 430 fmr = ib_fmr_cache_lookup(pool, 431 page_list, 432 list_len, 433 io_virtual_address); 434 if (fmr) { 435 /* found in cache */ 436 ++fmr->ref_count; 437 if (fmr->ref_count == 1) { 438 list_del(&fmr->list); 439 } 440 441 spin_unlock_irqrestore(&pool->pool_lock, flags); 442 443 return fmr; 444 } 445 446 if (list_empty(&pool->free_list)) { 447 spin_unlock_irqrestore(&pool->pool_lock, flags); 448 return ERR_PTR(-EAGAIN); 449 } 450 451 fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); 452 list_del(&fmr->list); 453 hlist_del_init(&fmr->cache_node); 454 spin_unlock_irqrestore(&pool->pool_lock, flags); 455 456 result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, 457 io_virtual_address); 458 459 if (result) { 460 spin_lock_irqsave(&pool->pool_lock, flags); 461 list_add(&fmr->list, &pool->free_list); 462 spin_unlock_irqrestore(&pool->pool_lock, flags); 463 464 pr_warn(PFX "fmr_map returns %d\n", result); 465 466 return ERR_PTR(result); 467 } 468 469 ++fmr->remap_count; 470 fmr->ref_count = 1; 471 472 if (pool->cache_bucket) { 473 fmr->io_virtual_address = io_virtual_address; 474 fmr->page_list_len = list_len; 475 memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); 476 477 spin_lock_irqsave(&pool->pool_lock, flags); 478 hlist_add_head(&fmr->cache_node, 479 pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); 480 spin_unlock_irqrestore(&pool->pool_lock, flags); 481 } 482 483 return fmr; 484 } 485 EXPORT_SYMBOL(ib_fmr_pool_map_phys); 486 487 /** 488 * ib_fmr_pool_unmap - Unmap FMR 489 * @fmr:FMR to unmap 490 * 491 * Unmap an FMR. The FMR mapping may remain valid until the FMR is 492 * reused (or until ib_flush_fmr_pool() is called). 493 */ 494 int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) 495 { 496 struct ib_fmr_pool *pool; 497 unsigned long flags; 498 499 pool = fmr->pool; 500 501 spin_lock_irqsave(&pool->pool_lock, flags); 502 503 --fmr->ref_count; 504 if (!fmr->ref_count) { 505 if (fmr->remap_count < pool->max_remaps) { 506 list_add_tail(&fmr->list, &pool->free_list); 507 } else { 508 list_add_tail(&fmr->list, &pool->dirty_list); 509 if (++pool->dirty_len >= pool->dirty_watermark) { 510 atomic_inc(&pool->req_ser); 511 wake_up_process(pool->thread); 512 } 513 } 514 } 515 516 #ifdef DEBUG 517 if (fmr->ref_count < 0) 518 pr_warn(PFX "FMR %p has ref count %d < 0\n", 519 fmr, fmr->ref_count); 520 #endif 521 522 spin_unlock_irqrestore(&pool->pool_lock, flags); 523 524 return 0; 525 } 526 EXPORT_SYMBOL(ib_fmr_pool_unmap); 527