1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2004 Topspin Communications. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <sys/cdefs.h> 37 #include <linux/errno.h> 38 #include <linux/spinlock.h> 39 #include <linux/slab.h> 40 #include <linux/jhash.h> 41 #include <linux/kthread.h> 42 #include <linux/wait.h> 43 44 #include <rdma/ib_fmr_pool.h> 45 46 #include "core_priv.h" 47 48 #define PFX "fmr_pool: " 49 50 enum { 51 IB_FMR_MAX_REMAPS = 32, 52 53 IB_FMR_HASH_BITS = 8, 54 IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, 55 IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 56 }; 57 58 /* 59 * If an FMR is not in use, then the list member will point to either 60 * its pool's free_list (if the FMR can be mapped again; that is, 61 * remap_count < pool->max_remaps) or its pool's dirty_list (if the 62 * FMR needs to be unmapped before being remapped). In either of 63 * these cases it is a bug if the ref_count is not 0. In other words, 64 * if ref_count is > 0, then the list member must not be linked into 65 * either free_list or dirty_list. 66 * 67 * The cache_node member is used to link the FMR into a cache bucket 68 * (if caching is enabled). This is independent of the reference 69 * count of the FMR. When a valid FMR is released, its ref_count is 70 * decremented, and if ref_count reaches 0, the FMR is placed in 71 * either free_list or dirty_list as appropriate. However, it is not 72 * removed from the cache and may be "revived" if a call to 73 * ib_fmr_register_physical() occurs before the FMR is remapped. In 74 * this case we just increment the ref_count and remove the FMR from 75 * free_list/dirty_list. 76 * 77 * Before we remap an FMR from free_list, we remove it from the cache 78 * (to prevent another user from obtaining a stale FMR). When an FMR 79 * is released, we add it to the tail of the free list, so that our 80 * cache eviction policy is "least recently used." 81 * 82 * All manipulation of ref_count, list and cache_node is protected by 83 * pool_lock to maintain consistency. 84 */ 85 86 struct ib_fmr_pool { 87 spinlock_t pool_lock; 88 89 int pool_size; 90 int max_pages; 91 int max_remaps; 92 int dirty_watermark; 93 int dirty_len; 94 struct list_head free_list; 95 struct list_head dirty_list; 96 struct hlist_head *cache_bucket; 97 98 void (*flush_function)(struct ib_fmr_pool *pool, 99 void * arg); 100 void *flush_arg; 101 102 struct task_struct *thread; 103 104 atomic_t req_ser; 105 atomic_t flush_ser; 106 107 wait_queue_head_t force_wait; 108 }; 109 110 static inline u32 ib_fmr_hash(u64 first_page) 111 { 112 return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) & 113 (IB_FMR_HASH_SIZE - 1); 114 } 115 116 /* Caller must hold pool_lock */ 117 static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, 118 u64 *page_list, 119 int page_list_len, 120 u64 io_virtual_address) 121 { 122 struct hlist_head *bucket; 123 struct ib_pool_fmr *fmr; 124 125 if (!pool->cache_bucket) 126 return NULL; 127 128 bucket = pool->cache_bucket + ib_fmr_hash(*page_list); 129 130 hlist_for_each_entry(fmr, bucket, cache_node) 131 if (io_virtual_address == fmr->io_virtual_address && 132 page_list_len == fmr->page_list_len && 133 !memcmp(page_list, fmr->page_list, 134 page_list_len * sizeof *page_list)) 135 return fmr; 136 137 return NULL; 138 } 139 140 static void ib_fmr_batch_release(struct ib_fmr_pool *pool) 141 { 142 int ret; 143 struct ib_pool_fmr *fmr; 144 LIST_HEAD(unmap_list); 145 LIST_HEAD(fmr_list); 146 147 spin_lock_irq(&pool->pool_lock); 148 149 list_for_each_entry(fmr, &pool->dirty_list, list) { 150 hlist_del_init(&fmr->cache_node); 151 fmr->remap_count = 0; 152 list_add_tail(&fmr->fmr->list, &fmr_list); 153 154 #ifdef DEBUG 155 if (fmr->ref_count !=0) { 156 pr_warn(PFX "Unmapping FMR %p with ref count %d\n", 157 fmr, fmr->ref_count); 158 } 159 #endif 160 } 161 162 list_splice_init(&pool->dirty_list, &unmap_list); 163 pool->dirty_len = 0; 164 165 spin_unlock_irq(&pool->pool_lock); 166 167 if (list_empty(&unmap_list)) { 168 return; 169 } 170 171 ret = ib_unmap_fmr(&fmr_list); 172 if (ret) 173 pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); 174 175 spin_lock_irq(&pool->pool_lock); 176 list_splice(&unmap_list, &pool->free_list); 177 spin_unlock_irq(&pool->pool_lock); 178 } 179 180 static int ib_fmr_cleanup_thread(void *pool_ptr) 181 { 182 struct ib_fmr_pool *pool = pool_ptr; 183 184 do { 185 if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { 186 ib_fmr_batch_release(pool); 187 188 atomic_inc(&pool->flush_ser); 189 wake_up_interruptible(&pool->force_wait); 190 191 if (pool->flush_function) 192 pool->flush_function(pool, pool->flush_arg); 193 } 194 195 set_current_state(TASK_INTERRUPTIBLE); 196 if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && 197 !kthread_should_stop()) 198 schedule(); 199 __set_current_state(TASK_RUNNING); 200 } while (!kthread_should_stop()); 201 202 return 0; 203 } 204 205 /** 206 * ib_create_fmr_pool - Create an FMR pool 207 * @pd:Protection domain for FMRs 208 * @params:FMR pool parameters 209 * 210 * Create a pool of FMRs. Return value is pointer to new pool or 211 * error code if creation failed. 212 */ 213 struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, 214 struct ib_fmr_pool_param *params) 215 { 216 struct ib_device *device; 217 struct ib_fmr_pool *pool; 218 int i; 219 int ret; 220 int max_remaps; 221 222 if (!params) 223 return ERR_PTR(-EINVAL); 224 225 device = pd->device; 226 if (!device->alloc_fmr || !device->dealloc_fmr || 227 !device->map_phys_fmr || !device->unmap_fmr) { 228 pr_info(PFX "Device %s does not support FMRs\n", device->name); 229 return ERR_PTR(-ENOSYS); 230 } 231 232 if (!device->attrs.max_map_per_fmr) 233 max_remaps = IB_FMR_MAX_REMAPS; 234 else 235 max_remaps = device->attrs.max_map_per_fmr; 236 237 pool = kmalloc(sizeof *pool, GFP_KERNEL); 238 if (!pool) 239 return ERR_PTR(-ENOMEM); 240 241 pool->cache_bucket = NULL; 242 pool->flush_function = params->flush_function; 243 pool->flush_arg = params->flush_arg; 244 245 INIT_LIST_HEAD(&pool->free_list); 246 INIT_LIST_HEAD(&pool->dirty_list); 247 248 if (params->cache) { 249 pool->cache_bucket = 250 kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, 251 GFP_KERNEL); 252 if (!pool->cache_bucket) { 253 pr_warn(PFX "Failed to allocate cache in pool\n"); 254 ret = -ENOMEM; 255 goto out_free_pool; 256 } 257 258 for (i = 0; i < IB_FMR_HASH_SIZE; ++i) 259 INIT_HLIST_HEAD(pool->cache_bucket + i); 260 } 261 262 pool->pool_size = 0; 263 pool->max_pages = params->max_pages_per_fmr; 264 pool->max_remaps = max_remaps; 265 pool->dirty_watermark = params->dirty_watermark; 266 pool->dirty_len = 0; 267 spin_lock_init(&pool->pool_lock); 268 atomic_set(&pool->req_ser, 0); 269 atomic_set(&pool->flush_ser, 0); 270 init_waitqueue_head(&pool->force_wait); 271 272 pool->thread = kthread_run(ib_fmr_cleanup_thread, 273 pool, 274 "ib_fmr(%s)", 275 device->name); 276 if (IS_ERR(pool->thread)) { 277 pr_warn(PFX "couldn't start cleanup thread\n"); 278 ret = PTR_ERR(pool->thread); 279 goto out_free_pool; 280 } 281 282 { 283 struct ib_pool_fmr *fmr; 284 struct ib_fmr_attr fmr_attr = { 285 .max_pages = params->max_pages_per_fmr, 286 .max_maps = pool->max_remaps, 287 .page_shift = params->page_shift 288 }; 289 int bytes_per_fmr = sizeof *fmr; 290 291 if (pool->cache_bucket) 292 bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64); 293 294 for (i = 0; i < params->pool_size; ++i) { 295 fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); 296 if (!fmr) 297 goto out_fail; 298 299 fmr->pool = pool; 300 fmr->remap_count = 0; 301 fmr->ref_count = 0; 302 INIT_HLIST_NODE(&fmr->cache_node); 303 304 fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); 305 if (IS_ERR(fmr->fmr)) { 306 pr_warn(PFX "fmr_create failed for FMR %d\n", 307 i); 308 kfree(fmr); 309 goto out_fail; 310 } 311 312 list_add_tail(&fmr->list, &pool->free_list); 313 ++pool->pool_size; 314 } 315 } 316 317 return pool; 318 319 out_free_pool: 320 kfree(pool->cache_bucket); 321 kfree(pool); 322 323 return ERR_PTR(ret); 324 325 out_fail: 326 ib_destroy_fmr_pool(pool); 327 328 return ERR_PTR(-ENOMEM); 329 } 330 EXPORT_SYMBOL(ib_create_fmr_pool); 331 332 /** 333 * ib_destroy_fmr_pool - Free FMR pool 334 * @pool:FMR pool to free 335 * 336 * Destroy an FMR pool and free all associated resources. 337 */ 338 void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) 339 { 340 struct ib_pool_fmr *fmr; 341 struct ib_pool_fmr *tmp; 342 LIST_HEAD(fmr_list); 343 int i; 344 345 kthread_stop(pool->thread); 346 ib_fmr_batch_release(pool); 347 348 i = 0; 349 list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { 350 if (fmr->remap_count) { 351 INIT_LIST_HEAD(&fmr_list); 352 list_add_tail(&fmr->fmr->list, &fmr_list); 353 ib_unmap_fmr(&fmr_list); 354 } 355 ib_dealloc_fmr(fmr->fmr); 356 list_del(&fmr->list); 357 kfree(fmr); 358 ++i; 359 } 360 361 if (i < pool->pool_size) 362 pr_warn(PFX "pool still has %d regions registered\n", 363 pool->pool_size - i); 364 365 kfree(pool->cache_bucket); 366 kfree(pool); 367 } 368 EXPORT_SYMBOL(ib_destroy_fmr_pool); 369 370 /** 371 * ib_flush_fmr_pool - Invalidate all unmapped FMRs 372 * @pool:FMR pool to flush 373 * 374 * Ensure that all unmapped FMRs are fully invalidated. 375 */ 376 int ib_flush_fmr_pool(struct ib_fmr_pool *pool) 377 { 378 int serial; 379 struct ib_pool_fmr *fmr, *next; 380 381 /* 382 * The free_list holds FMRs that may have been used 383 * but have not been remapped enough times to be dirty. 384 * Put them on the dirty list now so that the cleanup 385 * thread will reap them too. 386 */ 387 spin_lock_irq(&pool->pool_lock); 388 list_for_each_entry_safe(fmr, next, &pool->free_list, list) { 389 if (fmr->remap_count > 0) 390 list_move(&fmr->list, &pool->dirty_list); 391 } 392 spin_unlock_irq(&pool->pool_lock); 393 394 serial = atomic_inc_return(&pool->req_ser); 395 wake_up_process(pool->thread); 396 397 if (wait_event_interruptible(pool->force_wait, 398 atomic_read(&pool->flush_ser) - serial >= 0)) 399 return -EINTR; 400 401 return 0; 402 } 403 EXPORT_SYMBOL(ib_flush_fmr_pool); 404 405 /** 406 * ib_fmr_pool_map_phys - 407 * @pool:FMR pool to allocate FMR from 408 * @page_list:List of pages to map 409 * @list_len:Number of pages in @page_list 410 * @io_virtual_address:I/O virtual address for new FMR 411 * 412 * Map an FMR from an FMR pool. 413 */ 414 struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, 415 u64 *page_list, 416 int list_len, 417 u64 io_virtual_address) 418 { 419 struct ib_fmr_pool *pool = pool_handle; 420 struct ib_pool_fmr *fmr; 421 unsigned long flags; 422 int result; 423 424 if (list_len < 1 || list_len > pool->max_pages) 425 return ERR_PTR(-EINVAL); 426 427 spin_lock_irqsave(&pool->pool_lock, flags); 428 fmr = ib_fmr_cache_lookup(pool, 429 page_list, 430 list_len, 431 io_virtual_address); 432 if (fmr) { 433 /* found in cache */ 434 ++fmr->ref_count; 435 if (fmr->ref_count == 1) { 436 list_del(&fmr->list); 437 } 438 439 spin_unlock_irqrestore(&pool->pool_lock, flags); 440 441 return fmr; 442 } 443 444 if (list_empty(&pool->free_list)) { 445 spin_unlock_irqrestore(&pool->pool_lock, flags); 446 return ERR_PTR(-EAGAIN); 447 } 448 449 fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); 450 list_del(&fmr->list); 451 hlist_del_init(&fmr->cache_node); 452 spin_unlock_irqrestore(&pool->pool_lock, flags); 453 454 result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, 455 io_virtual_address); 456 457 if (result) { 458 spin_lock_irqsave(&pool->pool_lock, flags); 459 list_add(&fmr->list, &pool->free_list); 460 spin_unlock_irqrestore(&pool->pool_lock, flags); 461 462 pr_warn(PFX "fmr_map returns %d\n", result); 463 464 return ERR_PTR(result); 465 } 466 467 ++fmr->remap_count; 468 fmr->ref_count = 1; 469 470 if (pool->cache_bucket) { 471 fmr->io_virtual_address = io_virtual_address; 472 fmr->page_list_len = list_len; 473 memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); 474 475 spin_lock_irqsave(&pool->pool_lock, flags); 476 hlist_add_head(&fmr->cache_node, 477 pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); 478 spin_unlock_irqrestore(&pool->pool_lock, flags); 479 } 480 481 return fmr; 482 } 483 EXPORT_SYMBOL(ib_fmr_pool_map_phys); 484 485 /** 486 * ib_fmr_pool_unmap - Unmap FMR 487 * @fmr:FMR to unmap 488 * 489 * Unmap an FMR. The FMR mapping may remain valid until the FMR is 490 * reused (or until ib_flush_fmr_pool() is called). 491 */ 492 int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) 493 { 494 struct ib_fmr_pool *pool; 495 unsigned long flags; 496 497 pool = fmr->pool; 498 499 spin_lock_irqsave(&pool->pool_lock, flags); 500 501 --fmr->ref_count; 502 if (!fmr->ref_count) { 503 if (fmr->remap_count < pool->max_remaps) { 504 list_add_tail(&fmr->list, &pool->free_list); 505 } else { 506 list_add_tail(&fmr->list, &pool->dirty_list); 507 if (++pool->dirty_len >= pool->dirty_watermark) { 508 atomic_inc(&pool->req_ser); 509 wake_up_process(pool->thread); 510 } 511 } 512 } 513 514 #ifdef DEBUG 515 if (fmr->ref_count < 0) 516 pr_warn(PFX "FMR %p has ref count %d < 0\n", 517 fmr, fmr->ref_count); 518 #endif 519 520 spin_unlock_irqrestore(&pool->pool_lock, flags); 521 522 return 0; 523 } 524 EXPORT_SYMBOL(ib_fmr_pool_unmap); 525