1 /*- 2 * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "opt_rss.h" 27 #include "opt_ratelimit.h" 28 29 #include <linux/kernel.h> 30 #include <linux/module.h> 31 #include <linux/delay.h> 32 #include <dev/mlx5/driver.h> 33 #include <dev/mlx5/mlx5_core/mlx5_core.h> 34 35 CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE); 36 37 struct mlx5_pages_req { 38 struct mlx5_core_dev *dev; 39 u16 func_id; 40 s32 npages; 41 struct work_struct work; 42 }; 43 44 45 enum { 46 MAX_RECLAIM_TIME_MSECS = 5000, 47 }; 48 49 static void 50 mlx5_fwp_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 51 { 52 struct mlx5_fw_page *fwp; 53 uint8_t owned; 54 55 fwp = (struct mlx5_fw_page *)arg; 56 owned = MLX5_DMA_OWNED(fwp->dev); 57 58 if (!owned) 59 MLX5_DMA_LOCK(fwp->dev); 60 61 if (error == 0) { 62 KASSERT(nseg == 1, ("Number of segments is different from 1")); 63 fwp->dma_addr = segs->ds_addr; 64 fwp->load_done = MLX5_LOAD_ST_SUCCESS; 65 } else { 66 fwp->load_done = MLX5_LOAD_ST_FAILURE; 67 } 68 MLX5_DMA_DONE(fwp->dev); 69 70 if (!owned) 71 MLX5_DMA_UNLOCK(fwp->dev); 72 } 73 74 void 75 mlx5_fwp_flush(struct mlx5_fw_page *fwp) 76 { 77 unsigned num = fwp->numpages; 78 79 while (num--) 80 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREWRITE); 81 } 82 83 void 84 mlx5_fwp_invalidate(struct mlx5_fw_page *fwp) 85 { 86 unsigned num = fwp->numpages; 87 88 while (num--) { 89 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_POSTREAD); 90 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREREAD); 91 } 92 } 93 94 struct mlx5_fw_page * 95 mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num) 96 { 97 struct mlx5_fw_page *fwp; 98 unsigned x; 99 int err; 100 101 /* check for special case */ 102 if (num == 0) { 103 fwp = kzalloc(sizeof(*fwp), flags); 104 if (fwp != NULL) 105 fwp->dev = dev; 106 return (fwp); 107 } 108 109 /* we need sleeping context for this function */ 110 if (flags & M_NOWAIT) 111 return (NULL); 112 113 fwp = kzalloc(sizeof(*fwp) * num, flags); 114 115 /* serialize loading the DMA map(s) */ 116 sx_xlock(&dev->cmd.dma_sx); 117 118 for (x = 0; x != num; x++) { 119 /* store pointer to MLX5 core device */ 120 fwp[x].dev = dev; 121 /* store number of pages left from the array */ 122 fwp[x].numpages = num - x; 123 124 /* allocate memory */ 125 err = bus_dmamem_alloc(dev->cmd.dma_tag, &fwp[x].virt_addr, 126 BUS_DMA_WAITOK | BUS_DMA_COHERENT, &fwp[x].dma_map); 127 if (err != 0) 128 goto failure; 129 130 /* load memory into DMA */ 131 MLX5_DMA_LOCK(dev); 132 (void) bus_dmamap_load( 133 dev->cmd.dma_tag, fwp[x].dma_map, fwp[x].virt_addr, 134 MLX5_ADAPTER_PAGE_SIZE, &mlx5_fwp_load_mem_cb, 135 fwp + x, BUS_DMA_WAITOK | BUS_DMA_COHERENT); 136 137 while (fwp[x].load_done == MLX5_LOAD_ST_NONE) 138 MLX5_DMA_WAIT(dev); 139 MLX5_DMA_UNLOCK(dev); 140 141 /* check for error */ 142 if (fwp[x].load_done != MLX5_LOAD_ST_SUCCESS) { 143 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, 144 fwp[x].dma_map); 145 goto failure; 146 } 147 } 148 sx_xunlock(&dev->cmd.dma_sx); 149 return (fwp); 150 151 failure: 152 while (x--) { 153 bus_dmamap_unload(dev->cmd.dma_tag, fwp[x].dma_map); 154 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, fwp[x].dma_map); 155 } 156 sx_xunlock(&dev->cmd.dma_sx); 157 kfree(fwp); 158 return (NULL); 159 } 160 161 void 162 mlx5_fwp_free(struct mlx5_fw_page *fwp) 163 { 164 struct mlx5_core_dev *dev; 165 unsigned num; 166 167 /* be NULL safe */ 168 if (fwp == NULL) 169 return; 170 171 /* check for special case */ 172 if (fwp->numpages == 0) { 173 kfree(fwp); 174 return; 175 } 176 177 num = fwp->numpages; 178 dev = fwp->dev; 179 180 while (num--) { 181 bus_dmamap_unload(dev->cmd.dma_tag, fwp[num].dma_map); 182 bus_dmamem_free(dev->cmd.dma_tag, fwp[num].virt_addr, fwp[num].dma_map); 183 } 184 185 kfree(fwp); 186 } 187 188 u64 189 mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset) 190 { 191 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 192 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 193 194 return ((fwp + index)->dma_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 195 } 196 197 void * 198 mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset) 199 { 200 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 201 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 202 203 return ((char *)(fwp + index)->virt_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 204 } 205 206 static int 207 mlx5_insert_fw_page_locked(struct mlx5_core_dev *dev, struct mlx5_fw_page *nfp) 208 { 209 struct rb_root *root = &dev->priv.page_root; 210 struct rb_node **new = &root->rb_node; 211 struct rb_node *parent = NULL; 212 struct mlx5_fw_page *tfp; 213 214 while (*new) { 215 parent = *new; 216 tfp = rb_entry(parent, struct mlx5_fw_page, rb_node); 217 if (tfp->dma_addr < nfp->dma_addr) 218 new = &parent->rb_left; 219 else if (tfp->dma_addr > nfp->dma_addr) 220 new = &parent->rb_right; 221 else 222 return (-EEXIST); 223 } 224 225 rb_link_node(&nfp->rb_node, parent, new); 226 rb_insert_color(&nfp->rb_node, root); 227 return (0); 228 } 229 230 static struct mlx5_fw_page * 231 mlx5_remove_fw_page_locked(struct mlx5_core_dev *dev, bus_addr_t addr) 232 { 233 struct rb_root *root = &dev->priv.page_root; 234 struct rb_node *tmp = root->rb_node; 235 struct mlx5_fw_page *result = NULL; 236 struct mlx5_fw_page *tfp; 237 238 while (tmp) { 239 tfp = rb_entry(tmp, struct mlx5_fw_page, rb_node); 240 if (tfp->dma_addr < addr) { 241 tmp = tmp->rb_left; 242 } else if (tfp->dma_addr > addr) { 243 tmp = tmp->rb_right; 244 } else { 245 rb_erase(&tfp->rb_node, &dev->priv.page_root); 246 result = tfp; 247 break; 248 } 249 } 250 return (result); 251 } 252 253 static int 254 alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) 255 { 256 struct mlx5_fw_page *fwp; 257 int err; 258 259 fwp = mlx5_fwp_alloc(dev, GFP_KERNEL, 1); 260 if (fwp == NULL) 261 return (-ENOMEM); 262 263 fwp->func_id = func_id; 264 265 MLX5_DMA_LOCK(dev); 266 err = mlx5_insert_fw_page_locked(dev, fwp); 267 MLX5_DMA_UNLOCK(dev); 268 269 if (err != 0) { 270 mlx5_fwp_free(fwp); 271 } else { 272 /* make sure cached data is cleaned */ 273 mlx5_fwp_invalidate(fwp); 274 275 /* store DMA address */ 276 *addr = fwp->dma_addr; 277 } 278 return (err); 279 } 280 281 static void 282 free_4k(struct mlx5_core_dev *dev, u64 addr) 283 { 284 struct mlx5_fw_page *fwp; 285 286 MLX5_DMA_LOCK(dev); 287 fwp = mlx5_remove_fw_page_locked(dev, addr); 288 MLX5_DMA_UNLOCK(dev); 289 290 if (fwp == NULL) { 291 mlx5_core_warn(dev, "Cannot free 4K page at 0x%llx\n", (long long)addr); 292 return; 293 } 294 mlx5_fwp_free(fwp); 295 } 296 297 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, 298 s32 *npages, int boot) 299 { 300 u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0}; 301 u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0}; 302 int err; 303 304 MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); 305 MLX5_SET(query_pages_in, in, op_mod, boot ? 306 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : 307 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); 308 309 err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 310 if (err) 311 return err; 312 313 *npages = MLX5_GET(query_pages_out, out, num_pages); 314 *func_id = MLX5_GET(query_pages_out, out, function_id); 315 316 return 0; 317 } 318 319 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, 320 int notify_fail) 321 { 322 u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; 323 int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); 324 u64 addr; 325 int err; 326 u32 *in, *nin; 327 int i = 0; 328 329 inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); 330 in = mlx5_vzalloc(inlen); 331 if (!in) { 332 mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); 333 err = -ENOMEM; 334 goto out_alloc; 335 } 336 337 for (i = 0; i < npages; i++) { 338 err = alloc_4k(dev, &addr, func_id); 339 if (err) 340 goto out_alloc; 341 MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr); 342 } 343 344 MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); 345 MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); 346 MLX5_SET(manage_pages_in, in, function_id, func_id); 347 MLX5_SET(manage_pages_in, in, input_num_entries, npages); 348 349 err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); 350 if (err) { 351 mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", 352 func_id, npages, err); 353 goto out_alloc; 354 } 355 dev->priv.fw_pages += npages; 356 dev->priv.pages_per_func[func_id] += npages; 357 358 mlx5_core_dbg(dev, "err %d\n", err); 359 360 goto out_free; 361 362 out_alloc: 363 if (notify_fail) { 364 nin = mlx5_vzalloc(inlen); 365 if (!nin) 366 goto out_4k; 367 368 memset(&out, 0, sizeof(out)); 369 MLX5_SET(manage_pages_in, nin, opcode, MLX5_CMD_OP_MANAGE_PAGES); 370 MLX5_SET(manage_pages_in, nin, op_mod, MLX5_PAGES_CANT_GIVE); 371 MLX5_SET(manage_pages_in, nin, function_id, func_id); 372 if (mlx5_cmd_exec(dev, nin, inlen, out, sizeof(out))) 373 mlx5_core_warn(dev, "page notify failed\n"); 374 kvfree(nin); 375 } 376 377 out_4k: 378 for (i--; i >= 0; i--) 379 free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i])); 380 out_free: 381 kvfree(in); 382 return err; 383 } 384 385 static int reclaim_pages_cmd(struct mlx5_core_dev *dev, 386 u32 *in, int in_size, u32 *out, int out_size) 387 { 388 struct mlx5_fw_page *fwp; 389 struct rb_node *p; 390 u32 func_id; 391 u32 npages; 392 u32 i = 0; 393 394 if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) 395 return mlx5_cmd_exec(dev, in, in_size, out, out_size); 396 397 /* No hard feelings, we want our pages back! */ 398 npages = MLX5_GET(manage_pages_in, in, input_num_entries); 399 func_id = MLX5_GET(manage_pages_in, in, function_id); 400 401 p = rb_first(&dev->priv.page_root); 402 while (p && i < npages) { 403 fwp = rb_entry(p, struct mlx5_fw_page, rb_node); 404 p = rb_next(p); 405 if (fwp->func_id != func_id) 406 continue; 407 408 MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->dma_addr); 409 i++; 410 } 411 412 MLX5_SET(manage_pages_out, out, output_num_entries, i); 413 return 0; 414 } 415 416 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, 417 int *nclaimed) 418 { 419 int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); 420 u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; 421 int num_claimed; 422 u32 *out; 423 int err; 424 int i; 425 426 if (nclaimed) 427 *nclaimed = 0; 428 429 outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); 430 out = mlx5_vzalloc(outlen); 431 if (!out) 432 return -ENOMEM; 433 434 MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); 435 MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); 436 MLX5_SET(manage_pages_in, in, function_id, func_id); 437 MLX5_SET(manage_pages_in, in, input_num_entries, npages); 438 439 mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); 440 err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); 441 if (err) { 442 mlx5_core_err(dev, "failed reclaiming pages\n"); 443 goto out_free; 444 } 445 446 num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries); 447 if (nclaimed) 448 *nclaimed = num_claimed; 449 450 dev->priv.fw_pages -= num_claimed; 451 dev->priv.pages_per_func[func_id] -= num_claimed; 452 for (i = 0; i < num_claimed; i++) 453 free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); 454 455 out_free: 456 kvfree(out); 457 return err; 458 } 459 460 static void pages_work_handler(struct work_struct *work) 461 { 462 struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); 463 struct mlx5_core_dev *dev = req->dev; 464 int err = 0; 465 466 if (req->npages < 0) 467 err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); 468 else if (req->npages > 0) 469 err = give_pages(dev, req->func_id, req->npages, 1); 470 471 if (err) 472 mlx5_core_warn(dev, "%s fail %d\n", 473 req->npages < 0 ? "reclaim" : "give", err); 474 475 kfree(req); 476 } 477 478 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, 479 s32 npages) 480 { 481 struct mlx5_pages_req *req; 482 483 req = kzalloc(sizeof(*req), GFP_ATOMIC); 484 if (!req) { 485 mlx5_core_warn(dev, "failed to allocate pages request\n"); 486 return; 487 } 488 489 req->dev = dev; 490 req->func_id = func_id; 491 req->npages = npages; 492 INIT_WORK(&req->work, pages_work_handler); 493 if (!queue_work(dev->priv.pg_wq, &req->work)) 494 mlx5_core_warn(dev, "failed to queue pages handler work\n"); 495 } 496 497 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) 498 { 499 u16 uninitialized_var(func_id); 500 s32 uninitialized_var(npages); 501 int err; 502 503 err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); 504 if (err) 505 return err; 506 507 mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", 508 npages, boot ? "boot" : "init", func_id); 509 510 if (npages > 0) 511 return give_pages(dev, func_id, npages, 0); 512 else 513 return 0; 514 } 515 516 enum { 517 MLX5_BLKS_FOR_RECLAIM_PAGES = 12 518 }; 519 520 s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev) 521 { 522 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 523 s64 prevpages = 0; 524 s64 npages = 0; 525 526 while (!time_after(jiffies, end)) { 527 /* exclude own function, VFs only */ 528 npages = dev->priv.fw_pages - dev->priv.pages_per_func[0]; 529 if (!npages) 530 break; 531 532 if (npages != prevpages) 533 end = end + msecs_to_jiffies(100); 534 535 prevpages = npages; 536 msleep(1); 537 } 538 539 if (npages) 540 mlx5_core_warn(dev, "FW did not return all VFs pages, will cause to memory leak\n"); 541 542 return -npages; 543 } 544 545 static int optimal_reclaimed_pages(void) 546 { 547 struct mlx5_cmd_prot_block *block; 548 struct mlx5_cmd_layout *lay; 549 int ret; 550 551 ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - 552 MLX5_ST_SZ_BYTES(manage_pages_out)) / 553 MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); 554 555 return ret; 556 } 557 558 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) 559 { 560 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 561 struct mlx5_fw_page *fwp; 562 struct rb_node *p; 563 int nclaimed = 0; 564 int err; 565 566 do { 567 p = rb_first(&dev->priv.page_root); 568 if (p) { 569 fwp = rb_entry(p, struct mlx5_fw_page, rb_node); 570 err = reclaim_pages(dev, fwp->func_id, 571 optimal_reclaimed_pages(), 572 &nclaimed); 573 if (err) { 574 mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", 575 err); 576 return err; 577 } 578 579 if (nclaimed) 580 end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 581 } 582 if (time_after(jiffies, end)) { 583 mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); 584 break; 585 } 586 } while (p); 587 588 return 0; 589 } 590 591 void mlx5_pagealloc_init(struct mlx5_core_dev *dev) 592 { 593 594 dev->priv.page_root = RB_ROOT; 595 } 596 597 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) 598 { 599 /* nothing */ 600 } 601 602 int mlx5_pagealloc_start(struct mlx5_core_dev *dev) 603 { 604 dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); 605 if (!dev->priv.pg_wq) 606 return -ENOMEM; 607 608 return 0; 609 } 610 611 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) 612 { 613 destroy_workqueue(dev->priv.pg_wq); 614 } 615