1 /*- 2 * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "opt_rss.h" 29 #include "opt_ratelimit.h" 30 31 #include <linux/kernel.h> 32 #include <linux/module.h> 33 #include <linux/delay.h> 34 #include <dev/mlx5/driver.h> 35 #include <dev/mlx5/mlx5_core/mlx5_core.h> 36 37 CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE); 38 39 struct mlx5_pages_req { 40 struct mlx5_core_dev *dev; 41 u16 func_id; 42 s32 npages; 43 struct work_struct work; 44 }; 45 46 47 enum { 48 MAX_RECLAIM_TIME_MSECS = 5000, 49 }; 50 51 static void 52 mlx5_fwp_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 53 { 54 struct mlx5_fw_page *fwp; 55 uint8_t owned; 56 57 fwp = (struct mlx5_fw_page *)arg; 58 owned = MLX5_DMA_OWNED(fwp->dev); 59 60 if (!owned) 61 MLX5_DMA_LOCK(fwp->dev); 62 63 if (error == 0) { 64 KASSERT(nseg == 1, ("Number of segments is different from 1")); 65 fwp->dma_addr = segs->ds_addr; 66 fwp->load_done = MLX5_LOAD_ST_SUCCESS; 67 } else { 68 fwp->load_done = MLX5_LOAD_ST_FAILURE; 69 } 70 MLX5_DMA_DONE(fwp->dev); 71 72 if (!owned) 73 MLX5_DMA_UNLOCK(fwp->dev); 74 } 75 76 void 77 mlx5_fwp_flush(struct mlx5_fw_page *fwp) 78 { 79 unsigned num = fwp->numpages; 80 81 while (num--) 82 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREWRITE); 83 } 84 85 void 86 mlx5_fwp_invalidate(struct mlx5_fw_page *fwp) 87 { 88 unsigned num = fwp->numpages; 89 90 while (num--) { 91 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_POSTREAD); 92 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREREAD); 93 } 94 } 95 96 struct mlx5_fw_page * 97 mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num) 98 { 99 struct mlx5_fw_page *fwp; 100 unsigned x; 101 int err; 102 103 /* check for special case */ 104 if (num == 0) { 105 fwp = kzalloc(sizeof(*fwp), flags); 106 if (fwp != NULL) 107 fwp->dev = dev; 108 return (fwp); 109 } 110 111 /* we need sleeping context for this function */ 112 if (flags & M_NOWAIT) 113 return (NULL); 114 115 fwp = kzalloc(sizeof(*fwp) * num, flags); 116 117 /* serialize loading the DMA map(s) */ 118 sx_xlock(&dev->cmd.dma_sx); 119 120 for (x = 0; x != num; x++) { 121 /* store pointer to MLX5 core device */ 122 fwp[x].dev = dev; 123 /* store number of pages left from the array */ 124 fwp[x].numpages = num - x; 125 126 /* allocate memory */ 127 err = bus_dmamem_alloc(dev->cmd.dma_tag, &fwp[x].virt_addr, 128 BUS_DMA_WAITOK | BUS_DMA_COHERENT, &fwp[x].dma_map); 129 if (err != 0) 130 goto failure; 131 132 /* load memory into DMA */ 133 MLX5_DMA_LOCK(dev); 134 (void) bus_dmamap_load( 135 dev->cmd.dma_tag, fwp[x].dma_map, fwp[x].virt_addr, 136 MLX5_ADAPTER_PAGE_SIZE, &mlx5_fwp_load_mem_cb, 137 fwp + x, BUS_DMA_WAITOK | BUS_DMA_COHERENT); 138 139 while (fwp[x].load_done == MLX5_LOAD_ST_NONE) 140 MLX5_DMA_WAIT(dev); 141 MLX5_DMA_UNLOCK(dev); 142 143 /* check for error */ 144 if (fwp[x].load_done != MLX5_LOAD_ST_SUCCESS) { 145 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, 146 fwp[x].dma_map); 147 goto failure; 148 } 149 } 150 sx_xunlock(&dev->cmd.dma_sx); 151 return (fwp); 152 153 failure: 154 while (x--) { 155 bus_dmamap_unload(dev->cmd.dma_tag, fwp[x].dma_map); 156 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, fwp[x].dma_map); 157 } 158 sx_xunlock(&dev->cmd.dma_sx); 159 kfree(fwp); 160 return (NULL); 161 } 162 163 void 164 mlx5_fwp_free(struct mlx5_fw_page *fwp) 165 { 166 struct mlx5_core_dev *dev; 167 unsigned num; 168 169 /* be NULL safe */ 170 if (fwp == NULL) 171 return; 172 173 /* check for special case */ 174 if (fwp->numpages == 0) { 175 kfree(fwp); 176 return; 177 } 178 179 num = fwp->numpages; 180 dev = fwp->dev; 181 182 while (num--) { 183 bus_dmamap_unload(dev->cmd.dma_tag, fwp[num].dma_map); 184 bus_dmamem_free(dev->cmd.dma_tag, fwp[num].virt_addr, fwp[num].dma_map); 185 } 186 187 kfree(fwp); 188 } 189 190 u64 191 mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset) 192 { 193 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 194 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 195 196 return ((fwp + index)->dma_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 197 } 198 199 void * 200 mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset) 201 { 202 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 203 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 204 205 return ((char *)(fwp + index)->virt_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 206 } 207 208 static int 209 mlx5_insert_fw_page_locked(struct mlx5_core_dev *dev, struct mlx5_fw_page *nfp) 210 { 211 struct rb_root *root = &dev->priv.page_root; 212 struct rb_node **new = &root->rb_node; 213 struct rb_node *parent = NULL; 214 struct mlx5_fw_page *tfp; 215 216 while (*new) { 217 parent = *new; 218 tfp = rb_entry(parent, struct mlx5_fw_page, rb_node); 219 if (tfp->dma_addr < nfp->dma_addr) 220 new = &parent->rb_left; 221 else if (tfp->dma_addr > nfp->dma_addr) 222 new = &parent->rb_right; 223 else 224 return (-EEXIST); 225 } 226 227 rb_link_node(&nfp->rb_node, parent, new); 228 rb_insert_color(&nfp->rb_node, root); 229 return (0); 230 } 231 232 static struct mlx5_fw_page * 233 mlx5_remove_fw_page_locked(struct mlx5_core_dev *dev, bus_addr_t addr) 234 { 235 struct rb_root *root = &dev->priv.page_root; 236 struct rb_node *tmp = root->rb_node; 237 struct mlx5_fw_page *result = NULL; 238 struct mlx5_fw_page *tfp; 239 240 while (tmp) { 241 tfp = rb_entry(tmp, struct mlx5_fw_page, rb_node); 242 if (tfp->dma_addr < addr) { 243 tmp = tmp->rb_left; 244 } else if (tfp->dma_addr > addr) { 245 tmp = tmp->rb_right; 246 } else { 247 rb_erase(&tfp->rb_node, &dev->priv.page_root); 248 result = tfp; 249 break; 250 } 251 } 252 return (result); 253 } 254 255 static int 256 alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) 257 { 258 struct mlx5_fw_page *fwp; 259 int err; 260 261 fwp = mlx5_fwp_alloc(dev, GFP_KERNEL, 1); 262 if (fwp == NULL) 263 return (-ENOMEM); 264 265 fwp->func_id = func_id; 266 267 MLX5_DMA_LOCK(dev); 268 err = mlx5_insert_fw_page_locked(dev, fwp); 269 MLX5_DMA_UNLOCK(dev); 270 271 if (err != 0) { 272 mlx5_fwp_free(fwp); 273 } else { 274 /* make sure cached data is cleaned */ 275 mlx5_fwp_invalidate(fwp); 276 277 /* store DMA address */ 278 *addr = fwp->dma_addr; 279 } 280 return (err); 281 } 282 283 static void 284 free_4k(struct mlx5_core_dev *dev, u64 addr) 285 { 286 struct mlx5_fw_page *fwp; 287 288 MLX5_DMA_LOCK(dev); 289 fwp = mlx5_remove_fw_page_locked(dev, addr); 290 MLX5_DMA_UNLOCK(dev); 291 292 if (fwp == NULL) { 293 mlx5_core_warn(dev, "Cannot free 4K page at 0x%llx\n", (long long)addr); 294 return; 295 } 296 mlx5_fwp_free(fwp); 297 } 298 299 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, 300 s32 *npages, int boot) 301 { 302 u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0}; 303 u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0}; 304 int err; 305 306 MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); 307 MLX5_SET(query_pages_in, in, op_mod, boot ? 308 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : 309 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); 310 311 err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 312 if (err) 313 return err; 314 315 *npages = MLX5_GET(query_pages_out, out, num_pages); 316 *func_id = MLX5_GET(query_pages_out, out, function_id); 317 318 return 0; 319 } 320 321 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, 322 int notify_fail) 323 { 324 u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; 325 int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); 326 u64 addr; 327 int err; 328 u32 *in, *nin; 329 int i = 0; 330 331 inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); 332 in = mlx5_vzalloc(inlen); 333 if (!in) { 334 mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); 335 err = -ENOMEM; 336 goto out_alloc; 337 } 338 339 for (i = 0; i < npages; i++) { 340 err = alloc_4k(dev, &addr, func_id); 341 if (err) 342 goto out_alloc; 343 MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr); 344 } 345 346 MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); 347 MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); 348 MLX5_SET(manage_pages_in, in, function_id, func_id); 349 MLX5_SET(manage_pages_in, in, input_num_entries, npages); 350 351 err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); 352 if (err) { 353 mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", 354 func_id, npages, err); 355 goto out_alloc; 356 } 357 dev->priv.fw_pages += npages; 358 dev->priv.pages_per_func[func_id] += npages; 359 360 mlx5_core_dbg(dev, "err %d\n", err); 361 362 goto out_free; 363 364 out_alloc: 365 if (notify_fail) { 366 nin = mlx5_vzalloc(inlen); 367 if (!nin) 368 goto out_4k; 369 370 memset(&out, 0, sizeof(out)); 371 MLX5_SET(manage_pages_in, nin, opcode, MLX5_CMD_OP_MANAGE_PAGES); 372 MLX5_SET(manage_pages_in, nin, op_mod, MLX5_PAGES_CANT_GIVE); 373 MLX5_SET(manage_pages_in, nin, function_id, func_id); 374 if (mlx5_cmd_exec(dev, nin, inlen, out, sizeof(out))) 375 mlx5_core_warn(dev, "page notify failed\n"); 376 kvfree(nin); 377 } 378 379 out_4k: 380 for (i--; i >= 0; i--) 381 free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i])); 382 out_free: 383 kvfree(in); 384 return err; 385 } 386 387 static int reclaim_pages_cmd(struct mlx5_core_dev *dev, 388 u32 *in, int in_size, u32 *out, int out_size) 389 { 390 struct mlx5_fw_page *fwp; 391 struct rb_node *p; 392 u32 func_id; 393 u32 npages; 394 u32 i = 0; 395 396 if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) 397 return mlx5_cmd_exec(dev, in, in_size, out, out_size); 398 399 /* No hard feelings, we want our pages back! */ 400 npages = MLX5_GET(manage_pages_in, in, input_num_entries); 401 func_id = MLX5_GET(manage_pages_in, in, function_id); 402 403 p = rb_first(&dev->priv.page_root); 404 while (p && i < npages) { 405 fwp = rb_entry(p, struct mlx5_fw_page, rb_node); 406 p = rb_next(p); 407 if (fwp->func_id != func_id) 408 continue; 409 410 MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->dma_addr); 411 i++; 412 } 413 414 MLX5_SET(manage_pages_out, out, output_num_entries, i); 415 return 0; 416 } 417 418 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, 419 int *nclaimed) 420 { 421 int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); 422 u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; 423 int num_claimed; 424 u32 *out; 425 int err; 426 int i; 427 428 if (nclaimed) 429 *nclaimed = 0; 430 431 outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); 432 out = mlx5_vzalloc(outlen); 433 if (!out) 434 return -ENOMEM; 435 436 MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); 437 MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); 438 MLX5_SET(manage_pages_in, in, function_id, func_id); 439 MLX5_SET(manage_pages_in, in, input_num_entries, npages); 440 441 mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); 442 err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); 443 if (err) { 444 mlx5_core_err(dev, "failed reclaiming pages\n"); 445 goto out_free; 446 } 447 448 num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries); 449 if (nclaimed) 450 *nclaimed = num_claimed; 451 452 dev->priv.fw_pages -= num_claimed; 453 dev->priv.pages_per_func[func_id] -= num_claimed; 454 for (i = 0; i < num_claimed; i++) 455 free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); 456 457 out_free: 458 kvfree(out); 459 return err; 460 } 461 462 static void pages_work_handler(struct work_struct *work) 463 { 464 struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); 465 struct mlx5_core_dev *dev = req->dev; 466 int err = 0; 467 468 if (req->npages < 0) 469 err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); 470 else if (req->npages > 0) 471 err = give_pages(dev, req->func_id, req->npages, 1); 472 473 if (err) 474 mlx5_core_warn(dev, "%s fail %d\n", 475 req->npages < 0 ? "reclaim" : "give", err); 476 477 kfree(req); 478 } 479 480 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, 481 s32 npages) 482 { 483 struct mlx5_pages_req *req; 484 485 req = kzalloc(sizeof(*req), GFP_ATOMIC); 486 if (!req) { 487 mlx5_core_warn(dev, "failed to allocate pages request\n"); 488 return; 489 } 490 491 req->dev = dev; 492 req->func_id = func_id; 493 req->npages = npages; 494 INIT_WORK(&req->work, pages_work_handler); 495 if (!queue_work(dev->priv.pg_wq, &req->work)) 496 mlx5_core_warn(dev, "failed to queue pages handler work\n"); 497 } 498 499 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) 500 { 501 u16 uninitialized_var(func_id); 502 s32 uninitialized_var(npages); 503 int err; 504 505 err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); 506 if (err) 507 return err; 508 509 mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", 510 npages, boot ? "boot" : "init", func_id); 511 512 return give_pages(dev, func_id, npages, 0); 513 } 514 515 enum { 516 MLX5_BLKS_FOR_RECLAIM_PAGES = 12 517 }; 518 519 s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev) 520 { 521 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 522 s64 prevpages = 0; 523 s64 npages = 0; 524 525 while (!time_after(jiffies, end)) { 526 /* exclude own function, VFs only */ 527 npages = dev->priv.fw_pages - dev->priv.pages_per_func[0]; 528 if (!npages) 529 break; 530 531 if (npages != prevpages) 532 end = end + msecs_to_jiffies(100); 533 534 prevpages = npages; 535 msleep(1); 536 } 537 538 if (npages) 539 mlx5_core_warn(dev, "FW did not return all VFs pages, will cause to memory leak\n"); 540 541 return -npages; 542 } 543 544 static int optimal_reclaimed_pages(void) 545 { 546 struct mlx5_cmd_prot_block *block; 547 struct mlx5_cmd_layout *lay; 548 int ret; 549 550 ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - 551 MLX5_ST_SZ_BYTES(manage_pages_out)) / 552 MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); 553 554 return ret; 555 } 556 557 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) 558 { 559 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 560 struct mlx5_fw_page *fwp; 561 struct rb_node *p; 562 int nclaimed = 0; 563 int err; 564 565 do { 566 p = rb_first(&dev->priv.page_root); 567 if (p) { 568 fwp = rb_entry(p, struct mlx5_fw_page, rb_node); 569 err = reclaim_pages(dev, fwp->func_id, 570 optimal_reclaimed_pages(), 571 &nclaimed); 572 if (err) { 573 mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", 574 err); 575 return err; 576 } 577 578 if (nclaimed) 579 end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 580 } 581 if (time_after(jiffies, end)) { 582 mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); 583 break; 584 } 585 } while (p); 586 587 return 0; 588 } 589 590 void mlx5_pagealloc_init(struct mlx5_core_dev *dev) 591 { 592 593 dev->priv.page_root = RB_ROOT; 594 } 595 596 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) 597 { 598 /* nothing */ 599 } 600 601 int mlx5_pagealloc_start(struct mlx5_core_dev *dev) 602 { 603 dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); 604 if (!dev->priv.pg_wq) 605 return -ENOMEM; 606 607 return 0; 608 } 609 610 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) 611 { 612 destroy_workqueue(dev->priv.pg_wq); 613 } 614