1 /*- 2 * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include <linux/kernel.h> 29 #include <linux/module.h> 30 #include <linux/delay.h> 31 #include <dev/mlx5/driver.h> 32 #include "mlx5_core.h" 33 34 CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE); 35 36 struct mlx5_pages_req { 37 struct mlx5_core_dev *dev; 38 u16 func_id; 39 s32 npages; 40 struct work_struct work; 41 }; 42 43 44 struct mlx5_manage_pages_inbox { 45 struct mlx5_inbox_hdr hdr; 46 __be16 rsvd; 47 __be16 func_id; 48 __be32 num_entries; 49 __be64 pas[0]; 50 }; 51 52 struct mlx5_manage_pages_outbox { 53 struct mlx5_outbox_hdr hdr; 54 __be32 num_entries; 55 u8 rsvd[4]; 56 __be64 pas[0]; 57 }; 58 59 enum { 60 MAX_RECLAIM_TIME_MSECS = 5000, 61 }; 62 63 static void 64 mlx5_fwp_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 65 { 66 struct mlx5_fw_page *fwp; 67 uint8_t owned; 68 69 fwp = (struct mlx5_fw_page *)arg; 70 owned = MLX5_DMA_OWNED(fwp->dev); 71 72 if (!owned) 73 MLX5_DMA_LOCK(fwp->dev); 74 75 if (error == 0) { 76 KASSERT(nseg == 1, ("Number of segments is different from 1")); 77 fwp->dma_addr = segs->ds_addr; 78 fwp->load_done = MLX5_LOAD_ST_SUCCESS; 79 } else { 80 fwp->load_done = MLX5_LOAD_ST_FAILURE; 81 } 82 MLX5_DMA_DONE(fwp->dev); 83 84 if (!owned) 85 MLX5_DMA_UNLOCK(fwp->dev); 86 } 87 88 void 89 mlx5_fwp_flush(struct mlx5_fw_page *fwp) 90 { 91 unsigned num = fwp->numpages; 92 93 while (num--) 94 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREWRITE); 95 } 96 97 void 98 mlx5_fwp_invalidate(struct mlx5_fw_page *fwp) 99 { 100 unsigned num = fwp->numpages; 101 102 while (num--) { 103 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_POSTREAD); 104 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREREAD); 105 } 106 } 107 108 struct mlx5_fw_page * 109 mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num) 110 { 111 struct mlx5_fw_page *fwp; 112 unsigned x; 113 int err; 114 115 /* check for special case */ 116 if (num == 0) { 117 fwp = kzalloc(sizeof(*fwp), flags); 118 if (fwp != NULL) 119 fwp->dev = dev; 120 return (fwp); 121 } 122 123 /* we need sleeping context for this function */ 124 if (flags & M_NOWAIT) 125 return (NULL); 126 127 fwp = kzalloc(sizeof(*fwp) * num, flags); 128 129 /* serialize loading the DMA map(s) */ 130 sx_xlock(&dev->cmd.dma_sx); 131 132 for (x = 0; x != num; x++) { 133 /* store pointer to MLX5 core device */ 134 fwp[x].dev = dev; 135 /* store number of pages left from the array */ 136 fwp[x].numpages = num - x; 137 138 /* allocate memory */ 139 err = bus_dmamem_alloc(dev->cmd.dma_tag, &fwp[x].virt_addr, 140 BUS_DMA_WAITOK | BUS_DMA_COHERENT, &fwp[x].dma_map); 141 if (err != 0) 142 goto failure; 143 144 /* load memory into DMA */ 145 MLX5_DMA_LOCK(dev); 146 err = bus_dmamap_load( 147 dev->cmd.dma_tag, fwp[x].dma_map, fwp[x].virt_addr, 148 MLX5_ADAPTER_PAGE_SIZE, &mlx5_fwp_load_mem_cb, 149 fwp + x, BUS_DMA_WAITOK | BUS_DMA_COHERENT); 150 151 while (fwp[x].load_done == MLX5_LOAD_ST_NONE) 152 MLX5_DMA_WAIT(dev); 153 MLX5_DMA_UNLOCK(dev); 154 155 /* check for error */ 156 if (fwp[x].load_done != MLX5_LOAD_ST_SUCCESS) { 157 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, 158 fwp[x].dma_map); 159 goto failure; 160 } 161 } 162 sx_xunlock(&dev->cmd.dma_sx); 163 return (fwp); 164 165 failure: 166 while (x--) { 167 bus_dmamap_unload(dev->cmd.dma_tag, fwp[x].dma_map); 168 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, fwp[x].dma_map); 169 } 170 sx_xunlock(&dev->cmd.dma_sx); 171 return (NULL); 172 } 173 174 void 175 mlx5_fwp_free(struct mlx5_fw_page *fwp) 176 { 177 struct mlx5_core_dev *dev; 178 unsigned num; 179 180 /* be NULL safe */ 181 if (fwp == NULL) 182 return; 183 184 /* check for special case */ 185 if (fwp->numpages == 0) { 186 kfree(fwp); 187 return; 188 } 189 190 num = fwp->numpages; 191 dev = fwp->dev; 192 193 /* serialize unloading the DMA maps */ 194 sx_xlock(&dev->cmd.dma_sx); 195 while (num--) { 196 bus_dmamap_unload(dev->cmd.dma_tag, fwp[num].dma_map); 197 bus_dmamem_free(dev->cmd.dma_tag, fwp[num].virt_addr, fwp[num].dma_map); 198 } 199 sx_xunlock(&dev->cmd.dma_sx); 200 201 kfree(fwp); 202 } 203 204 u64 205 mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset) 206 { 207 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 208 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 209 210 return ((fwp + index)->dma_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 211 } 212 213 void * 214 mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset) 215 { 216 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 217 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 218 219 return ((char *)(fwp + index)->virt_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 220 } 221 222 static int 223 mlx5_insert_fw_page_locked(struct mlx5_core_dev *dev, struct mlx5_fw_page *nfp) 224 { 225 struct rb_root *root = &dev->priv.page_root; 226 struct rb_node **new = &root->rb_node; 227 struct rb_node *parent = NULL; 228 struct mlx5_fw_page *tfp; 229 230 while (*new) { 231 parent = *new; 232 tfp = rb_entry(parent, struct mlx5_fw_page, rb_node); 233 if (tfp->dma_addr < nfp->dma_addr) 234 new = &parent->rb_left; 235 else if (tfp->dma_addr > nfp->dma_addr) 236 new = &parent->rb_right; 237 else 238 return (-EEXIST); 239 } 240 241 rb_link_node(&nfp->rb_node, parent, new); 242 rb_insert_color(&nfp->rb_node, root); 243 return (0); 244 } 245 246 static struct mlx5_fw_page * 247 mlx5_remove_fw_page_locked(struct mlx5_core_dev *dev, bus_addr_t addr) 248 { 249 struct rb_root *root = &dev->priv.page_root; 250 struct rb_node *tmp = root->rb_node; 251 struct mlx5_fw_page *result = NULL; 252 struct mlx5_fw_page *tfp; 253 254 while (tmp) { 255 tfp = rb_entry(tmp, struct mlx5_fw_page, rb_node); 256 if (tfp->dma_addr < addr) { 257 tmp = tmp->rb_left; 258 } else if (tfp->dma_addr > addr) { 259 tmp = tmp->rb_right; 260 } else { 261 rb_erase(&tfp->rb_node, &dev->priv.page_root); 262 result = tfp; 263 break; 264 } 265 } 266 return (result); 267 } 268 269 static int 270 alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) 271 { 272 struct mlx5_fw_page *fwp; 273 int err; 274 275 fwp = mlx5_fwp_alloc(dev, GFP_KERNEL, 1); 276 if (fwp == NULL) 277 return (-ENOMEM); 278 279 fwp->func_id = func_id; 280 281 MLX5_DMA_LOCK(dev); 282 err = mlx5_insert_fw_page_locked(dev, fwp); 283 MLX5_DMA_UNLOCK(dev); 284 285 if (err != 0) { 286 mlx5_fwp_free(fwp); 287 } else { 288 /* make sure cached data is cleaned */ 289 mlx5_fwp_invalidate(fwp); 290 291 /* store DMA address */ 292 *addr = fwp->dma_addr; 293 } 294 return (err); 295 } 296 297 static void 298 free_4k(struct mlx5_core_dev *dev, u64 addr) 299 { 300 struct mlx5_fw_page *fwp; 301 302 MLX5_DMA_LOCK(dev); 303 fwp = mlx5_remove_fw_page_locked(dev, addr); 304 MLX5_DMA_UNLOCK(dev); 305 306 if (fwp == NULL) { 307 mlx5_core_warn(dev, "Cannot free 4K page at 0x%llx\n", (long long)addr); 308 return; 309 } 310 mlx5_fwp_free(fwp); 311 } 312 313 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, 314 s32 *npages, int boot) 315 { 316 u32 in[MLX5_ST_SZ_DW(query_pages_in)]; 317 u32 out[MLX5_ST_SZ_DW(query_pages_out)]; 318 int err; 319 320 memset(in, 0, sizeof(in)); 321 322 MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); 323 MLX5_SET(query_pages_in, in, op_mod, 324 boot ? MLX5_BOOT_PAGES : MLX5_INIT_PAGES); 325 326 memset(out, 0, sizeof(out)); 327 err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 328 if (err) 329 return err; 330 331 *npages = MLX5_GET(query_pages_out, out, num_pages); 332 *func_id = MLX5_GET(query_pages_out, out, function_id); 333 334 return 0; 335 } 336 337 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, 338 int notify_fail) 339 { 340 struct mlx5_manage_pages_inbox *in; 341 struct mlx5_manage_pages_outbox out; 342 struct mlx5_manage_pages_inbox *nin; 343 int inlen; 344 u64 addr; 345 int err; 346 int i = 0; 347 348 inlen = sizeof(*in) + npages * sizeof(in->pas[0]); 349 in = mlx5_vzalloc(inlen); 350 if (!in) { 351 mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); 352 err = -ENOMEM; 353 goto out_alloc; 354 } 355 memset(&out, 0, sizeof(out)); 356 357 for (i = 0; i < npages; i++) { 358 err = alloc_4k(dev, &addr, func_id); 359 if (err) 360 goto out_alloc; 361 in->pas[i] = cpu_to_be64(addr); 362 } 363 364 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); 365 in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE); 366 in->func_id = cpu_to_be16(func_id); 367 in->num_entries = cpu_to_be32(npages); 368 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); 369 if (err) { 370 mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", 371 func_id, npages, err); 372 goto out_alloc; 373 } 374 dev->priv.fw_pages += npages; 375 dev->priv.pages_per_func[func_id] += npages; 376 377 if (out.hdr.status) { 378 err = mlx5_cmd_status_to_err(&out.hdr); 379 if (err) { 380 mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", 381 func_id, npages, out.hdr.status); 382 goto out_alloc; 383 } 384 } 385 386 mlx5_core_dbg(dev, "err %d\n", err); 387 388 goto out_free; 389 390 out_alloc: 391 if (notify_fail) { 392 nin = kzalloc(sizeof(*nin), GFP_KERNEL); 393 if (!nin) 394 goto out_4k; 395 396 memset(&out, 0, sizeof(out)); 397 nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); 398 nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); 399 nin->func_id = cpu_to_be16(func_id); 400 if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out))) 401 mlx5_core_warn(dev, "page notify failed\n"); 402 kfree(nin); 403 } 404 405 out_4k: 406 for (i--; i >= 0; i--) 407 free_4k(dev, be64_to_cpu(in->pas[i])); 408 out_free: 409 kvfree(in); 410 return err; 411 } 412 413 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, 414 int *nclaimed) 415 { 416 struct mlx5_manage_pages_inbox in; 417 struct mlx5_manage_pages_outbox *out; 418 int num_claimed; 419 int outlen; 420 u64 addr; 421 int err; 422 int i; 423 424 if (nclaimed) 425 *nclaimed = 0; 426 427 memset(&in, 0, sizeof(in)); 428 outlen = sizeof(*out) + npages * sizeof(out->pas[0]); 429 out = mlx5_vzalloc(outlen); 430 if (!out) 431 return -ENOMEM; 432 433 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); 434 in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE); 435 in.func_id = cpu_to_be16(func_id); 436 in.num_entries = cpu_to_be32(npages); 437 mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); 438 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); 439 if (err) { 440 mlx5_core_err(dev, "failed reclaiming pages\n"); 441 goto out_free; 442 } 443 444 if (out->hdr.status) { 445 err = mlx5_cmd_status_to_err(&out->hdr); 446 goto out_free; 447 } 448 449 num_claimed = be32_to_cpu(out->num_entries); 450 if (nclaimed) 451 *nclaimed = num_claimed; 452 453 dev->priv.fw_pages -= num_claimed; 454 dev->priv.pages_per_func[func_id] -= num_claimed; 455 for (i = 0; i < num_claimed; i++) { 456 addr = be64_to_cpu(out->pas[i]); 457 free_4k(dev, addr); 458 } 459 460 out_free: 461 kvfree(out); 462 return err; 463 } 464 465 static void pages_work_handler(struct work_struct *work) 466 { 467 struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); 468 struct mlx5_core_dev *dev = req->dev; 469 int err = 0; 470 471 if (req->npages < 0) 472 err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); 473 else if (req->npages > 0) 474 err = give_pages(dev, req->func_id, req->npages, 1); 475 476 if (err) 477 mlx5_core_warn(dev, "%s fail %d\n", 478 req->npages < 0 ? "reclaim" : "give", err); 479 480 kfree(req); 481 } 482 483 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, 484 s32 npages) 485 { 486 struct mlx5_pages_req *req; 487 488 req = kzalloc(sizeof(*req), GFP_ATOMIC); 489 if (!req) { 490 mlx5_core_warn(dev, "failed to allocate pages request\n"); 491 return; 492 } 493 494 req->dev = dev; 495 req->func_id = func_id; 496 req->npages = npages; 497 INIT_WORK(&req->work, pages_work_handler); 498 if (!queue_work(dev->priv.pg_wq, &req->work)) 499 mlx5_core_warn(dev, "failed to queue pages handler work\n"); 500 } 501 502 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) 503 { 504 u16 uninitialized_var(func_id); 505 s32 uninitialized_var(npages); 506 int err; 507 508 err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); 509 if (err) 510 return err; 511 512 mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", 513 npages, boot ? "boot" : "init", func_id); 514 515 return give_pages(dev, func_id, npages, 0); 516 } 517 518 enum { 519 MLX5_BLKS_FOR_RECLAIM_PAGES = 12 520 }; 521 522 s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev) 523 { 524 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 525 s64 prevpages = 0; 526 s64 npages = 0; 527 528 while (!time_after(jiffies, end)) { 529 /* exclude own function, VFs only */ 530 npages = dev->priv.fw_pages - dev->priv.pages_per_func[0]; 531 if (!npages) 532 break; 533 534 if (npages != prevpages) 535 end = end + msecs_to_jiffies(100); 536 537 prevpages = npages; 538 msleep(1); 539 } 540 541 if (npages) 542 mlx5_core_warn(dev, "FW did not return all VFs pages, will cause to memory leak\n"); 543 544 return -npages; 545 } 546 547 static int optimal_reclaimed_pages(void) 548 { 549 struct mlx5_cmd_prot_block *block; 550 struct mlx5_cmd_layout *lay; 551 int ret; 552 553 ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - 554 sizeof(struct mlx5_manage_pages_outbox)) / 555 FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]); 556 557 return ret; 558 } 559 560 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) 561 { 562 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 563 struct mlx5_fw_page *fwp; 564 struct rb_node *p; 565 int nclaimed = 0; 566 int err; 567 568 do { 569 p = rb_first(&dev->priv.page_root); 570 if (p) { 571 fwp = rb_entry(p, struct mlx5_fw_page, rb_node); 572 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 573 --dev->priv.fw_pages; 574 free_4k(dev, fwp->dma_addr); 575 nclaimed = 1; 576 } else { 577 err = reclaim_pages(dev, fwp->func_id, 578 optimal_reclaimed_pages(), 579 &nclaimed); 580 if (err) { 581 mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", 582 err); 583 return err; 584 } 585 } 586 587 if (nclaimed) 588 end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 589 } 590 if (time_after(jiffies, end)) { 591 mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); 592 break; 593 } 594 } while (p); 595 596 return 0; 597 } 598 599 void mlx5_pagealloc_init(struct mlx5_core_dev *dev) 600 { 601 602 dev->priv.page_root = RB_ROOT; 603 } 604 605 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) 606 { 607 /* nothing */ 608 } 609 610 int mlx5_pagealloc_start(struct mlx5_core_dev *dev) 611 { 612 dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); 613 if (!dev->priv.pg_wq) 614 return -ENOMEM; 615 616 return 0; 617 } 618 619 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) 620 { 621 destroy_workqueue(dev->priv.pg_wq); 622 } 623