1 /*-
2  * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30 
31 #include <linux/kernel.h>
32 #include <linux/module.h>
33 #include <linux/delay.h>
34 #include <dev/mlx5/driver.h>
35 #include <dev/mlx5/mlx5_core/mlx5_core.h>
36 
37 CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE);
38 
39 struct mlx5_pages_req {
40 	struct mlx5_core_dev *dev;
41 	u16	func_id;
42 	s32	npages;
43 	struct work_struct work;
44 };
45 
46 
47 enum {
48 	MAX_RECLAIM_TIME_MSECS	= 5000,
49 };
50 
51 static void
52 mlx5_fwp_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
53 {
54 	struct mlx5_fw_page *fwp;
55 	uint8_t owned;
56 
57 	fwp = (struct mlx5_fw_page *)arg;
58 	owned = MLX5_DMA_OWNED(fwp->dev);
59 
60 	if (!owned)
61 		MLX5_DMA_LOCK(fwp->dev);
62 
63 	if (error == 0) {
64 		KASSERT(nseg == 1, ("Number of segments is different from 1"));
65 		fwp->dma_addr = segs->ds_addr;
66 		fwp->load_done = MLX5_LOAD_ST_SUCCESS;
67 	} else {
68 		fwp->load_done = MLX5_LOAD_ST_FAILURE;
69 	}
70 	MLX5_DMA_DONE(fwp->dev);
71 
72 	if (!owned)
73 		MLX5_DMA_UNLOCK(fwp->dev);
74 }
75 
76 void
77 mlx5_fwp_flush(struct mlx5_fw_page *fwp)
78 {
79 	unsigned num = fwp->numpages;
80 
81 	while (num--)
82 		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREWRITE);
83 }
84 
85 void
86 mlx5_fwp_invalidate(struct mlx5_fw_page *fwp)
87 {
88 	unsigned num = fwp->numpages;
89 
90 	while (num--) {
91 		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_POSTREAD);
92 		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREREAD);
93 	}
94 }
95 
96 struct mlx5_fw_page *
97 mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num)
98 {
99 	struct mlx5_fw_page *fwp;
100 	unsigned x;
101 	int err;
102 
103 	/* check for special case */
104 	if (num == 0) {
105 		fwp = kzalloc(sizeof(*fwp), flags);
106 		if (fwp != NULL)
107 			fwp->dev = dev;
108 		return (fwp);
109 	}
110 
111 	/* we need sleeping context for this function */
112 	if (flags & M_NOWAIT)
113 		return (NULL);
114 
115 	fwp = kzalloc(sizeof(*fwp) * num, flags);
116 
117 	/* serialize loading the DMA map(s) */
118 	sx_xlock(&dev->cmd.dma_sx);
119 
120 	for (x = 0; x != num; x++) {
121 		/* store pointer to MLX5 core device */
122 		fwp[x].dev = dev;
123 		/* store number of pages left from the array */
124 		fwp[x].numpages = num - x;
125 
126 		/* allocate memory */
127 		err = bus_dmamem_alloc(dev->cmd.dma_tag, &fwp[x].virt_addr,
128 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT, &fwp[x].dma_map);
129 		if (err != 0)
130 			goto failure;
131 
132 		/* load memory into DMA */
133 		MLX5_DMA_LOCK(dev);
134 		(void) bus_dmamap_load(
135 		    dev->cmd.dma_tag, fwp[x].dma_map, fwp[x].virt_addr,
136 		    MLX5_ADAPTER_PAGE_SIZE, &mlx5_fwp_load_mem_cb,
137 		    fwp + x, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
138 
139 		while (fwp[x].load_done == MLX5_LOAD_ST_NONE)
140 			MLX5_DMA_WAIT(dev);
141 		MLX5_DMA_UNLOCK(dev);
142 
143 		/* check for error */
144 		if (fwp[x].load_done != MLX5_LOAD_ST_SUCCESS) {
145 			bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr,
146 			    fwp[x].dma_map);
147 			goto failure;
148 		}
149 	}
150 	sx_xunlock(&dev->cmd.dma_sx);
151 	return (fwp);
152 
153 failure:
154 	while (x--) {
155 		bus_dmamap_unload(dev->cmd.dma_tag, fwp[x].dma_map);
156 		bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, fwp[x].dma_map);
157 	}
158 	sx_xunlock(&dev->cmd.dma_sx);
159 	kfree(fwp);
160 	return (NULL);
161 }
162 
163 void
164 mlx5_fwp_free(struct mlx5_fw_page *fwp)
165 {
166 	struct mlx5_core_dev *dev;
167 	unsigned num;
168 
169 	/* be NULL safe */
170 	if (fwp == NULL)
171 		return;
172 
173 	/* check for special case */
174 	if (fwp->numpages == 0) {
175 		kfree(fwp);
176 		return;
177 	}
178 
179 	num = fwp->numpages;
180 	dev = fwp->dev;
181 
182 	while (num--) {
183 		bus_dmamap_unload(dev->cmd.dma_tag, fwp[num].dma_map);
184 		bus_dmamem_free(dev->cmd.dma_tag, fwp[num].virt_addr, fwp[num].dma_map);
185 	}
186 
187 	kfree(fwp);
188 }
189 
190 u64
191 mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset)
192 {
193 	size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE);
194 	KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset));
195 
196 	return ((fwp + index)->dma_addr + (offset % MLX5_ADAPTER_PAGE_SIZE));
197 }
198 
199 void *
200 mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset)
201 {
202 	size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE);
203 	KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset));
204 
205 	return ((char *)(fwp + index)->virt_addr + (offset % MLX5_ADAPTER_PAGE_SIZE));
206 }
207 
208 static int
209 mlx5_insert_fw_page_locked(struct mlx5_core_dev *dev, struct mlx5_fw_page *nfp)
210 {
211 	struct rb_root *root = &dev->priv.page_root;
212 	struct rb_node **new = &root->rb_node;
213 	struct rb_node *parent = NULL;
214 	struct mlx5_fw_page *tfp;
215 
216 	while (*new) {
217 		parent = *new;
218 		tfp = rb_entry(parent, struct mlx5_fw_page, rb_node);
219 		if (tfp->dma_addr < nfp->dma_addr)
220 			new = &parent->rb_left;
221 		else if (tfp->dma_addr > nfp->dma_addr)
222 			new = &parent->rb_right;
223 		else
224 			return (-EEXIST);
225 	}
226 
227 	rb_link_node(&nfp->rb_node, parent, new);
228 	rb_insert_color(&nfp->rb_node, root);
229 	return (0);
230 }
231 
232 static struct mlx5_fw_page *
233 mlx5_remove_fw_page_locked(struct mlx5_core_dev *dev, bus_addr_t addr)
234 {
235 	struct rb_root *root = &dev->priv.page_root;
236 	struct rb_node *tmp = root->rb_node;
237 	struct mlx5_fw_page *result = NULL;
238 	struct mlx5_fw_page *tfp;
239 
240 	while (tmp) {
241 		tfp = rb_entry(tmp, struct mlx5_fw_page, rb_node);
242 		if (tfp->dma_addr < addr) {
243 			tmp = tmp->rb_left;
244 		} else if (tfp->dma_addr > addr) {
245 			tmp = tmp->rb_right;
246 		} else {
247 			rb_erase(&tfp->rb_node, &dev->priv.page_root);
248 			result = tfp;
249 			break;
250 		}
251 	}
252 	return (result);
253 }
254 
255 static int
256 alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id)
257 {
258 	struct mlx5_fw_page *fwp;
259 	int err;
260 
261 	fwp = mlx5_fwp_alloc(dev, GFP_KERNEL, 1);
262 	if (fwp == NULL)
263 		return (-ENOMEM);
264 
265 	fwp->func_id = func_id;
266 
267 	MLX5_DMA_LOCK(dev);
268 	err = mlx5_insert_fw_page_locked(dev, fwp);
269 	MLX5_DMA_UNLOCK(dev);
270 
271 	if (err != 0) {
272 		mlx5_fwp_free(fwp);
273 	} else {
274 		/* make sure cached data is cleaned */
275 		mlx5_fwp_invalidate(fwp);
276 
277 		/* store DMA address */
278 		*addr = fwp->dma_addr;
279 	}
280 	return (err);
281 }
282 
283 static void
284 free_4k(struct mlx5_core_dev *dev, u64 addr)
285 {
286 	struct mlx5_fw_page *fwp;
287 
288 	MLX5_DMA_LOCK(dev);
289 	fwp = mlx5_remove_fw_page_locked(dev, addr);
290 	MLX5_DMA_UNLOCK(dev);
291 
292 	if (fwp == NULL) {
293 		mlx5_core_warn(dev, "Cannot free 4K page at 0x%llx\n", (long long)addr);
294 		return;
295 	}
296 	mlx5_fwp_free(fwp);
297 }
298 
299 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
300 				s32 *npages, int boot)
301 {
302 	u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0};
303 	u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
304 	int err;
305 
306 	MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
307 	MLX5_SET(query_pages_in, in, op_mod, boot ?
308 		 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
309 		 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
310 
311 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
312 	if (err)
313 		return err;
314 
315 	*npages = MLX5_GET(query_pages_out, out, num_pages);
316 	*func_id = MLX5_GET(query_pages_out, out, function_id);
317 
318 	return 0;
319 }
320 
321 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
322 		      int notify_fail)
323 {
324 	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
325 	int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
326 	u64 addr;
327 	int err;
328 	u32 *in, *nin;
329 	int i = 0;
330 
331 	inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
332 	in = mlx5_vzalloc(inlen);
333 	if (!in) {
334 		mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
335 		err = -ENOMEM;
336 		goto out_alloc;
337 	}
338 
339 	for (i = 0; i < npages; i++) {
340 		err = alloc_4k(dev, &addr, func_id);
341 		if (err)
342 			goto out_alloc;
343 		MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr);
344 	}
345 
346 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
347 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
348 	MLX5_SET(manage_pages_in, in, function_id, func_id);
349 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
350 
351 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
352 	if (err) {
353 		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
354 			       func_id, npages, err);
355 		goto out_alloc;
356 	}
357 	dev->priv.fw_pages += npages;
358 	dev->priv.pages_per_func[func_id] += npages;
359 
360 	mlx5_core_dbg(dev, "err %d\n", err);
361 
362 	goto out_free;
363 
364 out_alloc:
365 	if (notify_fail) {
366 		nin = mlx5_vzalloc(inlen);
367 		if (!nin)
368 			goto out_4k;
369 
370 		memset(&out, 0, sizeof(out));
371 		MLX5_SET(manage_pages_in, nin, opcode, MLX5_CMD_OP_MANAGE_PAGES);
372 		MLX5_SET(manage_pages_in, nin, op_mod, MLX5_PAGES_CANT_GIVE);
373 		MLX5_SET(manage_pages_in, nin, function_id, func_id);
374 		if (mlx5_cmd_exec(dev, nin, inlen, out, sizeof(out)))
375 			mlx5_core_warn(dev, "page notify failed\n");
376 		kvfree(nin);
377 	}
378 
379 out_4k:
380 	for (i--; i >= 0; i--)
381 		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
382 out_free:
383 	kvfree(in);
384 	return err;
385 }
386 
387 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
388 			     u32 *in, int in_size, u32 *out, int out_size)
389 {
390 	struct mlx5_fw_page *fwp;
391 	struct rb_node *p;
392 	u32 func_id;
393 	u32 npages;
394 	u32 i = 0;
395 
396 	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
397 		return mlx5_cmd_exec(dev, in, in_size, out, out_size);
398 
399 	/* No hard feelings, we want our pages back! */
400 	npages = MLX5_GET(manage_pages_in, in, input_num_entries);
401 	func_id = MLX5_GET(manage_pages_in, in, function_id);
402 
403 	p = rb_first(&dev->priv.page_root);
404 	while (p && i < npages) {
405 		fwp = rb_entry(p, struct mlx5_fw_page, rb_node);
406 		p = rb_next(p);
407 		if (fwp->func_id != func_id)
408 			continue;
409 
410 		MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->dma_addr);
411 		i++;
412 	}
413 
414 	MLX5_SET(manage_pages_out, out, output_num_entries, i);
415 	return 0;
416 }
417 
418 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
419 			 int *nclaimed)
420 {
421 	int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
422 	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
423 	int num_claimed;
424 	u32 *out;
425 	int err;
426 	int i;
427 
428 	if (nclaimed)
429 		*nclaimed = 0;
430 
431 	outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
432 	out = mlx5_vzalloc(outlen);
433 	if (!out)
434 		return -ENOMEM;
435 
436 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
437 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
438 	MLX5_SET(manage_pages_in, in, function_id, func_id);
439 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
440 
441 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
442 	err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
443 	if (err) {
444 		mlx5_core_err(dev, "failed reclaiming pages\n");
445 		goto out_free;
446 	}
447 
448 	num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
449 	if (nclaimed)
450 		*nclaimed = num_claimed;
451 
452 	dev->priv.fw_pages -= num_claimed;
453 	dev->priv.pages_per_func[func_id] -= num_claimed;
454 	for (i = 0; i < num_claimed; i++)
455 		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
456 
457 out_free:
458 	kvfree(out);
459 	return err;
460 }
461 
462 static void pages_work_handler(struct work_struct *work)
463 {
464 	struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
465 	struct mlx5_core_dev *dev = req->dev;
466 	int err = 0;
467 
468 	if (req->npages < 0)
469 		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
470 	else if (req->npages > 0)
471 		err = give_pages(dev, req->func_id, req->npages, 1);
472 
473 	if (err)
474 		mlx5_core_warn(dev, "%s fail %d\n",
475 			       req->npages < 0 ? "reclaim" : "give", err);
476 
477 	kfree(req);
478 }
479 
480 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
481 				 s32 npages)
482 {
483 	struct mlx5_pages_req *req;
484 
485 	req = kzalloc(sizeof(*req), GFP_ATOMIC);
486 	if (!req) {
487 		mlx5_core_warn(dev, "failed to allocate pages request\n");
488 		return;
489 	}
490 
491 	req->dev = dev;
492 	req->func_id = func_id;
493 	req->npages = npages;
494 	INIT_WORK(&req->work, pages_work_handler);
495 	if (!queue_work(dev->priv.pg_wq, &req->work))
496 		mlx5_core_warn(dev, "failed to queue pages handler work\n");
497 }
498 
499 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
500 {
501 	u16 uninitialized_var(func_id);
502 	s32 uninitialized_var(npages);
503 	int err;
504 
505 	err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
506 	if (err)
507 		return err;
508 
509 	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
510 		      npages, boot ? "boot" : "init", func_id);
511 
512 	return give_pages(dev, func_id, npages, 0);
513 }
514 
515 enum {
516 	MLX5_BLKS_FOR_RECLAIM_PAGES = 12
517 };
518 
519 s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev)
520 {
521 	int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
522 	s64 prevpages = 0;
523 	s64 npages = 0;
524 
525 	while (!time_after(jiffies, end)) {
526 		/* exclude own function, VFs only */
527 		npages = dev->priv.fw_pages - dev->priv.pages_per_func[0];
528 		if (!npages)
529 			break;
530 
531 		if (npages != prevpages)
532 			end = end + msecs_to_jiffies(100);
533 
534 		prevpages = npages;
535 		msleep(1);
536 	}
537 
538 	if (npages)
539 		mlx5_core_warn(dev, "FW did not return all VFs pages, will cause to memory leak\n");
540 
541 	return -npages;
542 }
543 
544 static int optimal_reclaimed_pages(void)
545 {
546 	struct mlx5_cmd_prot_block *block;
547 	struct mlx5_cmd_layout *lay;
548 	int ret;
549 
550 	ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
551 	       MLX5_ST_SZ_BYTES(manage_pages_out)) /
552 	       MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
553 
554 	return ret;
555 }
556 
557 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
558 {
559 	int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
560 	struct mlx5_fw_page *fwp;
561 	struct rb_node *p;
562 	int nclaimed = 0;
563 	int err;
564 
565 	do {
566 		p = rb_first(&dev->priv.page_root);
567 		if (p) {
568 			fwp = rb_entry(p, struct mlx5_fw_page, rb_node);
569 			err = reclaim_pages(dev, fwp->func_id,
570 					    optimal_reclaimed_pages(),
571 					    &nclaimed);
572 			if (err) {
573 				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
574 					       err);
575 				return err;
576 			}
577 
578 			if (nclaimed)
579 				end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
580 		}
581 		if (time_after(jiffies, end)) {
582 			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
583 			break;
584 		}
585 	} while (p);
586 
587 	return 0;
588 }
589 
590 void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
591 {
592 
593 	dev->priv.page_root = RB_ROOT;
594 }
595 
596 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
597 {
598 	/* nothing */
599 }
600 
601 int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
602 {
603 	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
604 	if (!dev->priv.pg_wq)
605 		return -ENOMEM;
606 
607 	return 0;
608 }
609 
610 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
611 {
612 	destroy_workqueue(dev->priv.pg_wq);
613 }
614