xref: /linux/drivers/infiniband/hw/mlx5/mem.c (revision 9a6b55ac)
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/module.h>
34 #include <rdma/ib_umem.h>
35 #include <rdma/ib_umem_odp.h>
36 #include "mlx5_ib.h"
37 #include <linux/jiffies.h>
38 
39 /* @umem: umem object to scan
40  * @addr: ib virtual address requested by the user
41  * @max_page_shift: high limit for page_shift - 0 means no limit
42  * @count: number of PAGE_SIZE pages covered by umem
43  * @shift: page shift for the compound pages found in the region
44  * @ncont: number of compund pages
45  * @order: log2 of the number of compound pages
46  */
47 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
48 			unsigned long max_page_shift,
49 			int *count, int *shift,
50 			int *ncont, int *order)
51 {
52 	unsigned long tmp;
53 	unsigned long m;
54 	u64 base = ~0, p = 0;
55 	u64 len, pfn;
56 	int i = 0;
57 	struct scatterlist *sg;
58 	int entry;
59 
60 	addr = addr >> PAGE_SHIFT;
61 	tmp = (unsigned long)addr;
62 	m = find_first_bit(&tmp, BITS_PER_LONG);
63 	if (max_page_shift)
64 		m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
65 
66 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
67 		len = sg_dma_len(sg) >> PAGE_SHIFT;
68 		pfn = sg_dma_address(sg) >> PAGE_SHIFT;
69 		if (base + p != pfn) {
70 			/* If either the offset or the new
71 			 * base are unaligned update m
72 			 */
73 			tmp = (unsigned long)(pfn | p);
74 			if (!IS_ALIGNED(tmp, 1 << m))
75 				m = find_first_bit(&tmp, BITS_PER_LONG);
76 
77 			base = pfn;
78 			p = 0;
79 		}
80 
81 		p += len;
82 		i += len;
83 	}
84 
85 	if (i) {
86 		m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
87 
88 		if (order)
89 			*order = ilog2(roundup_pow_of_two(i) >> m);
90 
91 		*ncont = DIV_ROUND_UP(i, (1 << m));
92 	} else {
93 		m  = 0;
94 
95 		if (order)
96 			*order = 0;
97 
98 		*ncont = 0;
99 	}
100 	*shift = PAGE_SHIFT + m;
101 	*count = i;
102 }
103 
104 static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
105 {
106 	u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
107 
108 	if (umem_dma & ODP_READ_ALLOWED_BIT)
109 		mtt_entry |= MLX5_IB_MTT_READ;
110 	if (umem_dma & ODP_WRITE_ALLOWED_BIT)
111 		mtt_entry |= MLX5_IB_MTT_WRITE;
112 
113 	return mtt_entry;
114 }
115 
116 /*
117  * Populate the given array with bus addresses from the umem.
118  *
119  * dev - mlx5_ib device
120  * umem - umem to use to fill the pages
121  * page_shift - determines the page size used in the resulting array
122  * offset - offset into the umem to start from,
123  *          only implemented for ODP umems
124  * num_pages - total number of pages to fill
125  * pas - bus addresses array to fill
126  * access_flags - access flags to set on all present pages.
127 		  use enum mlx5_ib_mtt_access_flags for this.
128  */
129 void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
130 			    int page_shift, size_t offset, size_t num_pages,
131 			    __be64 *pas, int access_flags)
132 {
133 	int shift = page_shift - PAGE_SHIFT;
134 	int mask = (1 << shift) - 1;
135 	int i, k, idx;
136 	u64 cur = 0;
137 	u64 base;
138 	int len;
139 	struct scatterlist *sg;
140 	int entry;
141 
142 	if (umem->is_odp) {
143 		WARN_ON(shift != 0);
144 		WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
145 
146 		for (i = 0; i < num_pages; ++i) {
147 			dma_addr_t pa =
148 				to_ib_umem_odp(umem)->dma_list[offset + i];
149 
150 			pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
151 		}
152 		return;
153 	}
154 
155 	i = 0;
156 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
157 		len = sg_dma_len(sg) >> PAGE_SHIFT;
158 		base = sg_dma_address(sg);
159 
160 		/* Skip elements below offset */
161 		if (i + len < offset << shift) {
162 			i += len;
163 			continue;
164 		}
165 
166 		/* Skip pages below offset */
167 		if (i < offset << shift) {
168 			k = (offset << shift) - i;
169 			i = offset << shift;
170 		} else {
171 			k = 0;
172 		}
173 
174 		for (; k < len; k++) {
175 			if (!(i & mask)) {
176 				cur = base + (k << PAGE_SHIFT);
177 				cur |= access_flags;
178 				idx = (i >> shift) - offset;
179 
180 				pas[idx] = cpu_to_be64(cur);
181 				mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
182 					    i >> shift, be64_to_cpu(pas[idx]));
183 			}
184 			i++;
185 
186 			/* Stop after num_pages reached */
187 			if (i >> shift >= offset + num_pages)
188 				return;
189 		}
190 	}
191 }
192 
193 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
194 			  int page_shift, __be64 *pas, int access_flags)
195 {
196 	return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
197 				      ib_umem_num_pages(umem), pas,
198 				      access_flags);
199 }
200 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
201 {
202 	u64 page_size;
203 	u64 page_mask;
204 	u64 off_size;
205 	u64 off_mask;
206 	u64 buf_off;
207 
208 	page_size = (u64)1 << page_shift;
209 	page_mask = page_size - 1;
210 	buf_off = addr & page_mask;
211 	off_size = page_size >> 6;
212 	off_mask = off_size - 1;
213 
214 	if (buf_off & off_mask)
215 		return -EINVAL;
216 
217 	*offset = buf_off >> ilog2(off_size);
218 	return 0;
219 }
220 
221 #define WR_ID_BF 0xBF
222 #define WR_ID_END 0xBAD
223 #define TEST_WC_NUM_WQES 255
224 #define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
225 static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
226 			 bool signaled)
227 {
228 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
229 	struct mlx5_wqe_ctrl_seg *ctrl;
230 	struct mlx5_bf *bf = &qp->bf;
231 	__be32 mmio_wqe[16] = {};
232 	unsigned long flags;
233 	unsigned int idx;
234 	int i;
235 
236 	if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
237 		return -EIO;
238 
239 	spin_lock_irqsave(&qp->sq.lock, flags);
240 
241 	idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
242 	ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
243 
244 	memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
245 	ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
246 	ctrl->opmod_idx_opcode =
247 		cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
248 	ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
249 				   (qp->trans_qp.base.mqp.qpn << 8));
250 
251 	qp->sq.wrid[idx] = wr_id;
252 	qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
253 	qp->sq.wqe_head[idx] = qp->sq.head + 1;
254 	qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
255 					MLX5_SEND_WQE_BB);
256 	qp->sq.w_list[idx].next = qp->sq.cur_post;
257 	qp->sq.head++;
258 
259 	memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
260 	((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
261 		MLX5_WQE_CTRL_CQ_UPDATE;
262 
263 	/* Make sure that descriptors are written before
264 	 * updating doorbell record and ringing the doorbell
265 	 */
266 	wmb();
267 
268 	qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
269 
270 	/* Make sure doorbell record is visible to the HCA before
271 	 * we hit doorbell
272 	 */
273 	wmb();
274 	for (i = 0; i < 8; i++)
275 		mlx5_write64(&mmio_wqe[i * 2],
276 			     bf->bfreg->map + bf->offset + i * 8);
277 
278 	bf->offset ^= bf->buf_size;
279 
280 	spin_unlock_irqrestore(&qp->sq.lock, flags);
281 
282 	return 0;
283 }
284 
285 static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
286 {
287 	int ret;
288 	struct ib_wc wc = {};
289 	unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
290 
291 	do {
292 		ret = ib_poll_cq(cq, 1, &wc);
293 		if (ret < 0 || wc.status)
294 			return ret < 0 ? ret : -EINVAL;
295 		if (ret)
296 			break;
297 	} while (!time_after(jiffies, end));
298 
299 	if (!ret)
300 		return -ETIMEDOUT;
301 
302 	if (wc.wr_id != WR_ID_BF)
303 		ret = 0;
304 
305 	return ret;
306 }
307 
308 static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
309 {
310 	int err, i;
311 
312 	for (i = 0; i < TEST_WC_NUM_WQES; i++) {
313 		err = post_send_nop(dev, qp, WR_ID_BF, false);
314 		if (err)
315 			return err;
316 	}
317 
318 	return post_send_nop(dev, qp, WR_ID_END, true);
319 }
320 
321 int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
322 {
323 	struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
324 	int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
325 	struct ib_qp_init_attr qp_init_attr = {
326 		.cap = { .max_send_wr = TEST_WC_NUM_WQES },
327 		.qp_type = IB_QPT_UD,
328 		.sq_sig_type = IB_SIGNAL_REQ_WR,
329 		.create_flags = MLX5_IB_QP_CREATE_WC_TEST,
330 	};
331 	struct ib_qp_attr qp_attr = { .port_num = 1 };
332 	struct ib_device *ibdev = &dev->ib_dev;
333 	struct ib_qp *qp;
334 	struct ib_cq *cq;
335 	struct ib_pd *pd;
336 	int ret;
337 
338 	if (!MLX5_CAP_GEN(dev->mdev, bf))
339 		return 0;
340 
341 	if (!dev->mdev->roce.roce_en &&
342 	    port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
343 		if (mlx5_core_is_pf(dev->mdev))
344 			dev->wc_support = true;
345 		return 0;
346 	}
347 
348 	ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
349 	if (ret)
350 		goto print_err;
351 
352 	if (!dev->wc_bfreg.wc)
353 		goto out1;
354 
355 	pd = ib_alloc_pd(ibdev, 0);
356 	if (IS_ERR(pd)) {
357 		ret = PTR_ERR(pd);
358 		goto out1;
359 	}
360 
361 	cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
362 	if (IS_ERR(cq)) {
363 		ret = PTR_ERR(cq);
364 		goto out2;
365 	}
366 
367 	qp_init_attr.recv_cq = cq;
368 	qp_init_attr.send_cq = cq;
369 	qp = ib_create_qp(pd, &qp_init_attr);
370 	if (IS_ERR(qp)) {
371 		ret = PTR_ERR(qp);
372 		goto out3;
373 	}
374 
375 	qp_attr.qp_state = IB_QPS_INIT;
376 	ret = ib_modify_qp(qp, &qp_attr,
377 			   IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
378 				   IB_QP_QKEY);
379 	if (ret)
380 		goto out4;
381 
382 	qp_attr.qp_state = IB_QPS_RTR;
383 	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
384 	if (ret)
385 		goto out4;
386 
387 	qp_attr.qp_state = IB_QPS_RTS;
388 	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
389 	if (ret)
390 		goto out4;
391 
392 	ret = test_wc_do_send(dev, qp);
393 	if (ret < 0)
394 		goto out4;
395 
396 	ret = test_wc_poll_cq_result(dev, cq);
397 	if (ret > 0) {
398 		dev->wc_support = true;
399 		ret = 0;
400 	}
401 
402 out4:
403 	ib_destroy_qp(qp);
404 out3:
405 	ib_destroy_cq(cq);
406 out2:
407 	ib_dealloc_pd(pd);
408 out1:
409 	mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
410 print_err:
411 	if (ret)
412 		mlx5_ib_err(
413 			dev,
414 			"Error %d while trying to test write-combining support\n",
415 			ret);
416 	return ret;
417 }
418