xref: /freebsd/sys/dev/iser/iser_memory.c (revision 53b70c86)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "icl_iser.h"
28 
29 static struct fast_reg_descriptor *
30 iser_reg_desc_get(struct ib_conn *ib_conn)
31 {
32 	struct fast_reg_descriptor *desc;
33 
34 	mtx_lock(&ib_conn->lock);
35 	desc = list_first_entry(&ib_conn->fastreg.pool,
36 				struct fast_reg_descriptor, list);
37 	list_del(&desc->list);
38 	mtx_unlock(&ib_conn->lock);
39 
40 	return (desc);
41 }
42 
43 static void
44 iser_reg_desc_put(struct ib_conn *ib_conn,
45 		  struct fast_reg_descriptor *desc)
46 {
47 	mtx_lock(&ib_conn->lock);
48 	list_add(&desc->list, &ib_conn->fastreg.pool);
49 	mtx_unlock(&ib_conn->lock);
50 }
51 
52 #define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
53 
54 /**
55  * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
56  * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
57  * the number of entries which are aligned correctly. Supports the case where
58  * consecutive SG elements are actually fragments of the same physcial page.
59  */
60 static int
61 iser_data_buf_aligned_len(struct iser_data_buf *data, struct ib_device *ibdev)
62 {
63 	struct scatterlist *sg, *sgl, *next_sg = NULL;
64 	u64 start_addr, end_addr;
65 	int i, ret_len, start_check = 0;
66 
67 	if (data->dma_nents == 1)
68 		return (1);
69 
70 	sgl = data->sgl;
71 	start_addr  = ib_sg_dma_address(ibdev, sgl);
72 
73 	for_each_sg(sgl, sg, data->dma_nents, i) {
74 		if (start_check && !IS_4K_ALIGNED(start_addr))
75 			break;
76 
77 		next_sg = sg_next(sg);
78 		if (!next_sg)
79 			break;
80 
81 		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
82 		start_addr  = ib_sg_dma_address(ibdev, next_sg);
83 
84 		if (end_addr == start_addr) {
85 			start_check = 0;
86 			continue;
87 		} else
88 			start_check = 1;
89 
90 		if (!IS_4K_ALIGNED(end_addr))
91 			break;
92 	}
93 	ret_len = (next_sg) ? i : i+1;
94 
95 	return (ret_len);
96 }
97 
98 void
99 iser_dma_unmap_task_data(struct icl_iser_pdu *iser_pdu,
100 			 struct iser_data_buf *data,
101 			 enum dma_data_direction dir)
102 {
103 	struct ib_device *dev;
104 
105 	dev = iser_pdu->iser_conn->ib_conn.device->ib_device;
106 	ib_dma_unmap_sg(dev, data->sgl, data->size, dir);
107 }
108 
109 static int
110 iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
111 	     struct iser_mem_reg *reg)
112 {
113 	struct scatterlist *sg = mem->sgl;
114 
115 	reg->sge.lkey = device->mr->lkey;
116 	reg->rkey = device->mr->rkey;
117 	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
118 	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
119 
120 	return (0);
121 }
122 
123 /**
124  * TODO: This should be a verb
125  * iser_ib_inc_rkey - increments the key portion of the given rkey. Can be used
126  * for calculating a new rkey for type 2 memory windows.
127  * @rkey - the rkey to increment.
128  */
129 static inline u32
130 iser_ib_inc_rkey(u32 rkey)
131 {
132 	const u32 mask = 0x000000ff;
133 
134 	return (((rkey + 1) & mask) | (rkey & ~mask));
135 }
136 
137 static void
138 iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
139 {
140 	u32 rkey;
141 
142 	memset(inv_wr, 0, sizeof(*inv_wr));
143 	inv_wr->opcode = IB_WR_LOCAL_INV;
144 	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
145 	inv_wr->ex.invalidate_rkey = mr->rkey;
146 
147 	rkey = iser_ib_inc_rkey(mr->rkey);
148 	ib_update_fast_reg_key(mr, rkey);
149 }
150 
151 static int
152 iser_fast_reg_mr(struct icl_iser_pdu *iser_pdu,
153 		 struct iser_data_buf *mem,
154 		 struct iser_reg_resources *rsc,
155 		 struct iser_mem_reg *reg)
156 {
157 	struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn;
158 	struct iser_device *device = ib_conn->device;
159 	struct ib_mr *mr = rsc->mr;
160 	struct ib_reg_wr fastreg_wr;
161 	struct ib_send_wr inv_wr;
162 	const struct ib_send_wr *bad_wr;
163 	struct ib_send_wr *wr = NULL;
164 	int ret, n;
165 
166 	/* if there a single dma entry, dma mr suffices */
167 	if (mem->dma_nents == 1)
168 		return iser_reg_dma(device, mem, reg);
169 
170 	if (!rsc->mr_valid) {
171 		iser_inv_rkey(&inv_wr, mr);
172 		wr = &inv_wr;
173 	}
174 
175 	n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K);
176 	if (unlikely(n != mem->size)) {
177 		ISER_ERR("failed to map sg (%d/%d)\n", n, mem->size);
178 		return n < 0 ? n : -EINVAL;
179 	}
180 	/* Prepare FASTREG WR */
181 	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
182 	fastreg_wr.wr.opcode = IB_WR_REG_MR;
183 	fastreg_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
184 	fastreg_wr.wr.num_sge = 0;
185 	fastreg_wr.mr = mr;
186 	fastreg_wr.key = mr->rkey;
187 	fastreg_wr.access = IB_ACCESS_LOCAL_WRITE  |
188 			    IB_ACCESS_REMOTE_WRITE |
189 			    IB_ACCESS_REMOTE_READ;
190 
191 	if (!wr)
192 		wr = &fastreg_wr.wr;
193 	else
194 		wr->next = &fastreg_wr.wr;
195 
196 	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
197 	if (ret) {
198 		ISER_ERR("fast registration failed, ret:%d", ret);
199 		return (ret);
200 	}
201 	rsc->mr_valid = 0;
202 
203 	reg->sge.lkey = mr->lkey;
204 	reg->rkey = mr->rkey;
205 	reg->sge.addr = mr->iova;
206 	reg->sge.length = mr->length;
207 
208 	return (ret);
209 }
210 
211 /**
212  * iser_reg_rdma_mem - Registers memory intended for RDMA,
213  * using Fast Registration WR (if possible) obtaining rkey and va
214  *
215  * returns 0 on success, errno code on failure
216  */
217 int
218 iser_reg_rdma_mem(struct icl_iser_pdu *iser_pdu,
219 		  enum iser_data_dir cmd_dir)
220 {
221 	struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn;
222 	struct iser_device   *device = ib_conn->device;
223 	struct ib_device     *ibdev = device->ib_device;
224 	struct iser_data_buf *mem = &iser_pdu->data[cmd_dir];
225 	struct iser_mem_reg *mem_reg = &iser_pdu->rdma_reg[cmd_dir];
226 	struct fast_reg_descriptor *desc = NULL;
227 	int err, aligned_len;
228 
229 	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
230 	if (aligned_len != mem->dma_nents) {
231 		ISER_ERR("bounce buffer is not supported");
232 		return 1;
233 	}
234 
235 	if (mem->dma_nents != 1) {
236 		desc = iser_reg_desc_get(ib_conn);
237 		mem_reg->mem_h = desc;
238 	}
239 
240 	err = iser_fast_reg_mr(iser_pdu, mem, desc ? &desc->rsc : NULL,
241 				       mem_reg);
242 	if (err)
243 		goto err_reg;
244 
245 	return (0);
246 
247 err_reg:
248 	if (desc)
249 		iser_reg_desc_put(ib_conn, desc);
250 
251 	return (err);
252 }
253 
254 void
255 iser_unreg_rdma_mem(struct icl_iser_pdu *iser_pdu,
256 		    enum iser_data_dir cmd_dir)
257 {
258 	struct iser_mem_reg *reg = &iser_pdu->rdma_reg[cmd_dir];
259 
260 	if (!reg->mem_h)
261 		return;
262 
263 	iser_reg_desc_put(&iser_pdu->iser_conn->ib_conn,
264 			  reg->mem_h);
265 	reg->mem_h = NULL;
266 }
267 
268 int
269 iser_dma_map_task_data(struct icl_iser_pdu *iser_pdu,
270 		       struct iser_data_buf *data,
271 		       enum iser_data_dir iser_dir,
272 		       enum dma_data_direction dma_dir)
273 {
274 	struct ib_device *dev;
275 
276 	iser_pdu->dir[iser_dir] = 1;
277 	dev = iser_pdu->iser_conn->ib_conn.device->ib_device;
278 
279 	data->dma_nents = ib_dma_map_sg(dev, data->sgl, data->size, dma_dir);
280 	if (data->dma_nents == 0) {
281 		ISER_ERR("dma_map_sg failed");
282 		return (EINVAL);
283 	}
284 
285 	return (0);
286 }
287