xref: /freebsd/sys/dev/iser/iser_initiator.c (revision 38a52bd3)
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include "icl_iser.h"
28 
29 static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend");
30 
31 /* Register user buffer memory and initialize passive rdma
32  *  dto descriptor. Data size is stored in
33  *  task->data[ISER_DIR_IN].data_len, Protection size
34  *  os stored in task->prot[ISER_DIR_IN].data_len
35  */
36 static int
37 iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu)
38 {
39 	struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
40 	struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN];
41 	struct iser_mem_reg *mem_reg;
42 	int err;
43 
44 	err = iser_dma_map_task_data(iser_pdu,
45 				     buf_in,
46 				     ISER_DIR_IN,
47 				     DMA_FROM_DEVICE);
48 	if (err)
49 		return (err);
50 
51 	err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN);
52 	if (err) {
53 		ISER_ERR("Failed to set up Data-IN RDMA");
54 		return (err);
55 	}
56 
57 	mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN];
58 
59 	hdr->flags    |= ISER_RSV;
60 	hdr->read_stag = cpu_to_be32(mem_reg->rkey);
61 	hdr->read_va   = cpu_to_be64(mem_reg->sge.addr);
62 
63 	return (0);
64 }
65 
66 /* Register user buffer memory and initialize passive rdma
67  *  dto descriptor. Data size is stored in
68  *  task->data[ISER_DIR_OUT].data_len, Protection size
69  *  is stored at task->prot[ISER_DIR_OUT].data_len
70  */
71 static int
72 iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu)
73 {
74 	struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
75 	struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT];
76 	struct iser_mem_reg *mem_reg;
77 	int err;
78 
79 	err = iser_dma_map_task_data(iser_pdu,
80 				     buf_out,
81 				     ISER_DIR_OUT,
82 				     DMA_TO_DEVICE);
83 	if (err)
84 		return (err);
85 
86 	err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT);
87 	if (err) {
88 		ISER_ERR("Failed to set up Data-out RDMA");
89 		return (err);
90 	}
91 
92 	mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT];
93 
94 	hdr->flags     |= ISER_WSV;
95 	hdr->write_stag = cpu_to_be32(mem_reg->rkey);
96 	hdr->write_va   = cpu_to_be64(mem_reg->sge.addr);
97 
98 	return (0);
99 }
100 
101 /* creates a new tx descriptor and adds header regd buffer */
102 void
103 iser_create_send_desc(struct iser_conn *iser_conn,
104 		      struct iser_tx_desc *tx_desc)
105 {
106 	struct iser_device *device = iser_conn->ib_conn.device;
107 
108 	ib_dma_sync_single_for_cpu(device->ib_device,
109 		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
110 
111 	memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
112 	tx_desc->iser_header.flags = ISER_VER;
113 
114 	tx_desc->num_sge = 1;
115 
116 	if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
117 		tx_desc->tx_sg[0].lkey = device->mr->lkey;
118 		ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc);
119 	}
120 }
121 
122 void
123 iser_free_login_buf(struct iser_conn *iser_conn)
124 {
125 	struct iser_device *device = iser_conn->ib_conn.device;
126 
127 	if (!iser_conn->login_buf)
128 		return;
129 
130 	if (iser_conn->login_req_dma)
131 		ib_dma_unmap_single(device->ib_device,
132 				    iser_conn->login_req_dma,
133 				    ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
134 
135 	if (iser_conn->login_resp_dma)
136 		ib_dma_unmap_single(device->ib_device,
137 				    iser_conn->login_resp_dma,
138 				    ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
139 
140 	free(iser_conn->login_buf, M_ISER_INITIATOR);
141 
142 	/* make sure we never redo any unmapping */
143 	iser_conn->login_req_dma = 0;
144 	iser_conn->login_resp_dma = 0;
145 	iser_conn->login_buf = NULL;
146 }
147 
148 int
149 iser_alloc_login_buf(struct iser_conn *iser_conn)
150 {
151 	struct iser_device *device = iser_conn->ib_conn.device;
152 	int req_err, resp_err;
153 
154 	BUG_ON(device == NULL);
155 
156 	iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE,
157 				      M_ISER_INITIATOR, M_WAITOK | M_ZERO);
158 
159 	if (!iser_conn->login_buf)
160 		goto out_err;
161 
162 	iser_conn->login_req_buf  = iser_conn->login_buf;
163 	iser_conn->login_resp_buf = iser_conn->login_buf +
164 				    ISCSI_DEF_MAX_RECV_SEG_LEN;
165 
166 	iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
167 						     iser_conn->login_req_buf,
168 						     ISCSI_DEF_MAX_RECV_SEG_LEN,
169 						     DMA_TO_DEVICE);
170 
171 	iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
172 						      iser_conn->login_resp_buf,
173 						      ISER_RX_LOGIN_SIZE,
174 						      DMA_FROM_DEVICE);
175 
176 	req_err  = ib_dma_mapping_error(device->ib_device,
177 					iser_conn->login_req_dma);
178 	resp_err = ib_dma_mapping_error(device->ib_device,
179 					iser_conn->login_resp_dma);
180 
181 	if (req_err || resp_err) {
182 		if (req_err)
183 			iser_conn->login_req_dma = 0;
184 		if (resp_err)
185 			iser_conn->login_resp_dma = 0;
186 		goto free_login_buf;
187 	}
188 
189 	return (0);
190 
191 free_login_buf:
192 	iser_free_login_buf(iser_conn);
193 
194 out_err:
195 	ISER_DBG("unable to alloc or map login buf");
196 	return (ENOMEM);
197 }
198 
199 int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max)
200 {
201 	int i, j;
202 	u64 dma_addr;
203 	struct iser_rx_desc *rx_desc;
204 	struct ib_sge       *rx_sg;
205 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
206 	struct iser_device *device = ib_conn->device;
207 
208 	iser_conn->qp_max_recv_dtos = cmds_max;
209 	iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
210 
211 	if (iser_create_fastreg_pool(ib_conn, cmds_max))
212 		goto create_rdma_reg_res_failed;
213 
214 
215 	iser_conn->num_rx_descs = cmds_max;
216 	iser_conn->rx_descs = malloc(iser_conn->num_rx_descs *
217 				sizeof(struct iser_rx_desc), M_ISER_INITIATOR,
218 				M_WAITOK | M_ZERO);
219 	if (!iser_conn->rx_descs)
220 		goto rx_desc_alloc_fail;
221 
222 	rx_desc = iser_conn->rx_descs;
223 
224 	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)  {
225 		dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
226 					ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
227 		if (ib_dma_mapping_error(device->ib_device, dma_addr))
228 			goto rx_desc_dma_map_failed;
229 
230 		rx_desc->dma_addr = dma_addr;
231 
232 		rx_sg = &rx_desc->rx_sg;
233 		rx_sg->addr   = rx_desc->dma_addr;
234 		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
235 		rx_sg->lkey   = device->mr->lkey;
236 	}
237 
238 	iser_conn->rx_desc_head = 0;
239 
240 	return (0);
241 
242 rx_desc_dma_map_failed:
243 	rx_desc = iser_conn->rx_descs;
244 	for (j = 0; j < i; j++, rx_desc++)
245 		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
246 				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
247 	free(iser_conn->rx_descs, M_ISER_INITIATOR);
248 	iser_conn->rx_descs = NULL;
249 rx_desc_alloc_fail:
250 	iser_free_fastreg_pool(ib_conn);
251 create_rdma_reg_res_failed:
252 	ISER_ERR("failed allocating rx descriptors / data buffers");
253 
254 	return (ENOMEM);
255 }
256 
257 void
258 iser_free_rx_descriptors(struct iser_conn *iser_conn)
259 {
260 	int i;
261 	struct iser_rx_desc *rx_desc;
262 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
263 	struct iser_device *device = ib_conn->device;
264 
265 	iser_free_fastreg_pool(ib_conn);
266 
267 	rx_desc = iser_conn->rx_descs;
268 	for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
269 		ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
270 				    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
271 
272 	free(iser_conn->rx_descs, M_ISER_INITIATOR);
273 
274 	/* make sure we never redo any unmapping */
275 	iser_conn->rx_descs = NULL;
276 }
277 
278 static void
279 iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf)
280 {
281 	struct scatterlist *sg;
282 	int i;
283 	size_t len, tlen;
284 	int offset;
285 
286 	tlen = data_buf->data_len;
287 
288 	for (i = 0; 0 < tlen; i++, tlen -= len)  {
289 		sg = &data_buf->sgl[i];
290 		offset = ((uintptr_t)buf) & ~PAGE_MASK;
291 		len = min(PAGE_SIZE - offset, tlen);
292 		sg_set_buf(sg, buf, len);
293 		buf = (void *)((uintptr_t)buf + len);
294 	}
295 
296 	data_buf->size = i;
297 	sg_mark_end(sg);
298 }
299 
300 
301 static void
302 iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf)
303 {
304 	struct scatterlist *sg;
305 	int i;
306 	size_t len, tlen;
307 	int offset;
308 
309 	tlen = bp->bio_bcount;
310 	offset = bp->bio_ma_offset;
311 
312 	for (i = 0; 0 < tlen; i++, tlen -= len) {
313 		sg = &data_buf->sgl[i];
314 		len = min(PAGE_SIZE - offset, tlen);
315 		sg_set_page(sg, bp->bio_ma[i], len, offset);
316 		offset = 0;
317 	}
318 
319 	data_buf->size = i;
320 	sg_mark_end(sg);
321 }
322 
323 static int
324 iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf)
325 {
326 	struct ccb_hdr *ccbh;
327 	int err = 0;
328 
329 	ccbh = &csio->ccb_h;
330 	switch ((ccbh->flags & CAM_DATA_MASK)) {
331 		case CAM_DATA_BIO:
332 			iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf);
333 			break;
334 		case CAM_DATA_VADDR:
335 			/*
336 			 * Support KVA buffers for various scsi commands such as:
337 			 *  - REPORT_LUNS
338 			 *  - MODE_SENSE_6
339 			 *  - INQUIRY
340 			 *  - SERVICE_ACTION_IN.
341 			 * The data of these commands always mapped into KVA.
342 			 */
343 			iser_buf_to_sg(csio->data_ptr, data_buf);
344 			break;
345 		default:
346 			ISER_ERR("flags 0x%X unimplemented", ccbh->flags);
347 			err = EINVAL;
348 	}
349 	return (err);
350 }
351 
352 static inline bool
353 iser_signal_comp(u8 sig_count)
354 {
355 	return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
356 }
357 
358 int
359 iser_send_command(struct iser_conn *iser_conn,
360 		  struct icl_iser_pdu *iser_pdu)
361 {
362 	struct iser_data_buf *data_buf;
363 	struct iser_tx_desc *tx_desc = &iser_pdu->desc;
364 	struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header);
365 	struct ccb_scsiio *csio = iser_pdu->csio;
366 	int err = 0;
367 	u8 sig_count = ++iser_conn->ib_conn.sig_count;
368 
369 	/* build the tx desc regd header and add it to the tx desc dto */
370 	tx_desc->type = ISCSI_TX_SCSI_COMMAND;
371 	iser_create_send_desc(iser_conn, tx_desc);
372 
373 	if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
374 		data_buf = &iser_pdu->data[ISER_DIR_IN];
375 	} else {
376 		data_buf = &iser_pdu->data[ISER_DIR_OUT];
377 	}
378 
379 	data_buf->sg = csio->data_ptr;
380 	data_buf->data_len = csio->dxfer_len;
381 
382 	if (likely(csio->dxfer_len)) {
383 		err = iser_csio_to_sg(csio, data_buf);
384 		if (unlikely(err))
385 			goto send_command_error;
386 	}
387 
388 	if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
389 		err = iser_prepare_read_cmd(iser_pdu);
390 		if (err)
391 			goto send_command_error;
392 	} else if (hdr->bhssc_flags & BHSSC_FLAGS_W) {
393 		err = iser_prepare_write_cmd(iser_pdu);
394 		if (err)
395 			goto send_command_error;
396 	}
397 
398 	err = iser_post_send(&iser_conn->ib_conn, tx_desc,
399 			     iser_signal_comp(sig_count));
400 	if (!err)
401 		return (0);
402 
403 send_command_error:
404 	ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn,
405 			hdr->bhssc_initiator_task_tag,
406 			hdr->bhssc_expected_data_transfer_length,
407 			err);
408 	return (err);
409 }
410 
411 int
412 iser_send_control(struct iser_conn *iser_conn,
413 		  struct icl_iser_pdu *iser_pdu)
414 {
415 	struct iser_tx_desc *mdesc;
416 	struct iser_device *device;
417 	size_t datalen = iser_pdu->icl_pdu.ip_data_len;
418 	int err;
419 
420 	mdesc = &iser_pdu->desc;
421 
422 	/* build the tx desc regd header and add it to the tx desc dto */
423 	mdesc->type = ISCSI_TX_CONTROL;
424 	iser_create_send_desc(iser_conn, mdesc);
425 
426 	device = iser_conn->ib_conn.device;
427 
428 	if (datalen > 0) {
429 		struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
430 		ib_dma_sync_single_for_cpu(device->ib_device,
431 				iser_conn->login_req_dma, datalen,
432 				DMA_TO_DEVICE);
433 
434 		ib_dma_sync_single_for_device(device->ib_device,
435 			iser_conn->login_req_dma, datalen,
436 			DMA_TO_DEVICE);
437 
438 		tx_dsg->addr    = iser_conn->login_req_dma;
439 		tx_dsg->length  = datalen;
440 		tx_dsg->lkey    = device->mr->lkey;
441 		mdesc->num_sge = 2;
442 	}
443 
444 	/* For login phase and discovery session we re-use the login buffer */
445 	if (!iser_conn->handoff_done) {
446 		err = iser_post_recvl(iser_conn);
447 		if (err)
448 			goto send_control_error;
449 	}
450 
451 	err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
452 	if (!err)
453 		return (0);
454 
455 send_control_error:
456 	ISER_ERR("conn %p failed err %d", iser_conn, err);
457 
458 	return (err);
459 
460 }
461 
462 /**
463  * iser_rcv_dto_completion - recv DTO completion
464  */
465 void
466 iser_rcv_completion(struct iser_rx_desc *rx_desc,
467 		    unsigned long rx_xfer_len,
468 		    struct ib_conn *ib_conn)
469 {
470 	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
471 						   ib_conn);
472 	struct icl_conn *ic = &iser_conn->icl_conn;
473 	struct icl_pdu *response;
474 	struct iscsi_bhs *hdr;
475 	u64 rx_dma;
476 	int rx_buflen;
477 	int outstanding, count, err;
478 
479 	/* differentiate between login to all other PDUs */
480 	if ((char *)rx_desc == iser_conn->login_resp_buf) {
481 		rx_dma = iser_conn->login_resp_dma;
482 		rx_buflen = ISER_RX_LOGIN_SIZE;
483 	} else {
484 		rx_dma = rx_desc->dma_addr;
485 		rx_buflen = ISER_RX_PAYLOAD_SIZE;
486 	}
487 
488 	ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
489 				   rx_buflen, DMA_FROM_DEVICE);
490 
491 	hdr = &rx_desc->iscsi_header;
492 
493 	response = iser_new_pdu(ic, M_NOWAIT);
494 	response->ip_bhs = hdr;
495 	response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN;
496 
497 	/*
498 	 * In case we got data in the receive buffer, assign the ip_data_mbuf
499 	 * to the rx_buffer - later we'll copy it to upper layer buffers
500 	 */
501 	if (response->ip_data_len)
502 		response->ip_data_mbuf = (struct mbuf *)(rx_desc->data);
503 
504 	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
505 				      rx_buflen, DMA_FROM_DEVICE);
506 
507 	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *
508 	 * task eliminates the need to worry on tasks which are completed in   *
509 	 * parallel to the execution of iser_conn_term. So the code that waits *
510 	 * for the posted rx bufs refcount to become zero handles everything   */
511 	ib_conn->post_recv_buf_count--;
512 
513 	if (rx_dma == iser_conn->login_resp_dma)
514 		goto receive;
515 
516 	outstanding = ib_conn->post_recv_buf_count;
517 	if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
518 		count = min(iser_conn->qp_max_recv_dtos - outstanding,
519 			    iser_conn->min_posted_rx);
520 		err = iser_post_recvm(iser_conn, count);
521 		if (err)
522 			ISER_ERR("posting %d rx bufs err %d", count, err);
523 	}
524 
525 receive:
526 	(ic->ic_receive)(response);
527 }
528 
529 void
530 iser_snd_completion(struct iser_tx_desc *tx_desc,
531 		    struct ib_conn *ib_conn)
532 {
533 	struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc);
534 	struct iser_conn *iser_conn = iser_pdu->iser_conn;
535 
536 	if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL)
537 		iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu);
538 }
539