xref: /freebsd/contrib/ofed/libcxgb4/qp.c (revision d6b92ffa)
1 /*
2  * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <config.h>
33 
34 #include <assert.h>
35 #include <stdlib.h>
36 #include <pthread.h>
37 #include <string.h>
38 #include <stdio.h>
39 #include "libcxgb4.h"
40 
41 #ifdef STATS
42 struct c4iw_stats c4iw_stats;
43 #endif
44 
45 static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16)
46 {
47 	u64 *src, *dst;
48 
49 	src = (u64 *)wqe;
50 	dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE);
51 	if (t4_sq_onchip(wq)) {
52 		len16 = align(len16, 4);
53 
54 		/* In onchip mode the copy below will be made to WC memory and
55 		 * could trigger DMA. In offchip mode the copy below only
56 		 * queues the WQE, DMA cannot start until t4_ring_sq_db
57 		 * happens */
58 		mmio_wc_start();
59 	}
60 	while (len16) {
61 		*dst++ = *src++;
62 		if (dst == (u64 *)&wq->sq.queue[wq->sq.size])
63 			dst = (u64 *)wq->sq.queue;
64 		*dst++ = *src++;
65 		if (dst == (u64 *)&wq->sq.queue[wq->sq.size])
66 			dst = (u64 *)wq->sq.queue;
67 		len16--;
68 
69 		/* NOTE len16 cannot be large enough to write to the
70 		   same sq.queue memory twice in this loop */
71 	}
72 
73 	if (t4_sq_onchip(wq))
74 		mmio_flush_writes();
75 }
76 
77 static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16)
78 {
79 	u64 *src, *dst;
80 
81 	src = (u64 *)wqe;
82 	dst = (u64 *)((u8 *)wq->rq.queue + wq->rq.wq_pidx * T4_EQ_ENTRY_SIZE);
83 	while (len16) {
84 		*dst++ = *src++;
85 		if (dst >= (u64 *)&wq->rq.queue[wq->rq.size])
86 			dst = (u64 *)wq->rq.queue;
87 		*dst++ = *src++;
88 		if (dst >= (u64 *)&wq->rq.queue[wq->rq.size])
89 			dst = (u64 *)wq->rq.queue;
90 		len16--;
91 	}
92 }
93 
94 static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
95 		      struct ibv_send_wr *wr, int max, u32 *plenp)
96 {
97 	u8 *dstp, *srcp;
98 	u32 plen = 0;
99 	int i;
100 	int len;
101 
102 	dstp = (u8 *)immdp->data;
103 	for (i = 0; i < wr->num_sge; i++) {
104 		if ((plen + wr->sg_list[i].length) > max)
105 			return -EMSGSIZE;
106 		srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
107 		plen += wr->sg_list[i].length;
108 		len = wr->sg_list[i].length;
109 		memcpy(dstp, srcp, len);
110 		dstp += len;
111 		srcp += len;
112 	}
113 	len = ROUND_UP(plen + 8, 16) - (plen + 8);
114 	if (len)
115 		memset(dstp, 0, len);
116 	immdp->op = FW_RI_DATA_IMMD;
117 	immdp->r1 = 0;
118 	immdp->r2 = 0;
119 	immdp->immdlen = htobe32(plen);
120 	*plenp = plen;
121 	return 0;
122 }
123 
124 static int build_isgl(struct fw_ri_isgl *isglp, struct ibv_sge *sg_list,
125 		      int num_sge, u32 *plenp)
126 {
127 	int i;
128 	u32 plen = 0;
129 	__be64 *flitp = (__be64 *)isglp->sge;
130 
131 	for (i = 0; i < num_sge; i++) {
132 		if ((plen + sg_list[i].length) < plen)
133 			return -EMSGSIZE;
134 		plen += sg_list[i].length;
135 		*flitp++ = htobe64(((u64)sg_list[i].lkey << 32) |
136 				     sg_list[i].length);
137 		*flitp++ = htobe64(sg_list[i].addr);
138 	}
139 	*flitp = 0;
140 	isglp->op = FW_RI_DATA_ISGL;
141 	isglp->r1 = 0;
142 	isglp->nsge = htobe16(num_sge);
143 	isglp->r2 = 0;
144 	if (plenp)
145 		*plenp = plen;
146 	return 0;
147 }
148 
149 static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
150 			   struct ibv_send_wr *wr, u8 *len16)
151 {
152 	u32 plen;
153 	int size;
154 	int ret;
155 
156 	if (wr->num_sge > T4_MAX_SEND_SGE)
157 		return -EINVAL;
158 	if (wr->send_flags & IBV_SEND_SOLICITED)
159 		wqe->send.sendop_pkd = htobe32(
160 			FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE));
161 	else
162 		wqe->send.sendop_pkd = htobe32(
163 			FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND));
164 	wqe->send.stag_inv = 0;
165 	wqe->send.r3 = 0;
166 	wqe->send.r4 = 0;
167 
168 	plen = 0;
169 	if (wr->num_sge) {
170 		if (wr->send_flags & IBV_SEND_INLINE) {
171 			ret = build_immd(sq, wqe->send.u.immd_src, wr,
172 					 T4_MAX_SEND_INLINE, &plen);
173 			if (ret)
174 				return ret;
175 			size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
176 			       plen;
177 		} else {
178 			ret = build_isgl(wqe->send.u.isgl_src,
179 					 wr->sg_list, wr->num_sge, &plen);
180 			if (ret)
181 				return ret;
182 			size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
183 			       wr->num_sge * sizeof (struct fw_ri_sge);
184 		}
185 	} else {
186 		wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
187 		wqe->send.u.immd_src[0].r1 = 0;
188 		wqe->send.u.immd_src[0].r2 = 0;
189 		wqe->send.u.immd_src[0].immdlen = 0;
190 		size = sizeof wqe->send + sizeof(struct fw_ri_immd);
191 		plen = 0;
192 	}
193 	*len16 = DIV_ROUND_UP(size, 16);
194 	wqe->send.plen = htobe32(plen);
195 	return 0;
196 }
197 
198 static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
199 			    struct ibv_send_wr *wr, u8 *len16)
200 {
201 	u32 plen;
202 	int size;
203 	int ret;
204 
205 	if (wr->num_sge > T4_MAX_SEND_SGE)
206 		return -EINVAL;
207 	wqe->write.r2 = 0;
208 	wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey);
209 	wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr);
210 	if (wr->num_sge) {
211 		if (wr->send_flags & IBV_SEND_INLINE) {
212 			ret = build_immd(sq, wqe->write.u.immd_src, wr,
213 					 T4_MAX_WRITE_INLINE, &plen);
214 			if (ret)
215 				return ret;
216 			size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
217 			       plen;
218 		} else {
219 			ret = build_isgl(wqe->write.u.isgl_src,
220 					 wr->sg_list, wr->num_sge, &plen);
221 			if (ret)
222 				return ret;
223 			size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
224 			       wr->num_sge * sizeof (struct fw_ri_sge);
225 		}
226 	} else {
227 		wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
228 		wqe->write.u.immd_src[0].r1 = 0;
229 		wqe->write.u.immd_src[0].r2 = 0;
230 		wqe->write.u.immd_src[0].immdlen = 0;
231 		size = sizeof wqe->write + sizeof(struct fw_ri_immd);
232 		plen = 0;
233 	}
234 	*len16 = DIV_ROUND_UP(size, 16);
235 	wqe->write.plen = htobe32(plen);
236 	return 0;
237 }
238 
239 static int build_rdma_read(union t4_wr *wqe, struct ibv_send_wr *wr, u8 *len16)
240 {
241 	if (wr->num_sge > 1)
242 		return -EINVAL;
243 	if (wr->num_sge) {
244 		wqe->read.stag_src = htobe32(wr->wr.rdma.rkey);
245 		wqe->read.to_src_hi = htobe32((u32)(wr->wr.rdma.remote_addr >>32));
246 		wqe->read.to_src_lo = htobe32((u32)wr->wr.rdma.remote_addr);
247 		wqe->read.stag_sink = htobe32(wr->sg_list[0].lkey);
248 		wqe->read.plen = htobe32(wr->sg_list[0].length);
249 		wqe->read.to_sink_hi = htobe32((u32)(wr->sg_list[0].addr >> 32));
250 		wqe->read.to_sink_lo = htobe32((u32)(wr->sg_list[0].addr));
251 	} else {
252 		wqe->read.stag_src = htobe32(2);
253 		wqe->read.to_src_hi = 0;
254 		wqe->read.to_src_lo = 0;
255 		wqe->read.stag_sink = htobe32(2);
256 		wqe->read.plen = 0;
257 		wqe->read.to_sink_hi = 0;
258 		wqe->read.to_sink_lo = 0;
259 	}
260 	wqe->read.r2 = 0;
261 	wqe->read.r5 = 0;
262 	*len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
263 	return 0;
264 }
265 
266 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
267 			   struct ibv_recv_wr *wr, u8 *len16)
268 {
269 	int ret;
270 
271 	ret = build_isgl(&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
272 	if (ret)
273 		return ret;
274 	*len16 = DIV_ROUND_UP(sizeof wqe->recv +
275 			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
276 	return 0;
277 }
278 
279 static void ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 idx)
280 {
281 	struct ibv_modify_qp cmd = {};
282 	struct ibv_qp_attr attr;
283 	int mask;
284 	int __attribute__((unused)) ret;
285 
286 	/* FIXME: Why do we need this barrier if the kernel is going to
287 	   trigger the DMA? */
288 	udma_to_device_barrier();
289 	if (qid == qhp->wq.sq.qid) {
290 		attr.sq_psn = idx;
291 		mask = IBV_QP_SQ_PSN;
292 	} else  {
293 		attr.rq_psn = idx;
294 		mask = IBV_QP_RQ_PSN;
295 	}
296 	ret = ibv_cmd_modify_qp(&qhp->ibv_qp, &attr, mask, &cmd, sizeof cmd);
297 	assert(!ret);
298 }
299 
300 int c4iw_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
301 	           struct ibv_send_wr **bad_wr)
302 {
303 	int err = 0;
304 	u8 len16 = 0;
305 	enum fw_wr_opcodes fw_opcode;
306 	enum fw_ri_wr_flags fw_flags;
307 	struct c4iw_qp *qhp;
308 	union t4_wr *wqe, lwqe;
309 	u32 num_wrs;
310 	struct t4_swsqe *swsqe;
311 	u16 idx = 0;
312 
313 	qhp = to_c4iw_qp(ibqp);
314 	pthread_spin_lock(&qhp->lock);
315 	if (t4_wq_in_error(&qhp->wq)) {
316 		pthread_spin_unlock(&qhp->lock);
317 		*bad_wr = wr;
318 		return -EINVAL;
319 	}
320 	num_wrs = t4_sq_avail(&qhp->wq);
321 	if (num_wrs == 0) {
322 		pthread_spin_unlock(&qhp->lock);
323 		*bad_wr = wr;
324 		return -ENOMEM;
325 	}
326 	while (wr) {
327 		if (num_wrs == 0) {
328 			err = -ENOMEM;
329 			*bad_wr = wr;
330 			break;
331 		}
332 
333 		wqe = &lwqe;
334 		fw_flags = 0;
335 		if (wr->send_flags & IBV_SEND_SOLICITED)
336 			fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
337 		if (wr->send_flags & IBV_SEND_SIGNALED || qhp->sq_sig_all)
338 			fw_flags |= FW_RI_COMPLETION_FLAG;
339 		swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
340 		switch (wr->opcode) {
341 		case IBV_WR_SEND:
342 			INC_STAT(send);
343 			if (wr->send_flags & IBV_SEND_FENCE)
344 				fw_flags |= FW_RI_READ_FENCE_FLAG;
345 			fw_opcode = FW_RI_SEND_WR;
346 			swsqe->opcode = FW_RI_SEND;
347 			err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
348 			break;
349 		case IBV_WR_RDMA_WRITE:
350 			INC_STAT(write);
351 			fw_opcode = FW_RI_RDMA_WRITE_WR;
352 			swsqe->opcode = FW_RI_RDMA_WRITE;
353 			err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
354 			break;
355 		case IBV_WR_RDMA_READ:
356 			INC_STAT(read);
357 			fw_opcode = FW_RI_RDMA_READ_WR;
358 			swsqe->opcode = FW_RI_READ_REQ;
359 			fw_flags = 0;
360 			err = build_rdma_read(wqe, wr, &len16);
361 			if (err)
362 				break;
363 			swsqe->read_len = wr->sg_list ? wr->sg_list[0].length :
364 					  0;
365 			if (!qhp->wq.sq.oldest_read)
366 				qhp->wq.sq.oldest_read = swsqe;
367 			break;
368 		default:
369 			PDBG("%s post of type=%d TBD!\n", __func__,
370 			     wr->opcode);
371 			err = -EINVAL;
372 		}
373 		if (err) {
374 			*bad_wr = wr;
375 			break;
376 		}
377 		swsqe->idx = qhp->wq.sq.pidx;
378 		swsqe->complete = 0;
379 		swsqe->signaled = (wr->send_flags & IBV_SEND_SIGNALED) ||
380 				  qhp->sq_sig_all;
381 		swsqe->flushed = 0;
382 		swsqe->wr_id = wr->wr_id;
383 
384 		init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
385 		PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x\n",
386 		     __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
387 		     swsqe->opcode);
388 		wr = wr->next;
389 		num_wrs--;
390 		copy_wr_to_sq(&qhp->wq, wqe, len16);
391 		t4_sq_produce(&qhp->wq, len16);
392 		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
393 	}
394 	if (t4_wq_db_enabled(&qhp->wq)) {
395 		t4_ring_sq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp),
396 			      len16, wqe);
397 	} else
398 		ring_kernel_db(qhp, qhp->wq.sq.qid, idx);
399 	/* This write is only for debugging, the value does not matter for DMA
400 	 */
401 	qhp->wq.sq.queue[qhp->wq.sq.size].status.host_wq_pidx = \
402 			(qhp->wq.sq.wq_pidx);
403 
404 	pthread_spin_unlock(&qhp->lock);
405 	return err;
406 }
407 
408 int c4iw_post_receive(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
409 			   struct ibv_recv_wr **bad_wr)
410 {
411 	int err = 0;
412 	struct c4iw_qp *qhp;
413 	union t4_recv_wr *wqe, lwqe;
414 	u32 num_wrs;
415 	u8 len16 = 0;
416 	u16 idx = 0;
417 
418 	qhp = to_c4iw_qp(ibqp);
419 	pthread_spin_lock(&qhp->lock);
420 	if (t4_wq_in_error(&qhp->wq)) {
421 		pthread_spin_unlock(&qhp->lock);
422 		*bad_wr = wr;
423 		return -EINVAL;
424 	}
425 	INC_STAT(recv);
426 	num_wrs = t4_rq_avail(&qhp->wq);
427 	if (num_wrs == 0) {
428 		pthread_spin_unlock(&qhp->lock);
429 		*bad_wr = wr;
430 		return -ENOMEM;
431 	}
432 	while (wr) {
433 		if (wr->num_sge > T4_MAX_RECV_SGE) {
434 			err = -EINVAL;
435 			*bad_wr = wr;
436 			break;
437 		}
438 		wqe = &lwqe;
439 		if (num_wrs)
440 			err = build_rdma_recv(qhp, wqe, wr, &len16);
441 		else
442 			err = -ENOMEM;
443 		if (err) {
444 			*bad_wr = wr;
445 			break;
446 		}
447 
448 		qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
449 
450 		wqe->recv.opcode = FW_RI_RECV_WR;
451 		wqe->recv.r1 = 0;
452 		wqe->recv.wrid = qhp->wq.rq.pidx;
453 		wqe->recv.r2[0] = 0;
454 		wqe->recv.r2[1] = 0;
455 		wqe->recv.r2[2] = 0;
456 		wqe->recv.len16 = len16;
457 		PDBG("%s cookie 0x%llx pidx %u\n", __func__,
458 		     (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
459 		copy_wr_to_rq(&qhp->wq, wqe, len16);
460 		t4_rq_produce(&qhp->wq, len16);
461 		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
462 		wr = wr->next;
463 		num_wrs--;
464 	}
465 	if (t4_wq_db_enabled(&qhp->wq))
466 		t4_ring_rq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp),
467 			      len16, wqe);
468 	else
469 		ring_kernel_db(qhp, qhp->wq.rq.qid, idx);
470 	qhp->wq.rq.queue[qhp->wq.rq.size].status.host_wq_pidx = \
471 			(qhp->wq.rq.wq_pidx);
472 	pthread_spin_unlock(&qhp->lock);
473 	return err;
474 }
475 
476 static void update_qp_state(struct c4iw_qp *qhp)
477 {
478 	struct ibv_query_qp cmd;
479 	struct ibv_qp_attr attr;
480 	struct ibv_qp_init_attr iattr;
481 	int ret;
482 
483 	ret = ibv_cmd_query_qp(&qhp->ibv_qp, &attr, IBV_QP_STATE, &iattr,
484 			       &cmd, sizeof cmd);
485 	assert(!ret);
486 	if (!ret)
487 		qhp->ibv_qp.state = attr.qp_state;
488 }
489 
490 /*
491  * Assumes qhp lock is held.
492  */
493 void c4iw_flush_qp(struct c4iw_qp *qhp)
494 {
495 	struct c4iw_cq *rchp, *schp;
496 	int count;
497 
498 	if (qhp->wq.flushed)
499 		return;
500 
501 	update_qp_state(qhp);
502 
503 	rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq);
504 	schp = to_c4iw_cq(qhp->ibv_qp.send_cq);
505 
506 	PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
507 	qhp->wq.flushed = 1;
508 	pthread_spin_unlock(&qhp->lock);
509 
510 	/* locking heirarchy: cq lock first, then qp lock. */
511 	pthread_spin_lock(&rchp->lock);
512 	pthread_spin_lock(&qhp->lock);
513 	c4iw_flush_hw_cq(rchp);
514 	c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
515 	c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
516 	pthread_spin_unlock(&qhp->lock);
517 	pthread_spin_unlock(&rchp->lock);
518 
519 	/* locking heirarchy: cq lock first, then qp lock. */
520 	pthread_spin_lock(&schp->lock);
521 	pthread_spin_lock(&qhp->lock);
522 	if (schp != rchp)
523 		c4iw_flush_hw_cq(schp);
524 	c4iw_flush_sq(qhp);
525 	pthread_spin_unlock(&qhp->lock);
526 	pthread_spin_unlock(&schp->lock);
527 	pthread_spin_lock(&qhp->lock);
528 }
529 
530 void c4iw_flush_qps(struct c4iw_dev *dev)
531 {
532 	int i;
533 
534 	pthread_spin_lock(&dev->lock);
535 	for (i=0; i < dev->max_qp; i++) {
536 		struct c4iw_qp *qhp = dev->qpid2ptr[i];
537 		if (qhp) {
538 			if (!qhp->wq.flushed && t4_wq_in_error(&qhp->wq)) {
539 				pthread_spin_lock(&qhp->lock);
540 				c4iw_flush_qp(qhp);
541 				pthread_spin_unlock(&qhp->lock);
542 			}
543 		}
544 	}
545 	pthread_spin_unlock(&dev->lock);
546 }
547