xref: /freebsd/contrib/ofed/libmlx4/qp.c (revision d6b92ffa)
1d6b92ffaSHans Petter Selasky /*
2d6b92ffaSHans Petter Selasky  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3d6b92ffaSHans Petter Selasky  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4d6b92ffaSHans Petter Selasky  * Copyright (c) 2007 Cisco, Inc.  All rights reserved.
5d6b92ffaSHans Petter Selasky  *
6d6b92ffaSHans Petter Selasky  * This software is available to you under a choice of one of two
7d6b92ffaSHans Petter Selasky  * licenses.  You may choose to be licensed under the terms of the GNU
8d6b92ffaSHans Petter Selasky  * General Public License (GPL) Version 2, available from the file
9d6b92ffaSHans Petter Selasky  * COPYING in the main directory of this source tree, or the
10d6b92ffaSHans Petter Selasky  * OpenIB.org BSD license below:
11d6b92ffaSHans Petter Selasky  *
12d6b92ffaSHans Petter Selasky  *     Redistribution and use in source and binary forms, with or
13d6b92ffaSHans Petter Selasky  *     without modification, are permitted provided that the following
14d6b92ffaSHans Petter Selasky  *     conditions are met:
15d6b92ffaSHans Petter Selasky  *
16d6b92ffaSHans Petter Selasky  *      - Redistributions of source code must retain the above
17d6b92ffaSHans Petter Selasky  *        copyright notice, this list of conditions and the following
18d6b92ffaSHans Petter Selasky  *        disclaimer.
19d6b92ffaSHans Petter Selasky  *
20d6b92ffaSHans Petter Selasky  *      - Redistributions in binary form must reproduce the above
21d6b92ffaSHans Petter Selasky  *        copyright notice, this list of conditions and the following
22d6b92ffaSHans Petter Selasky  *        disclaimer in the documentation and/or other materials
23d6b92ffaSHans Petter Selasky  *        provided with the distribution.
24d6b92ffaSHans Petter Selasky  *
25d6b92ffaSHans Petter Selasky  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26d6b92ffaSHans Petter Selasky  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27d6b92ffaSHans Petter Selasky  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28d6b92ffaSHans Petter Selasky  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29d6b92ffaSHans Petter Selasky  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30d6b92ffaSHans Petter Selasky  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31d6b92ffaSHans Petter Selasky  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32d6b92ffaSHans Petter Selasky  * SOFTWARE.
33d6b92ffaSHans Petter Selasky  */
34d6b92ffaSHans Petter Selasky 
35d6b92ffaSHans Petter Selasky #include <config.h>
36d6b92ffaSHans Petter Selasky 
37d6b92ffaSHans Petter Selasky #include <stdlib.h>
38d6b92ffaSHans Petter Selasky #include <pthread.h>
39d6b92ffaSHans Petter Selasky #include <string.h>
40d6b92ffaSHans Petter Selasky #include <errno.h>
41d6b92ffaSHans Petter Selasky 
42d6b92ffaSHans Petter Selasky #include "mlx4.h"
43d6b92ffaSHans Petter Selasky #include "doorbell.h"
44d6b92ffaSHans Petter Selasky #include "wqe.h"
45d6b92ffaSHans Petter Selasky 
46d6b92ffaSHans Petter Selasky static const uint32_t mlx4_ib_opcode[] = {
47d6b92ffaSHans Petter Selasky 	[IBV_WR_SEND]			= MLX4_OPCODE_SEND,
48d6b92ffaSHans Petter Selasky 	[IBV_WR_SEND_WITH_IMM]		= MLX4_OPCODE_SEND_IMM,
49d6b92ffaSHans Petter Selasky 	[IBV_WR_RDMA_WRITE]		= MLX4_OPCODE_RDMA_WRITE,
50d6b92ffaSHans Petter Selasky 	[IBV_WR_RDMA_WRITE_WITH_IMM]	= MLX4_OPCODE_RDMA_WRITE_IMM,
51d6b92ffaSHans Petter Selasky 	[IBV_WR_RDMA_READ]		= MLX4_OPCODE_RDMA_READ,
52d6b92ffaSHans Petter Selasky 	[IBV_WR_ATOMIC_CMP_AND_SWP]	= MLX4_OPCODE_ATOMIC_CS,
53d6b92ffaSHans Petter Selasky 	[IBV_WR_ATOMIC_FETCH_AND_ADD]	= MLX4_OPCODE_ATOMIC_FA,
54d6b92ffaSHans Petter Selasky 	[IBV_WR_LOCAL_INV]		= MLX4_OPCODE_LOCAL_INVAL,
55d6b92ffaSHans Petter Selasky 	[IBV_WR_BIND_MW]		= MLX4_OPCODE_BIND_MW,
56d6b92ffaSHans Petter Selasky 	[IBV_WR_SEND_WITH_INV]		= MLX4_OPCODE_SEND_INVAL,
57d6b92ffaSHans Petter Selasky };
58d6b92ffaSHans Petter Selasky 
get_recv_wqe(struct mlx4_qp * qp,int n)59d6b92ffaSHans Petter Selasky static void *get_recv_wqe(struct mlx4_qp *qp, int n)
60d6b92ffaSHans Petter Selasky {
61d6b92ffaSHans Petter Selasky 	return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
62d6b92ffaSHans Petter Selasky }
63d6b92ffaSHans Petter Selasky 
get_send_wqe(struct mlx4_qp * qp,int n)64d6b92ffaSHans Petter Selasky static void *get_send_wqe(struct mlx4_qp *qp, int n)
65d6b92ffaSHans Petter Selasky {
66d6b92ffaSHans Petter Selasky 	return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
67d6b92ffaSHans Petter Selasky }
68d6b92ffaSHans Petter Selasky 
69d6b92ffaSHans Petter Selasky /*
70d6b92ffaSHans Petter Selasky  * Stamp a SQ WQE so that it is invalid if prefetched by marking the
71d6b92ffaSHans Petter Selasky  * first four bytes of every 64 byte chunk with 0xffffffff, except for
72d6b92ffaSHans Petter Selasky  * the very first chunk of the WQE.
73d6b92ffaSHans Petter Selasky  */
stamp_send_wqe(struct mlx4_qp * qp,int n)74d6b92ffaSHans Petter Selasky static void stamp_send_wqe(struct mlx4_qp *qp, int n)
75d6b92ffaSHans Petter Selasky {
76d6b92ffaSHans Petter Selasky 	uint32_t *wqe = get_send_wqe(qp, n);
77d6b92ffaSHans Petter Selasky 	int i;
78d6b92ffaSHans Petter Selasky 	int ds = (((struct mlx4_wqe_ctrl_seg *)wqe)->fence_size & 0x3f) << 2;
79d6b92ffaSHans Petter Selasky 
80d6b92ffaSHans Petter Selasky 	for (i = 16; i < ds; i += 16)
81d6b92ffaSHans Petter Selasky 		wqe[i] = 0xffffffff;
82d6b92ffaSHans Petter Selasky }
83d6b92ffaSHans Petter Selasky 
mlx4_init_qp_indices(struct mlx4_qp * qp)84d6b92ffaSHans Petter Selasky void mlx4_init_qp_indices(struct mlx4_qp *qp)
85d6b92ffaSHans Petter Selasky {
86d6b92ffaSHans Petter Selasky 	qp->sq.head	 = 0;
87d6b92ffaSHans Petter Selasky 	qp->sq.tail	 = 0;
88d6b92ffaSHans Petter Selasky 	qp->rq.head	 = 0;
89d6b92ffaSHans Petter Selasky 	qp->rq.tail	 = 0;
90d6b92ffaSHans Petter Selasky }
91d6b92ffaSHans Petter Selasky 
mlx4_qp_init_sq_ownership(struct mlx4_qp * qp)92d6b92ffaSHans Petter Selasky void mlx4_qp_init_sq_ownership(struct mlx4_qp *qp)
93d6b92ffaSHans Petter Selasky {
94d6b92ffaSHans Petter Selasky 	struct mlx4_wqe_ctrl_seg *ctrl;
95d6b92ffaSHans Petter Selasky 	int i;
96d6b92ffaSHans Petter Selasky 
97d6b92ffaSHans Petter Selasky 	for (i = 0; i < qp->sq.wqe_cnt; ++i) {
98d6b92ffaSHans Petter Selasky 		ctrl = get_send_wqe(qp, i);
99d6b92ffaSHans Petter Selasky 		ctrl->owner_opcode = htobe32(1 << 31);
100d6b92ffaSHans Petter Selasky 		ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
101d6b92ffaSHans Petter Selasky 
102d6b92ffaSHans Petter Selasky 		stamp_send_wqe(qp, i);
103d6b92ffaSHans Petter Selasky 	}
104d6b92ffaSHans Petter Selasky }
105d6b92ffaSHans Petter Selasky 
wq_overflow(struct mlx4_wq * wq,int nreq,struct mlx4_cq * cq)106d6b92ffaSHans Petter Selasky static int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq)
107d6b92ffaSHans Petter Selasky {
108d6b92ffaSHans Petter Selasky 	unsigned cur;
109d6b92ffaSHans Petter Selasky 
110d6b92ffaSHans Petter Selasky 	cur = wq->head - wq->tail;
111d6b92ffaSHans Petter Selasky 	if (cur + nreq < wq->max_post)
112d6b92ffaSHans Petter Selasky 		return 0;
113d6b92ffaSHans Petter Selasky 
114d6b92ffaSHans Petter Selasky 	pthread_spin_lock(&cq->lock);
115d6b92ffaSHans Petter Selasky 	cur = wq->head - wq->tail;
116d6b92ffaSHans Petter Selasky 	pthread_spin_unlock(&cq->lock);
117d6b92ffaSHans Petter Selasky 
118d6b92ffaSHans Petter Selasky 	return cur + nreq >= wq->max_post;
119d6b92ffaSHans Petter Selasky }
120d6b92ffaSHans Petter Selasky 
set_bind_seg(struct mlx4_wqe_bind_seg * bseg,struct ibv_send_wr * wr)121d6b92ffaSHans Petter Selasky static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ibv_send_wr *wr)
122d6b92ffaSHans Petter Selasky {
123d6b92ffaSHans Petter Selasky 	int acc = wr->bind_mw.bind_info.mw_access_flags;
124d6b92ffaSHans Petter Selasky 	bseg->flags1 = 0;
125d6b92ffaSHans Petter Selasky 	if (acc & IBV_ACCESS_REMOTE_ATOMIC)
126d6b92ffaSHans Petter Selasky 		bseg->flags1 |= htobe32(MLX4_WQE_MW_ATOMIC);
127d6b92ffaSHans Petter Selasky 	if (acc & IBV_ACCESS_REMOTE_WRITE)
128d6b92ffaSHans Petter Selasky 		bseg->flags1 |= htobe32(MLX4_WQE_MW_REMOTE_WRITE);
129d6b92ffaSHans Petter Selasky 	if (acc & IBV_ACCESS_REMOTE_READ)
130d6b92ffaSHans Petter Selasky 		bseg->flags1 |= htobe32(MLX4_WQE_MW_REMOTE_READ);
131d6b92ffaSHans Petter Selasky 
132d6b92ffaSHans Petter Selasky 	bseg->flags2 = 0;
133d6b92ffaSHans Petter Selasky 	if (((struct ibv_mw *)(wr->bind_mw.mw))->type == IBV_MW_TYPE_2)
134d6b92ffaSHans Petter Selasky 		bseg->flags2 |= htobe32(MLX4_WQE_BIND_TYPE_2);
135d6b92ffaSHans Petter Selasky 	if (acc & IBV_ACCESS_ZERO_BASED)
136d6b92ffaSHans Petter Selasky 		bseg->flags2 |= htobe32(MLX4_WQE_BIND_ZERO_BASED);
137d6b92ffaSHans Petter Selasky 
138d6b92ffaSHans Petter Selasky 	bseg->new_rkey = htobe32(wr->bind_mw.rkey);
139d6b92ffaSHans Petter Selasky 	bseg->lkey = htobe32(wr->bind_mw.bind_info.mr->lkey);
140d6b92ffaSHans Petter Selasky 	bseg->addr = htobe64((uint64_t) wr->bind_mw.bind_info.addr);
141d6b92ffaSHans Petter Selasky 	bseg->length = htobe64(wr->bind_mw.bind_info.length);
142d6b92ffaSHans Petter Selasky }
143d6b92ffaSHans Petter Selasky 
set_local_inv_seg(struct mlx4_wqe_local_inval_seg * iseg,uint32_t rkey)144d6b92ffaSHans Petter Selasky static inline void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg,
145d6b92ffaSHans Petter Selasky 		uint32_t rkey)
146d6b92ffaSHans Petter Selasky {
147d6b92ffaSHans Petter Selasky 	iseg->mem_key	= htobe32(rkey);
148d6b92ffaSHans Petter Selasky 
149d6b92ffaSHans Petter Selasky 	iseg->reserved1    = 0;
150d6b92ffaSHans Petter Selasky 	iseg->reserved2    = 0;
151d6b92ffaSHans Petter Selasky 	iseg->reserved3[0] = 0;
152d6b92ffaSHans Petter Selasky 	iseg->reserved3[1] = 0;
153d6b92ffaSHans Petter Selasky }
154d6b92ffaSHans Petter Selasky 
set_raddr_seg(struct mlx4_wqe_raddr_seg * rseg,uint64_t remote_addr,uint32_t rkey)155d6b92ffaSHans Petter Selasky static inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
156d6b92ffaSHans Petter Selasky 				 uint64_t remote_addr, uint32_t rkey)
157d6b92ffaSHans Petter Selasky {
158d6b92ffaSHans Petter Selasky 	rseg->raddr    = htobe64(remote_addr);
159d6b92ffaSHans Petter Selasky 	rseg->rkey     = htobe32(rkey);
160d6b92ffaSHans Petter Selasky 	rseg->reserved = 0;
161d6b92ffaSHans Petter Selasky }
162d6b92ffaSHans Petter Selasky 
set_atomic_seg(struct mlx4_wqe_atomic_seg * aseg,struct ibv_send_wr * wr)163d6b92ffaSHans Petter Selasky static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ibv_send_wr *wr)
164d6b92ffaSHans Petter Selasky {
165d6b92ffaSHans Petter Selasky 	if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
166d6b92ffaSHans Petter Selasky 		aseg->swap_add = htobe64(wr->wr.atomic.swap);
167d6b92ffaSHans Petter Selasky 		aseg->compare  = htobe64(wr->wr.atomic.compare_add);
168d6b92ffaSHans Petter Selasky 	} else {
169d6b92ffaSHans Petter Selasky 		aseg->swap_add = htobe64(wr->wr.atomic.compare_add);
170d6b92ffaSHans Petter Selasky 		aseg->compare  = 0;
171d6b92ffaSHans Petter Selasky 	}
172d6b92ffaSHans Petter Selasky 
173d6b92ffaSHans Petter Selasky }
174d6b92ffaSHans Petter Selasky 
set_datagram_seg(struct mlx4_wqe_datagram_seg * dseg,struct ibv_send_wr * wr)175d6b92ffaSHans Petter Selasky static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
176d6b92ffaSHans Petter Selasky 			     struct ibv_send_wr *wr)
177d6b92ffaSHans Petter Selasky {
178d6b92ffaSHans Petter Selasky 	memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
179d6b92ffaSHans Petter Selasky 	dseg->dqpn = htobe32(wr->wr.ud.remote_qpn);
180d6b92ffaSHans Petter Selasky 	dseg->qkey = htobe32(wr->wr.ud.remote_qkey);
181d6b92ffaSHans Petter Selasky 	dseg->vlan = htobe16(to_mah(wr->wr.ud.ah)->vlan);
182d6b92ffaSHans Petter Selasky 	memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6);
183d6b92ffaSHans Petter Selasky }
184d6b92ffaSHans Petter Selasky 
__set_data_seg(struct mlx4_wqe_data_seg * dseg,struct ibv_sge * sg)185d6b92ffaSHans Petter Selasky static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg)
186d6b92ffaSHans Petter Selasky {
187d6b92ffaSHans Petter Selasky 	dseg->byte_count = htobe32(sg->length);
188d6b92ffaSHans Petter Selasky 	dseg->lkey       = htobe32(sg->lkey);
189d6b92ffaSHans Petter Selasky 	dseg->addr       = htobe64(sg->addr);
190d6b92ffaSHans Petter Selasky }
191d6b92ffaSHans Petter Selasky 
set_data_seg(struct mlx4_wqe_data_seg * dseg,struct ibv_sge * sg)192d6b92ffaSHans Petter Selasky static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg)
193d6b92ffaSHans Petter Selasky {
194d6b92ffaSHans Petter Selasky 	dseg->lkey       = htobe32(sg->lkey);
195d6b92ffaSHans Petter Selasky 	dseg->addr       = htobe64(sg->addr);
196d6b92ffaSHans Petter Selasky 
197d6b92ffaSHans Petter Selasky 	/*
198d6b92ffaSHans Petter Selasky 	 * Need a barrier here before writing the byte_count field to
199d6b92ffaSHans Petter Selasky 	 * make sure that all the data is visible before the
200d6b92ffaSHans Petter Selasky 	 * byte_count field is set.  Otherwise, if the segment begins
201d6b92ffaSHans Petter Selasky 	 * a new cacheline, the HCA prefetcher could grab the 64-byte
202d6b92ffaSHans Petter Selasky 	 * chunk and get a valid (!= * 0xffffffff) byte count but
203d6b92ffaSHans Petter Selasky 	 * stale data, and end up sending the wrong data.
204d6b92ffaSHans Petter Selasky 	 */
205d6b92ffaSHans Petter Selasky 	udma_to_device_barrier();
206d6b92ffaSHans Petter Selasky 
207d6b92ffaSHans Petter Selasky 	if (likely(sg->length))
208d6b92ffaSHans Petter Selasky 		dseg->byte_count = htobe32(sg->length);
209d6b92ffaSHans Petter Selasky 	else
210d6b92ffaSHans Petter Selasky 		dseg->byte_count = htobe32(0x80000000);
211d6b92ffaSHans Petter Selasky }
212d6b92ffaSHans Petter Selasky 
mlx4_post_send(struct ibv_qp * ibqp,struct ibv_send_wr * wr,struct ibv_send_wr ** bad_wr)213d6b92ffaSHans Petter Selasky int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
214d6b92ffaSHans Petter Selasky 			  struct ibv_send_wr **bad_wr)
215d6b92ffaSHans Petter Selasky {
216d6b92ffaSHans Petter Selasky 	struct mlx4_context *ctx;
217d6b92ffaSHans Petter Selasky 	struct mlx4_qp *qp = to_mqp(ibqp);
218d6b92ffaSHans Petter Selasky 	void *wqe;
219d6b92ffaSHans Petter Selasky 	struct mlx4_wqe_ctrl_seg *ctrl = NULL;
220d6b92ffaSHans Petter Selasky 	int ind;
221d6b92ffaSHans Petter Selasky 	int nreq;
222d6b92ffaSHans Petter Selasky 	int inl = 0;
223d6b92ffaSHans Petter Selasky 	int ret = 0;
224d6b92ffaSHans Petter Selasky 	int size = 0;
225d6b92ffaSHans Petter Selasky 	int i;
226d6b92ffaSHans Petter Selasky 
227d6b92ffaSHans Petter Selasky 	pthread_spin_lock(&qp->sq.lock);
228d6b92ffaSHans Petter Selasky 
229d6b92ffaSHans Petter Selasky 	/* XXX check that state is OK to post send */
230d6b92ffaSHans Petter Selasky 
231d6b92ffaSHans Petter Selasky 	ind = qp->sq.head;
232d6b92ffaSHans Petter Selasky 
233d6b92ffaSHans Petter Selasky 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
234d6b92ffaSHans Petter Selasky 		if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) {
235d6b92ffaSHans Petter Selasky 			ret = ENOMEM;
236d6b92ffaSHans Petter Selasky 			*bad_wr = wr;
237d6b92ffaSHans Petter Selasky 			goto out;
238d6b92ffaSHans Petter Selasky 		}
239d6b92ffaSHans Petter Selasky 
240d6b92ffaSHans Petter Selasky 		if (wr->num_sge > qp->sq.max_gs) {
241d6b92ffaSHans Petter Selasky 			ret = ENOMEM;
242d6b92ffaSHans Petter Selasky 			*bad_wr = wr;
243d6b92ffaSHans Petter Selasky 			goto out;
244d6b92ffaSHans Petter Selasky 		}
245d6b92ffaSHans Petter Selasky 
246d6b92ffaSHans Petter Selasky 		if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) {
247d6b92ffaSHans Petter Selasky 			ret = EINVAL;
248d6b92ffaSHans Petter Selasky 			*bad_wr = wr;
249d6b92ffaSHans Petter Selasky 			goto out;
250d6b92ffaSHans Petter Selasky 		}
251d6b92ffaSHans Petter Selasky 
252d6b92ffaSHans Petter Selasky 		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
253d6b92ffaSHans Petter Selasky 		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
254d6b92ffaSHans Petter Selasky 
255d6b92ffaSHans Petter Selasky 		ctrl->srcrb_flags =
256d6b92ffaSHans Petter Selasky 			(wr->send_flags & IBV_SEND_SIGNALED ?
257d6b92ffaSHans Petter Selasky 			 htobe32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
258d6b92ffaSHans Petter Selasky 			(wr->send_flags & IBV_SEND_SOLICITED ?
259d6b92ffaSHans Petter Selasky 			 htobe32(MLX4_WQE_CTRL_SOLICIT) : 0)   |
260d6b92ffaSHans Petter Selasky 			qp->sq_signal_bits;
261d6b92ffaSHans Petter Selasky 
262d6b92ffaSHans Petter Selasky 		if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
263d6b92ffaSHans Petter Selasky 		    wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
264d6b92ffaSHans Petter Selasky 			ctrl->imm = wr->imm_data;
265d6b92ffaSHans Petter Selasky 		else
266d6b92ffaSHans Petter Selasky 			ctrl->imm = 0;
267d6b92ffaSHans Petter Selasky 
268d6b92ffaSHans Petter Selasky 		wqe += sizeof *ctrl;
269d6b92ffaSHans Petter Selasky 		size = sizeof *ctrl / 16;
270d6b92ffaSHans Petter Selasky 
271d6b92ffaSHans Petter Selasky 		switch (ibqp->qp_type) {
272d6b92ffaSHans Petter Selasky 		case IBV_QPT_XRC_SEND:
273d6b92ffaSHans Petter Selasky 			ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr);
274d6b92ffaSHans Petter Selasky 			/* fall through */
275d6b92ffaSHans Petter Selasky 		case IBV_QPT_RC:
276d6b92ffaSHans Petter Selasky 		case IBV_QPT_UC:
277d6b92ffaSHans Petter Selasky 			switch (wr->opcode) {
278d6b92ffaSHans Petter Selasky 			case IBV_WR_ATOMIC_CMP_AND_SWP:
279d6b92ffaSHans Petter Selasky 			case IBV_WR_ATOMIC_FETCH_AND_ADD:
280d6b92ffaSHans Petter Selasky 				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
281d6b92ffaSHans Petter Selasky 					      wr->wr.atomic.rkey);
282d6b92ffaSHans Petter Selasky 				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
283d6b92ffaSHans Petter Selasky 
284d6b92ffaSHans Petter Selasky 				set_atomic_seg(wqe, wr);
285d6b92ffaSHans Petter Selasky 				wqe  += sizeof (struct mlx4_wqe_atomic_seg);
286d6b92ffaSHans Petter Selasky 				size += (sizeof (struct mlx4_wqe_raddr_seg) +
287d6b92ffaSHans Petter Selasky 					 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
288d6b92ffaSHans Petter Selasky 
289d6b92ffaSHans Petter Selasky 				break;
290d6b92ffaSHans Petter Selasky 
291d6b92ffaSHans Petter Selasky 			case IBV_WR_RDMA_READ:
292d6b92ffaSHans Petter Selasky 				inl = 1;
293d6b92ffaSHans Petter Selasky 				/* fall through */
294d6b92ffaSHans Petter Selasky 			case IBV_WR_RDMA_WRITE:
295d6b92ffaSHans Petter Selasky 			case IBV_WR_RDMA_WRITE_WITH_IMM:
296d6b92ffaSHans Petter Selasky 				if (!wr->num_sge)
297d6b92ffaSHans Petter Selasky 					inl = 1;
298d6b92ffaSHans Petter Selasky 				set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
299d6b92ffaSHans Petter Selasky 					      wr->wr.rdma.rkey);
300d6b92ffaSHans Petter Selasky 				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
301d6b92ffaSHans Petter Selasky 				size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
302d6b92ffaSHans Petter Selasky 
303d6b92ffaSHans Petter Selasky 				break;
304d6b92ffaSHans Petter Selasky 			case IBV_WR_LOCAL_INV:
305d6b92ffaSHans Petter Selasky 				ctrl->srcrb_flags |=
306d6b92ffaSHans Petter Selasky 					htobe32(MLX4_WQE_CTRL_STRONG_ORDER);
307d6b92ffaSHans Petter Selasky 				set_local_inv_seg(wqe, wr->imm_data);
308d6b92ffaSHans Petter Selasky 				wqe  += sizeof
309d6b92ffaSHans Petter Selasky 					(struct mlx4_wqe_local_inval_seg);
310d6b92ffaSHans Petter Selasky 				size += sizeof
311d6b92ffaSHans Petter Selasky 					(struct mlx4_wqe_local_inval_seg) / 16;
312d6b92ffaSHans Petter Selasky 				break;
313d6b92ffaSHans Petter Selasky 			case IBV_WR_BIND_MW:
314d6b92ffaSHans Petter Selasky 				ctrl->srcrb_flags |=
315d6b92ffaSHans Petter Selasky 					htobe32(MLX4_WQE_CTRL_STRONG_ORDER);
316d6b92ffaSHans Petter Selasky 				set_bind_seg(wqe, wr);
317d6b92ffaSHans Petter Selasky 				wqe  += sizeof
318d6b92ffaSHans Petter Selasky 					(struct mlx4_wqe_bind_seg);
319d6b92ffaSHans Petter Selasky 				size += sizeof
320d6b92ffaSHans Petter Selasky 					(struct mlx4_wqe_bind_seg) / 16;
321d6b92ffaSHans Petter Selasky 				break;
322d6b92ffaSHans Petter Selasky 			case IBV_WR_SEND_WITH_INV:
323d6b92ffaSHans Petter Selasky 				ctrl->imm = htobe32(wr->imm_data);
324d6b92ffaSHans Petter Selasky 				break;
325d6b92ffaSHans Petter Selasky 
326d6b92ffaSHans Petter Selasky 			default:
327d6b92ffaSHans Petter Selasky 				/* No extra segments required for sends */
328d6b92ffaSHans Petter Selasky 				break;
329d6b92ffaSHans Petter Selasky 			}
330d6b92ffaSHans Petter Selasky 			break;
331d6b92ffaSHans Petter Selasky 
332d6b92ffaSHans Petter Selasky 		case IBV_QPT_UD:
333d6b92ffaSHans Petter Selasky 			set_datagram_seg(wqe, wr);
334d6b92ffaSHans Petter Selasky 			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
335d6b92ffaSHans Petter Selasky 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
336d6b92ffaSHans Petter Selasky 
337d6b92ffaSHans Petter Selasky 			if (wr->send_flags & IBV_SEND_IP_CSUM) {
338d6b92ffaSHans Petter Selasky 				if (!(qp->qp_cap_cache & MLX4_CSUM_SUPPORT_UD_OVER_IB)) {
339d6b92ffaSHans Petter Selasky 					ret = EINVAL;
340d6b92ffaSHans Petter Selasky 					*bad_wr = wr;
341d6b92ffaSHans Petter Selasky 					goto out;
342d6b92ffaSHans Petter Selasky 				}
343d6b92ffaSHans Petter Selasky 				ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_IP_HDR_CSUM |
344d6b92ffaSHans Petter Selasky 							   MLX4_WQE_CTRL_TCP_UDP_CSUM);
345d6b92ffaSHans Petter Selasky 			}
346d6b92ffaSHans Petter Selasky 			break;
347d6b92ffaSHans Petter Selasky 
348d6b92ffaSHans Petter Selasky 		case IBV_QPT_RAW_PACKET:
349d6b92ffaSHans Petter Selasky 			/* For raw eth, the MLX4_WQE_CTRL_SOLICIT flag is used
350d6b92ffaSHans Petter Selasky 			 * to indicate that no icrc should be calculated */
351d6b92ffaSHans Petter Selasky 			ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_SOLICIT);
352d6b92ffaSHans Petter Selasky 			if (wr->send_flags & IBV_SEND_IP_CSUM) {
353d6b92ffaSHans Petter Selasky 				if (!(qp->qp_cap_cache & MLX4_CSUM_SUPPORT_RAW_OVER_ETH)) {
354d6b92ffaSHans Petter Selasky 					ret = EINVAL;
355d6b92ffaSHans Petter Selasky 					*bad_wr = wr;
356d6b92ffaSHans Petter Selasky 					goto out;
357d6b92ffaSHans Petter Selasky 				}
358d6b92ffaSHans Petter Selasky 				ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_IP_HDR_CSUM |
359d6b92ffaSHans Petter Selasky 							   MLX4_WQE_CTRL_TCP_UDP_CSUM);
360d6b92ffaSHans Petter Selasky 			}
361d6b92ffaSHans Petter Selasky 			break;
362d6b92ffaSHans Petter Selasky 
363d6b92ffaSHans Petter Selasky 		default:
364d6b92ffaSHans Petter Selasky 			break;
365d6b92ffaSHans Petter Selasky 		}
366d6b92ffaSHans Petter Selasky 
367d6b92ffaSHans Petter Selasky 		if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
368d6b92ffaSHans Petter Selasky 			struct mlx4_wqe_inline_seg *seg;
369d6b92ffaSHans Petter Selasky 			void *addr;
370d6b92ffaSHans Petter Selasky 			int len, seg_len;
371d6b92ffaSHans Petter Selasky 			int num_seg;
372d6b92ffaSHans Petter Selasky 			int off, to_copy;
373d6b92ffaSHans Petter Selasky 
374d6b92ffaSHans Petter Selasky 			inl = 0;
375d6b92ffaSHans Petter Selasky 
376d6b92ffaSHans Petter Selasky 			seg = wqe;
377d6b92ffaSHans Petter Selasky 			wqe += sizeof *seg;
378d6b92ffaSHans Petter Selasky 			off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1);
379d6b92ffaSHans Petter Selasky 			num_seg = 0;
380d6b92ffaSHans Petter Selasky 			seg_len = 0;
381d6b92ffaSHans Petter Selasky 
382d6b92ffaSHans Petter Selasky 			for (i = 0; i < wr->num_sge; ++i) {
383d6b92ffaSHans Petter Selasky 				addr = (void *) (uintptr_t) wr->sg_list[i].addr;
384d6b92ffaSHans Petter Selasky 				len  = wr->sg_list[i].length;
385d6b92ffaSHans Petter Selasky 				inl += len;
386d6b92ffaSHans Petter Selasky 
387d6b92ffaSHans Petter Selasky 				if (inl > qp->max_inline_data) {
388d6b92ffaSHans Petter Selasky 					inl = 0;
389d6b92ffaSHans Petter Selasky 					ret = ENOMEM;
390d6b92ffaSHans Petter Selasky 					*bad_wr = wr;
391d6b92ffaSHans Petter Selasky 					goto out;
392d6b92ffaSHans Petter Selasky 				}
393d6b92ffaSHans Petter Selasky 
394d6b92ffaSHans Petter Selasky 				while (len >= MLX4_INLINE_ALIGN - off) {
395d6b92ffaSHans Petter Selasky 					to_copy = MLX4_INLINE_ALIGN - off;
396d6b92ffaSHans Petter Selasky 					memcpy(wqe, addr, to_copy);
397d6b92ffaSHans Petter Selasky 					len -= to_copy;
398d6b92ffaSHans Petter Selasky 					wqe += to_copy;
399d6b92ffaSHans Petter Selasky 					addr += to_copy;
400d6b92ffaSHans Petter Selasky 					seg_len += to_copy;
401d6b92ffaSHans Petter Selasky 					udma_to_device_barrier(); /* see comment below */
402d6b92ffaSHans Petter Selasky 					seg->byte_count = htobe32(MLX4_INLINE_SEG | seg_len);
403d6b92ffaSHans Petter Selasky 					seg_len = 0;
404d6b92ffaSHans Petter Selasky 					seg = wqe;
405d6b92ffaSHans Petter Selasky 					wqe += sizeof *seg;
406d6b92ffaSHans Petter Selasky 					off = sizeof *seg;
407d6b92ffaSHans Petter Selasky 					++num_seg;
408d6b92ffaSHans Petter Selasky 				}
409d6b92ffaSHans Petter Selasky 
410d6b92ffaSHans Petter Selasky 				memcpy(wqe, addr, len);
411d6b92ffaSHans Petter Selasky 				wqe += len;
412d6b92ffaSHans Petter Selasky 				seg_len += len;
413d6b92ffaSHans Petter Selasky 				off += len;
414d6b92ffaSHans Petter Selasky 			}
415d6b92ffaSHans Petter Selasky 
416d6b92ffaSHans Petter Selasky 			if (seg_len) {
417d6b92ffaSHans Petter Selasky 				++num_seg;
418d6b92ffaSHans Petter Selasky 				/*
419d6b92ffaSHans Petter Selasky 				 * Need a barrier here to make sure
420d6b92ffaSHans Petter Selasky 				 * all the data is visible before the
421d6b92ffaSHans Petter Selasky 				 * byte_count field is set.  Otherwise
422d6b92ffaSHans Petter Selasky 				 * the HCA prefetcher could grab the
423d6b92ffaSHans Petter Selasky 				 * 64-byte chunk with this inline
424d6b92ffaSHans Petter Selasky 				 * segment and get a valid (!=
425d6b92ffaSHans Petter Selasky 				 * 0xffffffff) byte count but stale
426d6b92ffaSHans Petter Selasky 				 * data, and end up sending the wrong
427d6b92ffaSHans Petter Selasky 				 * data.
428d6b92ffaSHans Petter Selasky 				 */
429d6b92ffaSHans Petter Selasky 				udma_to_device_barrier();
430d6b92ffaSHans Petter Selasky 				seg->byte_count = htobe32(MLX4_INLINE_SEG | seg_len);
431d6b92ffaSHans Petter Selasky 			}
432d6b92ffaSHans Petter Selasky 
433d6b92ffaSHans Petter Selasky 			size += (inl + num_seg * sizeof * seg + 15) / 16;
434d6b92ffaSHans Petter Selasky 		} else {
435d6b92ffaSHans Petter Selasky 			struct mlx4_wqe_data_seg *seg = wqe;
436d6b92ffaSHans Petter Selasky 
437d6b92ffaSHans Petter Selasky 			for (i = wr->num_sge - 1; i >= 0 ; --i)
438d6b92ffaSHans Petter Selasky 				set_data_seg(seg + i, wr->sg_list + i);
439d6b92ffaSHans Petter Selasky 
440d6b92ffaSHans Petter Selasky 			size += wr->num_sge * (sizeof *seg / 16);
441d6b92ffaSHans Petter Selasky 		}
442d6b92ffaSHans Petter Selasky 
443d6b92ffaSHans Petter Selasky 		ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ?
444d6b92ffaSHans Petter Selasky 				    MLX4_WQE_CTRL_FENCE : 0) | size;
445d6b92ffaSHans Petter Selasky 
446d6b92ffaSHans Petter Selasky 		/*
447d6b92ffaSHans Petter Selasky 		 * Make sure descriptor is fully written before
448d6b92ffaSHans Petter Selasky 		 * setting ownership bit (because HW can start
449d6b92ffaSHans Petter Selasky 		 * executing as soon as we do).
450d6b92ffaSHans Petter Selasky 		 */
451d6b92ffaSHans Petter Selasky 		udma_to_device_barrier();
452d6b92ffaSHans Petter Selasky 
453d6b92ffaSHans Petter Selasky 		ctrl->owner_opcode = htobe32(mlx4_ib_opcode[wr->opcode]) |
454d6b92ffaSHans Petter Selasky 			(ind & qp->sq.wqe_cnt ? htobe32(1 << 31) : 0);
455d6b92ffaSHans Petter Selasky 
456d6b92ffaSHans Petter Selasky 		/*
457d6b92ffaSHans Petter Selasky 		 * We can improve latency by not stamping the last
458d6b92ffaSHans Petter Selasky 		 * send queue WQE until after ringing the doorbell, so
459d6b92ffaSHans Petter Selasky 		 * only stamp here if there are still more WQEs to post.
460d6b92ffaSHans Petter Selasky 		 */
461d6b92ffaSHans Petter Selasky 		if (wr->next)
462d6b92ffaSHans Petter Selasky 			stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
463d6b92ffaSHans Petter Selasky 				       (qp->sq.wqe_cnt - 1));
464d6b92ffaSHans Petter Selasky 
465d6b92ffaSHans Petter Selasky 		++ind;
466d6b92ffaSHans Petter Selasky 	}
467d6b92ffaSHans Petter Selasky 
468d6b92ffaSHans Petter Selasky out:
469d6b92ffaSHans Petter Selasky 	ctx = to_mctx(ibqp->context);
470d6b92ffaSHans Petter Selasky 
471d6b92ffaSHans Petter Selasky 	if (nreq == 1 && inl && size > 1 && size <= ctx->bf_buf_size / 16) {
472d6b92ffaSHans Petter Selasky 		ctrl->owner_opcode |= htobe32((qp->sq.head & 0xffff) << 8);
473d6b92ffaSHans Petter Selasky 
474d6b92ffaSHans Petter Selasky 		ctrl->bf_qpn |= qp->doorbell_qpn;
475d6b92ffaSHans Petter Selasky 		++qp->sq.head;
476d6b92ffaSHans Petter Selasky 		/*
477d6b92ffaSHans Petter Selasky 		 * Make sure that descriptor is written to memory
478d6b92ffaSHans Petter Selasky 		 * before writing to BlueFlame page.
479d6b92ffaSHans Petter Selasky 		 */
480d6b92ffaSHans Petter Selasky 		mmio_wc_spinlock(&ctx->bf_lock);
481d6b92ffaSHans Petter Selasky 
482d6b92ffaSHans Petter Selasky 		mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) ctrl,
483d6b92ffaSHans Petter Selasky 			     align(size * 16, 64));
484d6b92ffaSHans Petter Selasky 		/* Flush before toggling bf_offset to be latency oriented */
485d6b92ffaSHans Petter Selasky 		mmio_flush_writes();
486d6b92ffaSHans Petter Selasky 
487d6b92ffaSHans Petter Selasky 		ctx->bf_offset ^= ctx->bf_buf_size;
488d6b92ffaSHans Petter Selasky 
489d6b92ffaSHans Petter Selasky 		pthread_spin_unlock(&ctx->bf_lock);
490d6b92ffaSHans Petter Selasky 	} else if (nreq) {
491d6b92ffaSHans Petter Selasky 		qp->sq.head += nreq;
492d6b92ffaSHans Petter Selasky 
493d6b92ffaSHans Petter Selasky 		/*
494d6b92ffaSHans Petter Selasky 		 * Make sure that descriptors are written before
495d6b92ffaSHans Petter Selasky 		 * doorbell record.
496d6b92ffaSHans Petter Selasky 		 */
497d6b92ffaSHans Petter Selasky 		udma_to_device_barrier();
498d6b92ffaSHans Petter Selasky 
499d6b92ffaSHans Petter Selasky 		mmio_writel((unsigned long)(ctx->uar + MLX4_SEND_DOORBELL),
500d6b92ffaSHans Petter Selasky 			    qp->doorbell_qpn);
501d6b92ffaSHans Petter Selasky 	}
502d6b92ffaSHans Petter Selasky 
503d6b92ffaSHans Petter Selasky 	if (nreq)
504d6b92ffaSHans Petter Selasky 		stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
505d6b92ffaSHans Petter Selasky 			       (qp->sq.wqe_cnt - 1));
506d6b92ffaSHans Petter Selasky 
507d6b92ffaSHans Petter Selasky 	pthread_spin_unlock(&qp->sq.lock);
508d6b92ffaSHans Petter Selasky 
509d6b92ffaSHans Petter Selasky 	return ret;
510d6b92ffaSHans Petter Selasky }
511d6b92ffaSHans Petter Selasky 
mlx4_post_recv(struct ibv_qp * ibqp,struct ibv_recv_wr * wr,struct ibv_recv_wr ** bad_wr)512d6b92ffaSHans Petter Selasky int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
513d6b92ffaSHans Petter Selasky 		   struct ibv_recv_wr **bad_wr)
514d6b92ffaSHans Petter Selasky {
515d6b92ffaSHans Petter Selasky 	struct mlx4_qp *qp = to_mqp(ibqp);
516d6b92ffaSHans Petter Selasky 	struct mlx4_wqe_data_seg *scat;
517d6b92ffaSHans Petter Selasky 	int ret = 0;
518d6b92ffaSHans Petter Selasky 	int nreq;
519d6b92ffaSHans Petter Selasky 	int ind;
520d6b92ffaSHans Petter Selasky 	int i;
521d6b92ffaSHans Petter Selasky 
522d6b92ffaSHans Petter Selasky 	pthread_spin_lock(&qp->rq.lock);
523d6b92ffaSHans Petter Selasky 
524d6b92ffaSHans Petter Selasky 	/* XXX check that state is OK to post receive */
525d6b92ffaSHans Petter Selasky 
526d6b92ffaSHans Petter Selasky 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
527d6b92ffaSHans Petter Selasky 
528d6b92ffaSHans Petter Selasky 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
529d6b92ffaSHans Petter Selasky 		if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) {
530d6b92ffaSHans Petter Selasky 			ret = ENOMEM;
531d6b92ffaSHans Petter Selasky 			*bad_wr = wr;
532d6b92ffaSHans Petter Selasky 			goto out;
533d6b92ffaSHans Petter Selasky 		}
534d6b92ffaSHans Petter Selasky 
535d6b92ffaSHans Petter Selasky 		if (wr->num_sge > qp->rq.max_gs) {
536d6b92ffaSHans Petter Selasky 			ret = ENOMEM;
537d6b92ffaSHans Petter Selasky 			*bad_wr = wr;
538d6b92ffaSHans Petter Selasky 			goto out;
539d6b92ffaSHans Petter Selasky 		}
540d6b92ffaSHans Petter Selasky 
541d6b92ffaSHans Petter Selasky 		scat = get_recv_wqe(qp, ind);
542d6b92ffaSHans Petter Selasky 
543d6b92ffaSHans Petter Selasky 		for (i = 0; i < wr->num_sge; ++i)
544d6b92ffaSHans Petter Selasky 			__set_data_seg(scat + i, wr->sg_list + i);
545d6b92ffaSHans Petter Selasky 
546d6b92ffaSHans Petter Selasky 		if (i < qp->rq.max_gs) {
547d6b92ffaSHans Petter Selasky 			scat[i].byte_count = 0;
548d6b92ffaSHans Petter Selasky 			scat[i].lkey       = htobe32(MLX4_INVALID_LKEY);
549d6b92ffaSHans Petter Selasky 			scat[i].addr       = 0;
550d6b92ffaSHans Petter Selasky 		}
551d6b92ffaSHans Petter Selasky 
552d6b92ffaSHans Petter Selasky 		qp->rq.wrid[ind] = wr->wr_id;
553d6b92ffaSHans Petter Selasky 
554d6b92ffaSHans Petter Selasky 		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
555d6b92ffaSHans Petter Selasky 	}
556d6b92ffaSHans Petter Selasky 
557d6b92ffaSHans Petter Selasky out:
558d6b92ffaSHans Petter Selasky 	if (nreq) {
559d6b92ffaSHans Petter Selasky 		qp->rq.head += nreq;
560d6b92ffaSHans Petter Selasky 
561d6b92ffaSHans Petter Selasky 		/*
562d6b92ffaSHans Petter Selasky 		 * Make sure that descriptors are written before
563d6b92ffaSHans Petter Selasky 		 * doorbell record.
564d6b92ffaSHans Petter Selasky 		 */
565d6b92ffaSHans Petter Selasky 		udma_to_device_barrier();
566d6b92ffaSHans Petter Selasky 
567d6b92ffaSHans Petter Selasky 		*qp->db = htobe32(qp->rq.head & 0xffff);
568d6b92ffaSHans Petter Selasky 	}
569d6b92ffaSHans Petter Selasky 
570d6b92ffaSHans Petter Selasky 	pthread_spin_unlock(&qp->rq.lock);
571d6b92ffaSHans Petter Selasky 
572d6b92ffaSHans Petter Selasky 	return ret;
573d6b92ffaSHans Petter Selasky }
574d6b92ffaSHans Petter Selasky 
num_inline_segs(int data,enum ibv_qp_type type)575d6b92ffaSHans Petter Selasky static int num_inline_segs(int data, enum ibv_qp_type type)
576d6b92ffaSHans Petter Selasky {
577d6b92ffaSHans Petter Selasky 	/*
578d6b92ffaSHans Petter Selasky 	 * Inline data segments are not allowed to cross 64 byte
579d6b92ffaSHans Petter Selasky 	 * boundaries.  For UD QPs, the data segments always start
580d6b92ffaSHans Petter Selasky 	 * aligned to 64 bytes (16 byte control segment + 48 byte
581d6b92ffaSHans Petter Selasky 	 * datagram segment); for other QPs, there will be a 16 byte
582d6b92ffaSHans Petter Selasky 	 * control segment and possibly a 16 byte remote address
583d6b92ffaSHans Petter Selasky 	 * segment, so in the worst case there will be only 32 bytes
584d6b92ffaSHans Petter Selasky 	 * available for the first data segment.
585d6b92ffaSHans Petter Selasky 	 */
586d6b92ffaSHans Petter Selasky 	if (type == IBV_QPT_UD)
587d6b92ffaSHans Petter Selasky 		data += (sizeof (struct mlx4_wqe_ctrl_seg) +
588d6b92ffaSHans Petter Selasky 			 sizeof (struct mlx4_wqe_datagram_seg)) %
589d6b92ffaSHans Petter Selasky 			MLX4_INLINE_ALIGN;
590d6b92ffaSHans Petter Selasky 	else
591d6b92ffaSHans Petter Selasky 		data += (sizeof (struct mlx4_wqe_ctrl_seg) +
592d6b92ffaSHans Petter Selasky 			 sizeof (struct mlx4_wqe_raddr_seg)) %
593d6b92ffaSHans Petter Selasky 			MLX4_INLINE_ALIGN;
594d6b92ffaSHans Petter Selasky 
595d6b92ffaSHans Petter Selasky 	return (data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) /
596d6b92ffaSHans Petter Selasky 		(MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg));
597d6b92ffaSHans Petter Selasky }
598d6b92ffaSHans Petter Selasky 
mlx4_calc_sq_wqe_size(struct ibv_qp_cap * cap,enum ibv_qp_type type,struct mlx4_qp * qp)599d6b92ffaSHans Petter Selasky void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
600d6b92ffaSHans Petter Selasky 			   struct mlx4_qp *qp)
601d6b92ffaSHans Petter Selasky {
602d6b92ffaSHans Petter Selasky 	int size;
603d6b92ffaSHans Petter Selasky 	int max_sq_sge;
604d6b92ffaSHans Petter Selasky 
605d6b92ffaSHans Petter Selasky 	max_sq_sge	 = align(cap->max_inline_data +
606d6b92ffaSHans Petter Selasky 				 num_inline_segs(cap->max_inline_data, type) *
607d6b92ffaSHans Petter Selasky 				 sizeof (struct mlx4_wqe_inline_seg),
608d6b92ffaSHans Petter Selasky 				 sizeof (struct mlx4_wqe_data_seg)) /
609d6b92ffaSHans Petter Selasky 		sizeof (struct mlx4_wqe_data_seg);
610d6b92ffaSHans Petter Selasky 	if (max_sq_sge < cap->max_send_sge)
611d6b92ffaSHans Petter Selasky 		max_sq_sge = cap->max_send_sge;
612d6b92ffaSHans Petter Selasky 
613d6b92ffaSHans Petter Selasky 	size = max_sq_sge * sizeof (struct mlx4_wqe_data_seg);
614d6b92ffaSHans Petter Selasky 	switch (type) {
615d6b92ffaSHans Petter Selasky 	case IBV_QPT_UD:
616d6b92ffaSHans Petter Selasky 		size += sizeof (struct mlx4_wqe_datagram_seg);
617d6b92ffaSHans Petter Selasky 		break;
618d6b92ffaSHans Petter Selasky 
619d6b92ffaSHans Petter Selasky 	case IBV_QPT_UC:
620d6b92ffaSHans Petter Selasky 		size += sizeof (struct mlx4_wqe_raddr_seg);
621d6b92ffaSHans Petter Selasky 		break;
622d6b92ffaSHans Petter Selasky 
623d6b92ffaSHans Petter Selasky 	case IBV_QPT_XRC_SEND:
624d6b92ffaSHans Petter Selasky 	case IBV_QPT_RC:
625d6b92ffaSHans Petter Selasky 		size += sizeof (struct mlx4_wqe_raddr_seg);
626d6b92ffaSHans Petter Selasky 		/*
627d6b92ffaSHans Petter Selasky 		 * An atomic op will require an atomic segment, a
628d6b92ffaSHans Petter Selasky 		 * remote address segment and one scatter entry.
629d6b92ffaSHans Petter Selasky 		 */
630d6b92ffaSHans Petter Selasky 		if (size < (sizeof (struct mlx4_wqe_atomic_seg) +
631d6b92ffaSHans Petter Selasky 			    sizeof (struct mlx4_wqe_raddr_seg) +
632d6b92ffaSHans Petter Selasky 			    sizeof (struct mlx4_wqe_data_seg)))
633d6b92ffaSHans Petter Selasky 			size = (sizeof (struct mlx4_wqe_atomic_seg) +
634d6b92ffaSHans Petter Selasky 				sizeof (struct mlx4_wqe_raddr_seg) +
635d6b92ffaSHans Petter Selasky 				sizeof (struct mlx4_wqe_data_seg));
636d6b92ffaSHans Petter Selasky 		break;
637d6b92ffaSHans Petter Selasky 
638d6b92ffaSHans Petter Selasky 	default:
639d6b92ffaSHans Petter Selasky 		break;
640d6b92ffaSHans Petter Selasky 	}
641d6b92ffaSHans Petter Selasky 
642d6b92ffaSHans Petter Selasky 	/* Make sure that we have enough space for a bind request */
643d6b92ffaSHans Petter Selasky 	if (size < sizeof (struct mlx4_wqe_bind_seg))
644d6b92ffaSHans Petter Selasky 		size = sizeof (struct mlx4_wqe_bind_seg);
645d6b92ffaSHans Petter Selasky 
646d6b92ffaSHans Petter Selasky 	size += sizeof (struct mlx4_wqe_ctrl_seg);
647d6b92ffaSHans Petter Selasky 
648d6b92ffaSHans Petter Selasky 	for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
649d6b92ffaSHans Petter Selasky 	     qp->sq.wqe_shift++)
650d6b92ffaSHans Petter Selasky 		; /* nothing */
651d6b92ffaSHans Petter Selasky }
652d6b92ffaSHans Petter Selasky 
mlx4_alloc_qp_buf(struct ibv_context * context,struct ibv_qp_cap * cap,enum ibv_qp_type type,struct mlx4_qp * qp)653d6b92ffaSHans Petter Selasky int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
654d6b92ffaSHans Petter Selasky 		       enum ibv_qp_type type, struct mlx4_qp *qp)
655d6b92ffaSHans Petter Selasky {
656d6b92ffaSHans Petter Selasky 	qp->rq.max_gs	 = cap->max_recv_sge;
657d6b92ffaSHans Petter Selasky 
658d6b92ffaSHans Petter Selasky 	if (qp->sq.wqe_cnt) {
659d6b92ffaSHans Petter Selasky 		qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
660d6b92ffaSHans Petter Selasky 		if (!qp->sq.wrid)
661d6b92ffaSHans Petter Selasky 			return -1;
662d6b92ffaSHans Petter Selasky 	}
663d6b92ffaSHans Petter Selasky 
664d6b92ffaSHans Petter Selasky 	if (qp->rq.wqe_cnt) {
665d6b92ffaSHans Petter Selasky 		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
666d6b92ffaSHans Petter Selasky 		if (!qp->rq.wrid) {
667d6b92ffaSHans Petter Selasky 			free(qp->sq.wrid);
668d6b92ffaSHans Petter Selasky 			return -1;
669d6b92ffaSHans Petter Selasky 		}
670d6b92ffaSHans Petter Selasky 	}
671d6b92ffaSHans Petter Selasky 
672d6b92ffaSHans Petter Selasky 	for (qp->rq.wqe_shift = 4;
673d6b92ffaSHans Petter Selasky 	     1 << qp->rq.wqe_shift < qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg);
674d6b92ffaSHans Petter Selasky 	     qp->rq.wqe_shift++)
675d6b92ffaSHans Petter Selasky 		; /* nothing */
676d6b92ffaSHans Petter Selasky 
677d6b92ffaSHans Petter Selasky 	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
678d6b92ffaSHans Petter Selasky 		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
679d6b92ffaSHans Petter Selasky 	if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
680d6b92ffaSHans Petter Selasky 		qp->rq.offset = 0;
681d6b92ffaSHans Petter Selasky 		qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
682d6b92ffaSHans Petter Selasky 	} else {
683d6b92ffaSHans Petter Selasky 		qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
684d6b92ffaSHans Petter Selasky 		qp->sq.offset = 0;
685d6b92ffaSHans Petter Selasky 	}
686d6b92ffaSHans Petter Selasky 
687d6b92ffaSHans Petter Selasky 	if (qp->buf_size) {
688d6b92ffaSHans Petter Selasky 		if (mlx4_alloc_buf(&qp->buf,
689d6b92ffaSHans Petter Selasky 				   align(qp->buf_size, to_mdev(context->device)->page_size),
690d6b92ffaSHans Petter Selasky 				   to_mdev(context->device)->page_size)) {
691d6b92ffaSHans Petter Selasky 			free(qp->sq.wrid);
692d6b92ffaSHans Petter Selasky 			free(qp->rq.wrid);
693d6b92ffaSHans Petter Selasky 			return -1;
694d6b92ffaSHans Petter Selasky 		}
695d6b92ffaSHans Petter Selasky 
696d6b92ffaSHans Petter Selasky 		memset(qp->buf.buf, 0, qp->buf_size);
697d6b92ffaSHans Petter Selasky 	} else {
698d6b92ffaSHans Petter Selasky 		qp->buf.buf = NULL;
699d6b92ffaSHans Petter Selasky 	}
700d6b92ffaSHans Petter Selasky 
701d6b92ffaSHans Petter Selasky 	return 0;
702d6b92ffaSHans Petter Selasky }
703d6b92ffaSHans Petter Selasky 
mlx4_set_sq_sizes(struct mlx4_qp * qp,struct ibv_qp_cap * cap,enum ibv_qp_type type)704d6b92ffaSHans Petter Selasky void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
705d6b92ffaSHans Petter Selasky 		       enum ibv_qp_type type)
706d6b92ffaSHans Petter Selasky {
707d6b92ffaSHans Petter Selasky 	int wqe_size;
708d6b92ffaSHans Petter Selasky 
709d6b92ffaSHans Petter Selasky 	wqe_size = (1 << qp->sq.wqe_shift) - sizeof (struct mlx4_wqe_ctrl_seg);
710d6b92ffaSHans Petter Selasky 	switch (type) {
711d6b92ffaSHans Petter Selasky 	case IBV_QPT_UD:
712d6b92ffaSHans Petter Selasky 		wqe_size -= sizeof (struct mlx4_wqe_datagram_seg);
713d6b92ffaSHans Petter Selasky 		break;
714d6b92ffaSHans Petter Selasky 
715d6b92ffaSHans Petter Selasky 	case IBV_QPT_XRC_SEND:
716d6b92ffaSHans Petter Selasky 	case IBV_QPT_UC:
717d6b92ffaSHans Petter Selasky 	case IBV_QPT_RC:
718d6b92ffaSHans Petter Selasky 		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
719d6b92ffaSHans Petter Selasky 		break;
720d6b92ffaSHans Petter Selasky 
721d6b92ffaSHans Petter Selasky 	default:
722d6b92ffaSHans Petter Selasky 		break;
723d6b92ffaSHans Petter Selasky 	}
724d6b92ffaSHans Petter Selasky 
725d6b92ffaSHans Petter Selasky 	qp->sq.max_gs	     = wqe_size / sizeof (struct mlx4_wqe_data_seg);
726d6b92ffaSHans Petter Selasky 	cap->max_send_sge    = qp->sq.max_gs;
727d6b92ffaSHans Petter Selasky 	qp->sq.max_post	     = qp->sq.wqe_cnt - qp->sq_spare_wqes;
728d6b92ffaSHans Petter Selasky 	cap->max_send_wr     = qp->sq.max_post;
729d6b92ffaSHans Petter Selasky 
730d6b92ffaSHans Petter Selasky 	/*
731d6b92ffaSHans Petter Selasky 	 * Inline data segments can't cross a 64 byte boundary.  So
732d6b92ffaSHans Petter Selasky 	 * subtract off one segment header for each 64-byte chunk,
733d6b92ffaSHans Petter Selasky 	 * taking into account the fact that wqe_size will be 32 mod
734d6b92ffaSHans Petter Selasky 	 * 64 for non-UD QPs.
735d6b92ffaSHans Petter Selasky 	 */
736d6b92ffaSHans Petter Selasky 	qp->max_inline_data  = wqe_size -
737d6b92ffaSHans Petter Selasky 		sizeof (struct mlx4_wqe_inline_seg) *
738d6b92ffaSHans Petter Selasky 		(align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN);
739d6b92ffaSHans Petter Selasky 	cap->max_inline_data = qp->max_inline_data;
740d6b92ffaSHans Petter Selasky }
741d6b92ffaSHans Petter Selasky 
mlx4_find_qp(struct mlx4_context * ctx,uint32_t qpn)742d6b92ffaSHans Petter Selasky struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn)
743d6b92ffaSHans Petter Selasky {
744d6b92ffaSHans Petter Selasky 	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
745d6b92ffaSHans Petter Selasky 
746d6b92ffaSHans Petter Selasky 	if (ctx->qp_table[tind].refcnt)
747d6b92ffaSHans Petter Selasky 		return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
748d6b92ffaSHans Petter Selasky 	else
749d6b92ffaSHans Petter Selasky 		return NULL;
750d6b92ffaSHans Petter Selasky }
751d6b92ffaSHans Petter Selasky 
mlx4_store_qp(struct mlx4_context * ctx,uint32_t qpn,struct mlx4_qp * qp)752d6b92ffaSHans Petter Selasky int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp)
753d6b92ffaSHans Petter Selasky {
754d6b92ffaSHans Petter Selasky 	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
755d6b92ffaSHans Petter Selasky 
756d6b92ffaSHans Petter Selasky 	if (!ctx->qp_table[tind].refcnt) {
757d6b92ffaSHans Petter Selasky 		ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1,
758d6b92ffaSHans Petter Selasky 						   sizeof (struct mlx4_qp *));
759d6b92ffaSHans Petter Selasky 		if (!ctx->qp_table[tind].table)
760d6b92ffaSHans Petter Selasky 			return -1;
761d6b92ffaSHans Petter Selasky 	}
762d6b92ffaSHans Petter Selasky 
763d6b92ffaSHans Petter Selasky 	++ctx->qp_table[tind].refcnt;
764d6b92ffaSHans Petter Selasky 	ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
765d6b92ffaSHans Petter Selasky 	return 0;
766d6b92ffaSHans Petter Selasky }
767d6b92ffaSHans Petter Selasky 
mlx4_clear_qp(struct mlx4_context * ctx,uint32_t qpn)768d6b92ffaSHans Petter Selasky void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn)
769d6b92ffaSHans Petter Selasky {
770d6b92ffaSHans Petter Selasky 	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
771d6b92ffaSHans Petter Selasky 
772d6b92ffaSHans Petter Selasky 	if (!--ctx->qp_table[tind].refcnt)
773d6b92ffaSHans Petter Selasky 		free(ctx->qp_table[tind].table);
774d6b92ffaSHans Petter Selasky 	else
775d6b92ffaSHans Petter Selasky 		ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
776d6b92ffaSHans Petter Selasky }
777