xref: /linux/drivers/infiniband/sw/rxe/rxe_resp.c (revision d6fd48ef)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include <linux/skbuff.h>
8 
9 #include "rxe.h"
10 #include "rxe_loc.h"
11 #include "rxe_queue.h"
12 
13 static char *resp_state_name[] = {
14 	[RESPST_NONE]				= "NONE",
15 	[RESPST_GET_REQ]			= "GET_REQ",
16 	[RESPST_CHK_PSN]			= "CHK_PSN",
17 	[RESPST_CHK_OP_SEQ]			= "CHK_OP_SEQ",
18 	[RESPST_CHK_OP_VALID]			= "CHK_OP_VALID",
19 	[RESPST_CHK_RESOURCE]			= "CHK_RESOURCE",
20 	[RESPST_CHK_LENGTH]			= "CHK_LENGTH",
21 	[RESPST_CHK_RKEY]			= "CHK_RKEY",
22 	[RESPST_EXECUTE]			= "EXECUTE",
23 	[RESPST_READ_REPLY]			= "READ_REPLY",
24 	[RESPST_ATOMIC_REPLY]			= "ATOMIC_REPLY",
25 	[RESPST_ATOMIC_WRITE_REPLY]		= "ATOMIC_WRITE_REPLY",
26 	[RESPST_PROCESS_FLUSH]			= "PROCESS_FLUSH",
27 	[RESPST_COMPLETE]			= "COMPLETE",
28 	[RESPST_ACKNOWLEDGE]			= "ACKNOWLEDGE",
29 	[RESPST_CLEANUP]			= "CLEANUP",
30 	[RESPST_DUPLICATE_REQUEST]		= "DUPLICATE_REQUEST",
31 	[RESPST_ERR_MALFORMED_WQE]		= "ERR_MALFORMED_WQE",
32 	[RESPST_ERR_UNSUPPORTED_OPCODE]		= "ERR_UNSUPPORTED_OPCODE",
33 	[RESPST_ERR_MISALIGNED_ATOMIC]		= "ERR_MISALIGNED_ATOMIC",
34 	[RESPST_ERR_PSN_OUT_OF_SEQ]		= "ERR_PSN_OUT_OF_SEQ",
35 	[RESPST_ERR_MISSING_OPCODE_FIRST]	= "ERR_MISSING_OPCODE_FIRST",
36 	[RESPST_ERR_MISSING_OPCODE_LAST_C]	= "ERR_MISSING_OPCODE_LAST_C",
37 	[RESPST_ERR_MISSING_OPCODE_LAST_D1E]	= "ERR_MISSING_OPCODE_LAST_D1E",
38 	[RESPST_ERR_TOO_MANY_RDMA_ATM_REQ]	= "ERR_TOO_MANY_RDMA_ATM_REQ",
39 	[RESPST_ERR_RNR]			= "ERR_RNR",
40 	[RESPST_ERR_RKEY_VIOLATION]		= "ERR_RKEY_VIOLATION",
41 	[RESPST_ERR_INVALIDATE_RKEY]		= "ERR_INVALIDATE_RKEY_VIOLATION",
42 	[RESPST_ERR_LENGTH]			= "ERR_LENGTH",
43 	[RESPST_ERR_CQ_OVERFLOW]		= "ERR_CQ_OVERFLOW",
44 	[RESPST_ERROR]				= "ERROR",
45 	[RESPST_RESET]				= "RESET",
46 	[RESPST_DONE]				= "DONE",
47 	[RESPST_EXIT]				= "EXIT",
48 };
49 
50 /* rxe_recv calls here to add a request packet to the input queue */
51 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
52 {
53 	int must_sched;
54 	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
55 
56 	skb_queue_tail(&qp->req_pkts, skb);
57 
58 	must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
59 			(skb_queue_len(&qp->req_pkts) > 1);
60 
61 	if (must_sched)
62 		rxe_sched_task(&qp->resp.task);
63 	else
64 		rxe_run_task(&qp->resp.task);
65 }
66 
67 static inline enum resp_states get_req(struct rxe_qp *qp,
68 				       struct rxe_pkt_info **pkt_p)
69 {
70 	struct sk_buff *skb;
71 
72 	if (qp->resp.state == QP_STATE_ERROR) {
73 		while ((skb = skb_dequeue(&qp->req_pkts))) {
74 			rxe_put(qp);
75 			kfree_skb(skb);
76 			ib_device_put(qp->ibqp.device);
77 		}
78 
79 		/* go drain recv wr queue */
80 		return RESPST_CHK_RESOURCE;
81 	}
82 
83 	skb = skb_peek(&qp->req_pkts);
84 	if (!skb)
85 		return RESPST_EXIT;
86 
87 	*pkt_p = SKB_TO_PKT(skb);
88 
89 	return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN;
90 }
91 
92 static enum resp_states check_psn(struct rxe_qp *qp,
93 				  struct rxe_pkt_info *pkt)
94 {
95 	int diff = psn_compare(pkt->psn, qp->resp.psn);
96 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
97 
98 	switch (qp_type(qp)) {
99 	case IB_QPT_RC:
100 		if (diff > 0) {
101 			if (qp->resp.sent_psn_nak)
102 				return RESPST_CLEANUP;
103 
104 			qp->resp.sent_psn_nak = 1;
105 			rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ);
106 			return RESPST_ERR_PSN_OUT_OF_SEQ;
107 
108 		} else if (diff < 0) {
109 			rxe_counter_inc(rxe, RXE_CNT_DUP_REQ);
110 			return RESPST_DUPLICATE_REQUEST;
111 		}
112 
113 		if (qp->resp.sent_psn_nak)
114 			qp->resp.sent_psn_nak = 0;
115 
116 		break;
117 
118 	case IB_QPT_UC:
119 		if (qp->resp.drop_msg || diff != 0) {
120 			if (pkt->mask & RXE_START_MASK) {
121 				qp->resp.drop_msg = 0;
122 				return RESPST_CHK_OP_SEQ;
123 			}
124 
125 			qp->resp.drop_msg = 1;
126 			return RESPST_CLEANUP;
127 		}
128 		break;
129 	default:
130 		break;
131 	}
132 
133 	return RESPST_CHK_OP_SEQ;
134 }
135 
136 static enum resp_states check_op_seq(struct rxe_qp *qp,
137 				     struct rxe_pkt_info *pkt)
138 {
139 	switch (qp_type(qp)) {
140 	case IB_QPT_RC:
141 		switch (qp->resp.opcode) {
142 		case IB_OPCODE_RC_SEND_FIRST:
143 		case IB_OPCODE_RC_SEND_MIDDLE:
144 			switch (pkt->opcode) {
145 			case IB_OPCODE_RC_SEND_MIDDLE:
146 			case IB_OPCODE_RC_SEND_LAST:
147 			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
148 			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
149 				return RESPST_CHK_OP_VALID;
150 			default:
151 				return RESPST_ERR_MISSING_OPCODE_LAST_C;
152 			}
153 
154 		case IB_OPCODE_RC_RDMA_WRITE_FIRST:
155 		case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
156 			switch (pkt->opcode) {
157 			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
158 			case IB_OPCODE_RC_RDMA_WRITE_LAST:
159 			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
160 				return RESPST_CHK_OP_VALID;
161 			default:
162 				return RESPST_ERR_MISSING_OPCODE_LAST_C;
163 			}
164 
165 		default:
166 			switch (pkt->opcode) {
167 			case IB_OPCODE_RC_SEND_MIDDLE:
168 			case IB_OPCODE_RC_SEND_LAST:
169 			case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE:
170 			case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE:
171 			case IB_OPCODE_RC_RDMA_WRITE_MIDDLE:
172 			case IB_OPCODE_RC_RDMA_WRITE_LAST:
173 			case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
174 				return RESPST_ERR_MISSING_OPCODE_FIRST;
175 			default:
176 				return RESPST_CHK_OP_VALID;
177 			}
178 		}
179 		break;
180 
181 	case IB_QPT_UC:
182 		switch (qp->resp.opcode) {
183 		case IB_OPCODE_UC_SEND_FIRST:
184 		case IB_OPCODE_UC_SEND_MIDDLE:
185 			switch (pkt->opcode) {
186 			case IB_OPCODE_UC_SEND_MIDDLE:
187 			case IB_OPCODE_UC_SEND_LAST:
188 			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
189 				return RESPST_CHK_OP_VALID;
190 			default:
191 				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
192 			}
193 
194 		case IB_OPCODE_UC_RDMA_WRITE_FIRST:
195 		case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
196 			switch (pkt->opcode) {
197 			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
198 			case IB_OPCODE_UC_RDMA_WRITE_LAST:
199 			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
200 				return RESPST_CHK_OP_VALID;
201 			default:
202 				return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
203 			}
204 
205 		default:
206 			switch (pkt->opcode) {
207 			case IB_OPCODE_UC_SEND_MIDDLE:
208 			case IB_OPCODE_UC_SEND_LAST:
209 			case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE:
210 			case IB_OPCODE_UC_RDMA_WRITE_MIDDLE:
211 			case IB_OPCODE_UC_RDMA_WRITE_LAST:
212 			case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
213 				qp->resp.drop_msg = 1;
214 				return RESPST_CLEANUP;
215 			default:
216 				return RESPST_CHK_OP_VALID;
217 			}
218 		}
219 		break;
220 
221 	default:
222 		return RESPST_CHK_OP_VALID;
223 	}
224 }
225 
226 static bool check_qp_attr_access(struct rxe_qp *qp,
227 				 struct rxe_pkt_info *pkt)
228 {
229 	if (((pkt->mask & RXE_READ_MASK) &&
230 	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
231 	    ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
232 	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
233 	    ((pkt->mask & RXE_ATOMIC_MASK) &&
234 	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
235 		return false;
236 
237 	if (pkt->mask & RXE_FLUSH_MASK) {
238 		u32 flush_type = feth_plt(pkt);
239 
240 		if ((flush_type & IB_FLUSH_GLOBAL &&
241 		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
242 		    (flush_type & IB_FLUSH_PERSISTENT &&
243 		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
244 			return false;
245 	}
246 
247 	return true;
248 }
249 
250 static enum resp_states check_op_valid(struct rxe_qp *qp,
251 				       struct rxe_pkt_info *pkt)
252 {
253 	switch (qp_type(qp)) {
254 	case IB_QPT_RC:
255 		if (!check_qp_attr_access(qp, pkt))
256 			return RESPST_ERR_UNSUPPORTED_OPCODE;
257 
258 		break;
259 
260 	case IB_QPT_UC:
261 		if ((pkt->mask & RXE_WRITE_MASK) &&
262 		    !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) {
263 			qp->resp.drop_msg = 1;
264 			return RESPST_CLEANUP;
265 		}
266 
267 		break;
268 
269 	case IB_QPT_UD:
270 	case IB_QPT_GSI:
271 		break;
272 
273 	default:
274 		WARN_ON_ONCE(1);
275 		break;
276 	}
277 
278 	return RESPST_CHK_RESOURCE;
279 }
280 
281 static enum resp_states get_srq_wqe(struct rxe_qp *qp)
282 {
283 	struct rxe_srq *srq = qp->srq;
284 	struct rxe_queue *q = srq->rq.queue;
285 	struct rxe_recv_wqe *wqe;
286 	struct ib_event ev;
287 	unsigned int count;
288 	size_t size;
289 	unsigned long flags;
290 
291 	if (srq->error)
292 		return RESPST_ERR_RNR;
293 
294 	spin_lock_irqsave(&srq->rq.consumer_lock, flags);
295 
296 	wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
297 	if (!wqe) {
298 		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
299 		return RESPST_ERR_RNR;
300 	}
301 
302 	/* don't trust user space data */
303 	if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
304 		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
305 		rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
306 		return RESPST_ERR_MALFORMED_WQE;
307 	}
308 	size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
309 	memcpy(&qp->resp.srq_wqe, wqe, size);
310 
311 	qp->resp.wqe = &qp->resp.srq_wqe.wqe;
312 	queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT);
313 	count = queue_count(q, QUEUE_TYPE_FROM_CLIENT);
314 
315 	if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) {
316 		srq->limit = 0;
317 		goto event;
318 	}
319 
320 	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
321 	return RESPST_CHK_LENGTH;
322 
323 event:
324 	spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
325 	ev.device = qp->ibqp.device;
326 	ev.element.srq = qp->ibqp.srq;
327 	ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
328 	srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context);
329 	return RESPST_CHK_LENGTH;
330 }
331 
332 static enum resp_states check_resource(struct rxe_qp *qp,
333 				       struct rxe_pkt_info *pkt)
334 {
335 	struct rxe_srq *srq = qp->srq;
336 
337 	if (qp->resp.state == QP_STATE_ERROR) {
338 		if (qp->resp.wqe) {
339 			qp->resp.status = IB_WC_WR_FLUSH_ERR;
340 			return RESPST_COMPLETE;
341 		} else if (!srq) {
342 			qp->resp.wqe = queue_head(qp->rq.queue,
343 					QUEUE_TYPE_FROM_CLIENT);
344 			if (qp->resp.wqe) {
345 				qp->resp.status = IB_WC_WR_FLUSH_ERR;
346 				return RESPST_COMPLETE;
347 			} else {
348 				return RESPST_EXIT;
349 			}
350 		} else {
351 			return RESPST_EXIT;
352 		}
353 	}
354 
355 	if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
356 		/* it is the requesters job to not send
357 		 * too many read/atomic ops, we just
358 		 * recycle the responder resource queue
359 		 */
360 		if (likely(qp->attr.max_dest_rd_atomic > 0))
361 			return RESPST_CHK_LENGTH;
362 		else
363 			return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
364 	}
365 
366 	if (pkt->mask & RXE_RWR_MASK) {
367 		if (srq)
368 			return get_srq_wqe(qp);
369 
370 		qp->resp.wqe = queue_head(qp->rq.queue,
371 				QUEUE_TYPE_FROM_CLIENT);
372 		return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR;
373 	}
374 
375 	return RESPST_CHK_LENGTH;
376 }
377 
378 static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
379 					      struct rxe_pkt_info *pkt)
380 {
381 	/*
382 	 * See IBA C9-92
383 	 * For UD QPs we only check if the packet will fit in the
384 	 * receive buffer later. For rmda operations additional
385 	 * length checks are performed in check_rkey.
386 	 */
387 	if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
388 					     (qp_type(qp) == IB_QPT_UC))) {
389 		unsigned int mtu = qp->mtu;
390 		unsigned int payload = payload_size(pkt);
391 
392 		if ((pkt->mask & RXE_START_MASK) &&
393 		    (pkt->mask & RXE_END_MASK)) {
394 			if (unlikely(payload > mtu)) {
395 				rxe_dbg_qp(qp, "only packet too long");
396 				return RESPST_ERR_LENGTH;
397 			}
398 		} else if ((pkt->mask & RXE_START_MASK) ||
399 			   (pkt->mask & RXE_MIDDLE_MASK)) {
400 			if (unlikely(payload != mtu)) {
401 				rxe_dbg_qp(qp, "first or middle packet not mtu");
402 				return RESPST_ERR_LENGTH;
403 			}
404 		} else if (pkt->mask & RXE_END_MASK) {
405 			if (unlikely((payload == 0) || (payload > mtu))) {
406 				rxe_dbg_qp(qp, "last packet zero or too long");
407 				return RESPST_ERR_LENGTH;
408 			}
409 		}
410 	}
411 
412 	/* See IBA C9-94 */
413 	if (pkt->mask & RXE_RETH_MASK) {
414 		if (reth_len(pkt) > (1U << 31)) {
415 			rxe_dbg_qp(qp, "dma length too long");
416 			return RESPST_ERR_LENGTH;
417 		}
418 	}
419 
420 	return RESPST_CHK_RKEY;
421 }
422 
423 /* if the reth length field is zero we can assume nothing
424  * about the rkey value and should not validate or use it.
425  * Instead set qp->resp.rkey to 0 which is an invalid rkey
426  * value since the minimum index part is 1.
427  */
428 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
429 {
430 	unsigned int length = reth_len(pkt);
431 
432 	qp->resp.va = reth_va(pkt);
433 	qp->resp.offset = 0;
434 	qp->resp.resid = length;
435 	qp->resp.length = length;
436 	if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0)
437 		qp->resp.rkey = 0;
438 	else
439 		qp->resp.rkey = reth_rkey(pkt);
440 }
441 
442 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
443 {
444 	qp->resp.va = atmeth_va(pkt);
445 	qp->resp.offset = 0;
446 	qp->resp.rkey = atmeth_rkey(pkt);
447 	qp->resp.resid = sizeof(u64);
448 }
449 
450 /* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL
451  * if an invalid rkey is received or the rdma length is zero. For middle
452  * or last packets use the stored value of mr.
453  */
454 static enum resp_states check_rkey(struct rxe_qp *qp,
455 				   struct rxe_pkt_info *pkt)
456 {
457 	struct rxe_mr *mr = NULL;
458 	struct rxe_mw *mw = NULL;
459 	u64 va;
460 	u32 rkey;
461 	u32 resid;
462 	u32 pktlen;
463 	int mtu = qp->mtu;
464 	enum resp_states state;
465 	int access = 0;
466 
467 	if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
468 		if (pkt->mask & RXE_RETH_MASK)
469 			qp_resp_from_reth(qp, pkt);
470 
471 		access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
472 						     : IB_ACCESS_REMOTE_WRITE;
473 	} else if (pkt->mask & RXE_FLUSH_MASK) {
474 		u32 flush_type = feth_plt(pkt);
475 
476 		if (pkt->mask & RXE_RETH_MASK)
477 			qp_resp_from_reth(qp, pkt);
478 
479 		if (flush_type & IB_FLUSH_GLOBAL)
480 			access |= IB_ACCESS_FLUSH_GLOBAL;
481 		if (flush_type & IB_FLUSH_PERSISTENT)
482 			access |= IB_ACCESS_FLUSH_PERSISTENT;
483 	} else if (pkt->mask & RXE_ATOMIC_MASK) {
484 		qp_resp_from_atmeth(qp, pkt);
485 		access = IB_ACCESS_REMOTE_ATOMIC;
486 	} else {
487 		return RESPST_EXECUTE;
488 	}
489 
490 	/* A zero-byte read or write op is not required to
491 	 * set an addr or rkey. See C9-88
492 	 */
493 	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
494 	    (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
495 		qp->resp.mr = NULL;
496 		return RESPST_EXECUTE;
497 	}
498 
499 	va	= qp->resp.va;
500 	rkey	= qp->resp.rkey;
501 	resid	= qp->resp.resid;
502 	pktlen	= payload_size(pkt);
503 
504 	if (rkey_is_mw(rkey)) {
505 		mw = rxe_lookup_mw(qp, access, rkey);
506 		if (!mw) {
507 			rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
508 			state = RESPST_ERR_RKEY_VIOLATION;
509 			goto err;
510 		}
511 
512 		mr = mw->mr;
513 		if (!mr) {
514 			rxe_dbg_qp(qp, "MW doesn't have an MR\n");
515 			state = RESPST_ERR_RKEY_VIOLATION;
516 			goto err;
517 		}
518 
519 		if (mw->access & IB_ZERO_BASED)
520 			qp->resp.offset = mw->addr;
521 
522 		rxe_put(mw);
523 		rxe_get(mr);
524 	} else {
525 		mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
526 		if (!mr) {
527 			rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
528 			state = RESPST_ERR_RKEY_VIOLATION;
529 			goto err;
530 		}
531 	}
532 
533 	if (pkt->mask & RXE_FLUSH_MASK) {
534 		/* FLUSH MR may not set va or resid
535 		 * no need to check range since we will flush whole mr
536 		 */
537 		if (feth_sel(pkt) == IB_FLUSH_MR)
538 			goto skip_check_range;
539 	}
540 
541 	if (mr_check_range(mr, va + qp->resp.offset, resid)) {
542 		state = RESPST_ERR_RKEY_VIOLATION;
543 		goto err;
544 	}
545 
546 skip_check_range:
547 	if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
548 		if (resid > mtu) {
549 			if (pktlen != mtu || bth_pad(pkt)) {
550 				state = RESPST_ERR_LENGTH;
551 				goto err;
552 			}
553 		} else {
554 			if (pktlen != resid) {
555 				state = RESPST_ERR_LENGTH;
556 				goto err;
557 			}
558 			if ((bth_pad(pkt) != (0x3 & (-resid)))) {
559 				/* This case may not be exactly that
560 				 * but nothing else fits.
561 				 */
562 				state = RESPST_ERR_LENGTH;
563 				goto err;
564 			}
565 		}
566 	}
567 
568 	WARN_ON_ONCE(qp->resp.mr);
569 
570 	qp->resp.mr = mr;
571 	return RESPST_EXECUTE;
572 
573 err:
574 	qp->resp.mr = NULL;
575 	if (mr)
576 		rxe_put(mr);
577 	if (mw)
578 		rxe_put(mw);
579 
580 	return state;
581 }
582 
583 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
584 				     int data_len)
585 {
586 	int err;
587 
588 	err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
589 			data_addr, data_len, RXE_TO_MR_OBJ);
590 	if (unlikely(err))
591 		return (err == -ENOSPC) ? RESPST_ERR_LENGTH
592 					: RESPST_ERR_MALFORMED_WQE;
593 
594 	return RESPST_NONE;
595 }
596 
597 static enum resp_states write_data_in(struct rxe_qp *qp,
598 				      struct rxe_pkt_info *pkt)
599 {
600 	enum resp_states rc = RESPST_NONE;
601 	int	err;
602 	int data_len = payload_size(pkt);
603 
604 	err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset,
605 			  payload_addr(pkt), data_len, RXE_TO_MR_OBJ);
606 	if (err) {
607 		rc = RESPST_ERR_RKEY_VIOLATION;
608 		goto out;
609 	}
610 
611 	qp->resp.va += data_len;
612 	qp->resp.resid -= data_len;
613 
614 out:
615 	return rc;
616 }
617 
618 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
619 					struct rxe_pkt_info *pkt,
620 					int type)
621 {
622 	struct resp_res *res;
623 	u32 pkts;
624 
625 	res = &qp->resp.resources[qp->resp.res_head];
626 	rxe_advance_resp_resource(qp);
627 	free_rd_atomic_resource(res);
628 
629 	res->type = type;
630 	res->replay = 0;
631 
632 	switch (type) {
633 	case RXE_READ_MASK:
634 		res->read.va = qp->resp.va + qp->resp.offset;
635 		res->read.va_org = qp->resp.va + qp->resp.offset;
636 		res->read.resid = qp->resp.resid;
637 		res->read.length = qp->resp.resid;
638 		res->read.rkey = qp->resp.rkey;
639 
640 		pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1);
641 		res->first_psn = pkt->psn;
642 		res->cur_psn = pkt->psn;
643 		res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK;
644 
645 		res->state = rdatm_res_state_new;
646 		break;
647 	case RXE_ATOMIC_MASK:
648 	case RXE_ATOMIC_WRITE_MASK:
649 		res->first_psn = pkt->psn;
650 		res->last_psn = pkt->psn;
651 		res->cur_psn = pkt->psn;
652 		break;
653 	case RXE_FLUSH_MASK:
654 		res->flush.va = qp->resp.va + qp->resp.offset;
655 		res->flush.length = qp->resp.length;
656 		res->flush.type = feth_plt(pkt);
657 		res->flush.level = feth_sel(pkt);
658 	}
659 
660 	return res;
661 }
662 
663 static enum resp_states process_flush(struct rxe_qp *qp,
664 				       struct rxe_pkt_info *pkt)
665 {
666 	u64 length, start;
667 	struct rxe_mr *mr = qp->resp.mr;
668 	struct resp_res *res = qp->resp.res;
669 
670 	/* oA19-14, oA19-15 */
671 	if (res && res->replay)
672 		return RESPST_ACKNOWLEDGE;
673 	else if (!res) {
674 		res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
675 		qp->resp.res = res;
676 	}
677 
678 	if (res->flush.level == IB_FLUSH_RANGE) {
679 		start = res->flush.va;
680 		length = res->flush.length;
681 	} else { /* level == IB_FLUSH_MR */
682 		start = mr->ibmr.iova;
683 		length = mr->ibmr.length;
684 	}
685 
686 	if (res->flush.type & IB_FLUSH_PERSISTENT) {
687 		if (rxe_flush_pmem_iova(mr, start, length))
688 			return RESPST_ERR_RKEY_VIOLATION;
689 		/* Make data persistent. */
690 		wmb();
691 	} else if (res->flush.type & IB_FLUSH_GLOBAL) {
692 		/* Make data global visibility. */
693 		wmb();
694 	}
695 
696 	qp->resp.msn++;
697 
698 	/* next expected psn, read handles this separately */
699 	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
700 	qp->resp.ack_psn = qp->resp.psn;
701 
702 	qp->resp.opcode = pkt->opcode;
703 	qp->resp.status = IB_WC_SUCCESS;
704 
705 	return RESPST_ACKNOWLEDGE;
706 }
707 
708 static enum resp_states atomic_reply(struct rxe_qp *qp,
709 				     struct rxe_pkt_info *pkt)
710 {
711 	struct rxe_mr *mr = qp->resp.mr;
712 	struct resp_res *res = qp->resp.res;
713 	int err;
714 
715 	if (!res) {
716 		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK);
717 		qp->resp.res = res;
718 	}
719 
720 	if (!res->replay) {
721 		u64 iova = qp->resp.va + qp->resp.offset;
722 
723 		err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
724 					  atmeth_comp(pkt),
725 					  atmeth_swap_add(pkt),
726 					  &res->atomic.orig_val);
727 		if (err)
728 			return err;
729 
730 		qp->resp.msn++;
731 
732 		/* next expected psn, read handles this separately */
733 		qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
734 		qp->resp.ack_psn = qp->resp.psn;
735 
736 		qp->resp.opcode = pkt->opcode;
737 		qp->resp.status = IB_WC_SUCCESS;
738 	}
739 
740 	return RESPST_ACKNOWLEDGE;
741 }
742 
743 static enum resp_states atomic_write_reply(struct rxe_qp *qp,
744 					   struct rxe_pkt_info *pkt)
745 {
746 	struct resp_res *res = qp->resp.res;
747 	struct rxe_mr *mr;
748 	u64 value;
749 	u64 iova;
750 	int err;
751 
752 	if (!res) {
753 		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
754 		qp->resp.res = res;
755 	}
756 
757 	if (res->replay)
758 		return RESPST_ACKNOWLEDGE;
759 
760 	mr = qp->resp.mr;
761 	value = *(u64 *)payload_addr(pkt);
762 	iova = qp->resp.va + qp->resp.offset;
763 
764 	err = rxe_mr_do_atomic_write(mr, iova, value);
765 	if (err)
766 		return err;
767 
768 	qp->resp.resid = 0;
769 	qp->resp.msn++;
770 
771 	/* next expected psn, read handles this separately */
772 	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
773 	qp->resp.ack_psn = qp->resp.psn;
774 
775 	qp->resp.opcode = pkt->opcode;
776 	qp->resp.status = IB_WC_SUCCESS;
777 
778 	return RESPST_ACKNOWLEDGE;
779 }
780 
781 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
782 					  struct rxe_pkt_info *ack,
783 					  int opcode,
784 					  int payload,
785 					  u32 psn,
786 					  u8 syndrome)
787 {
788 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
789 	struct sk_buff *skb;
790 	int paylen;
791 	int pad;
792 	int err;
793 
794 	/*
795 	 * allocate packet
796 	 */
797 	pad = (-payload) & 0x3;
798 	paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
799 
800 	skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
801 	if (!skb)
802 		return NULL;
803 
804 	ack->qp = qp;
805 	ack->opcode = opcode;
806 	ack->mask = rxe_opcode[opcode].mask;
807 	ack->paylen = paylen;
808 	ack->psn = psn;
809 
810 	bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL,
811 		 qp->attr.dest_qp_num, 0, psn);
812 
813 	if (ack->mask & RXE_AETH_MASK) {
814 		aeth_set_syn(ack, syndrome);
815 		aeth_set_msn(ack, qp->resp.msn);
816 	}
817 
818 	if (ack->mask & RXE_ATMACK_MASK)
819 		atmack_set_orig(ack, qp->resp.res->atomic.orig_val);
820 
821 	err = rxe_prepare(&qp->pri_av, ack, skb);
822 	if (err) {
823 		kfree_skb(skb);
824 		return NULL;
825 	}
826 
827 	return skb;
828 }
829 
830 /**
831  * rxe_recheck_mr - revalidate MR from rkey and get a reference
832  * @qp: the qp
833  * @rkey: the rkey
834  *
835  * This code allows the MR to be invalidated or deregistered or
836  * the MW if one was used to be invalidated or deallocated.
837  * It is assumed that the access permissions if originally good
838  * are OK and the mappings to be unchanged.
839  *
840  * TODO: If someone reregisters an MR to change its size or
841  * access permissions during the processing of an RDMA read
842  * we should kill the responder resource and complete the
843  * operation with an error.
844  *
845  * Return: mr on success else NULL
846  */
847 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
848 {
849 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
850 	struct rxe_mr *mr;
851 	struct rxe_mw *mw;
852 
853 	if (rkey_is_mw(rkey)) {
854 		mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
855 		if (!mw)
856 			return NULL;
857 
858 		mr = mw->mr;
859 		if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
860 		    !mr || mr->state != RXE_MR_STATE_VALID) {
861 			rxe_put(mw);
862 			return NULL;
863 		}
864 
865 		rxe_get(mr);
866 		rxe_put(mw);
867 
868 		return mr;
869 	}
870 
871 	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
872 	if (!mr)
873 		return NULL;
874 
875 	if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
876 		rxe_put(mr);
877 		return NULL;
878 	}
879 
880 	return mr;
881 }
882 
883 /* RDMA read response. If res is not NULL, then we have a current RDMA request
884  * being processed or replayed.
885  */
886 static enum resp_states read_reply(struct rxe_qp *qp,
887 				   struct rxe_pkt_info *req_pkt)
888 {
889 	struct rxe_pkt_info ack_pkt;
890 	struct sk_buff *skb;
891 	int mtu = qp->mtu;
892 	enum resp_states state;
893 	int payload;
894 	int opcode;
895 	int err;
896 	struct resp_res *res = qp->resp.res;
897 	struct rxe_mr *mr;
898 
899 	if (!res) {
900 		res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
901 		qp->resp.res = res;
902 	}
903 
904 	if (res->state == rdatm_res_state_new) {
905 		if (!res->replay || qp->resp.length == 0) {
906 			/* if length == 0 mr will be NULL (is ok)
907 			 * otherwise qp->resp.mr holds a ref on mr
908 			 * which we transfer to mr and drop below.
909 			 */
910 			mr = qp->resp.mr;
911 			qp->resp.mr = NULL;
912 		} else {
913 			mr = rxe_recheck_mr(qp, res->read.rkey);
914 			if (!mr)
915 				return RESPST_ERR_RKEY_VIOLATION;
916 		}
917 
918 		if (res->read.resid <= mtu)
919 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;
920 		else
921 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
922 	} else {
923 		/* re-lookup mr from rkey on all later packets.
924 		 * length will be non-zero. This can fail if someone
925 		 * modifies or destroys the mr since the first packet.
926 		 */
927 		mr = rxe_recheck_mr(qp, res->read.rkey);
928 		if (!mr)
929 			return RESPST_ERR_RKEY_VIOLATION;
930 
931 		if (res->read.resid > mtu)
932 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE;
933 		else
934 			opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST;
935 	}
936 
937 	res->state = rdatm_res_state_next;
938 
939 	payload = min_t(int, res->read.resid, mtu);
940 
941 	skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
942 				 res->cur_psn, AETH_ACK_UNLIMITED);
943 	if (!skb) {
944 		state = RESPST_ERR_RNR;
945 		goto err_out;
946 	}
947 
948 	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
949 			  payload, RXE_FROM_MR_OBJ);
950 	if (err) {
951 		kfree_skb(skb);
952 		state = RESPST_ERR_RKEY_VIOLATION;
953 		goto err_out;
954 	}
955 
956 	if (bth_pad(&ack_pkt)) {
957 		u8 *pad = payload_addr(&ack_pkt) + payload;
958 
959 		memset(pad, 0, bth_pad(&ack_pkt));
960 	}
961 
962 	/* rxe_xmit_packet always consumes the skb */
963 	err = rxe_xmit_packet(qp, &ack_pkt, skb);
964 	if (err) {
965 		state = RESPST_ERR_RNR;
966 		goto err_out;
967 	}
968 
969 	res->read.va += payload;
970 	res->read.resid -= payload;
971 	res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK;
972 
973 	if (res->read.resid > 0) {
974 		state = RESPST_DONE;
975 	} else {
976 		qp->resp.res = NULL;
977 		if (!res->replay)
978 			qp->resp.opcode = -1;
979 		if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
980 			qp->resp.psn = res->cur_psn;
981 		state = RESPST_CLEANUP;
982 	}
983 
984 err_out:
985 	if (mr)
986 		rxe_put(mr);
987 	return state;
988 }
989 
990 static int invalidate_rkey(struct rxe_qp *qp, u32 rkey)
991 {
992 	if (rkey_is_mw(rkey))
993 		return rxe_invalidate_mw(qp, rkey);
994 	else
995 		return rxe_invalidate_mr(qp, rkey);
996 }
997 
998 /* Executes a new request. A retried request never reach that function (send
999  * and writes are discarded, and reads and atomics are retried elsewhere.
1000  */
1001 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
1002 {
1003 	enum resp_states err;
1004 	struct sk_buff *skb = PKT_TO_SKB(pkt);
1005 	union rdma_network_hdr hdr;
1006 
1007 	if (pkt->mask & RXE_SEND_MASK) {
1008 		if (qp_type(qp) == IB_QPT_UD ||
1009 		    qp_type(qp) == IB_QPT_GSI) {
1010 			if (skb->protocol == htons(ETH_P_IP)) {
1011 				memset(&hdr.reserved, 0,
1012 						sizeof(hdr.reserved));
1013 				memcpy(&hdr.roce4grh, ip_hdr(skb),
1014 						sizeof(hdr.roce4grh));
1015 				err = send_data_in(qp, &hdr, sizeof(hdr));
1016 			} else {
1017 				err = send_data_in(qp, ipv6_hdr(skb),
1018 						sizeof(hdr));
1019 			}
1020 			if (err)
1021 				return err;
1022 		}
1023 		err = send_data_in(qp, payload_addr(pkt), payload_size(pkt));
1024 		if (err)
1025 			return err;
1026 	} else if (pkt->mask & RXE_WRITE_MASK) {
1027 		err = write_data_in(qp, pkt);
1028 		if (err)
1029 			return err;
1030 	} else if (pkt->mask & RXE_READ_MASK) {
1031 		/* For RDMA Read we can increment the msn now. See C9-148. */
1032 		qp->resp.msn++;
1033 		return RESPST_READ_REPLY;
1034 	} else if (pkt->mask & RXE_ATOMIC_MASK) {
1035 		return RESPST_ATOMIC_REPLY;
1036 	} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
1037 		return RESPST_ATOMIC_WRITE_REPLY;
1038 	} else if (pkt->mask & RXE_FLUSH_MASK) {
1039 		return RESPST_PROCESS_FLUSH;
1040 	} else {
1041 		/* Unreachable */
1042 		WARN_ON_ONCE(1);
1043 	}
1044 
1045 	if (pkt->mask & RXE_IETH_MASK) {
1046 		u32 rkey = ieth_rkey(pkt);
1047 
1048 		err = invalidate_rkey(qp, rkey);
1049 		if (err)
1050 			return RESPST_ERR_INVALIDATE_RKEY;
1051 	}
1052 
1053 	if (pkt->mask & RXE_END_MASK)
1054 		/* We successfully processed this new request. */
1055 		qp->resp.msn++;
1056 
1057 	/* next expected psn, read handles this separately */
1058 	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
1059 	qp->resp.ack_psn = qp->resp.psn;
1060 
1061 	qp->resp.opcode = pkt->opcode;
1062 	qp->resp.status = IB_WC_SUCCESS;
1063 
1064 	if (pkt->mask & RXE_COMP_MASK)
1065 		return RESPST_COMPLETE;
1066 	else if (qp_type(qp) == IB_QPT_RC)
1067 		return RESPST_ACKNOWLEDGE;
1068 	else
1069 		return RESPST_CLEANUP;
1070 }
1071 
1072 static enum resp_states do_complete(struct rxe_qp *qp,
1073 				    struct rxe_pkt_info *pkt)
1074 {
1075 	struct rxe_cqe cqe;
1076 	struct ib_wc *wc = &cqe.ibwc;
1077 	struct ib_uverbs_wc *uwc = &cqe.uibwc;
1078 	struct rxe_recv_wqe *wqe = qp->resp.wqe;
1079 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1080 
1081 	if (!wqe)
1082 		goto finish;
1083 
1084 	memset(&cqe, 0, sizeof(cqe));
1085 
1086 	if (qp->rcq->is_user) {
1087 		uwc->status		= qp->resp.status;
1088 		uwc->qp_num		= qp->ibqp.qp_num;
1089 		uwc->wr_id		= wqe->wr_id;
1090 	} else {
1091 		wc->status		= qp->resp.status;
1092 		wc->qp			= &qp->ibqp;
1093 		wc->wr_id		= wqe->wr_id;
1094 	}
1095 
1096 	if (wc->status == IB_WC_SUCCESS) {
1097 		rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
1098 		wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
1099 				pkt->mask & RXE_WRITE_MASK) ?
1100 					IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
1101 		wc->byte_len = (pkt->mask & RXE_IMMDT_MASK &&
1102 				pkt->mask & RXE_WRITE_MASK) ?
1103 					qp->resp.length : wqe->dma.length - wqe->dma.resid;
1104 
1105 		/* fields after byte_len are different between kernel and user
1106 		 * space
1107 		 */
1108 		if (qp->rcq->is_user) {
1109 			uwc->wc_flags = IB_WC_GRH;
1110 
1111 			if (pkt->mask & RXE_IMMDT_MASK) {
1112 				uwc->wc_flags |= IB_WC_WITH_IMM;
1113 				uwc->ex.imm_data = immdt_imm(pkt);
1114 			}
1115 
1116 			if (pkt->mask & RXE_IETH_MASK) {
1117 				uwc->wc_flags |= IB_WC_WITH_INVALIDATE;
1118 				uwc->ex.invalidate_rkey = ieth_rkey(pkt);
1119 			}
1120 
1121 			if (pkt->mask & RXE_DETH_MASK)
1122 				uwc->src_qp = deth_sqp(pkt);
1123 
1124 			uwc->port_num		= qp->attr.port_num;
1125 		} else {
1126 			struct sk_buff *skb = PKT_TO_SKB(pkt);
1127 
1128 			wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE;
1129 			if (skb->protocol == htons(ETH_P_IP))
1130 				wc->network_hdr_type = RDMA_NETWORK_IPV4;
1131 			else
1132 				wc->network_hdr_type = RDMA_NETWORK_IPV6;
1133 
1134 			if (is_vlan_dev(skb->dev)) {
1135 				wc->wc_flags |= IB_WC_WITH_VLAN;
1136 				wc->vlan_id = vlan_dev_vlan_id(skb->dev);
1137 			}
1138 
1139 			if (pkt->mask & RXE_IMMDT_MASK) {
1140 				wc->wc_flags |= IB_WC_WITH_IMM;
1141 				wc->ex.imm_data = immdt_imm(pkt);
1142 			}
1143 
1144 			if (pkt->mask & RXE_IETH_MASK) {
1145 				wc->wc_flags |= IB_WC_WITH_INVALIDATE;
1146 				wc->ex.invalidate_rkey = ieth_rkey(pkt);
1147 			}
1148 
1149 			if (pkt->mask & RXE_DETH_MASK)
1150 				wc->src_qp = deth_sqp(pkt);
1151 
1152 			wc->port_num		= qp->attr.port_num;
1153 		}
1154 	}
1155 
1156 	/* have copy for srq and reference for !srq */
1157 	if (!qp->srq)
1158 		queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT);
1159 
1160 	qp->resp.wqe = NULL;
1161 
1162 	if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1))
1163 		return RESPST_ERR_CQ_OVERFLOW;
1164 
1165 finish:
1166 	if (unlikely(qp->resp.state == QP_STATE_ERROR))
1167 		return RESPST_CHK_RESOURCE;
1168 	if (unlikely(!pkt))
1169 		return RESPST_DONE;
1170 	if (qp_type(qp) == IB_QPT_RC)
1171 		return RESPST_ACKNOWLEDGE;
1172 	else
1173 		return RESPST_CLEANUP;
1174 }
1175 
1176 
1177 static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
1178 				  int opcode, const char *msg)
1179 {
1180 	int err;
1181 	struct rxe_pkt_info ack_pkt;
1182 	struct sk_buff *skb;
1183 
1184 	skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome);
1185 	if (!skb)
1186 		return -ENOMEM;
1187 
1188 	err = rxe_xmit_packet(qp, &ack_pkt, skb);
1189 	if (err)
1190 		rxe_dbg_qp(qp, "Failed sending %s\n", msg);
1191 
1192 	return err;
1193 }
1194 
1195 static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1196 {
1197 	return send_common_ack(qp, syndrome, psn,
1198 			IB_OPCODE_RC_ACKNOWLEDGE, "ACK");
1199 }
1200 
1201 static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1202 {
1203 	int ret = send_common_ack(qp, syndrome, psn,
1204 			IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK");
1205 
1206 	/* have to clear this since it is used to trigger
1207 	 * long read replies
1208 	 */
1209 	qp->resp.res = NULL;
1210 	return ret;
1211 }
1212 
1213 static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
1214 {
1215 	int ret = send_common_ack(qp, syndrome, psn,
1216 			IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
1217 			"RDMA READ response of length zero ACK");
1218 
1219 	/* have to clear this since it is used to trigger
1220 	 * long read replies
1221 	 */
1222 	qp->resp.res = NULL;
1223 	return ret;
1224 }
1225 
1226 static enum resp_states acknowledge(struct rxe_qp *qp,
1227 				    struct rxe_pkt_info *pkt)
1228 {
1229 	if (qp_type(qp) != IB_QPT_RC)
1230 		return RESPST_CLEANUP;
1231 
1232 	if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED)
1233 		send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
1234 	else if (pkt->mask & RXE_ATOMIC_MASK)
1235 		send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1236 	else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
1237 		send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1238 	else if (bth_ack(pkt))
1239 		send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1240 
1241 	return RESPST_CLEANUP;
1242 }
1243 
1244 static enum resp_states cleanup(struct rxe_qp *qp,
1245 				struct rxe_pkt_info *pkt)
1246 {
1247 	struct sk_buff *skb;
1248 
1249 	if (pkt) {
1250 		skb = skb_dequeue(&qp->req_pkts);
1251 		rxe_put(qp);
1252 		kfree_skb(skb);
1253 		ib_device_put(qp->ibqp.device);
1254 	}
1255 
1256 	if (qp->resp.mr) {
1257 		rxe_put(qp->resp.mr);
1258 		qp->resp.mr = NULL;
1259 	}
1260 
1261 	return RESPST_DONE;
1262 }
1263 
1264 static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
1265 {
1266 	int i;
1267 
1268 	for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
1269 		struct resp_res *res = &qp->resp.resources[i];
1270 
1271 		if (res->type == 0)
1272 			continue;
1273 
1274 		if (psn_compare(psn, res->first_psn) >= 0 &&
1275 		    psn_compare(psn, res->last_psn) <= 0) {
1276 			return res;
1277 		}
1278 	}
1279 
1280 	return NULL;
1281 }
1282 
1283 static enum resp_states duplicate_request(struct rxe_qp *qp,
1284 					  struct rxe_pkt_info *pkt)
1285 {
1286 	enum resp_states rc;
1287 	u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
1288 
1289 	if (pkt->mask & RXE_SEND_MASK ||
1290 	    pkt->mask & RXE_WRITE_MASK) {
1291 		/* SEND. Ack again and cleanup. C9-105. */
1292 		send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
1293 		return RESPST_CLEANUP;
1294 	} else if (pkt->mask & RXE_FLUSH_MASK) {
1295 		struct resp_res *res;
1296 
1297 		/* Find the operation in our list of responder resources. */
1298 		res = find_resource(qp, pkt->psn);
1299 		if (res) {
1300 			res->replay = 1;
1301 			res->cur_psn = pkt->psn;
1302 			qp->resp.res = res;
1303 			rc = RESPST_PROCESS_FLUSH;
1304 			goto out;
1305 		}
1306 
1307 		/* Resource not found. Class D error. Drop the request. */
1308 		rc = RESPST_CLEANUP;
1309 		goto out;
1310 	} else if (pkt->mask & RXE_READ_MASK) {
1311 		struct resp_res *res;
1312 
1313 		res = find_resource(qp, pkt->psn);
1314 		if (!res) {
1315 			/* Resource not found. Class D error.  Drop the
1316 			 * request.
1317 			 */
1318 			rc = RESPST_CLEANUP;
1319 			goto out;
1320 		} else {
1321 			/* Ensure this new request is the same as the previous
1322 			 * one or a subset of it.
1323 			 */
1324 			u64 iova = reth_va(pkt);
1325 			u32 resid = reth_len(pkt);
1326 
1327 			if (iova < res->read.va_org ||
1328 			    resid > res->read.length ||
1329 			    (iova + resid) > (res->read.va_org +
1330 					      res->read.length)) {
1331 				rc = RESPST_CLEANUP;
1332 				goto out;
1333 			}
1334 
1335 			if (reth_rkey(pkt) != res->read.rkey) {
1336 				rc = RESPST_CLEANUP;
1337 				goto out;
1338 			}
1339 
1340 			res->cur_psn = pkt->psn;
1341 			res->state = (pkt->psn == res->first_psn) ?
1342 					rdatm_res_state_new :
1343 					rdatm_res_state_replay;
1344 			res->replay = 1;
1345 
1346 			/* Reset the resource, except length. */
1347 			res->read.va_org = iova;
1348 			res->read.va = iova;
1349 			res->read.resid = resid;
1350 
1351 			/* Replay the RDMA read reply. */
1352 			qp->resp.res = res;
1353 			rc = RESPST_READ_REPLY;
1354 			goto out;
1355 		}
1356 	} else {
1357 		struct resp_res *res;
1358 
1359 		/* Find the operation in our list of responder resources. */
1360 		res = find_resource(qp, pkt->psn);
1361 		if (res) {
1362 			res->replay = 1;
1363 			res->cur_psn = pkt->psn;
1364 			qp->resp.res = res;
1365 			rc = pkt->mask & RXE_ATOMIC_MASK ?
1366 					RESPST_ATOMIC_REPLY :
1367 					RESPST_ATOMIC_WRITE_REPLY;
1368 			goto out;
1369 		}
1370 
1371 		/* Resource not found. Class D error. Drop the request. */
1372 		rc = RESPST_CLEANUP;
1373 		goto out;
1374 	}
1375 out:
1376 	return rc;
1377 }
1378 
1379 /* Process a class A or C. Both are treated the same in this implementation. */
1380 static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome,
1381 			      enum ib_wc_status status)
1382 {
1383 	qp->resp.aeth_syndrome	= syndrome;
1384 	qp->resp.status		= status;
1385 
1386 	/* indicate that we should go through the ERROR state */
1387 	qp->resp.goto_error	= 1;
1388 }
1389 
1390 static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
1391 {
1392 	/* UC */
1393 	if (qp->srq) {
1394 		/* Class E */
1395 		qp->resp.drop_msg = 1;
1396 		if (qp->resp.wqe) {
1397 			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1398 			return RESPST_COMPLETE;
1399 		} else {
1400 			return RESPST_CLEANUP;
1401 		}
1402 	} else {
1403 		/* Class D1. This packet may be the start of a
1404 		 * new message and could be valid. The previous
1405 		 * message is invalid and ignored. reset the
1406 		 * recv wr to its original state
1407 		 */
1408 		if (qp->resp.wqe) {
1409 			qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
1410 			qp->resp.wqe->dma.cur_sge = 0;
1411 			qp->resp.wqe->dma.sge_offset = 0;
1412 			qp->resp.opcode = -1;
1413 		}
1414 
1415 		if (qp->resp.mr) {
1416 			rxe_put(qp->resp.mr);
1417 			qp->resp.mr = NULL;
1418 		}
1419 
1420 		return RESPST_CLEANUP;
1421 	}
1422 }
1423 
1424 static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
1425 {
1426 	struct sk_buff *skb;
1427 	struct rxe_queue *q = qp->rq.queue;
1428 
1429 	while ((skb = skb_dequeue(&qp->req_pkts))) {
1430 		rxe_put(qp);
1431 		kfree_skb(skb);
1432 		ib_device_put(qp->ibqp.device);
1433 	}
1434 
1435 	if (notify)
1436 		return;
1437 
1438 	while (!qp->srq && q && queue_head(q, q->type))
1439 		queue_advance_consumer(q, q->type);
1440 }
1441 
1442 int rxe_responder(void *arg)
1443 {
1444 	struct rxe_qp *qp = (struct rxe_qp *)arg;
1445 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
1446 	enum resp_states state;
1447 	struct rxe_pkt_info *pkt = NULL;
1448 	int ret;
1449 
1450 	if (!rxe_get(qp))
1451 		return -EAGAIN;
1452 
1453 	qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
1454 
1455 	if (!qp->valid)
1456 		goto exit;
1457 
1458 	switch (qp->resp.state) {
1459 	case QP_STATE_RESET:
1460 		state = RESPST_RESET;
1461 		break;
1462 
1463 	default:
1464 		state = RESPST_GET_REQ;
1465 		break;
1466 	}
1467 
1468 	while (1) {
1469 		rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
1470 		switch (state) {
1471 		case RESPST_GET_REQ:
1472 			state = get_req(qp, &pkt);
1473 			break;
1474 		case RESPST_CHK_PSN:
1475 			state = check_psn(qp, pkt);
1476 			break;
1477 		case RESPST_CHK_OP_SEQ:
1478 			state = check_op_seq(qp, pkt);
1479 			break;
1480 		case RESPST_CHK_OP_VALID:
1481 			state = check_op_valid(qp, pkt);
1482 			break;
1483 		case RESPST_CHK_RESOURCE:
1484 			state = check_resource(qp, pkt);
1485 			break;
1486 		case RESPST_CHK_LENGTH:
1487 			state = rxe_resp_check_length(qp, pkt);
1488 			break;
1489 		case RESPST_CHK_RKEY:
1490 			state = check_rkey(qp, pkt);
1491 			break;
1492 		case RESPST_EXECUTE:
1493 			state = execute(qp, pkt);
1494 			break;
1495 		case RESPST_COMPLETE:
1496 			state = do_complete(qp, pkt);
1497 			break;
1498 		case RESPST_READ_REPLY:
1499 			state = read_reply(qp, pkt);
1500 			break;
1501 		case RESPST_ATOMIC_REPLY:
1502 			state = atomic_reply(qp, pkt);
1503 			break;
1504 		case RESPST_ATOMIC_WRITE_REPLY:
1505 			state = atomic_write_reply(qp, pkt);
1506 			break;
1507 		case RESPST_PROCESS_FLUSH:
1508 			state = process_flush(qp, pkt);
1509 			break;
1510 		case RESPST_ACKNOWLEDGE:
1511 			state = acknowledge(qp, pkt);
1512 			break;
1513 		case RESPST_CLEANUP:
1514 			state = cleanup(qp, pkt);
1515 			break;
1516 		case RESPST_DUPLICATE_REQUEST:
1517 			state = duplicate_request(qp, pkt);
1518 			break;
1519 		case RESPST_ERR_PSN_OUT_OF_SEQ:
1520 			/* RC only - Class B. Drop packet. */
1521 			send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
1522 			state = RESPST_CLEANUP;
1523 			break;
1524 
1525 		case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ:
1526 		case RESPST_ERR_MISSING_OPCODE_FIRST:
1527 		case RESPST_ERR_MISSING_OPCODE_LAST_C:
1528 		case RESPST_ERR_UNSUPPORTED_OPCODE:
1529 		case RESPST_ERR_MISALIGNED_ATOMIC:
1530 			/* RC Only - Class C. */
1531 			do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1532 					  IB_WC_REM_INV_REQ_ERR);
1533 			state = RESPST_COMPLETE;
1534 			break;
1535 
1536 		case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
1537 			state = do_class_d1e_error(qp);
1538 			break;
1539 		case RESPST_ERR_RNR:
1540 			if (qp_type(qp) == IB_QPT_RC) {
1541 				rxe_counter_inc(rxe, RXE_CNT_SND_RNR);
1542 				/* RC - class B */
1543 				send_ack(qp, AETH_RNR_NAK |
1544 					 (~AETH_TYPE_MASK &
1545 					 qp->attr.min_rnr_timer),
1546 					 pkt->psn);
1547 			} else {
1548 				/* UD/UC - class D */
1549 				qp->resp.drop_msg = 1;
1550 			}
1551 			state = RESPST_CLEANUP;
1552 			break;
1553 
1554 		case RESPST_ERR_RKEY_VIOLATION:
1555 			if (qp_type(qp) == IB_QPT_RC) {
1556 				/* Class C */
1557 				do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
1558 						  IB_WC_REM_ACCESS_ERR);
1559 				state = RESPST_COMPLETE;
1560 			} else {
1561 				qp->resp.drop_msg = 1;
1562 				if (qp->srq) {
1563 					/* UC/SRQ Class D */
1564 					qp->resp.status = IB_WC_REM_ACCESS_ERR;
1565 					state = RESPST_COMPLETE;
1566 				} else {
1567 					/* UC/non-SRQ Class E. */
1568 					state = RESPST_CLEANUP;
1569 				}
1570 			}
1571 			break;
1572 
1573 		case RESPST_ERR_INVALIDATE_RKEY:
1574 			/* RC - Class J. */
1575 			qp->resp.goto_error = 1;
1576 			qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1577 			state = RESPST_COMPLETE;
1578 			break;
1579 
1580 		case RESPST_ERR_LENGTH:
1581 			if (qp_type(qp) == IB_QPT_RC) {
1582 				/* Class C */
1583 				do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
1584 						  IB_WC_REM_INV_REQ_ERR);
1585 				state = RESPST_COMPLETE;
1586 			} else if (qp->srq) {
1587 				/* UC/UD - class E */
1588 				qp->resp.status = IB_WC_REM_INV_REQ_ERR;
1589 				state = RESPST_COMPLETE;
1590 			} else {
1591 				/* UC/UD - class D */
1592 				qp->resp.drop_msg = 1;
1593 				state = RESPST_CLEANUP;
1594 			}
1595 			break;
1596 
1597 		case RESPST_ERR_MALFORMED_WQE:
1598 			/* All, Class A. */
1599 			do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
1600 					  IB_WC_LOC_QP_OP_ERR);
1601 			state = RESPST_COMPLETE;
1602 			break;
1603 
1604 		case RESPST_ERR_CQ_OVERFLOW:
1605 			/* All - Class G */
1606 			state = RESPST_ERROR;
1607 			break;
1608 
1609 		case RESPST_DONE:
1610 			if (qp->resp.goto_error) {
1611 				state = RESPST_ERROR;
1612 				break;
1613 			}
1614 
1615 			goto done;
1616 
1617 		case RESPST_EXIT:
1618 			if (qp->resp.goto_error) {
1619 				state = RESPST_ERROR;
1620 				break;
1621 			}
1622 
1623 			goto exit;
1624 
1625 		case RESPST_RESET:
1626 			rxe_drain_req_pkts(qp, false);
1627 			qp->resp.wqe = NULL;
1628 			goto exit;
1629 
1630 		case RESPST_ERROR:
1631 			qp->resp.goto_error = 0;
1632 			rxe_dbg_qp(qp, "moved to error state\n");
1633 			rxe_qp_error(qp);
1634 			goto exit;
1635 
1636 		default:
1637 			WARN_ON_ONCE(1);
1638 		}
1639 	}
1640 
1641 	/* A non-zero return value will cause rxe_do_task to
1642 	 * exit its loop and end the tasklet. A zero return
1643 	 * will continue looping and return to rxe_responder
1644 	 */
1645 done:
1646 	ret = 0;
1647 	goto out;
1648 exit:
1649 	ret = -EAGAIN;
1650 out:
1651 	rxe_put(qp);
1652 	return ret;
1653 }
1654