1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_cq.c
28 * Hermon Completion Queue Processing Routines
29 *
30 * Implements all the routines necessary for allocating, freeing, resizing,
31 * and handling the completion type events that the Hermon hardware can
32 * generate.
33 */
34
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/sysmacros.h>
42
43 #include <sys/ib/adapters/hermon/hermon.h>
44
45 int hermon_should_panic = 0; /* debugging aid */
46
47 #define hermon_cq_update_ci_doorbell(cq) \
48 /* Build the doorbell record data (low 24 bits only) */ \
49 HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr, \
50 cq->cq_consindx & 0x00FFFFFF)
51
52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq,
53 uint_t cmd);
54 #pragma inline(hermon_cq_arm_doorbell)
55 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr);
56 #pragma inline(hermon_arm_cq_dbr_init)
57 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
58 hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
59 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
60 hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
61
62
63 /*
64 * hermon_cq_alloc()
65 * Context: Can be called only from user or kernel context.
66 */
67 int
hermon_cq_alloc(hermon_state_t * state,ibt_cq_hdl_t ibt_cqhdl,ibt_cq_attr_t * cq_attr,uint_t * actual_size,hermon_cqhdl_t * cqhdl,uint_t sleepflag)68 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
69 ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl,
70 uint_t sleepflag)
71 {
72 hermon_rsrc_t *cqc, *rsrc;
73 hermon_umap_db_entry_t *umapdb;
74 hermon_hw_cqc_t cqc_entry;
75 hermon_cqhdl_t cq;
76 ibt_mr_attr_t mr_attr;
77 hermon_mr_options_t op;
78 hermon_pdhdl_t pd;
79 hermon_mrhdl_t mr;
80 hermon_hw_cqe_t *buf;
81 uint64_t value;
82 uint32_t log_cq_size, uarpg;
83 uint_t cq_is_umap;
84 uint32_t status, flag;
85 hermon_cq_sched_t *cq_schedp;
86
87 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr))
88
89 /*
90 * Determine whether CQ is being allocated for userland access or
91 * whether it is being allocated for kernel access. If the CQ is
92 * being allocated for userland access, then lookup the UAR
93 * page number for the current process. Note: If this is not found
94 * (e.g. if the process has not previously open()'d the Hermon driver),
95 * then an error is returned.
96 */
97 cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
98 if (cq_is_umap) {
99 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
100 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
101 if (status != DDI_SUCCESS) {
102 status = IBT_INVALID_PARAM;
103 goto cqalloc_fail;
104 }
105 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
106 } else {
107 uarpg = state->hs_kernel_uar_index;
108 }
109
110 /* Use the internal protection domain (PD) for setting up CQs */
111 pd = state->hs_pdhdl_internal;
112
113 /* Increment the reference count on the protection domain (PD) */
114 hermon_pd_refcnt_inc(pd);
115
116 /*
117 * Allocate an CQ context entry. This will be filled in with all
118 * the necessary parameters to define the Completion Queue. And then
119 * ownership will be passed to the hardware in the final step
120 * below. If we fail here, we must undo the protection domain
121 * reference count.
122 */
123 status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc);
124 if (status != DDI_SUCCESS) {
125 status = IBT_INSUFF_RESOURCE;
126 goto cqalloc_fail1;
127 }
128
129 /*
130 * Allocate the software structure for tracking the completion queue
131 * (i.e. the Hermon Completion Queue handle). If we fail here, we must
132 * undo the protection domain reference count and the previous
133 * resource allocation.
134 */
135 status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc);
136 if (status != DDI_SUCCESS) {
137 status = IBT_INSUFF_RESOURCE;
138 goto cqalloc_fail2;
139 }
140 cq = (hermon_cqhdl_t)rsrc->hr_addr;
141 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
142 cq->cq_is_umap = cq_is_umap;
143 cq->cq_cqnum = cqc->hr_indx; /* just use index, implicit in Hermon */
144 cq->cq_intmod_count = 0;
145 cq->cq_intmod_usec = 0;
146
147 /*
148 * If this will be a user-mappable CQ, then allocate an entry for
149 * the "userland resources database". This will later be added to
150 * the database (after all further CQ operations are successful).
151 * If we fail here, we must undo the reference counts and the
152 * previous resource allocation.
153 */
154 if (cq->cq_is_umap) {
155 umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum,
156 MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
157 if (umapdb == NULL) {
158 status = IBT_INSUFF_RESOURCE;
159 goto cqalloc_fail3;
160 }
161 }
162
163
164 /*
165 * Allocate the doorbell record. We'll need one for the CQ, handling
166 * both consumer index (SET CI) and the CQ state (CQ ARM).
167 */
168
169 status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl,
170 &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset);
171 if (status != DDI_SUCCESS) {
172 status = IBT_INSUFF_RESOURCE;
173 goto cqalloc_fail4;
174 }
175
176 /*
177 * Calculate the appropriate size for the completion queue.
178 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also
179 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is
180 * to round the requested size up to the next highest power-of-2
181 */
182 cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE);
183 log_cq_size = highbit(cq_attr->cq_size);
184
185 /*
186 * Next we verify that the rounded-up size is valid (i.e. consistent
187 * with the device limits and/or software-configured limits)
188 */
189 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
190 status = IBT_HCA_CQ_EXCEEDED;
191 goto cqalloc_fail4a;
192 }
193
194 /*
195 * Allocate the memory for Completion Queue.
196 *
197 * Note: Although we use the common queue allocation routine, we
198 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
199 * kernel system memory) for kernel CQs because it would be
200 * inefficient to have CQs located in DDR memory. This is primarily
201 * because CQs are read from (by software) more than they are written
202 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
203 * user-mappable CQs for a similar reason.)
204 * It is also worth noting that, unlike Hermon QP work queues,
205 * completion queues do not have the same strict alignment
206 * requirements. It is sufficient for the CQ memory to be both
207 * aligned to and bound to addresses which are a multiple of CQE size.
208 */
209 cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
210
211 cq->cq_cqinfo.qa_alloc_align = PAGESIZE;
212 cq->cq_cqinfo.qa_bind_align = PAGESIZE;
213 if (cq->cq_is_umap) {
214 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
215 } else {
216 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
217 hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr);
218 }
219 status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
220 if (status != DDI_SUCCESS) {
221 status = IBT_INSUFF_RESOURCE;
222 goto cqalloc_fail4;
223 }
224 buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
225 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
226
227 /*
228 * The ownership bit of the CQE's is set by the HW during the process
229 * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1
230 *
231 */
232
233 /*
234 * Register the memory for the CQ. The memory for the CQ must
235 * be registered in the Hermon TPT tables. This gives us the LKey
236 * to specify in the CQ context below. Note: If this is a user-
237 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
238 */
239 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
240 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
241 mr_attr.mr_len = cq->cq_cqinfo.qa_size;
242 mr_attr.mr_as = NULL;
243 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
244 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
245 op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
246 op.mro_bind_override_addr = 0;
247 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
248 HERMON_CQ_CMPT);
249 if (status != DDI_SUCCESS) {
250 status = IBT_INSUFF_RESOURCE;
251 goto cqalloc_fail5;
252 }
253 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
254
255 cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state);
256 if (cq_attr->cq_flags & IBT_CQ_HID) {
257 if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) {
258 IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x",
259 cq_attr->cq_hid);
260 status = IBT_INVALID_PARAM;
261 goto cqalloc_fail5;
262 }
263 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid);
264 IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum);
265 } else {
266 cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched;
267 if (cq_schedp == NULL) {
268 cq_schedp = &state->hs_cq_sched_default;
269 } else if (cq_schedp != &state->hs_cq_sched_default) {
270 int i;
271 hermon_cq_sched_t *tmp;
272
273 tmp = state->hs_cq_sched_array;
274 for (i = 0; i < state->hs_cq_sched_array_size; i++)
275 if (cq_schedp == &tmp[i])
276 break; /* found it */
277 if (i >= state->hs_cq_sched_array_size) {
278 cmn_err(CE_CONT, "!Invalid cq_sched argument: "
279 "ignored\n");
280 cq_schedp = &state->hs_cq_sched_default;
281 }
282 }
283 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state,
284 HERMON_CQSCHED_NEXT_HID(cq_schedp));
285 IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, "
286 "eqn %d", cq_schedp->cqs_start_hid - 1,
287 cq_schedp->cqs_len, cq->cq_eqnum);
288 }
289
290 /*
291 * Fill in the CQC entry. This is the final step before passing
292 * ownership of the CQC entry to the Hermon hardware. We use all of
293 * the information collected/calculated above to fill in the
294 * requisite portions of the CQC. Note: If this CQ is going to be
295 * used for userland access, then we need to set the UAR page number
296 * appropriately (otherwise it's a "don't care")
297 */
298 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
299
300 cqc_entry.state = HERMON_CQ_DISARMED;
301 cqc_entry.pg_offs = cq->cq_cqinfo.qa_pgoffs >> 5;
302 cqc_entry.log_cq_sz = log_cq_size;
303 cqc_entry.usr_page = uarpg;
304 cqc_entry.c_eqn = cq->cq_eqnum;
305 cqc_entry.log2_pgsz = mr->mr_log2_pgsz;
306 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
307 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
308 cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32);
309 cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3);
310
311 /*
312 * Write the CQC entry to hardware - we pass ownership of
313 * the entry to the hardware (using the Hermon SW2HW_CQ firmware
314 * command). Note: In general, this operation shouldn't fail. But
315 * if it does, we have to undo everything we've done above before
316 * returning error.
317 */
318 status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
319 sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag);
320 if (status != HERMON_CMD_SUCCESS) {
321 cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n",
322 status);
323 if (status == HERMON_CMD_INVALID_STATUS) {
324 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
325 }
326 status = ibc_get_ci_failure(0);
327 goto cqalloc_fail6;
328 }
329
330 /*
331 * Fill in the rest of the Hermon Completion Queue handle. Having
332 * successfully transferred ownership of the CQC, we can update the
333 * following fields for use in further operations on the CQ.
334 */
335 cq->cq_resize_hdl = 0;
336 cq->cq_cqcrsrcp = cqc;
337 cq->cq_rsrcp = rsrc;
338 cq->cq_consindx = 0;
339 /* least restrictive */
340 cq->cq_buf = buf;
341 cq->cq_bufsz = (1 << log_cq_size);
342 cq->cq_log_cqsz = log_cq_size;
343 cq->cq_mrhdl = mr;
344 cq->cq_refcnt = 0;
345 cq->cq_is_special = 0;
346 cq->cq_uarpg = uarpg;
347 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
348 avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare,
349 sizeof (struct hermon_workq_avl_s),
350 offsetof(struct hermon_workq_avl_s, wqa_link));
351
352 cq->cq_hdlrarg = (void *)ibt_cqhdl;
353
354 /*
355 * Put CQ handle in Hermon CQNum-to-CQHdl list. Then fill in the
356 * "actual_size" and "cqhdl" and return success
357 */
358 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq);
359
360 /*
361 * If this is a user-mappable CQ, then we need to insert the previously
362 * allocated entry into the "userland resources database". This will
363 * allow for later lookup during devmap() (i.e. mmap()) calls.
364 */
365 if (cq->cq_is_umap) {
366 hermon_umap_db_add(umapdb);
367 }
368
369 /*
370 * Fill in the return arguments (if necessary). This includes the
371 * real completion queue size.
372 */
373 if (actual_size != NULL) {
374 *actual_size = (1 << log_cq_size) - 1;
375 }
376 *cqhdl = cq;
377
378 return (DDI_SUCCESS);
379
380 /*
381 * The following is cleanup for all possible failure cases in this routine
382 */
383 cqalloc_fail6:
384 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
385 sleepflag) != DDI_SUCCESS) {
386 HERMON_WARNING(state, "failed to deregister CQ memory");
387 }
388 cqalloc_fail5:
389 hermon_queue_free(&cq->cq_cqinfo);
390 cqalloc_fail4a:
391 hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr);
392 cqalloc_fail4:
393 if (cq_is_umap) {
394 hermon_umap_db_free(umapdb);
395 }
396 cqalloc_fail3:
397 hermon_rsrc_free(state, &rsrc);
398 cqalloc_fail2:
399 hermon_rsrc_free(state, &cqc);
400 cqalloc_fail1:
401 hermon_pd_refcnt_dec(pd);
402 cqalloc_fail:
403 return (status);
404 }
405
406
407 /*
408 * hermon_cq_free()
409 * Context: Can be called only from user or kernel context.
410 */
411 /* ARGSUSED */
412 int
hermon_cq_free(hermon_state_t * state,hermon_cqhdl_t * cqhdl,uint_t sleepflag)413 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag)
414 {
415 hermon_rsrc_t *cqc, *rsrc;
416 hermon_umap_db_entry_t *umapdb;
417 hermon_hw_cqc_t cqc_entry;
418 hermon_pdhdl_t pd;
419 hermon_mrhdl_t mr;
420 hermon_cqhdl_t cq, resize;
421 uint32_t cqnum;
422 uint64_t value;
423 uint_t maxprot;
424 int status;
425
426 /*
427 * Pull all the necessary information from the Hermon Completion Queue
428 * handle. This is necessary here because the resource for the
429 * CQ handle is going to be freed up as part of this operation.
430 */
431 cq = *cqhdl;
432 mutex_enter(&cq->cq_lock);
433 cqc = cq->cq_cqcrsrcp;
434 rsrc = cq->cq_rsrcp;
435 pd = state->hs_pdhdl_internal;
436 mr = cq->cq_mrhdl;
437 cqnum = cq->cq_cqnum;
438
439 resize = cq->cq_resize_hdl; /* save the handle for later */
440
441 /*
442 * If there are work queues still associated with the CQ, then return
443 * an error. Otherwise, we will be holding the CQ lock.
444 */
445 if (cq->cq_refcnt != 0) {
446 mutex_exit(&cq->cq_lock);
447 return (IBT_CQ_BUSY);
448 }
449
450 /*
451 * If this was a user-mappable CQ, then we need to remove its entry
452 * from the "userland resources database". If it is also currently
453 * mmap()'d out to a user process, then we need to call
454 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
455 * We also need to invalidate the CQ tracking information for the
456 * user mapping.
457 */
458 if (cq->cq_is_umap) {
459 status = hermon_umap_db_find(state->hs_instance, cqnum,
460 MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
461 &umapdb);
462 if (status != DDI_SUCCESS) {
463 mutex_exit(&cq->cq_lock);
464 HERMON_WARNING(state, "failed to find in database");
465 return (ibc_get_ci_failure(0));
466 }
467 hermon_umap_db_free(umapdb);
468 if (cq->cq_umap_dhp != NULL) {
469 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
470 status = devmap_devmem_remap(cq->cq_umap_dhp,
471 state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size,
472 maxprot, DEVMAP_MAPPING_INVALID, NULL);
473 if (status != DDI_SUCCESS) {
474 mutex_exit(&cq->cq_lock);
475 HERMON_WARNING(state, "failed in CQ memory "
476 "devmap_devmem_remap()");
477 return (ibc_get_ci_failure(0));
478 }
479 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
480 }
481 }
482
483 /*
484 * Put NULL into the Arbel CQNum-to-CQHdl list. This will allow any
485 * in-progress events to detect that the CQ corresponding to this
486 * number has been freed.
487 */
488 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL);
489
490 mutex_exit(&cq->cq_lock);
491 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
492
493 /*
494 * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ
495 * firmware command). If the ownership transfer fails for any reason,
496 * then it is an indication that something (either in HW or SW) has
497 * gone seriously wrong.
498 */
499 status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
500 sizeof (hermon_hw_cqc_t), cqnum, sleepflag);
501 if (status != HERMON_CMD_SUCCESS) {
502 HERMON_WARNING(state, "failed to reclaim CQC ownership");
503 cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n",
504 status);
505 if (status == HERMON_CMD_INVALID_STATUS) {
506 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
507 }
508 return (ibc_get_ci_failure(0));
509 }
510
511 /*
512 * From here on, we start reliquishing resources - but check to see
513 * if a resize was in progress - if so, we need to relinquish those
514 * resources as well
515 */
516
517
518 /*
519 * Deregister the memory for the Completion Queue. If this fails
520 * for any reason, then it is an indication that something (either
521 * in HW or SW) has gone seriously wrong. So we print a warning
522 * message and return.
523 */
524 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
525 sleepflag);
526 if (status != DDI_SUCCESS) {
527 HERMON_WARNING(state, "failed to deregister CQ memory");
528 return (ibc_get_ci_failure(0));
529 }
530
531 if (resize) { /* there was a pointer to a handle */
532 mr = resize->cq_mrhdl; /* reuse the pointer to the region */
533 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
534 sleepflag);
535 if (status != DDI_SUCCESS) {
536 HERMON_WARNING(state, "failed to deregister resize CQ "
537 "memory");
538 return (ibc_get_ci_failure(0));
539 }
540 }
541
542 /* Free the memory for the CQ */
543 hermon_queue_free(&cq->cq_cqinfo);
544 if (resize) {
545 hermon_queue_free(&resize->cq_cqinfo);
546 /* and the temporary handle */
547 kmem_free(resize, sizeof (struct hermon_sw_cq_s));
548 }
549
550 /* everything else does not matter for the resize in progress */
551
552 /* Free the dbr */
553 hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr);
554
555 /* Free the Hermon Completion Queue handle */
556 hermon_rsrc_free(state, &rsrc);
557
558 /* Free up the CQC entry resource */
559 hermon_rsrc_free(state, &cqc);
560
561 /* Decrement the reference count on the protection domain (PD) */
562 hermon_pd_refcnt_dec(pd);
563
564 /* Set the cqhdl pointer to NULL and return success */
565 *cqhdl = NULL;
566
567 return (DDI_SUCCESS);
568 }
569
570
571 /*
572 * hermon_cq_resize()
573 * Context: Can be called only from user or kernel context.
574 */
575 int
hermon_cq_resize(hermon_state_t * state,hermon_cqhdl_t cq,uint_t req_size,uint_t * actual_size,uint_t sleepflag)576 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size,
577 uint_t *actual_size, uint_t sleepflag)
578 {
579 hermon_hw_cqc_t cqc_entry;
580 hermon_cqhdl_t resize_hdl;
581 hermon_qalloc_info_t new_cqinfo;
582 ibt_mr_attr_t mr_attr;
583 hermon_mr_options_t op;
584 hermon_pdhdl_t pd;
585 hermon_mrhdl_t mr;
586 hermon_hw_cqe_t *buf;
587 uint32_t new_prod_indx;
588 uint_t log_cq_size;
589 int status, flag;
590
591 if (cq->cq_resize_hdl != 0) { /* already in process */
592 status = IBT_CQ_BUSY;
593 goto cqresize_fail;
594 }
595
596
597 /* Use the internal protection domain (PD) for CQs */
598 pd = state->hs_pdhdl_internal;
599
600 /*
601 * Calculate the appropriate size for the new resized completion queue.
602 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also
603 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is
604 * to round the requested size up to the next highest power-of-2
605 */
606 req_size = max(req_size, HERMON_CQ_MIN_SIZE);
607 log_cq_size = highbit(req_size);
608
609 /*
610 * Next we verify that the rounded-up size is valid (i.e. consistent
611 * with the device limits and/or software-configured limits)
612 */
613 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
614 status = IBT_HCA_CQ_EXCEEDED;
615 goto cqresize_fail;
616 }
617
618 /*
619 * Allocate the memory for newly resized Completion Queue.
620 *
621 * Note: Although we use the common queue allocation routine, we
622 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
623 * kernel system memory) for kernel CQs because it would be
624 * inefficient to have CQs located in DDR memory. This is the same
625 * as we do when we first allocate completion queues primarily
626 * because CQs are read from (by software) more than they are written
627 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
628 * user-mappable CQs for a similar reason.)
629 * It is also worth noting that, unlike Hermon QP work queues,
630 * completion queues do not have the same strict alignment
631 * requirements. It is sufficient for the CQ memory to be both
632 * aligned to and bound to addresses which are a multiple of CQE size.
633 */
634
635 /* first, alloc the resize_handle */
636 resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP);
637
638 new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
639 new_cqinfo.qa_alloc_align = PAGESIZE;
640 new_cqinfo.qa_bind_align = PAGESIZE;
641 if (cq->cq_is_umap) {
642 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
643 } else {
644 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
645 }
646 status = hermon_queue_alloc(state, &new_cqinfo, sleepflag);
647 if (status != DDI_SUCCESS) {
648 /* free the resize handle */
649 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
650 status = IBT_INSUFF_RESOURCE;
651 goto cqresize_fail;
652 }
653 buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
654 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
655
656 /*
657 * No initialization of the cq is needed - the command will do it
658 */
659
660 /*
661 * Register the memory for the CQ. The memory for the CQ must
662 * be registered in the Hermon TPT tables. This gives us the LKey
663 * to specify in the CQ context below.
664 */
665 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
666 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
667 mr_attr.mr_len = new_cqinfo.qa_size;
668 mr_attr.mr_as = NULL;
669 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
670 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
671 op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
672 op.mro_bind_override_addr = 0;
673 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
674 HERMON_CQ_CMPT);
675 if (status != DDI_SUCCESS) {
676 hermon_queue_free(&new_cqinfo);
677 /* free the resize handle */
678 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
679 status = IBT_INSUFF_RESOURCE;
680 goto cqresize_fail;
681 }
682 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
683
684 /*
685 * Now we grab the CQ lock. Since we will be updating the actual
686 * CQ location and the producer/consumer indexes, we should hold
687 * the lock.
688 *
689 * We do a ARBEL_NOSLEEP here (and below), though, because we are
690 * holding the "cq_lock" and if we got raised to interrupt level
691 * by priority inversion, we would not want to block in this routine
692 * waiting for success.
693 */
694 mutex_enter(&cq->cq_lock);
695
696 /*
697 * Fill in the CQC entry. For the resize operation this is the
698 * final step before attempting the resize operation on the CQC entry.
699 * We use all of the information collected/calculated above to fill
700 * in the requisite portions of the CQC.
701 */
702 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
703 cqc_entry.log_cq_sz = log_cq_size;
704 cqc_entry.pg_offs = new_cqinfo.qa_pgoffs >> 5;
705 cqc_entry.log2_pgsz = mr->mr_log2_pgsz;
706 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
707 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
708
709 /*
710 * Write the CQC entry to hardware. Lastly, we pass ownership of
711 * the entry to the hardware (using the Hermon RESIZE_CQ firmware
712 * command). Note: In general, this operation shouldn't fail. But
713 * if it does, we have to undo everything we've done above before
714 * returning error. Also note that the status returned may indicate
715 * the code to return to the IBTF.
716 */
717 status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
718 &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN);
719 if (status != HERMON_CMD_SUCCESS) {
720 /* Resize attempt has failed, drop CQ lock and cleanup */
721 mutex_exit(&cq->cq_lock);
722 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
723 sleepflag) != DDI_SUCCESS) {
724 HERMON_WARNING(state, "failed to deregister CQ memory");
725 }
726 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
727 hermon_queue_free(&new_cqinfo);
728 if (status == HERMON_CMD_BAD_SIZE) {
729 return (IBT_CQ_SZ_INSUFFICIENT);
730 } else {
731 cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: "
732 "%08x\n", status);
733 if (status == HERMON_CMD_INVALID_STATUS) {
734 hermon_fm_ereport(state, HCA_SYS_ERR,
735 HCA_ERR_SRV_LOST);
736 }
737 return (ibc_get_ci_failure(0));
738 }
739 }
740
741 /*
742 * For Hermon, we've alloc'd another handle structure and save off the
743 * important things in it. Then, in polling we check to see if there's
744 * a "resizing handle" and if so we look for the "special CQE", opcode
745 * 0x16, that indicates the transition to the new buffer.
746 *
747 * At that point, we'll adjust everything - including dereg and
748 * freeing of the original buffer, updating all the necessary fields
749 * in the cq_hdl, and setting up for the next cqe polling
750 */
751
752 resize_hdl->cq_buf = buf;
753 resize_hdl->cq_bufsz = (1 << log_cq_size);
754 resize_hdl->cq_mrhdl = mr;
755 resize_hdl->cq_log_cqsz = log_cq_size;
756
757 bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo),
758 sizeof (struct hermon_qalloc_info_s));
759
760 /* now, save the address in the cq_handle */
761 cq->cq_resize_hdl = resize_hdl;
762
763 /*
764 * Drop the CQ lock now.
765 */
766
767 mutex_exit(&cq->cq_lock);
768 /*
769 * Fill in the return arguments (if necessary). This includes the
770 * real new completion queue size.
771 */
772 if (actual_size != NULL) {
773 *actual_size = (1 << log_cq_size) - 1;
774 }
775
776 return (DDI_SUCCESS);
777
778 cqresize_fail:
779 return (status);
780 }
781
782
783 /*
784 * hermon_cq_modify()
785 * Context: Can be called base context.
786 */
787 /* ARGSUSED */
788 int
hermon_cq_modify(hermon_state_t * state,hermon_cqhdl_t cq,uint_t count,uint_t usec,ibt_cq_handler_id_t hid,uint_t sleepflag)789 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq,
790 uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag)
791 {
792 int status;
793 hermon_hw_cqc_t cqc_entry;
794
795 mutex_enter(&cq->cq_lock);
796 if (count != cq->cq_intmod_count ||
797 usec != cq->cq_intmod_usec) {
798 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
799 cqc_entry.cq_max_cnt = count;
800 cqc_entry.cq_period = usec;
801 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
802 cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag);
803 if (status != HERMON_CMD_SUCCESS) {
804 mutex_exit(&cq->cq_lock);
805 cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ "
806 "command failed: %08x\n", status);
807 if (status == HERMON_CMD_INVALID_STATUS) {
808 hermon_fm_ereport(state, HCA_SYS_ERR,
809 HCA_ERR_SRV_LOST);
810 }
811 return (ibc_get_ci_failure(0));
812 }
813 cq->cq_intmod_count = count;
814 cq->cq_intmod_usec = usec;
815 }
816 if (hid && (hid - 1 != cq->cq_eqnum)) {
817 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
818 cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid);
819 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
820 cq->cq_cqnum, MODIFY_EQN, sleepflag);
821 if (status != HERMON_CMD_SUCCESS) {
822 mutex_exit(&cq->cq_lock);
823 cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: "
824 "%08x\n", status);
825 if (status == HERMON_CMD_INVALID_STATUS) {
826 hermon_fm_ereport(state, HCA_SYS_ERR,
827 HCA_ERR_SRV_LOST);
828 }
829 return (ibc_get_ci_failure(0));
830 }
831 cq->cq_eqnum = hid - 1;
832 }
833 mutex_exit(&cq->cq_lock);
834 return (DDI_SUCCESS);
835 }
836
837 /*
838 * hermon_cq_notify()
839 * Context: Can be called from interrupt or base context.
840 */
841 int
hermon_cq_notify(hermon_state_t * state,hermon_cqhdl_t cq,ibt_cq_notify_flags_t flags)842 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq,
843 ibt_cq_notify_flags_t flags)
844 {
845 uint_t cmd;
846 ibt_status_t status;
847
848 /* Validate IBT flags and call doorbell routine. */
849 if (flags == IBT_NEXT_COMPLETION) {
850 cmd = HERMON_CQDB_NOTIFY_CQ;
851 } else if (flags == IBT_NEXT_SOLICITED) {
852 cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT;
853 } else {
854 return (IBT_CQ_NOTIFY_TYPE_INVALID);
855 }
856
857 status = hermon_cq_arm_doorbell(state, cq, cmd);
858 return (status);
859 }
860
861
862 /*
863 * hermon_cq_poll()
864 * Context: Can be called from interrupt or base context.
865 */
866 int
hermon_cq_poll(hermon_state_t * state,hermon_cqhdl_t cq,ibt_wc_t * wc_p,uint_t num_wc,uint_t * num_polled)867 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p,
868 uint_t num_wc, uint_t *num_polled)
869 {
870 hermon_hw_cqe_t *cqe;
871 uint_t opcode;
872 uint32_t cons_indx, wrap_around_mask, shift, mask;
873 uint32_t polled_cnt, spec_op = 0;
874 int status;
875
876 /*
877 * Check for user-mappable CQ memory. Note: We do not allow kernel
878 * clients to poll CQ memory that is accessible directly by the user.
879 * If the CQ memory is user accessible, then return an error.
880 */
881 if (cq->cq_is_umap) {
882 return (IBT_CQ_HDL_INVALID);
883 }
884
885 mutex_enter(&cq->cq_lock);
886
887 /* Get the consumer index */
888 cons_indx = cq->cq_consindx;
889 shift = cq->cq_log_cqsz;
890 mask = cq->cq_bufsz;
891
892 /*
893 * Calculate the wrap around mask. Note: This operation only works
894 * because all Hermon completion queues have power-of-2 sizes
895 */
896 wrap_around_mask = (cq->cq_bufsz - 1);
897
898 /* Calculate the pointer to the first CQ entry */
899 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
900
901 /*
902 * Keep pulling entries from the CQ until we find an entry owned by
903 * the hardware. As long as there the CQE's owned by SW, process
904 * each entry by calling hermon_cq_cqe_consume() and updating the CQ
905 * consumer index. Note: We only update the consumer index if
906 * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB. Otherwise,
907 * it indicates that we are going to "recycle" the CQE (probably
908 * because it is a error CQE and corresponds to more than one
909 * completion).
910 */
911 polled_cnt = 0;
912 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) {
913 if (cq->cq_resize_hdl != 0) { /* in midst of resize */
914 /* peek at the opcode */
915 opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
916 if (opcode == HERMON_CQE_RCV_RESIZE_CODE) {
917 hermon_cq_resize_helper(state, cq);
918
919 /* Increment the consumer index */
920 cons_indx = (cons_indx + 1);
921 spec_op = 1; /* plus one for the limiting CQE */
922
923 wrap_around_mask = (cq->cq_bufsz - 1);
924
925 /* Update the pointer to the next CQ entry */
926 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
927
928 continue;
929 }
930 } /* in resizing CQ */
931
932 /*
933 * either resizing and not the special opcode, or
934 * not resizing at all
935 */
936 hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]);
937
938 /* Increment the consumer index */
939 cons_indx = (cons_indx + 1);
940
941 /* Update the pointer to the next CQ entry */
942 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
943
944 /*
945 * If we have run out of space to store work completions,
946 * then stop and return the ones we have pulled of the CQ.
947 */
948 if (polled_cnt >= num_wc) {
949 break;
950 }
951 }
952
953 /*
954 * Now we only ring the doorbell (to update the consumer index) if
955 * we've actually consumed a CQ entry.
956 */
957 if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
958 /*
959 * Update the consumer index in both the CQ handle and the
960 * doorbell record.
961 */
962 cq->cq_consindx = cons_indx;
963 hermon_cq_update_ci_doorbell(cq);
964
965 } else if (polled_cnt == 0) {
966 if (spec_op != 0) {
967 /* if we got the special opcode, update the consindx */
968 cq->cq_consindx = cons_indx;
969 hermon_cq_update_ci_doorbell(cq);
970 }
971 }
972
973 mutex_exit(&cq->cq_lock);
974
975 /* Set "num_polled" (if necessary) */
976 if (num_polled != NULL) {
977 *num_polled = polled_cnt;
978 }
979
980 /* Set CQ_EMPTY condition if needed, otherwise return success */
981 if (polled_cnt == 0) {
982 status = IBT_CQ_EMPTY;
983 } else {
984 status = DDI_SUCCESS;
985 }
986
987 /*
988 * Check if the system is currently panicking. If it is, then call
989 * the Hermon interrupt service routine. This step is necessary here
990 * because we might be in a polled I/O mode and without the call to
991 * hermon_isr() - and its subsequent calls to poll and rearm each
992 * event queue - we might overflow our EQs and render the system
993 * unable to sync/dump.
994 */
995 if (ddi_in_panic() != 0) {
996 (void) hermon_isr((caddr_t)state, (caddr_t)NULL);
997 }
998 return (status);
999 }
1000
1001 /*
1002 * cmd_sn must be initialized to 1 to enable proper reenabling
1003 * by hermon_arm_cq_dbr_update().
1004 */
1005 static void
hermon_arm_cq_dbr_init(hermon_dbr_t * cq_arm_dbr)1006 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr)
1007 {
1008 uint32_t *target;
1009
1010 target = (uint32_t *)cq_arm_dbr + 1;
1011 *target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT);
1012 }
1013
1014
1015 /*
1016 * User cmd_sn needs help from this kernel function to know
1017 * when it should be incremented (modulo 4). We do an atomic
1018 * update of the arm_cq dbr to communicate this fact. We retry
1019 * in the case that user library is racing with us. We zero
1020 * out the cmd field so that the user library can use the cmd
1021 * field to track the last command it issued (solicited verses any).
1022 */
1023 static void
hermon_arm_cq_dbr_update(hermon_dbr_t * cq_arm_dbr)1024 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr)
1025 {
1026 uint32_t tmp, cmp, new;
1027 uint32_t old_cmd_sn, new_cmd_sn;
1028 uint32_t *target;
1029 int retries = 0;
1030
1031 target = (uint32_t *)cq_arm_dbr + 1;
1032 retry:
1033 cmp = *target;
1034 tmp = htonl(cmp);
1035 old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1036 new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) &
1037 (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1038 new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn);
1039 tmp = atomic_cas_32(target, cmp, new);
1040 if (tmp != cmp) { /* cas failed, so need to retry */
1041 drv_usecwait(retries & 0xff); /* avoid race */
1042 if (++retries > 100000) {
1043 cmn_err(CE_CONT, "cas failed in hermon\n");
1044 retries = 0;
1045 }
1046 goto retry;
1047 }
1048 }
1049
1050
1051 /*
1052 * hermon_cq_handler()
1053 * Context: Only called from interrupt context
1054 */
1055 /* ARGSUSED */
1056 int
hermon_cq_handler(hermon_state_t * state,hermon_eqhdl_t eq,hermon_hw_eqe_t * eqe)1057 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1058 hermon_hw_eqe_t *eqe)
1059 {
1060 hermon_cqhdl_t cq;
1061 uint_t cqnum;
1062
1063 /* Get the CQ handle from CQ number in event descriptor */
1064 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1065 cq = hermon_cqhdl_from_cqnum(state, cqnum);
1066
1067 /*
1068 * If the CQ handle is NULL, this is probably an indication
1069 * that the CQ has been freed already. In which case, we
1070 * should not deliver this event.
1071 *
1072 * We also check that the CQ number in the handle is the
1073 * same as the CQ number in the event queue entry. This
1074 * extra check allows us to handle the case where a CQ was
1075 * freed and then allocated again in the time it took to
1076 * handle the event queue processing. By constantly incrementing
1077 * the non-constrained portion of the CQ number every time
1078 * a new CQ is allocated, we mitigate (somewhat) the chance
1079 * that a stale event could be passed to the client's CQ
1080 * handler.
1081 *
1082 * Lastly, we check if "hs_ibtfpriv" is NULL. If it is then it
1083 * means that we've have either received this event before we
1084 * finished attaching to the IBTF or we've received it while we
1085 * are in the process of detaching.
1086 */
1087 if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1088 (state->hs_ibtfpriv != NULL)) {
1089 hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr);
1090 HERMON_DO_IBTF_CQ_CALLB(state, cq);
1091 }
1092
1093 return (DDI_SUCCESS);
1094 }
1095
1096
1097 /*
1098 * hermon_cq_err_handler()
1099 * Context: Only called from interrupt context
1100 */
1101 /* ARGSUSED */
1102 int
hermon_cq_err_handler(hermon_state_t * state,hermon_eqhdl_t eq,hermon_hw_eqe_t * eqe)1103 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1104 hermon_hw_eqe_t *eqe)
1105 {
1106 hermon_cqhdl_t cq;
1107 uint_t cqnum;
1108 ibc_async_event_t event;
1109 ibt_async_code_t type;
1110
1111 HERMON_FMANOTE(state, HERMON_FMA_OVERRUN);
1112 /* Get the CQ handle from CQ number in event descriptor */
1113 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1114 cq = hermon_cqhdl_from_cqnum(state, cqnum);
1115
1116 /*
1117 * If the CQ handle is NULL, this is probably an indication
1118 * that the CQ has been freed already. In which case, we
1119 * should not deliver this event.
1120 *
1121 * We also check that the CQ number in the handle is the
1122 * same as the CQ number in the event queue entry. This
1123 * extra check allows us to handle the case where a CQ was
1124 * freed and then allocated again in the time it took to
1125 * handle the event queue processing. By constantly incrementing
1126 * the non-constrained portion of the CQ number every time
1127 * a new CQ is allocated, we mitigate (somewhat) the chance
1128 * that a stale event could be passed to the client's CQ
1129 * handler.
1130 *
1131 * And then we check if "hs_ibtfpriv" is NULL. If it is then it
1132 * means that we've have either received this event before we
1133 * finished attaching to the IBTF or we've received it while we
1134 * are in the process of detaching.
1135 */
1136 if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1137 (state->hs_ibtfpriv != NULL)) {
1138 event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1139 type = IBT_ERROR_CQ;
1140 HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event);
1141 }
1142
1143 return (DDI_SUCCESS);
1144 }
1145
1146
1147 /*
1148 * hermon_cq_refcnt_inc()
1149 * Context: Can be called from interrupt or base context.
1150 */
1151 int
hermon_cq_refcnt_inc(hermon_cqhdl_t cq,uint_t is_special)1152 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special)
1153 {
1154 /*
1155 * Increment the completion queue's reference count. Note: In order
1156 * to ensure compliance with IBA C11-15, we must ensure that a given
1157 * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1158 * This is accomplished here by keeping track of how the referenced
1159 * CQ is being used.
1160 */
1161 mutex_enter(&cq->cq_lock);
1162 if (cq->cq_refcnt == 0) {
1163 cq->cq_is_special = is_special;
1164 } else {
1165 if (cq->cq_is_special != is_special) {
1166 mutex_exit(&cq->cq_lock);
1167 return (DDI_FAILURE);
1168 }
1169 }
1170 cq->cq_refcnt++;
1171 mutex_exit(&cq->cq_lock);
1172 return (DDI_SUCCESS);
1173 }
1174
1175
1176 /*
1177 * hermon_cq_refcnt_dec()
1178 * Context: Can be called from interrupt or base context.
1179 */
1180 void
hermon_cq_refcnt_dec(hermon_cqhdl_t cq)1181 hermon_cq_refcnt_dec(hermon_cqhdl_t cq)
1182 {
1183 /* Decrement the completion queue's reference count */
1184 mutex_enter(&cq->cq_lock);
1185 cq->cq_refcnt--;
1186 mutex_exit(&cq->cq_lock);
1187 }
1188
1189
1190 /*
1191 * hermon_cq_arm_doorbell()
1192 * Context: Can be called from interrupt or base context.
1193 */
1194 static int
hermon_cq_arm_doorbell(hermon_state_t * state,hermon_cqhdl_t cq,uint_t cq_cmd)1195 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd)
1196 {
1197 uint32_t cq_num;
1198 uint32_t *target;
1199 uint32_t old_cmd, cmp, new, tmp, cmd_sn;
1200 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1201
1202 /* initialize the FMA retry loop */
1203 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
1204
1205 cq_num = cq->cq_cqnum;
1206 target = (uint32_t *)cq->cq_arm_ci_vdbr + 1;
1207
1208 /* the FMA retry loop starts for Hermon doorbell register. */
1209 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1210 fm_test_num);
1211 retry:
1212 cmp = *target;
1213 tmp = htonl(cmp);
1214 old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT);
1215 cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1216 if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) {
1217 if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1218 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ <<
1219 HERMON_CQDB_CMD_SHIFT);
1220 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1221 tmp = atomic_cas_32(target, cmp, new);
1222 if (tmp != cmp)
1223 goto retry;
1224 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1225 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1226 32) | (cq->cq_consindx & 0xFFFFFF));
1227 } /* else it's already armed */
1228 } else {
1229 ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT);
1230 if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1231 old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1232 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT <<
1233 HERMON_CQDB_CMD_SHIFT);
1234 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1235 tmp = atomic_cas_32(target, cmp, new);
1236 if (tmp != cmp)
1237 goto retry;
1238 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1239 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1240 32) | (cq->cq_consindx & 0xFFFFFF));
1241 } /* else it's already armed */
1242 }
1243
1244 /* the FMA retry loop ends. */
1245 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1246 fm_test_num);
1247
1248 return (IBT_SUCCESS);
1249
1250 pio_error:
1251 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1252 return (ibc_get_ci_failure(0));
1253 }
1254
1255
1256 /*
1257 * hermon_cqhdl_from_cqnum()
1258 * Context: Can be called from interrupt or base context.
1259 *
1260 * This routine is important because changing the unconstrained
1261 * portion of the CQ number is critical to the detection of a
1262 * potential race condition in the CQ handler code (i.e. the case
1263 * where a CQ is freed and alloc'd again before an event for the
1264 * "old" CQ can be handled).
1265 *
1266 * While this is not a perfect solution (not sure that one exists)
1267 * it does help to mitigate the chance that this race condition will
1268 * cause us to deliver a "stale" event to the new CQ owner. Note:
1269 * this solution does not scale well because the number of constrained
1270 * bits increases (and, hence, the number of unconstrained bits
1271 * decreases) as the number of supported CQs grows. For small and
1272 * intermediate values, it should hopefully provide sufficient
1273 * protection.
1274 */
1275 hermon_cqhdl_t
hermon_cqhdl_from_cqnum(hermon_state_t * state,uint_t cqnum)1276 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum)
1277 {
1278 uint_t cqindx, cqmask;
1279
1280 /* Calculate the CQ table index from the cqnum */
1281 cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1;
1282 cqindx = cqnum & cqmask;
1283 return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx));
1284 }
1285
1286 /*
1287 * hermon_cq_cqe_consume()
1288 * Context: Can be called from interrupt or base context.
1289 */
1290 static void
hermon_cq_cqe_consume(hermon_state_t * state,hermon_cqhdl_t cq,hermon_hw_cqe_t * cqe,ibt_wc_t * wc)1291 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1292 hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1293 {
1294 uint_t opcode, qpnum, qp1_indx;
1295 ibt_wc_flags_t flags;
1296 ibt_wrc_opcode_t type;
1297
1298 /*
1299 * Determine if this is an "error" CQE by examining "opcode". If it
1300 * is an error CQE, then call hermon_cq_errcqe_consume() and return
1301 * whatever status it returns. Otherwise, this is a successful
1302 * completion.
1303 */
1304 opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
1305 if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) ||
1306 (opcode == HERMON_CQE_RECV_ERR_OPCODE)) {
1307 hermon_cq_errcqe_consume(state, cq, cqe, wc);
1308 return;
1309 }
1310
1311 /*
1312 * Fetch the Work Request ID using the information in the CQE.
1313 * See hermon_wr.c for more details.
1314 */
1315 wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1316
1317 /*
1318 * Parse the CQE opcode to determine completion type. This will set
1319 * not only the type of the completion, but also any flags that might
1320 * be associated with it (e.g. whether immediate data is present).
1321 */
1322 flags = IBT_WC_NO_FLAGS;
1323 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->hs_fcoib_may_be_running))
1324 if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) {
1325
1326 /* Send CQE */
1327 switch (opcode) {
1328 case HERMON_CQE_SND_RDMAWR_IMM:
1329 case HERMON_CQE_SND_RDMAWR:
1330 type = IBT_WRC_RDMAW;
1331 break;
1332
1333 case HERMON_CQE_SND_SEND_INV:
1334 case HERMON_CQE_SND_SEND_IMM:
1335 case HERMON_CQE_SND_SEND:
1336 type = IBT_WRC_SEND;
1337 break;
1338
1339 case HERMON_CQE_SND_LSO:
1340 type = IBT_WRC_SEND_LSO;
1341 break;
1342
1343 case HERMON_CQE_SND_RDMARD:
1344 type = IBT_WRC_RDMAR;
1345 break;
1346
1347 case HERMON_CQE_SND_ATOMIC_CS:
1348 type = IBT_WRC_CSWAP;
1349 break;
1350
1351 case HERMON_CQE_SND_ATOMIC_FA:
1352 type = IBT_WRC_FADD;
1353 break;
1354
1355 case HERMON_CQE_SND_BIND_MW:
1356 type = IBT_WRC_BIND;
1357 break;
1358
1359 case HERMON_CQE_SND_FRWR:
1360 type = IBT_WRC_FAST_REG_PMR;
1361 break;
1362
1363 case HERMON_CQE_SND_LCL_INV:
1364 type = IBT_WRC_LOCAL_INVALIDATE;
1365 break;
1366
1367 default:
1368 HERMON_WARNING(state, "unknown send CQE type");
1369 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1370 return;
1371 }
1372 } else if ((state->hs_fcoib_may_be_running == B_TRUE) &&
1373 hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) {
1374 type = IBT_WRC_RECV;
1375 if (HERMON_CQE_FEXCH_DIFE(cq, cqe))
1376 flags |= IBT_WC_DIF_ERROR;
1377 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1378 wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe);
1379 wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe);
1380 wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe);
1381 wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe);
1382 wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) &
1383 IBT_WC_DETAIL_FC_MATCH_MASK;
1384 wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1385 flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED;
1386 } else {
1387 /*
1388 * Parse the remaining contents of the CQE into the work
1389 * completion. This means filling in SL, QP number, SLID,
1390 * immediate data, etc.
1391 *
1392 * Note: Not all of these fields are valid in a given
1393 * completion. Many of them depend on the actual type of
1394 * completion. So we fill in all of the fields and leave
1395 * it up to the IBTF and consumer to sort out which are
1396 * valid based on their context.
1397 */
1398 wc->wc_sl = HERMON_CQE_SL_GET(cq, cqe);
1399 wc->wc_qpn = HERMON_CQE_DQPN_GET(cq, cqe);
1400 wc->wc_slid = HERMON_CQE_DLID_GET(cq, cqe);
1401 wc->wc_immed_data =
1402 HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1403 wc->wc_ethertype = (wc->wc_immed_data & 0xFFFF);
1404 wc->wc_pkey_ix = (wc->wc_immed_data &
1405 ((1 << state->hs_queryport.log_max_pkey) - 1));
1406 /*
1407 * Fill in "bytes transferred" as appropriate. Also,
1408 * if necessary, fill in the "path bits" field.
1409 */
1410 wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe);
1411 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1412
1413 /*
1414 * Check for GRH, update the flags, then fill in "wc_flags"
1415 * field in the work completion
1416 */
1417 if (HERMON_CQE_GRH_GET(cq, cqe) != 0) {
1418 flags |= IBT_WC_GRH_PRESENT;
1419 }
1420
1421 /* Receive CQE */
1422 switch (opcode) {
1423 case HERMON_CQE_RCV_SEND_IMM:
1424 /*
1425 * Note: According to the PRM, all QP1 recv
1426 * completions look like the result of a Send with
1427 * Immediate. They are not, however, (MADs are Send
1428 * Only) so we need to check the QP number and set
1429 * the flag only if it is non-QP1.
1430 */
1431 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe);
1432 qp1_indx = state->hs_spec_qp1->hr_indx;
1433 if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1434 flags |= IBT_WC_IMMED_DATA_PRESENT;
1435 }
1436 /* FALLTHROUGH */
1437
1438 case HERMON_CQE_RCV_SEND:
1439 type = IBT_WRC_RECV;
1440 if (HERMON_CQE_IS_IPOK(cq, cqe)) {
1441 wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe);
1442 flags |= IBT_WC_CKSUM_OK;
1443 wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK &
1444 HERMON_CQE_IPOIB_STATUS(cq, cqe);
1445 }
1446 break;
1447
1448 case HERMON_CQE_RCV_SEND_INV:
1449 type = IBT_WRC_RECV;
1450 flags |= IBT_WC_RKEY_INVALIDATED;
1451 wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */
1452 break;
1453
1454 case HERMON_CQE_RCV_RDMAWR_IMM:
1455 flags |= IBT_WC_IMMED_DATA_PRESENT;
1456 type = IBT_WRC_RECV_RDMAWI;
1457 break;
1458
1459 default:
1460
1461 HERMON_WARNING(state, "unknown recv CQE type");
1462 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1463 return;
1464 }
1465 }
1466 wc->wc_type = type;
1467 wc->wc_flags = flags;
1468 wc->wc_status = IBT_WC_SUCCESS;
1469 }
1470
1471 /*
1472 * hermon_cq_errcqe_consume()
1473 * Context: Can be called from interrupt or base context.
1474 */
1475 static void
hermon_cq_errcqe_consume(hermon_state_t * state,hermon_cqhdl_t cq,hermon_hw_cqe_t * cqe,ibt_wc_t * wc)1476 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1477 hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1478 {
1479 uint32_t imm_eth_pkey_cred;
1480 uint_t status;
1481 ibt_wc_status_t ibt_status;
1482
1483 /*
1484 * Fetch the Work Request ID using the information in the CQE.
1485 * See hermon_wr.c for more details.
1486 */
1487 wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1488
1489 /*
1490 * Parse the CQE opcode to determine completion type. We know that
1491 * the CQE is an error completion, so we extract only the completion
1492 * status/syndrome here.
1493 */
1494 imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe);
1495 status = imm_eth_pkey_cred;
1496 if (status != HERMON_CQE_WR_FLUSHED_ERR)
1497 IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x "
1498 "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe),
1499 HERMON_CQE_WQECNTR_GET(cq, cqe), status,
1500 HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe));
1501 switch (status) {
1502 case HERMON_CQE_LOC_LEN_ERR:
1503 HERMON_WARNING(state, HERMON_FMA_LOCLEN);
1504 ibt_status = IBT_WC_LOCAL_LEN_ERR;
1505 break;
1506
1507 case HERMON_CQE_LOC_OP_ERR:
1508 HERMON_WARNING(state, HERMON_FMA_LOCQPOP);
1509 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1510 break;
1511
1512 case HERMON_CQE_LOC_PROT_ERR:
1513 HERMON_WARNING(state, HERMON_FMA_LOCPROT);
1514 ibt_status = IBT_WC_LOCAL_PROTECT_ERR;
1515 IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe);
1516 if (hermon_should_panic) {
1517 cmn_err(CE_PANIC, "Hermon intentional PANIC - "
1518 "Local Protection Error\n");
1519 }
1520 break;
1521
1522 case HERMON_CQE_WR_FLUSHED_ERR:
1523 ibt_status = IBT_WC_WR_FLUSHED_ERR;
1524 break;
1525
1526 case HERMON_CQE_MW_BIND_ERR:
1527 HERMON_WARNING(state, HERMON_FMA_MWBIND);
1528 ibt_status = IBT_WC_MEM_WIN_BIND_ERR;
1529 break;
1530
1531 case HERMON_CQE_BAD_RESPONSE_ERR:
1532 HERMON_WARNING(state, HERMON_FMA_RESP);
1533 ibt_status = IBT_WC_BAD_RESPONSE_ERR;
1534 break;
1535
1536 case HERMON_CQE_LOCAL_ACCESS_ERR:
1537 HERMON_WARNING(state, HERMON_FMA_LOCACC);
1538 ibt_status = IBT_WC_LOCAL_ACCESS_ERR;
1539 break;
1540
1541 case HERMON_CQE_REM_INV_REQ_ERR:
1542 HERMON_WARNING(state, HERMON_FMA_REMREQ);
1543 ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1544 break;
1545
1546 case HERMON_CQE_REM_ACC_ERR:
1547 HERMON_WARNING(state, HERMON_FMA_REMACC);
1548 ibt_status = IBT_WC_REMOTE_ACCESS_ERR;
1549 break;
1550
1551 case HERMON_CQE_REM_OP_ERR:
1552 HERMON_WARNING(state, HERMON_FMA_REMOP);
1553 ibt_status = IBT_WC_REMOTE_OP_ERR;
1554 break;
1555
1556 case HERMON_CQE_TRANS_TO_ERR:
1557 HERMON_WARNING(state, HERMON_FMA_XPORTCNT);
1558 ibt_status = IBT_WC_TRANS_TIMEOUT_ERR;
1559 break;
1560
1561 case HERMON_CQE_RNRNAK_TO_ERR:
1562 HERMON_WARNING(state, HERMON_FMA_RNRCNT);
1563 ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1564 break;
1565
1566 /*
1567 * The following error codes are not supported in the Hermon driver
1568 * as they relate only to Reliable Datagram completion statuses:
1569 * case HERMON_CQE_LOCAL_RDD_VIO_ERR:
1570 * case HERMON_CQE_REM_INV_RD_REQ_ERR:
1571 * case HERMON_CQE_EEC_REM_ABORTED_ERR:
1572 * case HERMON_CQE_INV_EEC_NUM_ERR:
1573 * case HERMON_CQE_INV_EEC_STATE_ERR:
1574 * case HERMON_CQE_LOC_EEC_ERR:
1575 */
1576
1577 default:
1578 HERMON_WARNING(state, "unknown error CQE status");
1579 HERMON_FMANOTE(state, HERMON_FMA_UNKN);
1580 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1581 break;
1582 }
1583
1584 wc->wc_status = ibt_status;
1585 }
1586
1587
1588 /*
1589 * hermon_cq_resize_helper()
1590 * Context: Can be called only from user or kernel context.
1591 */
1592 void
hermon_cq_resize_helper(hermon_state_t * state,hermon_cqhdl_t cq)1593 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq)
1594 {
1595 hermon_cqhdl_t resize_hdl;
1596 int status;
1597
1598 /*
1599 * we're here because we found the special cqe opcode, so we have
1600 * to update the cq_handle, release the old resources, clear the
1601 * flag in the cq_hdl, and release the resize_hdl. When we return
1602 * above, it will take care of the rest
1603 */
1604 ASSERT(MUTEX_HELD(&cq->cq_lock));
1605
1606 resize_hdl = cq->cq_resize_hdl;
1607
1608 /*
1609 * Deregister the memory for the old Completion Queue. Note: We
1610 * really can't return error here because we have no good way to
1611 * cleanup. Plus, the deregistration really shouldn't ever happen.
1612 * So, if it does, it is an indication that something has gone
1613 * seriously wrong. So we print a warning message and return error
1614 * (knowing, of course, that the "old" CQ memory will be leaked)
1615 */
1616 status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL,
1617 HERMON_SLEEP);
1618 if (status != DDI_SUCCESS) {
1619 HERMON_WARNING(state, "failed to deregister old CQ memory");
1620 }
1621
1622 /* Next, free the memory from the old CQ buffer */
1623 hermon_queue_free(&cq->cq_cqinfo);
1624
1625 /* now we can update the cq_hdl with the new things saved */
1626
1627 cq->cq_buf = resize_hdl->cq_buf;
1628 cq->cq_mrhdl = resize_hdl->cq_mrhdl;
1629 cq->cq_bufsz = resize_hdl->cq_bufsz;
1630 cq->cq_log_cqsz = resize_hdl->cq_log_cqsz;
1631 cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp;
1632 cq->cq_resize_hdl = 0;
1633 bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo,
1634 sizeof (struct hermon_qalloc_info_s));
1635
1636 /* finally, release the resizing handle */
1637 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
1638 }
1639
1640
1641 /*
1642 * hermon_cq_entries_flush()
1643 * Context: Can be called from interrupt or base context.
1644 */
1645 /* ARGSUSED */
1646 void
hermon_cq_entries_flush(hermon_state_t * state,hermon_qphdl_t qp)1647 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp)
1648 {
1649 hermon_cqhdl_t cq;
1650 hermon_hw_cqe_t *cqe, *next_cqe;
1651 hermon_srqhdl_t srq;
1652 hermon_workq_hdr_t *wq;
1653 uint32_t cons_indx, tail_cons_indx, wrap_around_mask;
1654 uint32_t new_indx, check_indx, qpnum;
1655 uint32_t shift, mask;
1656 int outstanding_cqes;
1657
1658 qpnum = qp->qp_qpnum;
1659 if ((srq = qp->qp_srqhdl) != NULL)
1660 wq = qp->qp_srqhdl->srq_wq_wqhdr;
1661 else
1662 wq = NULL;
1663 cq = qp->qp_rq_cqhdl;
1664
1665 if (cq == NULL) {
1666 cq = qp->qp_sq_cqhdl;
1667 }
1668
1669 do_send_cq: /* loop back to here if send_cq is not the same as recv_cq */
1670 if (cq == NULL)
1671 return;
1672
1673 cons_indx = cq->cq_consindx;
1674 shift = cq->cq_log_cqsz;
1675 mask = cq->cq_bufsz;
1676 wrap_around_mask = mask - 1;
1677
1678 /* Calculate the pointer to the first CQ entry */
1679 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
1680
1681 /*
1682 * Loop through the CQ looking for entries owned by software. If an
1683 * entry is owned by software then we increment an 'outstanding_cqes'
1684 * count to know how many entries total we have on our CQ. We use this
1685 * value further down to know how many entries to loop through looking
1686 * for our same QP number.
1687 */
1688 outstanding_cqes = 0;
1689 tail_cons_indx = cons_indx;
1690 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) {
1691 /* increment total cqes count */
1692 outstanding_cqes++;
1693
1694 /* increment the consumer index */
1695 tail_cons_indx++;
1696
1697 /* update the pointer to the next cq entry */
1698 cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask];
1699 }
1700
1701 /*
1702 * Using the 'tail_cons_indx' that was just set, we now know how many
1703 * total CQEs possible there are. Set the 'check_indx' and the
1704 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1705 */
1706 check_indx = new_indx = (tail_cons_indx - 1);
1707
1708 while (--outstanding_cqes >= 0) {
1709 cqe = &cq->cq_buf[check_indx & wrap_around_mask];
1710
1711 /*
1712 * If the QP number is the same in the CQE as the QP, then
1713 * we must "consume" it. If it is for an SRQ wqe, then we
1714 * also must free the wqe back onto the free list of the SRQ.
1715 */
1716 if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) {
1717 if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) ==
1718 HERMON_COMPLETION_RECV)) {
1719 uint64_t *desc;
1720 int indx;
1721
1722 /* Add wqe back to SRQ free list */
1723 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) &
1724 wq->wq_mask;
1725 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
1726 ((uint16_t *)desc)[1] = htons(indx);
1727 wq->wq_tail = indx;
1728 }
1729 } else { /* CQEs for other QPNs need to remain */
1730 if (check_indx != new_indx) {
1731 next_cqe =
1732 &cq->cq_buf[new_indx & wrap_around_mask];
1733 /* Copy the CQE into the "next_cqe" pointer. */
1734 bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t));
1735 }
1736 new_indx--; /* move index to next CQE to fill */
1737 }
1738 check_indx--; /* move index to next CQE to check */
1739 }
1740
1741 /*
1742 * Update consumer index to be the 'new_indx'. This moves it past all
1743 * removed entries. Because 'new_indx' is pointing to the last
1744 * previously valid SW owned entry, we add 1 to point the cons_indx to
1745 * the first HW owned entry.
1746 */
1747 cons_indx = (new_indx + 1);
1748
1749 /*
1750 * Now we only ring the doorbell (to update the consumer index) if
1751 * we've actually consumed a CQ entry. If we found no QP number
1752 * matches above, then we would not have removed anything. So only if
1753 * something was removed do we ring the doorbell.
1754 */
1755 if (cq->cq_consindx != cons_indx) {
1756 /*
1757 * Update the consumer index in both the CQ handle and the
1758 * doorbell record.
1759 */
1760 cq->cq_consindx = cons_indx;
1761
1762 hermon_cq_update_ci_doorbell(cq);
1763
1764 }
1765 if (cq != qp->qp_sq_cqhdl) {
1766 cq = qp->qp_sq_cqhdl;
1767 goto do_send_cq;
1768 }
1769 }
1770
1771 /*
1772 * hermon_get_cq_sched_list()
1773 * Context: Only called from attach() path context
1774 *
1775 * Read properties, creating entries in hs_cq_sched_list with
1776 * information about the requested "expected" and "minimum"
1777 * number of MSI-X interrupt vectors per list entry.
1778 */
1779 static int
hermon_get_cq_sched_list(hermon_state_t * state)1780 hermon_get_cq_sched_list(hermon_state_t *state)
1781 {
1782 char **listp, ulp_prop[HERMON_CQH_MAX + 4];
1783 uint_t nlist, i, j, ndata;
1784 int *data;
1785 size_t len;
1786 hermon_cq_sched_t *cq_schedp;
1787
1788 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip,
1789 DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) !=
1790 DDI_PROP_SUCCESS)
1791 return (0);
1792
1793 state->hs_cq_sched_array_size = nlist;
1794 state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist *
1795 sizeof (hermon_cq_sched_t), KM_SLEEP);
1796 for (i = 0; i < nlist; i++) {
1797 if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) {
1798 cmn_err(CE_CONT, "'cqh' property name too long\n");
1799 goto game_over;
1800 }
1801 for (j = 0; j < i; j++) {
1802 if (strcmp(listp[j], listp[i]) == 0) {
1803 cmn_err(CE_CONT, "Duplicate 'cqh' property\n");
1804 goto game_over;
1805 }
1806 }
1807 (void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX);
1808 ulp_prop[0] = 'c';
1809 ulp_prop[1] = 'q';
1810 ulp_prop[2] = 'h';
1811 ulp_prop[3] = '-';
1812 (void) strncpy(ulp_prop + 4, listp[i], len + 1);
1813 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1814 DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) !=
1815 DDI_PROP_SUCCESS) {
1816 cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop);
1817 goto game_over;
1818 }
1819 if (ndata != 2) {
1820 cmn_err(CE_CONT, "property '%s' does not "
1821 "have 2 integers\n", ulp_prop);
1822 goto game_over_free_data;
1823 }
1824 cq_schedp[i].cqs_desired = data[0];
1825 cq_schedp[i].cqs_minimum = data[1];
1826 cq_schedp[i].cqs_refcnt = 0;
1827 ddi_prop_free(data);
1828 }
1829 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1830 DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) !=
1831 DDI_PROP_SUCCESS) {
1832 cmn_err(CE_CONT, "property 'cqh-default' not found\n");
1833 goto game_over;
1834 }
1835 if (ndata != 2) {
1836 cmn_err(CE_CONT, "property 'cqh-default' does not "
1837 "have 2 integers\n");
1838 goto game_over_free_data;
1839 }
1840 cq_schedp = &state->hs_cq_sched_default;
1841 cq_schedp->cqs_desired = data[0];
1842 cq_schedp->cqs_minimum = data[1];
1843 cq_schedp->cqs_refcnt = 0;
1844 ddi_prop_free(data);
1845 ddi_prop_free(listp);
1846 return (1); /* game on */
1847
1848 game_over_free_data:
1849 ddi_prop_free(data);
1850 game_over:
1851 cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n");
1852 cmn_err(CE_CONT, "completion handler groups not being used\n");
1853 kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t));
1854 state->hs_cq_sched_array_size = 0;
1855 ddi_prop_free(listp);
1856 return (0);
1857 }
1858
1859 /*
1860 * hermon_cq_sched_init()
1861 * Context: Only called from attach() path context
1862 *
1863 * Read the hermon.conf properties looking for cq_sched info,
1864 * creating reserved pools of MSI-X interrupt ranges for the
1865 * specified ULPs.
1866 */
1867 int
hermon_cq_sched_init(hermon_state_t * state)1868 hermon_cq_sched_init(hermon_state_t *state)
1869 {
1870 hermon_cq_sched_t *cq_schedp, *defp;
1871 int i, desired, array_size;
1872
1873 mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER,
1874 DDI_INTR_PRI(state->hs_intrmsi_pri));
1875
1876 mutex_enter(&state->hs_cq_sched_lock);
1877 state->hs_cq_sched_array = NULL;
1878
1879 /* initialize cq_sched_default */
1880 defp = &state->hs_cq_sched_default;
1881 defp->cqs_start_hid = 1;
1882 defp->cqs_len = state->hs_intrmsi_allocd;
1883 defp->cqs_next_alloc = defp->cqs_len - 1;
1884 (void) strncpy(defp->cqs_name, "default", 8);
1885
1886 /* Read properties to determine which ULPs use cq_sched */
1887 if (hermon_get_cq_sched_list(state) == 0)
1888 goto done;
1889
1890 /* Determine if we have enough vectors, or if we have to scale down */
1891 desired = defp->cqs_desired; /* default desired (from hermon.conf) */
1892 if (desired <= 0)
1893 goto done; /* all interrupts in the default pool */
1894 cq_schedp = state->hs_cq_sched_array;
1895 array_size = state->hs_cq_sched_array_size;
1896 for (i = 0; i < array_size; i++)
1897 desired += cq_schedp[i].cqs_desired;
1898 if (desired > state->hs_intrmsi_allocd) {
1899 cmn_err(CE_CONT, "#interrupts allocated (%d) is less than "
1900 "the #interrupts desired (%d)\n",
1901 state->hs_intrmsi_allocd, desired);
1902 cmn_err(CE_CONT, "completion handler groups not being used\n");
1903 goto done; /* all interrupts in the default pool */
1904 }
1905 /* Game on. For each cq_sched group, reserve the MSI-X range */
1906 for (i = 0; i < array_size; i++) {
1907 desired = cq_schedp[i].cqs_desired;
1908 cq_schedp[i].cqs_start_hid = defp->cqs_start_hid;
1909 cq_schedp[i].cqs_len = desired;
1910 cq_schedp[i].cqs_next_alloc = desired - 1;
1911 defp->cqs_len -= desired;
1912 defp->cqs_start_hid += desired;
1913 }
1914 /* reset default's start allocation seed */
1915 state->hs_cq_sched_default.cqs_next_alloc =
1916 state->hs_cq_sched_default.cqs_len - 1;
1917
1918 done:
1919 mutex_exit(&state->hs_cq_sched_lock);
1920 return (IBT_SUCCESS);
1921 }
1922
1923 void
hermon_cq_sched_fini(hermon_state_t * state)1924 hermon_cq_sched_fini(hermon_state_t *state)
1925 {
1926 mutex_enter(&state->hs_cq_sched_lock);
1927 if (state->hs_cq_sched_array_size) {
1928 kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) *
1929 state->hs_cq_sched_array_size);
1930 state->hs_cq_sched_array_size = 0;
1931 state->hs_cq_sched_array = NULL;
1932 }
1933 mutex_exit(&state->hs_cq_sched_lock);
1934 mutex_destroy(&state->hs_cq_sched_lock);
1935 }
1936
1937 int
hermon_cq_sched_alloc(hermon_state_t * state,ibt_cq_sched_attr_t * attr,hermon_cq_sched_t ** cq_sched_pp)1938 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr,
1939 hermon_cq_sched_t **cq_sched_pp)
1940 {
1941 hermon_cq_sched_t *cq_schedp;
1942 int i;
1943 char *name;
1944 ibt_cq_sched_flags_t flags;
1945
1946 flags = attr->cqs_flags;
1947 if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) {
1948 *cq_sched_pp = NULL;
1949 return (IBT_SUCCESS);
1950 }
1951 name = attr->cqs_pool_name;
1952
1953 mutex_enter(&state->hs_cq_sched_lock);
1954 cq_schedp = state->hs_cq_sched_array;
1955 for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) {
1956 if (strcmp(name, cq_schedp->cqs_name) == 0) {
1957 if (cq_schedp->cqs_len != 0)
1958 cq_schedp->cqs_refcnt++;
1959 break; /* found it */
1960 }
1961 }
1962 if ((i == state->hs_cq_sched_array_size) || /* not found, or */
1963 (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */
1964 cq_schedp = NULL;
1965 mutex_exit(&state->hs_cq_sched_lock);
1966
1967 *cq_sched_pp = cq_schedp; /* set to valid hdl, or to NULL */
1968 if ((cq_schedp == NULL) &&
1969 (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP))
1970 return (IBT_CQ_NO_SCHED_GROUP);
1971 else
1972 return (IBT_SUCCESS);
1973 }
1974
1975 int
hermon_cq_sched_free(hermon_state_t * state,hermon_cq_sched_t * cq_schedp)1976 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp)
1977 {
1978 if (cq_schedp != NULL) {
1979 /* Just decrement refcnt */
1980 mutex_enter(&state->hs_cq_sched_lock);
1981 if (cq_schedp->cqs_refcnt == 0)
1982 HERMON_WARNING(state, "cq_sched free underflow\n");
1983 else
1984 cq_schedp->cqs_refcnt--;
1985 mutex_exit(&state->hs_cq_sched_lock);
1986 }
1987 return (IBT_SUCCESS);
1988 }
1989