xref: /freebsd/contrib/ofed/libcxgb4/dev.c (revision d6b92ffa)
1 /*
2  * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <config.h>
33 
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <sys/mman.h>
39 #include <pthread.h>
40 #include <string.h>
41 #include <signal.h>
42 
43 #include "libcxgb4.h"
44 #include "cxgb4-abi.h"
45 
46 #define PCI_VENDOR_ID_CHELSIO		0x1425
47 
48 /*
49  * Macros needed to support the PCI Device ID Table ...
50  */
51 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
52 	static struct { \
53 		unsigned vendor; \
54 		unsigned device; \
55 	} hca_table[] = {
56 
57 #define CH_PCI_DEVICE_ID_FUNCTION \
58 		0x4
59 
60 #define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \
61 		{ \
62 			.vendor = PCI_VENDOR_ID_CHELSIO, \
63 			.device = (__DeviceID), \
64 		}
65 
66 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
67 	}
68 
69 #include "t4_chip_type.h"
70 #include "t4_pci_id_tbl.h"
71 
72 unsigned long c4iw_page_size;
73 unsigned long c4iw_page_shift;
74 unsigned long c4iw_page_mask;
75 int ma_wr;
76 int t5_en_wc = 1;
77 
78 static TAILQ_HEAD(,c4iw_dev) devices = TAILQ_HEAD_INITIALIZER(devices);
79 
80 static struct ibv_context_ops c4iw_ctx_ops = {
81 	.query_device = c4iw_query_device,
82 	.query_port = c4iw_query_port,
83 	.alloc_pd = c4iw_alloc_pd,
84 	.dealloc_pd = c4iw_free_pd,
85 	.reg_mr = c4iw_reg_mr,
86 	.dereg_mr = c4iw_dereg_mr,
87 	.create_cq = c4iw_create_cq,
88 	.resize_cq = c4iw_resize_cq,
89 	.destroy_cq = c4iw_destroy_cq,
90 	.create_srq = c4iw_create_srq,
91 	.modify_srq = c4iw_modify_srq,
92 	.destroy_srq = c4iw_destroy_srq,
93 	.create_qp = c4iw_create_qp,
94 	.modify_qp = c4iw_modify_qp,
95 	.destroy_qp = c4iw_destroy_qp,
96 	.query_qp = c4iw_query_qp,
97 	.create_ah = c4iw_create_ah,
98 	.destroy_ah = c4iw_destroy_ah,
99 	.attach_mcast = c4iw_attach_mcast,
100 	.detach_mcast = c4iw_detach_mcast,
101 	.post_srq_recv = c4iw_post_srq_recv,
102 	.req_notify_cq = c4iw_arm_cq,
103 };
104 
105 static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev,
106 					      int cmd_fd)
107 {
108 	struct c4iw_context *context;
109 	struct ibv_get_context cmd;
110 	struct c4iw_alloc_ucontext_resp resp;
111 	struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
112 	struct ibv_query_device qcmd;
113 	uint64_t raw_fw_ver;
114 	struct ibv_device_attr attr;
115 
116 	context = malloc(sizeof *context);
117 	if (!context)
118 		return NULL;
119 
120 	memset(context, 0, sizeof *context);
121 	context->ibv_ctx.cmd_fd = cmd_fd;
122 
123 	resp.status_page_size = 0;
124 	resp.reserved = 0;
125 	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
126 				&resp.ibv_resp, sizeof resp))
127 		goto err_free;
128 
129 	if (resp.reserved)
130 		PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n",
131 		     __FUNCTION__);
132 
133 	context->status_page_size = resp.status_page_size;
134 	if (resp.status_page_size) {
135 		context->status_page = mmap(NULL, resp.status_page_size,
136 					    PROT_READ, MAP_SHARED, cmd_fd,
137 					    resp.status_page_key);
138 		if (context->status_page == MAP_FAILED)
139 			goto err_free;
140 	}
141 
142 	context->ibv_ctx.device = ibdev;
143 	context->ibv_ctx.ops = c4iw_ctx_ops;
144 
145 	switch (rhp->chip_version) {
146 	case CHELSIO_T6:
147 		PDBG("%s T6/T5/T4 device\n", __FUNCTION__);
148 	case CHELSIO_T5:
149 		PDBG("%s T5/T4 device\n", __FUNCTION__);
150 	case CHELSIO_T4:
151 		PDBG("%s T4 device\n", __FUNCTION__);
152 		context->ibv_ctx.ops.async_event = c4iw_async_event;
153 		context->ibv_ctx.ops.post_send = c4iw_post_send;
154 		context->ibv_ctx.ops.post_recv = c4iw_post_receive;
155 		context->ibv_ctx.ops.poll_cq = c4iw_poll_cq;
156 		context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq;
157 		break;
158 	default:
159 		PDBG("%s unknown hca type %d\n", __FUNCTION__,
160 		     rhp->chip_version);
161 		goto err_unmap;
162 		break;
163 	}
164 
165 	if (!rhp->mmid2ptr) {
166 		int ret;
167 
168 		ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd,
169 					   sizeof qcmd);
170 		if (ret)
171 			goto err_unmap;
172 		rhp->max_mr = attr.max_mr;
173 		rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *));
174 		if (!rhp->mmid2ptr) {
175 			goto err_unmap;
176 		}
177 		if (rhp->abi_version < 3) {
178 			fprintf(stderr, "Warning: iw_cxgb4 driver is of older version"
179 					" than libcxgb4:: %d\n", rhp->abi_version);
180 			rhp->max_qp = T4_QID_BASE + attr.max_qp;
181 		} else {
182 			rhp->max_qp = context->status_page->qp_start +
183 					context->status_page->qp_size;
184 		}
185 		rhp->qpid2ptr = calloc(rhp->max_qp, sizeof(void *));
186 		if (!rhp->qpid2ptr) {
187 			goto err_unmap;
188 		}
189 		if (rhp->abi_version < 3)
190 			rhp->max_cq = T4_QID_BASE + attr.max_cq;
191 		else
192 			rhp->max_cq = context->status_page->cq_start +
193 					context->status_page->cq_size;
194 		rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *));
195 		if (!rhp->cqid2ptr)
196 			goto err_unmap;
197 	}
198 
199 	return &context->ibv_ctx;
200 
201 err_unmap:
202 	munmap(context->status_page, context->status_page_size);
203 err_free:
204 	if (rhp->cqid2ptr)
205 		free(rhp->cqid2ptr);
206 	if (rhp->qpid2ptr)
207 		free(rhp->cqid2ptr);
208 	if (rhp->mmid2ptr)
209 		free(rhp->cqid2ptr);
210 	free(context);
211 	return NULL;
212 }
213 
214 static void c4iw_free_context(struct ibv_context *ibctx)
215 {
216 	struct c4iw_context *context = to_c4iw_context(ibctx);
217 
218 	if (context->status_page_size)
219 		munmap(context->status_page, context->status_page_size);
220 	free(context);
221 }
222 
223 static struct verbs_device_ops c4iw_dev_ops = {
224 	.alloc_context = c4iw_alloc_context,
225 	.free_context = c4iw_free_context
226 };
227 
228 #ifdef STALL_DETECTION
229 
230 int stall_to;
231 
232 static void dump_cq(struct c4iw_cq *chp)
233 {
234 	int i;
235 
236 	fprintf(stderr,
237  		"CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d "
238 		"cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp,
239                 chp->cq.cqid, chp->cq.queue, chp->cq.cidx,
240 	 	chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use,
241                 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64toh(chp->cq.bits_type_ts),
242 		t4_cq_notempty(&chp->cq));
243 
244 	for (i=0; i < chp->cq.size; i++) {
245 		u64 *p = (u64 *)(chp->cq.queue + i);
246 
247 		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64toh(p[0]), be64toh(p[1]));
248 		if (i == chp->cq.cidx)
249 			fprintf(stderr, " <-- cidx\n");
250 		else
251 			fprintf(stderr, "\n");
252 		p+= 2;
253 		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
254 		p+= 2;
255 		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
256 		p+= 2;
257 		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1]));
258 		p+= 2;
259 	}
260 }
261 
262 static void dump_qp(struct c4iw_qp *qhp)
263 {
264 	int i;
265 	int j;
266 	struct t4_swsqe *swsqe;
267 	struct t4_swrqe *swrqe;
268 	u16 cidx, pidx;
269 	u64 *p;
270 
271 	fprintf(stderr,
272 		"QP: %p id %u error %d flushed %d qid_mask 0x%x\n"
273 		"    SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n"
274 		"    RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n",
275 		qhp,
276 		qhp->wq.sq.qid,
277 		qhp->wq.error,
278 		qhp->wq.flushed,
279 		qhp->wq.qid_mask,
280 		qhp->wq.sq.qid,
281 		qhp->wq.sq.queue,
282 		qhp->wq.sq.sw_sq,
283 		qhp->wq.sq.cidx,
284 		qhp->wq.sq.pidx,
285 		qhp->wq.sq.in_use,
286 		qhp->wq.sq.wq_pidx,
287 		qhp->wq.sq.size,
288 		qhp->wq.sq.flags,
289 		qhp->wq.sq.flush_cidx,
290 		qhp->wq.rq.qid,
291 		qhp->wq.rq.queue,
292 		qhp->wq.rq.sw_rq,
293 		qhp->wq.rq.cidx,
294 		qhp->wq.rq.pidx,
295 		qhp->wq.rq.in_use,
296 		qhp->wq.rq.size);
297 	cidx = qhp->wq.sq.cidx;
298 	pidx = qhp->wq.sq.pidx;
299 	if (cidx != pidx)
300 		fprintf(stderr, "SQ: \n");
301 	while (cidx != pidx) {
302 		swsqe = &qhp->wq.sq.sw_sq[cidx];
303 		fprintf(stderr, "%04u: wr_id %016" PRIx64
304 			" sq_wptr %08x read_len %u opcode 0x%x "
305 			"complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
306 			cidx,
307 			swsqe->wr_id,
308 			swsqe->idx,
309 			swsqe->read_len,
310 			swsqe->opcode,
311 			swsqe->complete,
312 			swsqe->signaled,
313 			htobe64(((uint64_t *)&swsqe->cqe)[0]),
314 			htobe64(((uint64_t *)&swsqe->cqe)[1]),
315 			htobe64(((uint64_t *)&swsqe->cqe)[2]),
316 			htobe64(((uint64_t *)&swsqe->cqe)[3]));
317 		if (++cidx == qhp->wq.sq.size)
318 			cidx = 0;
319 	}
320 
321 	fprintf(stderr, "SQ WQ: \n");
322 	p = (u64 *)qhp->wq.sq.queue;
323 	for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) {
324 		for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
325 			fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
326 				i, be64toh(p[0]), be64toh(p[1]));
327 			if (j == 0 && i == qhp->wq.sq.wq_pidx)
328 				fprintf(stderr, " <-- pidx");
329 			fprintf(stderr, "\n");
330 			p += 2;
331 		}
332 	}
333 	cidx = qhp->wq.rq.cidx;
334 	pidx = qhp->wq.rq.pidx;
335 	if (cidx != pidx)
336 		fprintf(stderr, "RQ: \n");
337 	while (cidx != pidx) {
338 		swrqe = &qhp->wq.rq.sw_rq[cidx];
339 		fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n",
340 			cidx,
341 			swrqe->wr_id );
342 		if (++cidx == qhp->wq.rq.size)
343 			cidx = 0;
344 	}
345 
346 	fprintf(stderr, "RQ WQ: \n");
347 	p = (u64 *)qhp->wq.rq.queue;
348 	for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) {
349 		for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
350 			fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
351 				i, be64toh(p[0]), be64toh(p[1]));
352 			if (j == 0 && i == qhp->wq.rq.pidx)
353 				fprintf(stderr, " <-- pidx");
354 			if (j == 0 && i == qhp->wq.rq.cidx)
355 				fprintf(stderr, " <-- cidx");
356 			fprintf(stderr, "\n");
357 			p+=2;
358 		}
359 	}
360 }
361 
362 void dump_state(void)
363 {
364 	struct c4iw_dev *dev;
365 	int i;
366 
367 	fprintf(stderr, "STALL DETECTED:\n");
368 	TAILQ_FOREACH(dev, &devices, list) {
369 		//pthread_spin_lock(&dev->lock);
370 		fprintf(stderr, "Device %s\n", dev->ibv_dev.name);
371 		for (i=0; i < dev->max_cq; i++) {
372 			if (dev->cqid2ptr[i]) {
373 				struct c4iw_cq *chp = dev->cqid2ptr[i];
374 				//pthread_spin_lock(&chp->lock);
375 				dump_cq(chp);
376 				//pthread_spin_unlock(&chp->lock);
377 			}
378 		}
379 		for (i=0; i < dev->max_qp; i++) {
380 			if (dev->qpid2ptr[i]) {
381 				struct c4iw_qp *qhp = dev->qpid2ptr[i];
382 				//pthread_spin_lock(&qhp->lock);
383 				dump_qp(qhp);
384 				//pthread_spin_unlock(&qhp->lock);
385 			}
386 		}
387 		//pthread_spin_unlock(&dev->lock);
388 	}
389 	fprintf(stderr, "DUMP COMPLETE:\n");
390 	fflush(stderr);
391 }
392 #endif /* end of STALL_DETECTION */
393 
394 /*
395  * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library
396  * can know if the driver supports the kernel mode db ringing.
397  */
398 int c4iw_abi_version = 1;
399 
400 static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path,
401 					      int abi_version)
402 {
403 	char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[32], *cp;
404 	struct c4iw_dev *dev;
405 	unsigned vendor, device, fw_maj, fw_min;
406 	int i;
407 
408 	if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
409 				value, sizeof value) < 0)
410 		return NULL;
411 	sscanf(value, "%i", &vendor);
412 
413 	if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
414 				value, sizeof value) < 0)
415 		return NULL;
416 	sscanf(value, "%i", &device);
417 
418 	for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
419 		if (vendor == hca_table[i].vendor &&
420 		    device == hca_table[i].device)
421 			goto found;
422 
423 	return NULL;
424 
425 found:
426 	c4iw_abi_version = abi_version;
427 
428 	/*
429 	 * Verify that the firmware major number matches.  Major number
430 	 * mismatches are fatal.  Minor number mismatches are tolerated.
431 	 */
432 	if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
433 				ibdev, sizeof ibdev) < 0)
434 		return NULL;
435 
436 	memset(devstr, 0, sizeof devstr);
437 	snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s",
438 		 ibv_get_sysfs_path(), ibdev);
439 	if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0)
440 		return NULL;
441 
442 	cp = strtok(value+1, ".");
443 	sscanf(cp, "%i", &fw_maj);
444 	cp = strtok(NULL, ".");
445 	sscanf(cp, "%i", &fw_min);
446 
447 	if ((signed int)fw_maj < FW_MAJ) {
448 		fprintf(stderr, "libcxgb4: Fatal firmware version mismatch.  "
449 			"Firmware major number is %u and libcxgb4 needs %u.\n",
450 			fw_maj, FW_MAJ);
451 		fflush(stderr);
452 		return NULL;
453 	}
454 
455 	DBGLOG("libcxgb4");
456 
457 	if ((signed int)fw_min < FW_MIN) {
458 		PDBG("libcxgb4: non-fatal firmware version mismatch.  "
459 			"Firmware minor number is %u and libcxgb4 needs %u.\n",
460 			fw_min, FW_MIN);
461 		fflush(stderr);
462 	}
463 
464 	PDBG("%s found vendor %d device %d type %d\n",
465 	     __FUNCTION__, vendor, device, CHELSIO_CHIP_VERSION(hca_table[i].device >> 8));
466 
467 	dev = calloc(1, sizeof *dev);
468 	if (!dev) {
469 		return NULL;
470 	}
471 
472 	pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE);
473 	dev->ibv_dev.ops = &c4iw_dev_ops;
474 	dev->chip_version = CHELSIO_CHIP_VERSION(hca_table[i].device >> 8);
475 	dev->abi_version = abi_version;
476 
477 	PDBG("%s device claimed\n", __FUNCTION__);
478 	TAILQ_INSERT_TAIL(&devices, dev, list);
479 #ifdef STALL_DETECTION
480 {
481 	char *c = getenv("CXGB4_STALL_TIMEOUT");
482 	if (c) {
483 		stall_to = strtol(c, NULL, 0);
484 		if (errno || stall_to < 0)
485 			stall_to = 0;
486 	}
487 }
488 #endif
489 {
490 	char *c = getenv("CXGB4_MA_WR");
491 	if (c) {
492 		ma_wr = strtol(c, NULL, 0);
493 		if (ma_wr != 1)
494 			ma_wr = 0;
495 	}
496 }
497 {
498 	char *c = getenv("T5_ENABLE_WC");
499 	if (c) {
500 		t5_en_wc = strtol(c, NULL, 0);
501 		if (t5_en_wc != 1)
502 			t5_en_wc = 0;
503 	}
504 }
505 
506 	return &dev->ibv_dev;
507 }
508 
509 static __attribute__((constructor)) void cxgb4_register_driver(void)
510 {
511 	c4iw_page_size = sysconf(_SC_PAGESIZE);
512 	c4iw_page_shift = long_log2(c4iw_page_size);
513 	c4iw_page_mask = ~(c4iw_page_size - 1);
514 	verbs_register_driver("cxgb4", cxgb4_driver_init);
515 }
516 
517 #ifdef STATS
518 void __attribute__ ((destructor)) cs_fini(void);
519 void  __attribute__ ((destructor)) cs_fini(void)
520 {
521 	syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu "
522 	       "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n",
523 	       c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read,
524 	       c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe,
525 	       c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq);
526 }
527 #endif
528