1 /* 2 * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <config.h> 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <unistd.h> 37 #include <errno.h> 38 #include <sys/mman.h> 39 #include <pthread.h> 40 #include <string.h> 41 #include <signal.h> 42 43 #include "libcxgb4.h" 44 #include "cxgb4-abi.h" 45 46 #define PCI_VENDOR_ID_CHELSIO 0x1425 47 48 /* 49 * Macros needed to support the PCI Device ID Table ... 50 */ 51 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ 52 static struct { \ 53 unsigned vendor; \ 54 unsigned device; \ 55 } hca_table[] = { 56 57 #define CH_PCI_DEVICE_ID_FUNCTION \ 58 0x4 59 60 #define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \ 61 { \ 62 .vendor = PCI_VENDOR_ID_CHELSIO, \ 63 .device = (__DeviceID), \ 64 } 65 66 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \ 67 } 68 69 #include "t4_chip_type.h" 70 #include "t4_pci_id_tbl.h" 71 72 unsigned long c4iw_page_size; 73 unsigned long c4iw_page_shift; 74 unsigned long c4iw_page_mask; 75 int ma_wr; 76 int t5_en_wc = 1; 77 78 static TAILQ_HEAD(,c4iw_dev) devices = TAILQ_HEAD_INITIALIZER(devices); 79 80 static struct ibv_context_ops c4iw_ctx_ops = { 81 .query_device = c4iw_query_device, 82 .query_port = c4iw_query_port, 83 .alloc_pd = c4iw_alloc_pd, 84 .dealloc_pd = c4iw_free_pd, 85 .reg_mr = c4iw_reg_mr, 86 .dereg_mr = c4iw_dereg_mr, 87 .create_cq = c4iw_create_cq, 88 .resize_cq = c4iw_resize_cq, 89 .destroy_cq = c4iw_destroy_cq, 90 .create_srq = c4iw_create_srq, 91 .modify_srq = c4iw_modify_srq, 92 .destroy_srq = c4iw_destroy_srq, 93 .create_qp = c4iw_create_qp, 94 .modify_qp = c4iw_modify_qp, 95 .destroy_qp = c4iw_destroy_qp, 96 .query_qp = c4iw_query_qp, 97 .create_ah = c4iw_create_ah, 98 .destroy_ah = c4iw_destroy_ah, 99 .attach_mcast = c4iw_attach_mcast, 100 .detach_mcast = c4iw_detach_mcast, 101 .post_srq_recv = c4iw_post_srq_recv, 102 .req_notify_cq = c4iw_arm_cq, 103 }; 104 105 static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev, 106 int cmd_fd) 107 { 108 struct c4iw_context *context; 109 struct ibv_get_context cmd; 110 struct c4iw_alloc_ucontext_resp resp; 111 struct c4iw_dev *rhp = to_c4iw_dev(ibdev); 112 struct ibv_query_device qcmd; 113 uint64_t raw_fw_ver; 114 struct ibv_device_attr attr; 115 116 context = malloc(sizeof *context); 117 if (!context) 118 return NULL; 119 120 memset(context, 0, sizeof *context); 121 context->ibv_ctx.cmd_fd = cmd_fd; 122 123 resp.status_page_size = 0; 124 resp.reserved = 0; 125 if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd, 126 &resp.ibv_resp, sizeof resp)) 127 goto err_free; 128 129 if (resp.reserved) 130 PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n", 131 __FUNCTION__); 132 133 context->status_page_size = resp.status_page_size; 134 if (resp.status_page_size) { 135 context->status_page = mmap(NULL, resp.status_page_size, 136 PROT_READ, MAP_SHARED, cmd_fd, 137 resp.status_page_key); 138 if (context->status_page == MAP_FAILED) 139 goto err_free; 140 } 141 142 context->ibv_ctx.device = ibdev; 143 context->ibv_ctx.ops = c4iw_ctx_ops; 144 145 switch (rhp->chip_version) { 146 case CHELSIO_T6: 147 PDBG("%s T6/T5/T4 device\n", __FUNCTION__); 148 case CHELSIO_T5: 149 PDBG("%s T5/T4 device\n", __FUNCTION__); 150 case CHELSIO_T4: 151 PDBG("%s T4 device\n", __FUNCTION__); 152 context->ibv_ctx.ops.async_event = c4iw_async_event; 153 context->ibv_ctx.ops.post_send = c4iw_post_send; 154 context->ibv_ctx.ops.post_recv = c4iw_post_receive; 155 context->ibv_ctx.ops.poll_cq = c4iw_poll_cq; 156 context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq; 157 break; 158 default: 159 PDBG("%s unknown hca type %d\n", __FUNCTION__, 160 rhp->chip_version); 161 goto err_unmap; 162 break; 163 } 164 165 if (!rhp->mmid2ptr) { 166 int ret; 167 168 ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd, 169 sizeof qcmd); 170 if (ret) 171 goto err_unmap; 172 rhp->max_mr = attr.max_mr; 173 rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *)); 174 if (!rhp->mmid2ptr) { 175 goto err_unmap; 176 } 177 if (rhp->abi_version < 3) { 178 fprintf(stderr, "Warning: iw_cxgb4 driver is of older version" 179 " than libcxgb4:: %d\n", rhp->abi_version); 180 rhp->max_qp = T4_QID_BASE + attr.max_qp; 181 } else { 182 rhp->max_qp = context->status_page->qp_start + 183 context->status_page->qp_size; 184 } 185 rhp->qpid2ptr = calloc(rhp->max_qp, sizeof(void *)); 186 if (!rhp->qpid2ptr) { 187 goto err_unmap; 188 } 189 if (rhp->abi_version < 3) 190 rhp->max_cq = T4_QID_BASE + attr.max_cq; 191 else 192 rhp->max_cq = context->status_page->cq_start + 193 context->status_page->cq_size; 194 rhp->cqid2ptr = calloc(rhp->max_cq, sizeof(void *)); 195 if (!rhp->cqid2ptr) 196 goto err_unmap; 197 } 198 199 return &context->ibv_ctx; 200 201 err_unmap: 202 munmap(context->status_page, context->status_page_size); 203 err_free: 204 if (rhp->cqid2ptr) 205 free(rhp->cqid2ptr); 206 if (rhp->qpid2ptr) 207 free(rhp->cqid2ptr); 208 if (rhp->mmid2ptr) 209 free(rhp->cqid2ptr); 210 free(context); 211 return NULL; 212 } 213 214 static void c4iw_free_context(struct ibv_context *ibctx) 215 { 216 struct c4iw_context *context = to_c4iw_context(ibctx); 217 218 if (context->status_page_size) 219 munmap(context->status_page, context->status_page_size); 220 free(context); 221 } 222 223 static struct verbs_device_ops c4iw_dev_ops = { 224 .alloc_context = c4iw_alloc_context, 225 .free_context = c4iw_free_context 226 }; 227 228 #ifdef STALL_DETECTION 229 230 int stall_to; 231 232 static void dump_cq(struct c4iw_cq *chp) 233 { 234 int i; 235 236 fprintf(stderr, 237 "CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d " 238 "cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp, 239 chp->cq.cqid, chp->cq.queue, chp->cq.cidx, 240 chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use, 241 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64toh(chp->cq.bits_type_ts), 242 t4_cq_notempty(&chp->cq)); 243 244 for (i=0; i < chp->cq.size; i++) { 245 u64 *p = (u64 *)(chp->cq.queue + i); 246 247 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64toh(p[0]), be64toh(p[1])); 248 if (i == chp->cq.cidx) 249 fprintf(stderr, " <-- cidx\n"); 250 else 251 fprintf(stderr, "\n"); 252 p+= 2; 253 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 254 p+= 2; 255 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 256 p+= 2; 257 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64toh(p[0]), be64toh(p[1])); 258 p+= 2; 259 } 260 } 261 262 static void dump_qp(struct c4iw_qp *qhp) 263 { 264 int i; 265 int j; 266 struct t4_swsqe *swsqe; 267 struct t4_swrqe *swrqe; 268 u16 cidx, pidx; 269 u64 *p; 270 271 fprintf(stderr, 272 "QP: %p id %u error %d flushed %d qid_mask 0x%x\n" 273 " SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n" 274 " RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n", 275 qhp, 276 qhp->wq.sq.qid, 277 qhp->wq.error, 278 qhp->wq.flushed, 279 qhp->wq.qid_mask, 280 qhp->wq.sq.qid, 281 qhp->wq.sq.queue, 282 qhp->wq.sq.sw_sq, 283 qhp->wq.sq.cidx, 284 qhp->wq.sq.pidx, 285 qhp->wq.sq.in_use, 286 qhp->wq.sq.wq_pidx, 287 qhp->wq.sq.size, 288 qhp->wq.sq.flags, 289 qhp->wq.sq.flush_cidx, 290 qhp->wq.rq.qid, 291 qhp->wq.rq.queue, 292 qhp->wq.rq.sw_rq, 293 qhp->wq.rq.cidx, 294 qhp->wq.rq.pidx, 295 qhp->wq.rq.in_use, 296 qhp->wq.rq.size); 297 cidx = qhp->wq.sq.cidx; 298 pidx = qhp->wq.sq.pidx; 299 if (cidx != pidx) 300 fprintf(stderr, "SQ: \n"); 301 while (cidx != pidx) { 302 swsqe = &qhp->wq.sq.sw_sq[cidx]; 303 fprintf(stderr, "%04u: wr_id %016" PRIx64 304 " sq_wptr %08x read_len %u opcode 0x%x " 305 "complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n", 306 cidx, 307 swsqe->wr_id, 308 swsqe->idx, 309 swsqe->read_len, 310 swsqe->opcode, 311 swsqe->complete, 312 swsqe->signaled, 313 htobe64(((uint64_t *)&swsqe->cqe)[0]), 314 htobe64(((uint64_t *)&swsqe->cqe)[1]), 315 htobe64(((uint64_t *)&swsqe->cqe)[2]), 316 htobe64(((uint64_t *)&swsqe->cqe)[3])); 317 if (++cidx == qhp->wq.sq.size) 318 cidx = 0; 319 } 320 321 fprintf(stderr, "SQ WQ: \n"); 322 p = (u64 *)qhp->wq.sq.queue; 323 for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) { 324 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) { 325 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ", 326 i, be64toh(p[0]), be64toh(p[1])); 327 if (j == 0 && i == qhp->wq.sq.wq_pidx) 328 fprintf(stderr, " <-- pidx"); 329 fprintf(stderr, "\n"); 330 p += 2; 331 } 332 } 333 cidx = qhp->wq.rq.cidx; 334 pidx = qhp->wq.rq.pidx; 335 if (cidx != pidx) 336 fprintf(stderr, "RQ: \n"); 337 while (cidx != pidx) { 338 swrqe = &qhp->wq.rq.sw_rq[cidx]; 339 fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n", 340 cidx, 341 swrqe->wr_id ); 342 if (++cidx == qhp->wq.rq.size) 343 cidx = 0; 344 } 345 346 fprintf(stderr, "RQ WQ: \n"); 347 p = (u64 *)qhp->wq.rq.queue; 348 for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) { 349 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) { 350 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ", 351 i, be64toh(p[0]), be64toh(p[1])); 352 if (j == 0 && i == qhp->wq.rq.pidx) 353 fprintf(stderr, " <-- pidx"); 354 if (j == 0 && i == qhp->wq.rq.cidx) 355 fprintf(stderr, " <-- cidx"); 356 fprintf(stderr, "\n"); 357 p+=2; 358 } 359 } 360 } 361 362 void dump_state(void) 363 { 364 struct c4iw_dev *dev; 365 int i; 366 367 fprintf(stderr, "STALL DETECTED:\n"); 368 TAILQ_FOREACH(dev, &devices, list) { 369 //pthread_spin_lock(&dev->lock); 370 fprintf(stderr, "Device %s\n", dev->ibv_dev.name); 371 for (i=0; i < dev->max_cq; i++) { 372 if (dev->cqid2ptr[i]) { 373 struct c4iw_cq *chp = dev->cqid2ptr[i]; 374 //pthread_spin_lock(&chp->lock); 375 dump_cq(chp); 376 //pthread_spin_unlock(&chp->lock); 377 } 378 } 379 for (i=0; i < dev->max_qp; i++) { 380 if (dev->qpid2ptr[i]) { 381 struct c4iw_qp *qhp = dev->qpid2ptr[i]; 382 //pthread_spin_lock(&qhp->lock); 383 dump_qp(qhp); 384 //pthread_spin_unlock(&qhp->lock); 385 } 386 } 387 //pthread_spin_unlock(&dev->lock); 388 } 389 fprintf(stderr, "DUMP COMPLETE:\n"); 390 fflush(stderr); 391 } 392 #endif /* end of STALL_DETECTION */ 393 394 /* 395 * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library 396 * can know if the driver supports the kernel mode db ringing. 397 */ 398 int c4iw_abi_version = 1; 399 400 static struct verbs_device *cxgb4_driver_init(const char *uverbs_sys_path, 401 int abi_version) 402 { 403 char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[32], *cp; 404 struct c4iw_dev *dev; 405 unsigned vendor, device, fw_maj, fw_min; 406 int i; 407 408 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", 409 value, sizeof value) < 0) 410 return NULL; 411 sscanf(value, "%i", &vendor); 412 413 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", 414 value, sizeof value) < 0) 415 return NULL; 416 sscanf(value, "%i", &device); 417 418 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) 419 if (vendor == hca_table[i].vendor && 420 device == hca_table[i].device) 421 goto found; 422 423 return NULL; 424 425 found: 426 c4iw_abi_version = abi_version; 427 428 /* 429 * Verify that the firmware major number matches. Major number 430 * mismatches are fatal. Minor number mismatches are tolerated. 431 */ 432 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", 433 ibdev, sizeof ibdev) < 0) 434 return NULL; 435 436 memset(devstr, 0, sizeof devstr); 437 snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s", 438 ibv_get_sysfs_path(), ibdev); 439 if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0) 440 return NULL; 441 442 cp = strtok(value+1, "."); 443 sscanf(cp, "%i", &fw_maj); 444 cp = strtok(NULL, "."); 445 sscanf(cp, "%i", &fw_min); 446 447 if ((signed int)fw_maj < FW_MAJ) { 448 fprintf(stderr, "libcxgb4: Fatal firmware version mismatch. " 449 "Firmware major number is %u and libcxgb4 needs %u.\n", 450 fw_maj, FW_MAJ); 451 fflush(stderr); 452 return NULL; 453 } 454 455 DBGLOG("libcxgb4"); 456 457 if ((signed int)fw_min < FW_MIN) { 458 PDBG("libcxgb4: non-fatal firmware version mismatch. " 459 "Firmware minor number is %u and libcxgb4 needs %u.\n", 460 fw_min, FW_MIN); 461 fflush(stderr); 462 } 463 464 PDBG("%s found vendor %d device %d type %d\n", 465 __FUNCTION__, vendor, device, CHELSIO_CHIP_VERSION(hca_table[i].device >> 8)); 466 467 dev = calloc(1, sizeof *dev); 468 if (!dev) { 469 return NULL; 470 } 471 472 pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE); 473 dev->ibv_dev.ops = &c4iw_dev_ops; 474 dev->chip_version = CHELSIO_CHIP_VERSION(hca_table[i].device >> 8); 475 dev->abi_version = abi_version; 476 477 PDBG("%s device claimed\n", __FUNCTION__); 478 TAILQ_INSERT_TAIL(&devices, dev, list); 479 #ifdef STALL_DETECTION 480 { 481 char *c = getenv("CXGB4_STALL_TIMEOUT"); 482 if (c) { 483 stall_to = strtol(c, NULL, 0); 484 if (errno || stall_to < 0) 485 stall_to = 0; 486 } 487 } 488 #endif 489 { 490 char *c = getenv("CXGB4_MA_WR"); 491 if (c) { 492 ma_wr = strtol(c, NULL, 0); 493 if (ma_wr != 1) 494 ma_wr = 0; 495 } 496 } 497 { 498 char *c = getenv("T5_ENABLE_WC"); 499 if (c) { 500 t5_en_wc = strtol(c, NULL, 0); 501 if (t5_en_wc != 1) 502 t5_en_wc = 0; 503 } 504 } 505 506 return &dev->ibv_dev; 507 } 508 509 static __attribute__((constructor)) void cxgb4_register_driver(void) 510 { 511 c4iw_page_size = sysconf(_SC_PAGESIZE); 512 c4iw_page_shift = long_log2(c4iw_page_size); 513 c4iw_page_mask = ~(c4iw_page_size - 1); 514 verbs_register_driver("cxgb4", cxgb4_driver_init); 515 } 516 517 #ifdef STATS 518 void __attribute__ ((destructor)) cs_fini(void); 519 void __attribute__ ((destructor)) cs_fini(void) 520 { 521 syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu " 522 "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n", 523 c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read, 524 c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe, 525 c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq); 526 } 527 #endif 528