1 /* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #define LINUXKPI_PARAM_PREFIX mlx4_ 37 38 #include <linux/kmod.h> 39 #include <linux/module.h> 40 #include <linux/errno.h> 41 #include <linux/pci.h> 42 #include <linux/dma-mapping.h> 43 #include <linux/slab.h> 44 #include <linux/io-mapping.h> 45 #include <linux/delay.h> 46 #include <linux/netdevice.h> 47 #include <linux/string.h> 48 #include <linux/fs.h> 49 50 #include <dev/mlx4/device.h> 51 #include <dev/mlx4/doorbell.h> 52 53 #include "mlx4.h" 54 #include "fw.h" 55 #include "icm.h" 56 #include <dev/mlx4/stats.h> 57 58 /* Mellanox ConnectX HCA low-level driver */ 59 60 struct workqueue_struct *mlx4_wq; 61 62 #ifdef CONFIG_MLX4_DEBUG 63 64 int mlx4_debug_level = 0; 65 module_param_named(debug_level, mlx4_debug_level, int, 0644); 66 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 67 68 #endif /* CONFIG_MLX4_DEBUG */ 69 70 #ifdef CONFIG_PCI_MSI 71 72 static int msi_x = 1; 73 module_param(msi_x, int, 0444); 74 MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); 75 76 #else /* CONFIG_PCI_MSI */ 77 78 #define msi_x (0) 79 80 #endif /* CONFIG_PCI_MSI */ 81 82 static int enable_sys_tune = 0; 83 module_param(enable_sys_tune, int, 0444); 84 MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); 85 86 int mlx4_blck_lb = 1; 87 module_param_named(block_loopback, mlx4_blck_lb, int, 0644); 88 MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " 89 "(default: 1)"); 90 enum { 91 DEFAULT_DOMAIN = 0, 92 BDF_STR_SIZE = 8, /* bb:dd.f- */ 93 DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ 94 }; 95 96 enum { 97 NUM_VFS, 98 PROBE_VF, 99 PORT_TYPE_ARRAY 100 }; 101 102 enum { 103 VALID_DATA, 104 INVALID_DATA, 105 INVALID_STR 106 }; 107 108 struct param_data { 109 int id; 110 struct mlx4_dbdf2val_lst dbdf2val; 111 }; 112 113 static struct param_data num_vfs = { 114 .id = NUM_VFS, 115 .dbdf2val = { 116 .name = "num_vfs param", 117 .num_vals = 1, 118 .def_val = {0}, 119 .range = {0, MLX4_MAX_NUM_VF} 120 } 121 }; 122 module_param_string(num_vfs, num_vfs.dbdf2val.str, 123 sizeof(num_vfs.dbdf2val.str), 0444); 124 MODULE_PARM_DESC(num_vfs, 125 "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" 126 "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" 127 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); 128 129 static struct param_data probe_vf = { 130 .id = PROBE_VF, 131 .dbdf2val = { 132 .name = "probe_vf param", 133 .num_vals = 1, 134 .def_val = {0}, 135 .range = {0, MLX4_MAX_NUM_VF} 136 } 137 }; 138 module_param_string(probe_vf, probe_vf.dbdf2val.str, 139 sizeof(probe_vf.dbdf2val.str), 0444); 140 MODULE_PARM_DESC(probe_vf, 141 "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" 142 "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" 143 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); 144 145 int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 146 147 module_param_named(log_num_mgm_entry_size, 148 mlx4_log_num_mgm_entry_size, int, 0444); 149 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" 150 " of qp per mcg, for example:" 151 " 10 gives 248.range: 7 <=" 152 " log_num_mgm_entry_size <= 12." 153 " To activate device managed" 154 " flow steering when available, set to -1"); 155 156 static int high_rate_steer; 157 module_param(high_rate_steer, int, 0444); 158 MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" 159 " (default off)"); 160 161 static int fast_drop; 162 module_param_named(fast_drop, fast_drop, int, 0444); 163 MODULE_PARM_DESC(fast_drop, 164 "Enable fast packet drop when no receive WQEs are posted"); 165 166 int mlx4_enable_64b_cqe_eqe = 1; 167 module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); 168 MODULE_PARM_DESC(enable_64b_cqe_eqe, 169 "Enable 64 byte CQEs/EQEs when the FW supports this if non-zero (default: 1)"); 170 171 #define HCA_GLOBAL_CAP_MASK 0 172 173 #define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE 174 175 static char mlx4_version[] __devinitdata = 176 DRV_NAME ": Mellanox ConnectX VPI driver v" 177 DRV_VERSION "\n"; 178 179 static int log_num_mac = 7; 180 module_param_named(log_num_mac, log_num_mac, int, 0444); 181 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); 182 183 static int log_num_vlan; 184 module_param_named(log_num_vlan, log_num_vlan, int, 0444); 185 MODULE_PARM_DESC(log_num_vlan, 186 "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); 187 /* Log2 max number of VLANs per ETH port (0-7) */ 188 #define MLX4_LOG_NUM_VLANS 7 189 190 int log_mtts_per_seg = ilog2(1); 191 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); 192 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " 193 "(0-7) (default: 0)"); 194 195 static struct param_data port_type_array = { 196 .id = PORT_TYPE_ARRAY, 197 .dbdf2val = { 198 .name = "port_type_array param", 199 .num_vals = 2, 200 .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, 201 .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} 202 } 203 }; 204 module_param_string(port_type_array, port_type_array.dbdf2val.str, 205 sizeof(port_type_array.dbdf2val.str), 0444); 206 MODULE_PARM_DESC(port_type_array, 207 "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" 208 "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" 209 "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" 210 "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); 211 212 213 struct mlx4_port_config { 214 struct list_head list; 215 enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; 216 struct pci_dev *pdev; 217 }; 218 219 #define MLX4_LOG_NUM_MTT 20 220 /* We limit to 30 as of a bit map issue which uses int and not uint. 221 see mlx4_buddy_init -> bitmap_zero which gets int. 222 */ 223 #define MLX4_MAX_LOG_NUM_MTT 30 224 static struct mlx4_profile mod_param_profile = { 225 .num_qp = 19, 226 .num_srq = 16, 227 .rdmarc_per_qp = 4, 228 .num_cq = 16, 229 .num_mcg = 13, 230 .num_mpt = 19, 231 .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ 232 }; 233 234 module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); 235 MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); 236 237 module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); 238 MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " 239 "(default: 16)"); 240 241 module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 242 0444); 243 MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " 244 "(default: 4)"); 245 246 module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); 247 MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); 248 249 module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); 250 MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " 251 "(default: 13)"); 252 253 module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); 254 MODULE_PARM_DESC(log_num_mpt, 255 "log maximum number of memory protection table entries per " 256 "HCA (default: 19)"); 257 258 module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); 259 MODULE_PARM_DESC(log_num_mtt, 260 "log maximum number of memory translation table segments per " 261 "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); 262 263 enum { 264 MLX4_IF_STATE_BASIC, 265 MLX4_IF_STATE_EXTENDED 266 }; 267 268 static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) 269 { 270 return (domain << 20) | (bus << 12) | (dev << 4) | fn; 271 } 272 273 static inline void pr_bdf_err(const char *dbdf, const char *pname) 274 { 275 pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); 276 } 277 278 static inline void pr_val_err(const char *dbdf, const char *pname, 279 const char *val) 280 { 281 pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" 282 , val, dbdf, pname); 283 } 284 285 static inline void pr_out_of_range_bdf(const char *dbdf, int val, 286 struct mlx4_dbdf2val_lst *dbdf2val) 287 { 288 pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" 289 , val, dbdf, dbdf2val->name , dbdf2val->range.min, 290 dbdf2val->range.max); 291 } 292 293 static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) 294 { 295 pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" 296 , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); 297 } 298 299 static inline int is_in_range(int val, struct mlx4_range *r) 300 { 301 return (val >= r->min && val <= r->max); 302 } 303 304 static int update_defaults(struct param_data *pdata) 305 { 306 long int val[MLX4_MAX_BDF_VALS]; 307 int ret; 308 char *t, *p = pdata->dbdf2val.str; 309 char sval[32]; 310 int val_len; 311 312 if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) 313 return INVALID_STR; 314 315 switch (pdata->id) { 316 case PORT_TYPE_ARRAY: 317 t = strchr(p, ','); 318 if (!t || t == p || (t - p) > sizeof(sval)) 319 return INVALID_STR; 320 321 val_len = t - p; 322 strncpy(sval, p, val_len); 323 sval[val_len] = 0; 324 325 ret = kstrtol(sval, 0, &val[0]); 326 if (ret == -EINVAL) 327 return INVALID_STR; 328 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 329 pr_out_of_range(&pdata->dbdf2val); 330 return INVALID_DATA; 331 } 332 333 ret = kstrtol(t + 1, 0, &val[1]); 334 if (ret == -EINVAL) 335 return INVALID_STR; 336 if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { 337 pr_out_of_range(&pdata->dbdf2val); 338 return INVALID_DATA; 339 } 340 341 pdata->dbdf2val.tbl[0].val[0] = val[0]; 342 pdata->dbdf2val.tbl[0].val[1] = val[1]; 343 break; 344 345 case NUM_VFS: 346 case PROBE_VF: 347 ret = kstrtol(p, 0, &val[0]); 348 if (ret == -EINVAL) 349 return INVALID_STR; 350 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 351 pr_out_of_range(&pdata->dbdf2val); 352 return INVALID_DATA; 353 } 354 pdata->dbdf2val.tbl[0].val[0] = val[0]; 355 break; 356 } 357 pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; 358 359 return VALID_DATA; 360 } 361 362 int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) 363 { 364 int domain, bus, dev, fn; 365 u64 dbdf; 366 char *p, *t, *v; 367 char tmp[32]; 368 char sbdf[32]; 369 char sep = ','; 370 int j, k, str_size, i = 1; 371 int prfx_size; 372 373 p = dbdf2val_lst->str; 374 375 for (j = 0; j < dbdf2val_lst->num_vals; j++) 376 dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; 377 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 378 379 str_size = strlen(dbdf2val_lst->str); 380 381 if (str_size == 0) 382 return 0; 383 384 while (strlen(p)) { 385 prfx_size = BDF_STR_SIZE; 386 sbdf[prfx_size] = 0; 387 strncpy(sbdf, p, prfx_size); 388 domain = DEFAULT_DOMAIN; 389 if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { 390 prfx_size = DBDF_STR_SIZE; 391 sbdf[prfx_size] = 0; 392 strncpy(sbdf, p, prfx_size); 393 if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, 394 &dev, &fn) != 4) { 395 pr_bdf_err(sbdf, dbdf2val_lst->name); 396 goto err; 397 } 398 sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, 399 fn); 400 } else { 401 sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); 402 } 403 404 if (strnicmp(sbdf, tmp, sizeof(tmp))) { 405 pr_bdf_err(sbdf, dbdf2val_lst->name); 406 goto err; 407 } 408 409 dbdf = dbdf_to_u64(domain, bus, dev, fn); 410 411 for (j = 1; j < i; j++) 412 if (dbdf2val_lst->tbl[j].dbdf == dbdf) { 413 pr_warn("mlx4_core: in '%s', %s appears multiple times\n" 414 , dbdf2val_lst->name, sbdf); 415 goto err; 416 } 417 418 if (i >= MLX4_DEVS_TBL_SIZE) { 419 pr_warn("mlx4_core: Too many devices in '%s'\n" 420 , dbdf2val_lst->name); 421 goto err; 422 } 423 424 p += prfx_size; 425 t = strchr(p, sep); 426 t = t ? t : p + strlen(p); 427 if (p >= t) { 428 pr_val_err(sbdf, dbdf2val_lst->name, ""); 429 goto err; 430 } 431 432 for (k = 0; k < dbdf2val_lst->num_vals; k++) { 433 char sval[32]; 434 long int val; 435 int ret, val_len; 436 char vsep = ';'; 437 438 v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); 439 if (!v || v > t || v == p || (v - p) > sizeof(sval)) { 440 pr_val_err(sbdf, dbdf2val_lst->name, p); 441 goto err; 442 } 443 val_len = v - p; 444 strncpy(sval, p, val_len); 445 sval[val_len] = 0; 446 447 ret = kstrtol(sval, 0, &val); 448 if (ret) { 449 if (strchr(p, vsep)) 450 pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" 451 , sbdf, dbdf2val_lst->name); 452 else 453 pr_val_err(sbdf, dbdf2val_lst->name, 454 sval); 455 goto err; 456 } 457 if (!is_in_range(val, &dbdf2val_lst->range)) { 458 pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); 459 goto err; 460 } 461 462 dbdf2val_lst->tbl[i].val[k] = val; 463 p = v; 464 if (p[0] == vsep) 465 p++; 466 } 467 468 dbdf2val_lst->tbl[i].dbdf = dbdf; 469 if (strlen(p)) { 470 if (p[0] != sep) { 471 pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" 472 , sep, p, dbdf2val_lst->name); 473 goto err; 474 } 475 p++; 476 } 477 i++; 478 if (i < MLX4_DEVS_TBL_SIZE) 479 dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; 480 } 481 482 return 0; 483 484 err: 485 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 486 pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" 487 , dbdf2val_lst->name); 488 489 return -EINVAL; 490 } 491 EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); 492 493 int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, 494 int *val) 495 { 496 u64 dbdf; 497 int i = 1; 498 499 *val = tbl[0].val[idx]; 500 if (!pdev) 501 return -EINVAL; 502 503 dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), 504 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 505 506 while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { 507 if (tbl[i].dbdf == dbdf) { 508 *val = tbl[i].val[idx]; 509 return 0; 510 } 511 i++; 512 } 513 514 return 0; 515 } 516 EXPORT_SYMBOL(mlx4_get_val); 517 518 static void process_mod_param_profile(struct mlx4_profile *profile) 519 { 520 vm_size_t hwphyssz; 521 hwphyssz = 0; 522 TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); 523 524 profile->num_qp = 1 << mod_param_profile.num_qp; 525 profile->num_srq = 1 << mod_param_profile.num_srq; 526 profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; 527 profile->num_cq = 1 << mod_param_profile.num_cq; 528 profile->num_mcg = 1 << mod_param_profile.num_mcg; 529 profile->num_mpt = 1 << mod_param_profile.num_mpt; 530 /* 531 * We want to scale the number of MTTs with the size of the 532 * system memory, since it makes sense to register a lot of 533 * memory on a system with a lot of memory. As a heuristic, 534 * make sure we have enough MTTs to register twice the system 535 * memory (with PAGE_SIZE entries). 536 * 537 * This number has to be a power of two and fit into 32 bits 538 * due to device limitations. We cap this at 2^30 as of bit map 539 * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) 540 * That limits us to 4TB of memory registration per HCA with 541 * 4KB pages, which is probably OK for the next few months. 542 */ 543 if (mod_param_profile.num_mtt_segs) 544 profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; 545 else { 546 profile->num_mtt_segs = 547 roundup_pow_of_two(max_t(unsigned, 548 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), 549 min(1UL << 550 (MLX4_MAX_LOG_NUM_MTT - 551 log_mtts_per_seg), 552 (hwphyssz << 1) 553 >> log_mtts_per_seg))); 554 /* set the actual value, so it will be reflected to the user 555 using the sysfs */ 556 mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); 557 } 558 } 559 560 int mlx4_check_port_params(struct mlx4_dev *dev, 561 enum mlx4_port_type *port_type) 562 { 563 int i; 564 565 for (i = 0; i < dev->caps.num_ports - 1; i++) { 566 if (port_type[i] != port_type[i + 1]) { 567 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { 568 mlx4_err(dev, "Only same port types supported " 569 "on this HCA, aborting.\n"); 570 return -EINVAL; 571 } 572 } 573 } 574 575 for (i = 0; i < dev->caps.num_ports; i++) { 576 if (!(port_type[i] & dev->caps.supported_type[i+1])) { 577 mlx4_err(dev, "Requested port type for port %d is not " 578 "supported on this HCA\n", i + 1); 579 return -EINVAL; 580 } 581 } 582 return 0; 583 } 584 585 static void mlx4_set_port_mask(struct mlx4_dev *dev) 586 { 587 int i; 588 589 for (i = 1; i <= dev->caps.num_ports; ++i) 590 dev->caps.port_mask[i] = dev->caps.port_type[i]; 591 } 592 593 enum { 594 MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0, 595 }; 596 597 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 598 { 599 int err = 0; 600 struct mlx4_func func; 601 602 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { 603 err = mlx4_QUERY_FUNC(dev, &func, 0); 604 if (err) { 605 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 606 return err; 607 } 608 dev_cap->max_eqs = func.max_eq; 609 dev_cap->reserved_eqs = func.rsvd_eqs; 610 dev_cap->reserved_uars = func.rsvd_uars; 611 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS; 612 } 613 return err; 614 } 615 616 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 617 { 618 int err; 619 int i; 620 621 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 622 if (err) { 623 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 624 return err; 625 } 626 627 if (dev_cap->min_page_sz > PAGE_SIZE) { 628 mlx4_err(dev, "HCA minimum page size of %d bigger than " 629 "kernel PAGE_SIZE of %d, aborting.\n", 630 dev_cap->min_page_sz, (int)PAGE_SIZE); 631 return -ENODEV; 632 } 633 if (dev_cap->num_ports > MLX4_MAX_PORTS) { 634 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 635 "aborting.\n", 636 dev_cap->num_ports, MLX4_MAX_PORTS); 637 return -ENODEV; 638 } 639 640 if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { 641 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " 642 "PCI resource 2 size of 0x%llx, aborting.\n", 643 dev_cap->uar_size, 644 (unsigned long long) pci_resource_len(dev->pdev, 2)); 645 return -ENODEV; 646 } 647 648 dev->caps.num_ports = dev_cap->num_ports; 649 dev->caps.num_sys_eqs = dev_cap->num_sys_eqs; 650 dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ? 651 dev->caps.num_sys_eqs : 652 MLX4_MAX_EQ_NUM; 653 for (i = 1; i <= dev->caps.num_ports; ++i) { 654 dev->caps.vl_cap[i] = dev_cap->max_vl[i]; 655 dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; 656 dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; 657 dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; 658 /* set gid and pkey table operating lengths by default 659 * to non-sriov values */ 660 dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; 661 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; 662 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; 663 dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; 664 dev->caps.def_mac[i] = dev_cap->def_mac[i]; 665 dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; 666 dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; 667 dev->caps.default_sense[i] = dev_cap->default_sense[i]; 668 dev->caps.trans_type[i] = dev_cap->trans_type[i]; 669 dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; 670 dev->caps.wavelength[i] = dev_cap->wavelength[i]; 671 dev->caps.trans_code[i] = dev_cap->trans_code[i]; 672 } 673 674 dev->caps.uar_page_size = PAGE_SIZE; 675 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; 676 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; 677 dev->caps.bf_reg_size = dev_cap->bf_reg_size; 678 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; 679 dev->caps.max_sq_sg = dev_cap->max_sq_sg; 680 dev->caps.max_rq_sg = dev_cap->max_rq_sg; 681 dev->caps.max_wqes = dev_cap->max_qp_sz; 682 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; 683 dev->caps.max_srq_wqes = dev_cap->max_srq_sz; 684 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; 685 dev->caps.reserved_srqs = dev_cap->reserved_srqs; 686 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; 687 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; 688 /* 689 * Subtract 1 from the limit because we need to allocate a 690 * spare CQE to enable resizing the CQ 691 */ 692 dev->caps.max_cqes = dev_cap->max_cq_sz - 1; 693 dev->caps.reserved_cqs = dev_cap->reserved_cqs; 694 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 695 dev->caps.reserved_mtts = dev_cap->reserved_mtts; 696 dev->caps.reserved_mrws = dev_cap->reserved_mrws; 697 698 /* The first 128 UARs are used for EQ doorbells */ 699 dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); 700 dev->caps.reserved_pds = dev_cap->reserved_pds; 701 dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 702 dev_cap->reserved_xrcds : 0; 703 dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 704 dev_cap->max_xrcds : 0; 705 dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; 706 707 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 708 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 709 dev->caps.flags = dev_cap->flags; 710 dev->caps.flags2 = dev_cap->flags2; 711 dev->caps.bmme_flags = dev_cap->bmme_flags; 712 dev->caps.reserved_lkey = dev_cap->reserved_lkey; 713 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 714 dev->caps.cq_timestamp = dev_cap->timestamp_support; 715 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 716 dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; 717 718 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ 719 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) 720 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 721 /* Don't do sense port on multifunction devices (for now at least) */ 722 if (mlx4_is_mfunc(dev)) 723 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 724 725 dev->caps.log_num_macs = log_num_mac; 726 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; 727 728 dev->caps.fast_drop = fast_drop ? 729 !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 730 0; 731 732 for (i = 1; i <= dev->caps.num_ports; ++i) { 733 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; 734 if (dev->caps.supported_type[i]) { 735 /* if only ETH is supported - assign ETH */ 736 if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) 737 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; 738 /* if only IB is supported, assign IB */ 739 else if (dev->caps.supported_type[i] == 740 MLX4_PORT_TYPE_IB) 741 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; 742 else { 743 /* 744 * if IB and ETH are supported, we set the port 745 * type according to user selection of port type; 746 * if there is no user selection, take the FW hint 747 */ 748 int pta; 749 mlx4_get_val(port_type_array.dbdf2val.tbl, 750 pci_physfn(dev->pdev), i - 1, 751 &pta); 752 if (pta == MLX4_PORT_TYPE_NONE) { 753 dev->caps.port_type[i] = dev->caps.suggested_type[i] ? 754 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; 755 } else if (pta == MLX4_PORT_TYPE_NA) { 756 mlx4_err(dev, "Port %d is valid port. " 757 "It is not allowed to configure its type to N/A(%d)\n", 758 i, MLX4_PORT_TYPE_NA); 759 return -EINVAL; 760 } else { 761 dev->caps.port_type[i] = pta; 762 } 763 } 764 } 765 /* 766 * Link sensing is allowed on the port if 3 conditions are true: 767 * 1. Both protocols are supported on the port. 768 * 2. Different types are supported on the port 769 * 3. FW declared that it supports link sensing 770 */ 771 mlx4_priv(dev)->sense.sense_allowed[i] = 772 ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && 773 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 774 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); 775 776 /* Disablling auto sense for default Eth ports support */ 777 mlx4_priv(dev)->sense.sense_allowed[i] = 0; 778 779 /* 780 * If "default_sense" bit is set, we move the port to "AUTO" mode 781 * and perform sense_port FW command to try and set the correct 782 * port type from beginning 783 */ 784 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { 785 enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; 786 dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; 787 mlx4_SENSE_PORT(dev, i, &sensed_port); 788 if (sensed_port != MLX4_PORT_TYPE_NONE) 789 dev->caps.port_type[i] = sensed_port; 790 } else { 791 dev->caps.possible_type[i] = dev->caps.port_type[i]; 792 } 793 794 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { 795 dev->caps.log_num_macs = dev_cap->log_max_macs[i]; 796 mlx4_warn(dev, "Requested number of MACs is too much " 797 "for port %d, reducing to %d.\n", 798 i, 1 << dev->caps.log_num_macs); 799 } 800 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { 801 dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; 802 mlx4_warn(dev, "Requested number of VLANs is too much " 803 "for port %d, reducing to %d.\n", 804 i, 1 << dev->caps.log_num_vlans); 805 } 806 } 807 808 dev->caps.max_basic_counters = dev_cap->max_basic_counters; 809 dev->caps.max_extended_counters = dev_cap->max_extended_counters; 810 /* support extended counters if available */ 811 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) 812 dev->caps.max_counters = dev->caps.max_extended_counters; 813 else 814 dev->caps.max_counters = dev->caps.max_basic_counters; 815 816 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; 817 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = 818 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = 819 (1 << dev->caps.log_num_macs) * 820 (1 << dev->caps.log_num_vlans) * 821 dev->caps.num_ports; 822 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; 823 824 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + 825 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + 826 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + 827 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 828 829 dev->caps.sync_qp = dev_cap->sync_qp; 830 if (dev->pdev->device == 0x1003) 831 dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; 832 833 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 834 835 if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { 836 if (dev_cap->flags & 837 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { 838 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); 839 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; 840 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; 841 } 842 } 843 844 if ((dev->caps.flags & 845 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && 846 mlx4_is_master(dev)) 847 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; 848 849 if (!mlx4_is_slave(dev)) { 850 for (i = 0; i < dev->caps.num_ports; ++i) 851 dev->caps.def_counter_index[i] = i << 1; 852 853 dev->caps.alloc_res_qp_mask = 854 (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0); 855 } else { 856 dev->caps.alloc_res_qp_mask = 0; 857 } 858 859 return 0; 860 } 861 /*The function checks if there are live vf, return the num of them*/ 862 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) 863 { 864 struct mlx4_priv *priv = mlx4_priv(dev); 865 struct mlx4_slave_state *s_state; 866 int i; 867 int ret = 0; 868 869 for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { 870 s_state = &priv->mfunc.master.slave_state[i]; 871 if (s_state->active && s_state->last_cmd != 872 MLX4_COMM_CMD_RESET) { 873 mlx4_warn(dev, "%s: slave: %d is still active\n", 874 __func__, i); 875 ret++; 876 } 877 } 878 return ret; 879 } 880 881 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) 882 { 883 u32 qk = MLX4_RESERVED_QKEY_BASE; 884 885 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || 886 qpn < dev->phys_caps.base_proxy_sqpn) 887 return -EINVAL; 888 889 if (qpn >= dev->phys_caps.base_tunnel_sqpn) 890 /* tunnel qp */ 891 qk += qpn - dev->phys_caps.base_tunnel_sqpn; 892 else 893 qk += qpn - dev->phys_caps.base_proxy_sqpn; 894 *qkey = qk; 895 return 0; 896 } 897 EXPORT_SYMBOL(mlx4_get_parav_qkey); 898 899 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) 900 { 901 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 902 903 if (!mlx4_is_master(dev)) 904 return; 905 906 priv->virt2phys_pkey[slave][port - 1][i] = val; 907 } 908 EXPORT_SYMBOL(mlx4_sync_pkey_table); 909 910 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) 911 { 912 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 913 914 if (!mlx4_is_master(dev)) 915 return; 916 917 priv->slave_node_guids[slave] = guid; 918 } 919 EXPORT_SYMBOL(mlx4_put_slave_node_guid); 920 921 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) 922 { 923 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 924 925 if (!mlx4_is_master(dev)) 926 return 0; 927 928 return priv->slave_node_guids[slave]; 929 } 930 EXPORT_SYMBOL(mlx4_get_slave_node_guid); 931 932 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) 933 { 934 struct mlx4_priv *priv = mlx4_priv(dev); 935 struct mlx4_slave_state *s_slave; 936 937 if (!mlx4_is_master(dev)) 938 return 0; 939 940 s_slave = &priv->mfunc.master.slave_state[slave]; 941 return !!s_slave->active; 942 } 943 EXPORT_SYMBOL(mlx4_is_slave_active); 944 945 static void slave_adjust_steering_mode(struct mlx4_dev *dev, 946 struct mlx4_dev_cap *dev_cap, 947 struct mlx4_init_hca_param *hca_param) 948 { 949 dev->caps.steering_mode = hca_param->steering_mode; 950 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) 951 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 952 else 953 dev->caps.num_qp_per_mgm = 954 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); 955 956 mlx4_dbg(dev, "Steering mode is: %s\n", 957 mlx4_steering_mode_str(dev->caps.steering_mode)); 958 } 959 960 static int mlx4_slave_cap(struct mlx4_dev *dev) 961 { 962 int err; 963 u32 page_size; 964 struct mlx4_dev_cap dev_cap; 965 struct mlx4_func_cap func_cap; 966 struct mlx4_init_hca_param hca_param; 967 int i; 968 969 memset(&hca_param, 0, sizeof(hca_param)); 970 err = mlx4_QUERY_HCA(dev, &hca_param); 971 if (err) { 972 mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); 973 return err; 974 } 975 976 /*fail if the hca has an unknown capability */ 977 if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != 978 HCA_GLOBAL_CAP_MASK) { 979 mlx4_err(dev, "Unknown hca global capabilities\n"); 980 return -ENOSYS; 981 } 982 983 mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; 984 985 dev->caps.hca_core_clock = hca_param.hca_core_clock; 986 987 memset(&dev_cap, 0, sizeof(dev_cap)); 988 dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; 989 err = mlx4_dev_cap(dev, &dev_cap); 990 if (err) { 991 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 992 return err; 993 } 994 995 err = mlx4_QUERY_FW(dev); 996 if (err) 997 mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); 998 999 if (!hca_param.mw_enable) { 1000 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; 1001 dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; 1002 } 1003 1004 page_size = ~dev->caps.page_size_cap + 1; 1005 mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); 1006 if (page_size > PAGE_SIZE) { 1007 mlx4_err(dev, "HCA minimum page size of %d bigger than " 1008 "kernel PAGE_SIZE of %d, aborting.\n", 1009 page_size, (int)PAGE_SIZE); 1010 return -ENODEV; 1011 } 1012 1013 /* slave gets uar page size from QUERY_HCA fw command */ 1014 dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); 1015 1016 /* TODO: relax this assumption */ 1017 if (dev->caps.uar_page_size != PAGE_SIZE) { 1018 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", 1019 dev->caps.uar_page_size, (int)PAGE_SIZE); 1020 return -ENODEV; 1021 } 1022 1023 memset(&func_cap, 0, sizeof(func_cap)); 1024 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); 1025 if (err) { 1026 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", 1027 err); 1028 return err; 1029 } 1030 1031 if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != 1032 PF_CONTEXT_BEHAVIOUR_MASK) { 1033 mlx4_err(dev, "Unknown pf context behaviour\n"); 1034 return -ENOSYS; 1035 } 1036 1037 dev->caps.num_ports = func_cap.num_ports; 1038 dev->quotas.qp = func_cap.qp_quota; 1039 dev->quotas.srq = func_cap.srq_quota; 1040 dev->quotas.cq = func_cap.cq_quota; 1041 dev->quotas.mpt = func_cap.mpt_quota; 1042 dev->quotas.mtt = func_cap.mtt_quota; 1043 dev->caps.num_qps = 1 << hca_param.log_num_qps; 1044 dev->caps.num_srqs = 1 << hca_param.log_num_srqs; 1045 dev->caps.num_cqs = 1 << hca_param.log_num_cqs; 1046 dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; 1047 dev->caps.num_eqs = func_cap.max_eq; 1048 dev->caps.reserved_eqs = func_cap.reserved_eq; 1049 dev->caps.num_pds = MLX4_NUM_PDS; 1050 dev->caps.num_mgms = 0; 1051 dev->caps.num_amgms = 0; 1052 1053 if (dev->caps.num_ports > MLX4_MAX_PORTS) { 1054 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 1055 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); 1056 return -ENODEV; 1057 } 1058 1059 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1060 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1061 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1062 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1063 1064 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || 1065 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { 1066 err = -ENOMEM; 1067 goto err_mem; 1068 } 1069 1070 for (i = 1; i <= dev->caps.num_ports; ++i) { 1071 err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); 1072 if (err) { 1073 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" 1074 " port %d, aborting (%d).\n", i, err); 1075 goto err_mem; 1076 } 1077 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; 1078 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; 1079 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; 1080 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; 1081 dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; 1082 1083 dev->caps.port_mask[i] = dev->caps.port_type[i]; 1084 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, 1085 &dev->caps.gid_table_len[i], 1086 &dev->caps.pkey_table_len[i]); 1087 if (err) 1088 goto err_mem; 1089 } 1090 1091 if (dev->caps.uar_page_size * (dev->caps.num_uars - 1092 dev->caps.reserved_uars) > 1093 pci_resource_len(dev->pdev, 2)) { 1094 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " 1095 "PCI resource 2 size of 0x%llx, aborting.\n", 1096 dev->caps.uar_page_size * dev->caps.num_uars, 1097 (unsigned long long) pci_resource_len(dev->pdev, 2)); 1098 err = -ENOMEM; 1099 goto err_mem; 1100 } 1101 1102 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { 1103 dev->caps.eqe_size = 64; 1104 dev->caps.eqe_factor = 1; 1105 } else { 1106 dev->caps.eqe_size = 32; 1107 dev->caps.eqe_factor = 0; 1108 } 1109 1110 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { 1111 dev->caps.cqe_size = 64; 1112 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; 1113 } else { 1114 dev->caps.cqe_size = 32; 1115 } 1116 1117 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 1118 mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); 1119 1120 slave_adjust_steering_mode(dev, &dev_cap, &hca_param); 1121 1122 if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && 1123 dev->caps.bf_reg_size) 1124 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; 1125 1126 return 0; 1127 1128 err_mem: 1129 kfree(dev->caps.qp0_tunnel); 1130 kfree(dev->caps.qp0_proxy); 1131 kfree(dev->caps.qp1_tunnel); 1132 kfree(dev->caps.qp1_proxy); 1133 dev->caps.qp0_tunnel = dev->caps.qp0_proxy = 1134 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; 1135 1136 return err; 1137 } 1138 1139 static void mlx4_request_modules(struct mlx4_dev *dev) 1140 { 1141 int port; 1142 int has_ib_port = false; 1143 int has_eth_port = false; 1144 #define EN_DRV_NAME "mlx4_en" 1145 #define IB_DRV_NAME "mlx4_ib" 1146 1147 for (port = 1; port <= dev->caps.num_ports; port++) { 1148 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) 1149 has_ib_port = true; 1150 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) 1151 has_eth_port = true; 1152 } 1153 1154 if (has_ib_port) 1155 request_module_nowait(IB_DRV_NAME); 1156 if (has_eth_port) 1157 request_module_nowait(EN_DRV_NAME); 1158 } 1159 1160 /* 1161 * Change the port configuration of the device. 1162 * Every user of this function must hold the port mutex. 1163 */ 1164 int mlx4_change_port_types(struct mlx4_dev *dev, 1165 enum mlx4_port_type *port_types) 1166 { 1167 int err = 0; 1168 int change = 0; 1169 int port; 1170 1171 for (port = 0; port < dev->caps.num_ports; port++) { 1172 /* Change the port type only if the new type is different 1173 * from the current, and not set to Auto */ 1174 if (port_types[port] != dev->caps.port_type[port + 1]) 1175 change = 1; 1176 } 1177 if (change) { 1178 mlx4_unregister_device(dev); 1179 for (port = 1; port <= dev->caps.num_ports; port++) { 1180 mlx4_CLOSE_PORT(dev, port); 1181 dev->caps.port_type[port] = port_types[port - 1]; 1182 err = mlx4_SET_PORT(dev, port, -1); 1183 if (err) { 1184 mlx4_err(dev, "Failed to set port %d, " 1185 "aborting\n", port); 1186 goto out; 1187 } 1188 } 1189 mlx4_set_port_mask(dev); 1190 err = mlx4_register_device(dev); 1191 if (err) { 1192 mlx4_err(dev, "Failed to register device\n"); 1193 goto out; 1194 } 1195 mlx4_request_modules(dev); 1196 } 1197 1198 out: 1199 return err; 1200 } 1201 1202 static ssize_t show_port_type(struct device *dev, 1203 struct device_attribute *attr, 1204 char *buf) 1205 { 1206 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1207 port_attr); 1208 struct mlx4_dev *mdev = info->dev; 1209 char type[8]; 1210 1211 sprintf(type, "%s", 1212 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? 1213 "ib" : "eth"); 1214 if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) 1215 sprintf(buf, "auto (%s)\n", type); 1216 else 1217 sprintf(buf, "%s\n", type); 1218 1219 return strlen(buf); 1220 } 1221 1222 static ssize_t set_port_type(struct device *dev, 1223 struct device_attribute *attr, 1224 const char *buf, size_t count) 1225 { 1226 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1227 port_attr); 1228 struct mlx4_dev *mdev = info->dev; 1229 struct mlx4_priv *priv = mlx4_priv(mdev); 1230 enum mlx4_port_type types[MLX4_MAX_PORTS]; 1231 enum mlx4_port_type new_types[MLX4_MAX_PORTS]; 1232 int i; 1233 int err = 0; 1234 1235 if (!strcmp(buf, "ib\n")) 1236 info->tmp_type = MLX4_PORT_TYPE_IB; 1237 else if (!strcmp(buf, "eth\n")) 1238 info->tmp_type = MLX4_PORT_TYPE_ETH; 1239 else if (!strcmp(buf, "auto\n")) 1240 info->tmp_type = MLX4_PORT_TYPE_AUTO; 1241 else { 1242 mlx4_err(mdev, "%s is not supported port type\n", buf); 1243 return -EINVAL; 1244 } 1245 1246 if ((info->tmp_type & mdev->caps.supported_type[info->port]) != 1247 info->tmp_type) { 1248 mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", 1249 info->port); 1250 return -EINVAL; 1251 } 1252 1253 mlx4_stop_sense(mdev); 1254 mutex_lock(&priv->port_mutex); 1255 /* Possible type is always the one that was delivered */ 1256 mdev->caps.possible_type[info->port] = info->tmp_type; 1257 1258 for (i = 0; i < mdev->caps.num_ports; i++) { 1259 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : 1260 mdev->caps.possible_type[i+1]; 1261 if (types[i] == MLX4_PORT_TYPE_AUTO) 1262 types[i] = mdev->caps.port_type[i+1]; 1263 } 1264 1265 if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 1266 !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { 1267 for (i = 1; i <= mdev->caps.num_ports; i++) { 1268 if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { 1269 mdev->caps.possible_type[i] = mdev->caps.port_type[i]; 1270 err = -EINVAL; 1271 } 1272 } 1273 } 1274 if (err) { 1275 mlx4_err(mdev, "Auto sensing is not supported on this HCA. " 1276 "Set only 'eth' or 'ib' for both ports " 1277 "(should be the same)\n"); 1278 goto out; 1279 } 1280 1281 mlx4_do_sense_ports(mdev, new_types, types); 1282 1283 err = mlx4_check_port_params(mdev, new_types); 1284 if (err) 1285 goto out; 1286 1287 /* We are about to apply the changes after the configuration 1288 * was verified, no need to remember the temporary types 1289 * any more */ 1290 for (i = 0; i < mdev->caps.num_ports; i++) 1291 priv->port[i + 1].tmp_type = 0; 1292 1293 err = mlx4_change_port_types(mdev, new_types); 1294 1295 out: 1296 mlx4_start_sense(mdev); 1297 mutex_unlock(&priv->port_mutex); 1298 return err ? err : count; 1299 } 1300 1301 enum ibta_mtu { 1302 IB_MTU_256 = 1, 1303 IB_MTU_512 = 2, 1304 IB_MTU_1024 = 3, 1305 IB_MTU_2048 = 4, 1306 IB_MTU_4096 = 5 1307 }; 1308 1309 static inline int int_to_ibta_mtu(int mtu) 1310 { 1311 switch (mtu) { 1312 case 256: return IB_MTU_256; 1313 case 512: return IB_MTU_512; 1314 case 1024: return IB_MTU_1024; 1315 case 2048: return IB_MTU_2048; 1316 case 4096: return IB_MTU_4096; 1317 default: return -1; 1318 } 1319 } 1320 1321 static inline int ibta_mtu_to_int(enum ibta_mtu mtu) 1322 { 1323 switch (mtu) { 1324 case IB_MTU_256: return 256; 1325 case IB_MTU_512: return 512; 1326 case IB_MTU_1024: return 1024; 1327 case IB_MTU_2048: return 2048; 1328 case IB_MTU_4096: return 4096; 1329 default: return -1; 1330 } 1331 } 1332 1333 static ssize_t 1334 show_board(struct device *device, struct device_attribute *attr, 1335 char *buf) 1336 { 1337 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1338 board_attr); 1339 struct mlx4_dev *mdev = info->dev; 1340 1341 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 1342 mdev->board_id); 1343 } 1344 1345 static ssize_t 1346 show_hca(struct device *device, struct device_attribute *attr, 1347 char *buf) 1348 { 1349 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1350 hca_attr); 1351 struct mlx4_dev *mdev = info->dev; 1352 1353 return sprintf(buf, "MT%d\n", mdev->pdev->device); 1354 } 1355 1356 static ssize_t 1357 show_firmware_version(struct device *dev, 1358 struct device_attribute *attr, 1359 char *buf) 1360 { 1361 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1362 firmware_attr); 1363 struct mlx4_dev *mdev = info->dev; 1364 1365 return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32), 1366 (int)(mdev->caps.fw_ver >> 16) & 0xffff, 1367 (int)mdev->caps.fw_ver & 0xffff); 1368 } 1369 1370 static ssize_t show_port_ib_mtu(struct device *dev, 1371 struct device_attribute *attr, 1372 char *buf) 1373 { 1374 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1375 port_mtu_attr); 1376 struct mlx4_dev *mdev = info->dev; 1377 1378 /* When port type is eth, port mtu value isn't used. */ 1379 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) 1380 return -EINVAL; 1381 1382 sprintf(buf, "%d\n", 1383 ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); 1384 return strlen(buf); 1385 } 1386 1387 static ssize_t set_port_ib_mtu(struct device *dev, 1388 struct device_attribute *attr, 1389 const char *buf, size_t count) 1390 { 1391 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1392 port_mtu_attr); 1393 struct mlx4_dev *mdev = info->dev; 1394 struct mlx4_priv *priv = mlx4_priv(mdev); 1395 int err, port, mtu, ibta_mtu = -1; 1396 1397 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { 1398 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 1399 return -EINVAL; 1400 } 1401 1402 mtu = (int) simple_strtol(buf, NULL, 0); 1403 ibta_mtu = int_to_ibta_mtu(mtu); 1404 1405 if (ibta_mtu < 0) { 1406 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); 1407 return -EINVAL; 1408 } 1409 1410 mdev->caps.port_ib_mtu[info->port] = ibta_mtu; 1411 1412 mlx4_stop_sense(mdev); 1413 mutex_lock(&priv->port_mutex); 1414 mlx4_unregister_device(mdev); 1415 for (port = 1; port <= mdev->caps.num_ports; port++) { 1416 mlx4_CLOSE_PORT(mdev, port); 1417 err = mlx4_SET_PORT(mdev, port, -1); 1418 if (err) { 1419 mlx4_err(mdev, "Failed to set port %d, " 1420 "aborting\n", port); 1421 goto err_set_port; 1422 } 1423 } 1424 err = mlx4_register_device(mdev); 1425 err_set_port: 1426 mutex_unlock(&priv->port_mutex); 1427 mlx4_start_sense(mdev); 1428 return err ? err : count; 1429 } 1430 1431 static int mlx4_load_fw(struct mlx4_dev *dev) 1432 { 1433 struct mlx4_priv *priv = mlx4_priv(dev); 1434 int err, unmap_flag = 0; 1435 1436 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, 1437 GFP_HIGHUSER | __GFP_NOWARN, 0); 1438 if (!priv->fw.fw_icm) { 1439 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); 1440 return -ENOMEM; 1441 } 1442 1443 err = mlx4_MAP_FA(dev, priv->fw.fw_icm); 1444 if (err) { 1445 mlx4_err(dev, "MAP_FA command failed, aborting.\n"); 1446 goto err_free; 1447 } 1448 1449 err = mlx4_RUN_FW(dev); 1450 if (err) { 1451 mlx4_err(dev, "RUN_FW command failed, aborting.\n"); 1452 goto err_unmap_fa; 1453 } 1454 1455 return 0; 1456 1457 err_unmap_fa: 1458 unmap_flag = mlx4_UNMAP_FA(dev); 1459 if (unmap_flag) 1460 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1461 1462 err_free: 1463 if (!unmap_flag) 1464 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 1465 return err; 1466 } 1467 1468 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, 1469 int cmpt_entry_sz) 1470 { 1471 struct mlx4_priv *priv = mlx4_priv(dev); 1472 int err; 1473 int num_eqs; 1474 1475 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, 1476 cmpt_base + 1477 ((u64) (MLX4_CMPT_TYPE_QP * 1478 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1479 cmpt_entry_sz, dev->caps.num_qps, 1480 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1481 0, 0); 1482 if (err) 1483 goto err; 1484 1485 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, 1486 cmpt_base + 1487 ((u64) (MLX4_CMPT_TYPE_SRQ * 1488 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1489 cmpt_entry_sz, dev->caps.num_srqs, 1490 dev->caps.reserved_srqs, 0, 0); 1491 if (err) 1492 goto err_qp; 1493 1494 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, 1495 cmpt_base + 1496 ((u64) (MLX4_CMPT_TYPE_CQ * 1497 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1498 cmpt_entry_sz, dev->caps.num_cqs, 1499 dev->caps.reserved_cqs, 0, 0); 1500 if (err) 1501 goto err_srq; 1502 1503 num_eqs = dev->phys_caps.num_phys_eqs; 1504 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, 1505 cmpt_base + 1506 ((u64) (MLX4_CMPT_TYPE_EQ * 1507 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1508 cmpt_entry_sz, num_eqs, num_eqs, 0, 0); 1509 if (err) 1510 goto err_cq; 1511 1512 return 0; 1513 1514 err_cq: 1515 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1516 1517 err_srq: 1518 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1519 1520 err_qp: 1521 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1522 1523 err: 1524 return err; 1525 } 1526 1527 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, 1528 struct mlx4_init_hca_param *init_hca, u64 icm_size) 1529 { 1530 struct mlx4_priv *priv = mlx4_priv(dev); 1531 u64 aux_pages; 1532 int num_eqs; 1533 int err, unmap_flag = 0; 1534 1535 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); 1536 if (err) { 1537 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); 1538 return err; 1539 } 1540 1541 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", 1542 (unsigned long long) icm_size >> 10, 1543 (unsigned long long) aux_pages << 2); 1544 1545 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, 1546 GFP_HIGHUSER | __GFP_NOWARN, 0); 1547 if (!priv->fw.aux_icm) { 1548 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); 1549 return -ENOMEM; 1550 } 1551 1552 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); 1553 if (err) { 1554 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); 1555 goto err_free_aux; 1556 } 1557 1558 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); 1559 if (err) { 1560 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); 1561 goto err_unmap_aux; 1562 } 1563 1564 1565 num_eqs = dev->phys_caps.num_phys_eqs; 1566 err = mlx4_init_icm_table(dev, &priv->eq_table.table, 1567 init_hca->eqc_base, dev_cap->eqc_entry_sz, 1568 num_eqs, num_eqs, 0, 0); 1569 if (err) { 1570 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); 1571 goto err_unmap_cmpt; 1572 } 1573 1574 /* 1575 * Reserved MTT entries must be aligned up to a cacheline 1576 * boundary, since the FW will write to them, while the driver 1577 * writes to all other MTT entries. (The variable 1578 * dev->caps.mtt_entry_sz below is really the MTT segment 1579 * size, not the raw entry size) 1580 */ 1581 dev->caps.reserved_mtts = 1582 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, 1583 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; 1584 1585 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 1586 init_hca->mtt_base, 1587 dev->caps.mtt_entry_sz, 1588 dev->caps.num_mtts, 1589 dev->caps.reserved_mtts, 1, 0); 1590 if (err) { 1591 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); 1592 goto err_unmap_eq; 1593 } 1594 1595 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, 1596 init_hca->dmpt_base, 1597 dev_cap->dmpt_entry_sz, 1598 dev->caps.num_mpts, 1599 dev->caps.reserved_mrws, 1, 1); 1600 if (err) { 1601 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); 1602 goto err_unmap_mtt; 1603 } 1604 1605 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, 1606 init_hca->qpc_base, 1607 dev_cap->qpc_entry_sz, 1608 dev->caps.num_qps, 1609 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1610 0, 0); 1611 if (err) { 1612 mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); 1613 goto err_unmap_dmpt; 1614 } 1615 1616 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, 1617 init_hca->auxc_base, 1618 dev_cap->aux_entry_sz, 1619 dev->caps.num_qps, 1620 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1621 0, 0); 1622 if (err) { 1623 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); 1624 goto err_unmap_qp; 1625 } 1626 1627 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, 1628 init_hca->altc_base, 1629 dev_cap->altc_entry_sz, 1630 dev->caps.num_qps, 1631 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1632 0, 0); 1633 if (err) { 1634 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); 1635 goto err_unmap_auxc; 1636 } 1637 1638 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, 1639 init_hca->rdmarc_base, 1640 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, 1641 dev->caps.num_qps, 1642 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1643 0, 0); 1644 if (err) { 1645 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); 1646 goto err_unmap_altc; 1647 } 1648 1649 err = mlx4_init_icm_table(dev, &priv->cq_table.table, 1650 init_hca->cqc_base, 1651 dev_cap->cqc_entry_sz, 1652 dev->caps.num_cqs, 1653 dev->caps.reserved_cqs, 0, 0); 1654 if (err) { 1655 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); 1656 goto err_unmap_rdmarc; 1657 } 1658 1659 err = mlx4_init_icm_table(dev, &priv->srq_table.table, 1660 init_hca->srqc_base, 1661 dev_cap->srq_entry_sz, 1662 dev->caps.num_srqs, 1663 dev->caps.reserved_srqs, 0, 0); 1664 if (err) { 1665 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); 1666 goto err_unmap_cq; 1667 } 1668 1669 /* 1670 * For flow steering device managed mode it is required to use 1671 * mlx4_init_icm_table. For B0 steering mode it's not strictly 1672 * required, but for simplicity just map the whole multicast 1673 * group table now. The table isn't very big and it's a lot 1674 * easier than trying to track ref counts. 1675 */ 1676 err = mlx4_init_icm_table(dev, &priv->mcg_table.table, 1677 init_hca->mc_base, 1678 mlx4_get_mgm_entry_size(dev), 1679 dev->caps.num_mgms + dev->caps.num_amgms, 1680 dev->caps.num_mgms + dev->caps.num_amgms, 1681 0, 0); 1682 if (err) { 1683 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); 1684 goto err_unmap_srq; 1685 } 1686 1687 return 0; 1688 1689 err_unmap_srq: 1690 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1691 1692 err_unmap_cq: 1693 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1694 1695 err_unmap_rdmarc: 1696 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1697 1698 err_unmap_altc: 1699 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1700 1701 err_unmap_auxc: 1702 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1703 1704 err_unmap_qp: 1705 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1706 1707 err_unmap_dmpt: 1708 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1709 1710 err_unmap_mtt: 1711 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1712 1713 err_unmap_eq: 1714 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1715 1716 err_unmap_cmpt: 1717 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1718 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1719 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1720 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1721 1722 err_unmap_aux: 1723 unmap_flag = mlx4_UNMAP_ICM_AUX(dev); 1724 if (unmap_flag) 1725 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1726 1727 err_free_aux: 1728 if (!unmap_flag) 1729 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1730 1731 return err; 1732 } 1733 1734 static void mlx4_free_icms(struct mlx4_dev *dev) 1735 { 1736 struct mlx4_priv *priv = mlx4_priv(dev); 1737 1738 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); 1739 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1740 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1741 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1742 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1743 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1744 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1745 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1746 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1747 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1748 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1749 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1750 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1751 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1752 1753 if (!mlx4_UNMAP_ICM_AUX(dev)) 1754 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1755 else 1756 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1757 } 1758 1759 static void mlx4_slave_exit(struct mlx4_dev *dev) 1760 { 1761 struct mlx4_priv *priv = mlx4_priv(dev); 1762 1763 mutex_lock(&priv->cmd.slave_cmd_mutex); 1764 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) 1765 mlx4_warn(dev, "Failed to close slave function.\n"); 1766 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1767 } 1768 1769 static int map_bf_area(struct mlx4_dev *dev) 1770 { 1771 struct mlx4_priv *priv = mlx4_priv(dev); 1772 resource_size_t bf_start; 1773 resource_size_t bf_len; 1774 int err = 0; 1775 1776 if (!dev->caps.bf_reg_size) 1777 return -ENXIO; 1778 1779 bf_start = pci_resource_start(dev->pdev, 2) + 1780 (dev->caps.num_uars << PAGE_SHIFT); 1781 bf_len = pci_resource_len(dev->pdev, 2) - 1782 (dev->caps.num_uars << PAGE_SHIFT); 1783 priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); 1784 if (!priv->bf_mapping) 1785 err = -ENOMEM; 1786 1787 return err; 1788 } 1789 1790 static void unmap_bf_area(struct mlx4_dev *dev) 1791 { 1792 if (mlx4_priv(dev)->bf_mapping) 1793 io_mapping_free(mlx4_priv(dev)->bf_mapping); 1794 } 1795 1796 s64 mlx4_read_clock(struct mlx4_dev *dev) 1797 { 1798 u32 clockhi, clocklo, clockhi1; 1799 s64 cycles; 1800 int i; 1801 struct mlx4_priv *priv = mlx4_priv(dev); 1802 1803 if (!priv->clock_mapping) 1804 return -ENOTSUPP; 1805 1806 for (i = 0; i < 10; i++) { 1807 clockhi = swab32(readl(priv->clock_mapping)); 1808 clocklo = swab32(readl(priv->clock_mapping + 4)); 1809 clockhi1 = swab32(readl(priv->clock_mapping)); 1810 if (clockhi == clockhi1) 1811 break; 1812 } 1813 1814 cycles = (u64) clockhi << 32 | (u64) clocklo; 1815 1816 return cycles & CORE_CLOCK_MASK; 1817 } 1818 EXPORT_SYMBOL_GPL(mlx4_read_clock); 1819 1820 1821 static int map_internal_clock(struct mlx4_dev *dev) 1822 { 1823 struct mlx4_priv *priv = mlx4_priv(dev); 1824 1825 priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, 1826 priv->fw.clock_bar) + 1827 priv->fw.clock_offset, MLX4_CLOCK_SIZE); 1828 1829 if (!priv->clock_mapping) 1830 return -ENOMEM; 1831 1832 return 0; 1833 } 1834 1835 1836 int mlx4_get_internal_clock_params(struct mlx4_dev *dev, 1837 struct mlx4_clock_params *params) 1838 { 1839 struct mlx4_priv *priv = mlx4_priv(dev); 1840 1841 if (mlx4_is_slave(dev)) 1842 return -ENOTSUPP; 1843 if (!params) 1844 return -EINVAL; 1845 1846 params->bar = priv->fw.clock_bar; 1847 params->offset = priv->fw.clock_offset; 1848 params->size = MLX4_CLOCK_SIZE; 1849 1850 return 0; 1851 } 1852 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); 1853 1854 static void unmap_internal_clock(struct mlx4_dev *dev) 1855 { 1856 struct mlx4_priv *priv = mlx4_priv(dev); 1857 1858 if (priv->clock_mapping) 1859 iounmap(priv->clock_mapping); 1860 } 1861 1862 static void mlx4_close_hca(struct mlx4_dev *dev) 1863 { 1864 unmap_internal_clock(dev); 1865 unmap_bf_area(dev); 1866 if (mlx4_is_slave(dev)) { 1867 mlx4_slave_exit(dev); 1868 } else { 1869 mlx4_CLOSE_HCA(dev, 0); 1870 mlx4_free_icms(dev); 1871 1872 if (!mlx4_UNMAP_FA(dev)) 1873 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); 1874 else 1875 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1876 } 1877 } 1878 1879 static int mlx4_init_slave(struct mlx4_dev *dev) 1880 { 1881 struct mlx4_priv *priv = mlx4_priv(dev); 1882 u64 dma = (u64) priv->mfunc.vhcr_dma; 1883 int num_of_reset_retries = NUM_OF_RESET_RETRIES; 1884 int ret_from_reset = 0; 1885 u32 slave_read; 1886 u32 cmd_channel_ver; 1887 1888 mutex_lock(&priv->cmd.slave_cmd_mutex); 1889 priv->cmd.max_cmds = 1; 1890 mlx4_warn(dev, "Sending reset\n"); 1891 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 1892 MLX4_COMM_TIME); 1893 /* if we are in the middle of flr the slave will try 1894 * NUM_OF_RESET_RETRIES times before leaving.*/ 1895 if (ret_from_reset) { 1896 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { 1897 msleep(SLEEP_TIME_IN_RESET); 1898 while (ret_from_reset && num_of_reset_retries) { 1899 mlx4_warn(dev, "slave is currently in the" 1900 "middle of FLR. retrying..." 1901 "(try num:%d)\n", 1902 (NUM_OF_RESET_RETRIES - 1903 num_of_reset_retries + 1)); 1904 ret_from_reset = 1905 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 1906 0, MLX4_COMM_TIME); 1907 num_of_reset_retries = num_of_reset_retries - 1; 1908 } 1909 } else 1910 goto err; 1911 } 1912 1913 /* check the driver version - the slave I/F revision 1914 * must match the master's */ 1915 slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); 1916 cmd_channel_ver = mlx4_comm_get_version(); 1917 1918 if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != 1919 MLX4_COMM_GET_IF_REV(slave_read)) { 1920 mlx4_err(dev, "slave driver version is not supported" 1921 " by the master\n"); 1922 goto err; 1923 } 1924 1925 mlx4_warn(dev, "Sending vhcr0\n"); 1926 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, 1927 MLX4_COMM_TIME)) 1928 goto err; 1929 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, 1930 MLX4_COMM_TIME)) 1931 goto err; 1932 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, 1933 MLX4_COMM_TIME)) 1934 goto err; 1935 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) 1936 goto err; 1937 1938 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1939 return 0; 1940 1941 err: 1942 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); 1943 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1944 return -EIO; 1945 } 1946 1947 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) 1948 { 1949 int i; 1950 1951 for (i = 1; i <= dev->caps.num_ports; i++) { 1952 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) 1953 dev->caps.gid_table_len[i] = 1954 mlx4_get_slave_num_gids(dev, 0); 1955 else 1956 dev->caps.gid_table_len[i] = 1; 1957 dev->caps.pkey_table_len[i] = 1958 dev->phys_caps.pkey_phys_table_len[i] - 1; 1959 } 1960 } 1961 1962 static int choose_log_fs_mgm_entry_size(int qp_per_entry) 1963 { 1964 int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; 1965 1966 for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; 1967 i++) { 1968 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) 1969 break; 1970 } 1971 1972 return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; 1973 } 1974 1975 static void choose_steering_mode(struct mlx4_dev *dev, 1976 struct mlx4_dev_cap *dev_cap) 1977 { 1978 int nvfs; 1979 1980 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); 1981 if (high_rate_steer && !mlx4_is_mfunc(dev)) { 1982 dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | 1983 MLX4_DEV_CAP_FLAG_VEP_UC_STEER); 1984 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; 1985 } 1986 1987 if (mlx4_log_num_mgm_entry_size == -1 && 1988 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && 1989 (!mlx4_is_mfunc(dev) || 1990 (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && 1991 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= 1992 MLX4_MIN_MGM_LOG_ENTRY_SIZE) { 1993 dev->oper_log_mgm_entry_size = 1994 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); 1995 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; 1996 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 1997 } else { 1998 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && 1999 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 2000 dev->caps.steering_mode = MLX4_STEERING_MODE_B0; 2001 else { 2002 dev->caps.steering_mode = MLX4_STEERING_MODE_A0; 2003 2004 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || 2005 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 2006 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " 2007 "set to use B0 steering. Falling back to A0 steering mode.\n"); 2008 } 2009 dev->oper_log_mgm_entry_size = 2010 mlx4_log_num_mgm_entry_size > 0 ? 2011 mlx4_log_num_mgm_entry_size : 2012 MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 2013 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); 2014 } 2015 mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " 2016 "log_num_mgm_entry_size = %d\n", 2017 mlx4_steering_mode_str(dev->caps.steering_mode), 2018 dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); 2019 } 2020 2021 static int mlx4_init_hca(struct mlx4_dev *dev) 2022 { 2023 struct mlx4_priv *priv = mlx4_priv(dev); 2024 struct mlx4_dev_cap *dev_cap = NULL; 2025 struct mlx4_adapter adapter; 2026 struct mlx4_mod_stat_cfg mlx4_cfg; 2027 struct mlx4_profile profile; 2028 struct mlx4_init_hca_param init_hca; 2029 u64 icm_size; 2030 int err; 2031 2032 if (!mlx4_is_slave(dev)) { 2033 err = mlx4_QUERY_FW(dev); 2034 if (err) { 2035 if (err == -EACCES) 2036 mlx4_info(dev, "non-primary physical function, skipping.\n"); 2037 else 2038 mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); 2039 return err; 2040 } 2041 2042 err = mlx4_load_fw(dev); 2043 if (err) { 2044 mlx4_err(dev, "Failed to start FW, aborting.\n"); 2045 return err; 2046 } 2047 2048 mlx4_cfg.log_pg_sz_m = 1; 2049 mlx4_cfg.log_pg_sz = 0; 2050 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); 2051 if (err) 2052 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); 2053 2054 dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); 2055 if (!dev_cap) { 2056 mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); 2057 err = -ENOMEM; 2058 goto err_stop_fw; 2059 } 2060 2061 err = mlx4_dev_cap(dev, dev_cap); 2062 if (err) { 2063 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 2064 goto err_stop_fw; 2065 } 2066 2067 choose_steering_mode(dev, dev_cap); 2068 2069 if (mlx4_is_master(dev)) 2070 mlx4_parav_master_pf_caps(dev); 2071 2072 process_mod_param_profile(&profile); 2073 if (dev->caps.steering_mode == 2074 MLX4_STEERING_MODE_DEVICE_MANAGED) 2075 profile.num_mcg = MLX4_FS_NUM_MCG; 2076 2077 icm_size = mlx4_make_profile(dev, &profile, dev_cap, 2078 &init_hca); 2079 if ((long long) icm_size < 0) { 2080 err = icm_size; 2081 goto err_stop_fw; 2082 } 2083 2084 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; 2085 2086 init_hca.log_uar_sz = ilog2(dev->caps.num_uars); 2087 init_hca.uar_page_sz = PAGE_SHIFT - 12; 2088 2089 err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); 2090 if (err) 2091 goto err_stop_fw; 2092 2093 init_hca.mw_enable = 1; 2094 2095 err = mlx4_INIT_HCA(dev, &init_hca); 2096 if (err) { 2097 mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); 2098 goto err_free_icm; 2099 } 2100 2101 if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { 2102 err = mlx4_query_func(dev, dev_cap); 2103 if (err < 0) { 2104 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); 2105 goto err_stop_fw; 2106 } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { 2107 dev->caps.num_eqs = dev_cap->max_eqs; 2108 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 2109 dev->caps.reserved_uars = dev_cap->reserved_uars; 2110 } 2111 } 2112 2113 /* 2114 * Read HCA frequency by QUERY_HCA command 2115 */ 2116 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { 2117 memset(&init_hca, 0, sizeof(init_hca)); 2118 err = mlx4_QUERY_HCA(dev, &init_hca); 2119 if (err) { 2120 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); 2121 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2122 } else { 2123 dev->caps.hca_core_clock = 2124 init_hca.hca_core_clock; 2125 } 2126 2127 /* In case we got HCA frequency 0 - disable timestamping 2128 * to avoid dividing by zero 2129 */ 2130 if (!dev->caps.hca_core_clock) { 2131 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2132 mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); 2133 } else if (map_internal_clock(dev)) { 2134 /* Map internal clock, 2135 * in case of failure disable timestamping 2136 */ 2137 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2138 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); 2139 } 2140 } 2141 } else { 2142 err = mlx4_init_slave(dev); 2143 if (err) { 2144 mlx4_err(dev, "Failed to initialize slave\n"); 2145 return err; 2146 } 2147 2148 err = mlx4_slave_cap(dev); 2149 if (err) { 2150 mlx4_err(dev, "Failed to obtain slave caps\n"); 2151 goto err_close; 2152 } 2153 } 2154 2155 if (map_bf_area(dev)) 2156 mlx4_dbg(dev, "Failed to map blue flame area\n"); 2157 2158 /* Only the master set the ports, all the rest got it from it.*/ 2159 if (!mlx4_is_slave(dev)) 2160 mlx4_set_port_mask(dev); 2161 2162 err = mlx4_QUERY_ADAPTER(dev, &adapter); 2163 if (err) { 2164 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); 2165 goto unmap_bf; 2166 } 2167 2168 priv->eq_table.inta_pin = adapter.inta_pin; 2169 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); 2170 memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); 2171 dev->vsd_vendor_id = adapter.vsd_vendor_id; 2172 2173 if (!mlx4_is_slave(dev)) 2174 kfree(dev_cap); 2175 2176 return 0; 2177 2178 unmap_bf: 2179 if (!mlx4_is_slave(dev)) 2180 unmap_internal_clock(dev); 2181 unmap_bf_area(dev); 2182 2183 if (mlx4_is_slave(dev)) { 2184 kfree(dev->caps.qp0_tunnel); 2185 kfree(dev->caps.qp0_proxy); 2186 kfree(dev->caps.qp1_tunnel); 2187 kfree(dev->caps.qp1_proxy); 2188 } 2189 2190 err_close: 2191 if (mlx4_is_slave(dev)) 2192 mlx4_slave_exit(dev); 2193 else 2194 mlx4_CLOSE_HCA(dev, 0); 2195 2196 err_free_icm: 2197 if (!mlx4_is_slave(dev)) 2198 mlx4_free_icms(dev); 2199 2200 err_stop_fw: 2201 if (!mlx4_is_slave(dev)) { 2202 if (!mlx4_UNMAP_FA(dev)) 2203 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 2204 else 2205 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 2206 kfree(dev_cap); 2207 } 2208 return err; 2209 } 2210 2211 static int mlx4_init_counters_table(struct mlx4_dev *dev) 2212 { 2213 struct mlx4_priv *priv = mlx4_priv(dev); 2214 int nent_pow2, port_indx, vf_index, num_counters; 2215 int res, index = 0; 2216 struct counter_index *new_counter_index; 2217 2218 2219 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2220 return -ENOENT; 2221 2222 if (!mlx4_is_slave(dev) && 2223 dev->caps.max_counters == dev->caps.max_extended_counters) { 2224 res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, 2225 MLX4_CMD_SET_IF_STAT, 2226 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 2227 if (res) { 2228 mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); 2229 return res; 2230 } 2231 } 2232 2233 mutex_init(&priv->counters_table.mutex); 2234 2235 if (mlx4_is_slave(dev)) { 2236 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2237 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2238 if (dev->caps.def_counter_index[port_indx] != 0xFF) { 2239 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2240 if (!new_counter_index) 2241 return -ENOMEM; 2242 new_counter_index->index = dev->caps.def_counter_index[port_indx]; 2243 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); 2244 } 2245 } 2246 mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", 2247 __func__, dev->caps.num_ports, dev->caps.num_ports); 2248 return 0; 2249 } 2250 2251 nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); 2252 2253 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2254 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2255 /* allocating 2 counters per port for PFs */ 2256 /* For the PF, the ETH default counters are 0,2; */ 2257 /* and the RoCE default counters are 1,3 */ 2258 for (num_counters = 0; num_counters < 2; num_counters++, index++) { 2259 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2260 if (!new_counter_index) 2261 return -ENOMEM; 2262 new_counter_index->index = index; 2263 list_add_tail(&new_counter_index->list, 2264 &priv->counters_table.global_port_list[port_indx]); 2265 } 2266 } 2267 2268 if (mlx4_is_master(dev)) { 2269 for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { 2270 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2271 INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); 2272 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2273 if (!new_counter_index) 2274 return -ENOMEM; 2275 if (index < nent_pow2 - 2) { 2276 new_counter_index->index = index; 2277 index++; 2278 } else { 2279 new_counter_index->index = MLX4_SINK_COUNTER_INDEX; 2280 } 2281 2282 list_add_tail(&new_counter_index->list, 2283 &priv->counters_table.vf_list[vf_index][port_indx]); 2284 } 2285 } 2286 2287 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2288 nent_pow2, nent_pow2 - 1, 2289 index, 1); 2290 mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", 2291 __func__, index, dev->num_vfs); 2292 } else { 2293 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2294 nent_pow2, nent_pow2 - 1, 2295 index, 1); 2296 mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", 2297 __func__, index, dev->caps.num_ports); 2298 } 2299 2300 return 0; 2301 2302 } 2303 2304 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) 2305 { 2306 struct mlx4_priv *priv = mlx4_priv(dev); 2307 int i, j; 2308 struct counter_index *port, *tmp_port; 2309 struct counter_index *vf, *tmp_vf; 2310 2311 mutex_lock(&priv->counters_table.mutex); 2312 2313 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { 2314 for (i = 0; i < dev->caps.num_ports; i++) { 2315 list_for_each_entry_safe(port, tmp_port, 2316 &priv->counters_table.global_port_list[i], 2317 list) { 2318 list_del(&port->list); 2319 kfree(port); 2320 } 2321 } 2322 if (!mlx4_is_slave(dev)) { 2323 for (i = 0; i < dev->num_vfs; i++) { 2324 for (j = 0; j < dev->caps.num_ports; j++) { 2325 list_for_each_entry_safe(vf, tmp_vf, 2326 &priv->counters_table.vf_list[i][j], 2327 list) { 2328 /* clear the counter statistic */ 2329 if (__mlx4_clear_if_stat(dev, vf->index)) 2330 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2331 __func__, vf->index); 2332 list_del(&vf->list); 2333 kfree(vf); 2334 } 2335 } 2336 } 2337 mlx4_bitmap_cleanup(&priv->counters_table.bitmap); 2338 } 2339 } 2340 mutex_unlock(&priv->counters_table.mutex); 2341 } 2342 2343 int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) 2344 { 2345 struct mlx4_priv *priv = mlx4_priv(dev); 2346 int i, first; 2347 struct counter_index *vf, *tmp_vf; 2348 2349 /* clean VF's counters for the next useg */ 2350 if (slave > 0 && slave <= dev->num_vfs) { 2351 mlx4_dbg(dev, "%s: free counters of slave(%d)\n" 2352 , __func__, slave); 2353 2354 mutex_lock(&priv->counters_table.mutex); 2355 for (i = 0; i < dev->caps.num_ports; i++) { 2356 first = 0; 2357 list_for_each_entry_safe(vf, tmp_vf, 2358 &priv->counters_table.vf_list[slave - 1][i], 2359 list) { 2360 /* clear the counter statistic */ 2361 if (__mlx4_clear_if_stat(dev, vf->index)) 2362 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2363 __func__, vf->index); 2364 if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { 2365 mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" 2366 , __func__, vf->index, slave, i + 1); 2367 mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); 2368 list_del(&vf->list); 2369 kfree(vf); 2370 } else { 2371 mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" 2372 , __func__, vf->index, slave, i + 1); 2373 } 2374 } 2375 } 2376 mutex_unlock(&priv->counters_table.mutex); 2377 } 2378 2379 return 0; 2380 } 2381 2382 int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) 2383 { 2384 struct mlx4_priv *priv = mlx4_priv(dev); 2385 struct counter_index *new_counter_index; 2386 2387 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2388 return -ENOENT; 2389 2390 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2391 (port < 0) || (port > MLX4_MAX_PORTS)) { 2392 mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", 2393 __func__, slave, port); 2394 return -EINVAL; 2395 } 2396 2397 /* handle old guest request does not support request by port index */ 2398 if (port == 0) { 2399 *idx = MLX4_SINK_COUNTER_INDEX; 2400 mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" 2401 , __func__, *idx, slave, port); 2402 return 0; 2403 } 2404 2405 mutex_lock(&priv->counters_table.mutex); 2406 2407 *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); 2408 /* if no resources return the default counter of the slave and port */ 2409 if (*idx == -1) { 2410 if (slave == 0) { /* its the ethernet counter ?????? */ 2411 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2412 struct counter_index, 2413 list); 2414 } else { 2415 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2416 struct counter_index, 2417 list); 2418 } 2419 2420 *idx = new_counter_index->index; 2421 mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" 2422 , __func__, *idx, slave, port); 2423 goto out; 2424 } 2425 2426 if (slave == 0) { /* native or master */ 2427 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2428 if (!new_counter_index) 2429 goto no_mem; 2430 new_counter_index->index = *idx; 2431 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2432 } else { 2433 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2434 if (!new_counter_index) 2435 goto no_mem; 2436 new_counter_index->index = *idx; 2437 list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); 2438 } 2439 2440 mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" 2441 , __func__, *idx, slave, port); 2442 out: 2443 mutex_unlock(&priv->counters_table.mutex); 2444 return 0; 2445 2446 no_mem: 2447 mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); 2448 mutex_unlock(&priv->counters_table.mutex); 2449 *idx = MLX4_SINK_COUNTER_INDEX; 2450 mlx4_dbg(dev, "%s: failed err (%d)\n" 2451 , __func__, -ENOMEM); 2452 return -ENOMEM; 2453 } 2454 2455 int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) 2456 { 2457 u64 out_param; 2458 int err; 2459 struct mlx4_priv *priv = mlx4_priv(dev); 2460 struct counter_index *new_counter_index, *c_index; 2461 2462 if (mlx4_is_mfunc(dev)) { 2463 err = mlx4_cmd_imm(dev, 0, &out_param, 2464 ((u32) port) << 8 | (u32) RES_COUNTER, 2465 RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, 2466 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 2467 if (!err) { 2468 *idx = get_param_l(&out_param); 2469 if (*idx == MLX4_SINK_COUNTER_INDEX) 2470 return -ENOSPC; 2471 2472 mutex_lock(&priv->counters_table.mutex); 2473 c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2474 struct counter_index, 2475 list); 2476 mutex_unlock(&priv->counters_table.mutex); 2477 if (c_index->index == *idx) 2478 return -EEXIST; 2479 2480 if (mlx4_is_slave(dev)) { 2481 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2482 if (!new_counter_index) { 2483 mlx4_counter_free(dev, port, *idx); 2484 return -ENOMEM; 2485 } 2486 new_counter_index->index = *idx; 2487 mutex_lock(&priv->counters_table.mutex); 2488 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2489 mutex_unlock(&priv->counters_table.mutex); 2490 mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" 2491 , __func__, *idx, port); 2492 } 2493 } 2494 return err; 2495 } 2496 return __mlx4_counter_alloc(dev, 0, port, idx); 2497 } 2498 EXPORT_SYMBOL_GPL(mlx4_counter_alloc); 2499 2500 void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) 2501 { 2502 /* check if native or slave and deletes accordingly */ 2503 struct mlx4_priv *priv = mlx4_priv(dev); 2504 struct counter_index *pf, *tmp_pf; 2505 struct counter_index *vf, *tmp_vf; 2506 int first; 2507 2508 2509 if (idx == MLX4_SINK_COUNTER_INDEX) { 2510 mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" 2511 , __func__, idx, port); 2512 return; 2513 } 2514 2515 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2516 (port < 0) || (port > MLX4_MAX_PORTS)) { 2517 mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" 2518 , __func__, slave, idx); 2519 return; 2520 } 2521 2522 mutex_lock(&priv->counters_table.mutex); 2523 if (slave == 0) { 2524 first = 0; 2525 list_for_each_entry_safe(pf, tmp_pf, 2526 &priv->counters_table.global_port_list[port - 1], 2527 list) { 2528 /* the first 2 counters are reserved */ 2529 if (pf->index == idx) { 2530 /* clear the counter statistic */ 2531 if (__mlx4_clear_if_stat(dev, pf->index)) 2532 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2533 __func__, pf->index); 2534 if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { 2535 list_del(&pf->list); 2536 kfree(pf); 2537 mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" 2538 , __func__, idx, slave, port); 2539 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2540 goto out; 2541 } else { 2542 mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" 2543 , __func__, idx, slave, port); 2544 goto out; 2545 } 2546 } 2547 first++; 2548 } 2549 mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" 2550 , __func__, idx, slave, port); 2551 } else { 2552 first = 0; 2553 list_for_each_entry_safe(vf, tmp_vf, 2554 &priv->counters_table.vf_list[slave - 1][port - 1], 2555 list) { 2556 /* the first element is reserved */ 2557 if (vf->index == idx) { 2558 /* clear the counter statistic */ 2559 if (__mlx4_clear_if_stat(dev, vf->index)) 2560 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2561 __func__, vf->index); 2562 if (first) { 2563 list_del(&vf->list); 2564 kfree(vf); 2565 mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", 2566 __func__, idx, slave, port); 2567 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2568 goto out; 2569 } else { 2570 mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" 2571 , __func__, slave, idx, port); 2572 goto out; 2573 } 2574 } 2575 first++; 2576 } 2577 mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" 2578 , __func__, slave, idx, port); 2579 } 2580 2581 out: 2582 mutex_unlock(&priv->counters_table.mutex); 2583 } 2584 2585 void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) 2586 { 2587 u64 in_param = 0; 2588 struct mlx4_priv *priv = mlx4_priv(dev); 2589 struct counter_index *counter, *tmp_counter; 2590 int first = 0; 2591 2592 if (mlx4_is_mfunc(dev)) { 2593 set_param_l(&in_param, idx); 2594 mlx4_cmd(dev, in_param, 2595 ((u32) port) << 8 | (u32) RES_COUNTER, 2596 RES_OP_RESERVE, 2597 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, 2598 MLX4_CMD_WRAPPED); 2599 2600 if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { 2601 mutex_lock(&priv->counters_table.mutex); 2602 list_for_each_entry_safe(counter, tmp_counter, 2603 &priv->counters_table.global_port_list[port - 1], 2604 list) { 2605 if (counter->index == idx && first++) { 2606 list_del(&counter->list); 2607 kfree(counter); 2608 mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" 2609 , __func__, idx, port); 2610 mutex_unlock(&priv->counters_table.mutex); 2611 return; 2612 } 2613 } 2614 mutex_unlock(&priv->counters_table.mutex); 2615 } 2616 2617 return; 2618 } 2619 __mlx4_counter_free(dev, 0, port, idx); 2620 } 2621 EXPORT_SYMBOL_GPL(mlx4_counter_free); 2622 2623 int __mlx4_clear_if_stat(struct mlx4_dev *dev, 2624 u8 counter_index) 2625 { 2626 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2627 int err = 0; 2628 u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); 2629 2630 if (counter_index == MLX4_SINK_COUNTER_INDEX) 2631 return -EINVAL; 2632 2633 if (mlx4_is_slave(dev)) 2634 return 0; 2635 2636 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2637 if (IS_ERR(if_stat_mailbox)) { 2638 err = PTR_ERR(if_stat_mailbox); 2639 return err; 2640 } 2641 2642 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, 2643 MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, 2644 MLX4_CMD_NATIVE); 2645 2646 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2647 return err; 2648 } 2649 2650 u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) 2651 { 2652 struct mlx4_priv *priv = mlx4_priv(dev); 2653 struct counter_index *new_counter_index; 2654 2655 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { 2656 mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", 2657 __func__, MLX4_SINK_COUNTER_INDEX, slave, port); 2658 return (u8)MLX4_SINK_COUNTER_INDEX; 2659 } 2660 2661 mutex_lock(&priv->counters_table.mutex); 2662 if (slave == 0) { 2663 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2664 struct counter_index, 2665 list); 2666 } else { 2667 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2668 struct counter_index, 2669 list); 2670 } 2671 mutex_unlock(&priv->counters_table.mutex); 2672 2673 mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", 2674 __func__, new_counter_index->index, slave, port); 2675 2676 2677 return (u8)new_counter_index->index; 2678 } 2679 2680 int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, 2681 struct mlx4_en_vport_stats *vport_stats, 2682 int reset) 2683 { 2684 struct mlx4_priv *priv = mlx4_priv(dev); 2685 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2686 union mlx4_counter *counter; 2687 int err = 0; 2688 u32 if_stat_in_mod; 2689 struct counter_index *vport, *tmp_vport; 2690 2691 if (!vport_stats) 2692 return -EINVAL; 2693 2694 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2695 if (IS_ERR(if_stat_mailbox)) { 2696 err = PTR_ERR(if_stat_mailbox); 2697 return err; 2698 } 2699 2700 mutex_lock(&priv->counters_table.mutex); 2701 list_for_each_entry_safe(vport, tmp_vport, 2702 &priv->counters_table.global_port_list[port - 1], 2703 list) { 2704 if (vport->index == MLX4_SINK_COUNTER_INDEX) 2705 continue; 2706 2707 memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); 2708 if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); 2709 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, 2710 if_stat_in_mod, 0, 2711 MLX4_CMD_QUERY_IF_STAT, 2712 MLX4_CMD_TIME_CLASS_C, 2713 MLX4_CMD_NATIVE); 2714 if (err) { 2715 mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", 2716 __func__, vport->index); 2717 goto if_stat_out; 2718 } 2719 counter = (union mlx4_counter *)if_stat_mailbox->buf; 2720 if ((counter->control.cnt_mode & 0xf) == 1) { 2721 vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); 2722 vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); 2723 vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); 2724 vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); 2725 vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); 2726 vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); 2727 vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); 2728 vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); 2729 vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); 2730 vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); 2731 vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); 2732 vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); 2733 vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); 2734 vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); 2735 vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); 2736 } 2737 } 2738 2739 if_stat_out: 2740 mutex_unlock(&priv->counters_table.mutex); 2741 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2742 2743 return err; 2744 } 2745 EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); 2746 2747 static int mlx4_setup_hca(struct mlx4_dev *dev) 2748 { 2749 struct mlx4_priv *priv = mlx4_priv(dev); 2750 int err; 2751 int port; 2752 __be32 ib_port_default_caps; 2753 2754 err = mlx4_init_uar_table(dev); 2755 if (err) { 2756 mlx4_err(dev, "Failed to initialize " 2757 "user access region table (err=%d), aborting.\n", 2758 err); 2759 return err; 2760 } 2761 2762 err = mlx4_uar_alloc(dev, &priv->driver_uar); 2763 if (err) { 2764 mlx4_err(dev, "Failed to allocate driver access region " 2765 "(err=%d), aborting.\n", err); 2766 goto err_uar_table_free; 2767 } 2768 2769 priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 2770 if (!priv->kar) { 2771 mlx4_err(dev, "Couldn't map kernel access region, " 2772 "aborting.\n"); 2773 err = -ENOMEM; 2774 goto err_uar_free; 2775 } 2776 2777 err = mlx4_init_pd_table(dev); 2778 if (err) { 2779 mlx4_err(dev, "Failed to initialize " 2780 "protection domain table (err=%d), aborting.\n", err); 2781 goto err_kar_unmap; 2782 } 2783 2784 err = mlx4_init_xrcd_table(dev); 2785 if (err) { 2786 mlx4_err(dev, "Failed to initialize " 2787 "reliable connection domain table (err=%d), " 2788 "aborting.\n", err); 2789 goto err_pd_table_free; 2790 } 2791 2792 err = mlx4_init_mr_table(dev); 2793 if (err) { 2794 mlx4_err(dev, "Failed to initialize " 2795 "memory region table (err=%d), aborting.\n", err); 2796 goto err_xrcd_table_free; 2797 } 2798 2799 if (!mlx4_is_slave(dev)) { 2800 err = mlx4_init_mcg_table(dev); 2801 if (err) { 2802 mlx4_err(dev, "Failed to initialize " 2803 "multicast group table (err=%d), aborting.\n", 2804 err); 2805 goto err_mr_table_free; 2806 } 2807 } 2808 2809 err = mlx4_init_eq_table(dev); 2810 if (err) { 2811 mlx4_err(dev, "Failed to initialize " 2812 "event queue table (err=%d), aborting.\n", err); 2813 goto err_mcg_table_free; 2814 } 2815 2816 err = mlx4_cmd_use_events(dev); 2817 if (err) { 2818 mlx4_err(dev, "Failed to switch to event-driven " 2819 "firmware commands (err=%d), aborting.\n", err); 2820 goto err_eq_table_free; 2821 } 2822 2823 err = mlx4_NOP(dev); 2824 if (err) { 2825 if (dev->flags & MLX4_FLAG_MSI_X) { 2826 mlx4_warn(dev, "NOP command failed to generate MSI-X " 2827 "interrupt IRQ %d).\n", 2828 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2829 mlx4_warn(dev, "Trying again without MSI-X.\n"); 2830 } else { 2831 mlx4_err(dev, "NOP command failed to generate interrupt " 2832 "(IRQ %d), aborting.\n", 2833 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2834 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 2835 } 2836 2837 goto err_cmd_poll; 2838 } 2839 2840 mlx4_dbg(dev, "NOP command IRQ test passed\n"); 2841 2842 err = mlx4_init_cq_table(dev); 2843 if (err) { 2844 mlx4_err(dev, "Failed to initialize " 2845 "completion queue table (err=%d), aborting.\n", err); 2846 goto err_cmd_poll; 2847 } 2848 2849 err = mlx4_init_srq_table(dev); 2850 if (err) { 2851 mlx4_err(dev, "Failed to initialize " 2852 "shared receive queue table (err=%d), aborting.\n", 2853 err); 2854 goto err_cq_table_free; 2855 } 2856 2857 err = mlx4_init_qp_table(dev); 2858 if (err) { 2859 mlx4_err(dev, "Failed to initialize " 2860 "queue pair table (err=%d), aborting.\n", err); 2861 goto err_srq_table_free; 2862 } 2863 2864 err = mlx4_init_counters_table(dev); 2865 if (err && err != -ENOENT) { 2866 mlx4_err(dev, "Failed to initialize counters table (err=%d), " 2867 "aborting.\n", err); 2868 goto err_qp_table_free; 2869 } 2870 2871 if (!mlx4_is_slave(dev)) { 2872 for (port = 1; port <= dev->caps.num_ports; port++) { 2873 ib_port_default_caps = 0; 2874 err = mlx4_get_port_ib_caps(dev, port, 2875 &ib_port_default_caps); 2876 if (err) 2877 mlx4_warn(dev, "failed to get port %d default " 2878 "ib capabilities (%d). Continuing " 2879 "with caps = 0\n", port, err); 2880 dev->caps.ib_port_def_cap[port] = ib_port_default_caps; 2881 2882 /* initialize per-slave default ib port capabilities */ 2883 if (mlx4_is_master(dev)) { 2884 int i; 2885 for (i = 0; i < dev->num_slaves; i++) { 2886 if (i == mlx4_master_func_num(dev)) 2887 continue; 2888 priv->mfunc.master.slave_state[i].ib_cap_mask[port] = 2889 ib_port_default_caps; 2890 } 2891 } 2892 2893 dev->caps.port_ib_mtu[port] = IB_MTU_4096; 2894 2895 err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? 2896 dev->caps.pkey_table_len[port] : -1); 2897 if (err) { 2898 mlx4_err(dev, "Failed to set port %d (err=%d), " 2899 "aborting\n", port, err); 2900 goto err_counters_table_free; 2901 } 2902 } 2903 } 2904 2905 return 0; 2906 2907 err_counters_table_free: 2908 mlx4_cleanup_counters_table(dev); 2909 2910 err_qp_table_free: 2911 mlx4_cleanup_qp_table(dev); 2912 2913 err_srq_table_free: 2914 mlx4_cleanup_srq_table(dev); 2915 2916 err_cq_table_free: 2917 mlx4_cleanup_cq_table(dev); 2918 2919 err_cmd_poll: 2920 mlx4_cmd_use_polling(dev); 2921 2922 err_eq_table_free: 2923 mlx4_cleanup_eq_table(dev); 2924 2925 err_mcg_table_free: 2926 if (!mlx4_is_slave(dev)) 2927 mlx4_cleanup_mcg_table(dev); 2928 2929 err_mr_table_free: 2930 mlx4_cleanup_mr_table(dev); 2931 2932 err_xrcd_table_free: 2933 mlx4_cleanup_xrcd_table(dev); 2934 2935 err_pd_table_free: 2936 mlx4_cleanup_pd_table(dev); 2937 2938 err_kar_unmap: 2939 iounmap(priv->kar); 2940 2941 err_uar_free: 2942 mlx4_uar_free(dev, &priv->driver_uar); 2943 2944 err_uar_table_free: 2945 mlx4_cleanup_uar_table(dev); 2946 return err; 2947 } 2948 2949 static void mlx4_enable_msi_x(struct mlx4_dev *dev) 2950 { 2951 struct mlx4_priv *priv = mlx4_priv(dev); 2952 struct msix_entry *entries; 2953 int err; 2954 int i; 2955 2956 if (msi_x) { 2957 int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ; 2958 2959 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, 2960 nreq); 2961 2962 if (msi_x > 1 && !mlx4_is_mfunc(dev)) 2963 nreq = min_t(int, nreq, msi_x); 2964 2965 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); 2966 if (!entries) 2967 goto no_msi; 2968 2969 for (i = 0; i < nreq; ++i) 2970 entries[i].entry = i; 2971 2972 retry: 2973 err = pci_enable_msix(dev->pdev, entries, nreq); 2974 if (err) { 2975 /* Try again if at least 2 vectors are available */ 2976 if (err > 1) { 2977 mlx4_info(dev, "Requested %d vectors, " 2978 "but only %d MSI-X vectors available, " 2979 "trying again\n", nreq, err); 2980 nreq = err; 2981 goto retry; 2982 } 2983 kfree(entries); 2984 /* if error, or can't alloc even 1 IRQ */ 2985 if (err < 0) { 2986 mlx4_err(dev, "No IRQs left, device can't " 2987 "be started.\n"); 2988 goto no_irq; 2989 } 2990 goto no_msi; 2991 } 2992 2993 if (nreq < 2994 MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { 2995 /*Working in legacy mode , all EQ's shared*/ 2996 dev->caps.comp_pool = 0; 2997 dev->caps.num_comp_vectors = nreq - 1; 2998 } else { 2999 dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; 3000 dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; 3001 } 3002 for (i = 0; i < nreq; ++i) 3003 priv->eq_table.eq[i].irq = entries[i].vector; 3004 3005 dev->flags |= MLX4_FLAG_MSI_X; 3006 3007 kfree(entries); 3008 return; 3009 } 3010 3011 no_msi: 3012 dev->caps.num_comp_vectors = 1; 3013 dev->caps.comp_pool = 0; 3014 3015 for (i = 0; i < 2; ++i) 3016 priv->eq_table.eq[i].irq = dev->pdev->irq; 3017 return; 3018 no_irq: 3019 dev->caps.num_comp_vectors = 0; 3020 dev->caps.comp_pool = 0; 3021 return; 3022 } 3023 3024 static void 3025 mlx4_init_hca_info(struct mlx4_dev *dev) 3026 { 3027 struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info; 3028 3029 info->dev = dev; 3030 3031 info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO, 3032 show_firmware_version, NULL); 3033 if (device_create_file(&dev->pdev->dev, &info->firmware_attr)) 3034 mlx4_err(dev, "Failed to add file firmware version"); 3035 3036 info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca, 3037 NULL); 3038 if (device_create_file(&dev->pdev->dev, &info->hca_attr)) 3039 mlx4_err(dev, "Failed to add file hca type"); 3040 3041 info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO, 3042 show_board, NULL); 3043 if (device_create_file(&dev->pdev->dev, &info->board_attr)) 3044 mlx4_err(dev, "Failed to add file board id type"); 3045 } 3046 3047 static int mlx4_init_port_info(struct mlx4_dev *dev, int port) 3048 { 3049 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; 3050 int err = 0; 3051 3052 info->dev = dev; 3053 info->port = port; 3054 if (!mlx4_is_slave(dev)) { 3055 mlx4_init_mac_table(dev, &info->mac_table); 3056 mlx4_init_vlan_table(dev, &info->vlan_table); 3057 info->base_qpn = mlx4_get_base_qpn(dev, port); 3058 } 3059 3060 sprintf(info->dev_name, "mlx4_port%d", port); 3061 info->port_attr.attr.name = info->dev_name; 3062 if (mlx4_is_mfunc(dev)) 3063 info->port_attr.attr.mode = S_IRUGO; 3064 else { 3065 info->port_attr.attr.mode = S_IRUGO | S_IWUSR; 3066 info->port_attr.store = set_port_type; 3067 } 3068 info->port_attr.show = show_port_type; 3069 sysfs_attr_init(&info->port_attr.attr); 3070 3071 err = device_create_file(&dev->pdev->dev, &info->port_attr); 3072 if (err) { 3073 mlx4_err(dev, "Failed to create file for port %d\n", port); 3074 info->port = -1; 3075 } 3076 3077 sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); 3078 info->port_mtu_attr.attr.name = info->dev_mtu_name; 3079 if (mlx4_is_mfunc(dev)) 3080 info->port_mtu_attr.attr.mode = S_IRUGO; 3081 else { 3082 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; 3083 info->port_mtu_attr.store = set_port_ib_mtu; 3084 } 3085 info->port_mtu_attr.show = show_port_ib_mtu; 3086 sysfs_attr_init(&info->port_mtu_attr.attr); 3087 3088 err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); 3089 if (err) { 3090 mlx4_err(dev, "Failed to create mtu file for port %d\n", port); 3091 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3092 info->port = -1; 3093 } 3094 3095 return err; 3096 } 3097 3098 static void 3099 mlx4_cleanup_hca_info(struct mlx4_hca_info *info) 3100 { 3101 device_remove_file(&info->dev->pdev->dev, &info->firmware_attr); 3102 device_remove_file(&info->dev->pdev->dev, &info->board_attr); 3103 device_remove_file(&info->dev->pdev->dev, &info->hca_attr); 3104 } 3105 3106 static void mlx4_cleanup_port_info(struct mlx4_port_info *info) 3107 { 3108 if (info->port < 0) 3109 return; 3110 3111 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3112 device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); 3113 } 3114 3115 static int mlx4_init_steering(struct mlx4_dev *dev) 3116 { 3117 struct mlx4_priv *priv = mlx4_priv(dev); 3118 int num_entries = dev->caps.num_ports; 3119 int i, j; 3120 3121 priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); 3122 if (!priv->steer) 3123 return -ENOMEM; 3124 3125 for (i = 0; i < num_entries; i++) 3126 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3127 INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); 3128 INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); 3129 } 3130 return 0; 3131 } 3132 3133 static void mlx4_clear_steering(struct mlx4_dev *dev) 3134 { 3135 struct mlx4_priv *priv = mlx4_priv(dev); 3136 struct mlx4_steer_index *entry, *tmp_entry; 3137 struct mlx4_promisc_qp *pqp, *tmp_pqp; 3138 int num_entries = dev->caps.num_ports; 3139 int i, j; 3140 3141 for (i = 0; i < num_entries; i++) { 3142 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3143 list_for_each_entry_safe(pqp, tmp_pqp, 3144 &priv->steer[i].promisc_qps[j], 3145 list) { 3146 list_del(&pqp->list); 3147 kfree(pqp); 3148 } 3149 list_for_each_entry_safe(entry, tmp_entry, 3150 &priv->steer[i].steer_entries[j], 3151 list) { 3152 list_del(&entry->list); 3153 list_for_each_entry_safe(pqp, tmp_pqp, 3154 &entry->duplicates, 3155 list) { 3156 list_del(&pqp->list); 3157 kfree(pqp); 3158 } 3159 kfree(entry); 3160 } 3161 } 3162 } 3163 kfree(priv->steer); 3164 } 3165 3166 static int extended_func_num(struct pci_dev *pdev) 3167 { 3168 return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); 3169 } 3170 3171 #define MLX4_OWNER_BASE 0x8069c 3172 #define MLX4_OWNER_SIZE 4 3173 3174 static int mlx4_get_ownership(struct mlx4_dev *dev) 3175 { 3176 void __iomem *owner; 3177 u32 ret; 3178 3179 if (pci_channel_offline(dev->pdev)) 3180 return -EIO; 3181 3182 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3183 MLX4_OWNER_SIZE); 3184 if (!owner) { 3185 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3186 return -ENOMEM; 3187 } 3188 3189 ret = readl(owner); 3190 iounmap(owner); 3191 return (int) !!ret; 3192 } 3193 3194 static void mlx4_free_ownership(struct mlx4_dev *dev) 3195 { 3196 void __iomem *owner; 3197 3198 if (pci_channel_offline(dev->pdev)) 3199 return; 3200 3201 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3202 MLX4_OWNER_SIZE); 3203 if (!owner) { 3204 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3205 return; 3206 } 3207 writel(0, owner); 3208 msleep(1000); 3209 iounmap(owner); 3210 } 3211 3212 static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) 3213 { 3214 struct mlx4_priv *priv; 3215 struct mlx4_dev *dev; 3216 int err; 3217 int port; 3218 int nvfs, prb_vf; 3219 3220 pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); 3221 3222 err = pci_enable_device(pdev); 3223 if (err) { 3224 dev_err(&pdev->dev, "Cannot enable PCI device, " 3225 "aborting.\n"); 3226 return err; 3227 } 3228 3229 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); 3230 mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); 3231 if (nvfs > MLX4_MAX_NUM_VF) { 3232 dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", 3233 nvfs, MLX4_MAX_NUM_VF); 3234 return -EINVAL; 3235 } 3236 3237 if (nvfs < 0) { 3238 dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); 3239 return -EINVAL; 3240 } 3241 /* 3242 * Check for BARs. 3243 */ 3244 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && 3245 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 3246 dev_err(&pdev->dev, "Missing DCS, aborting." 3247 "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", 3248 pci_dev_data, pci_resource_flags(pdev, 0)); 3249 err = -ENODEV; 3250 goto err_disable_pdev; 3251 } 3252 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { 3253 dev_err(&pdev->dev, "Missing UAR, aborting.\n"); 3254 err = -ENODEV; 3255 goto err_disable_pdev; 3256 } 3257 3258 err = pci_request_regions(pdev, DRV_NAME); 3259 if (err) { 3260 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 3261 goto err_disable_pdev; 3262 } 3263 3264 pci_set_master(pdev); 3265 3266 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3267 if (err) { 3268 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); 3269 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3270 if (err) { 3271 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); 3272 goto err_release_regions; 3273 } 3274 } 3275 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3276 if (err) { 3277 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " 3278 "consistent PCI DMA mask.\n"); 3279 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 3280 if (err) { 3281 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " 3282 "aborting.\n"); 3283 goto err_release_regions; 3284 } 3285 } 3286 3287 /* Allow large DMA segments, up to the firmware limit of 1 GB */ 3288 dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); 3289 3290 priv = kzalloc(sizeof *priv, GFP_KERNEL); 3291 if (!priv) { 3292 dev_err(&pdev->dev, "Device struct alloc failed, " 3293 "aborting.\n"); 3294 err = -ENOMEM; 3295 goto err_release_regions; 3296 } 3297 3298 dev = &priv->dev; 3299 dev->pdev = pdev; 3300 INIT_LIST_HEAD(&priv->dev_list); 3301 INIT_LIST_HEAD(&priv->ctx_list); 3302 spin_lock_init(&priv->ctx_lock); 3303 3304 mutex_init(&priv->port_mutex); 3305 3306 INIT_LIST_HEAD(&priv->pgdir_list); 3307 mutex_init(&priv->pgdir_mutex); 3308 3309 INIT_LIST_HEAD(&priv->bf_list); 3310 mutex_init(&priv->bf_mutex); 3311 3312 dev->rev_id = pdev->revision; 3313 dev->numa_node = dev_to_node(&pdev->dev); 3314 /* Detect if this device is a virtual function */ 3315 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { 3316 /* When acting as pf, we normally skip vfs unless explicitly 3317 * requested to probe them. */ 3318 if (nvfs && extended_func_num(pdev) > prb_vf) { 3319 mlx4_warn(dev, "Skipping virtual function:%d\n", 3320 extended_func_num(pdev)); 3321 err = -ENODEV; 3322 goto err_free_dev; 3323 } 3324 mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); 3325 dev->flags |= MLX4_FLAG_SLAVE; 3326 } else { 3327 /* We reset the device and enable SRIOV only for physical 3328 * devices. Try to claim ownership on the device; 3329 * if already taken, skip -- do not allow multiple PFs */ 3330 err = mlx4_get_ownership(dev); 3331 if (err) { 3332 if (err < 0) 3333 goto err_free_dev; 3334 else { 3335 mlx4_warn(dev, "Multiple PFs not yet supported." 3336 " Skipping PF.\n"); 3337 err = -EINVAL; 3338 goto err_free_dev; 3339 } 3340 } 3341 3342 if (nvfs) { 3343 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); 3344 err = pci_enable_sriov(pdev, nvfs); 3345 if (err) { 3346 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", 3347 err); 3348 err = 0; 3349 } else { 3350 mlx4_warn(dev, "Running in master mode\n"); 3351 dev->flags |= MLX4_FLAG_SRIOV | 3352 MLX4_FLAG_MASTER; 3353 dev->num_vfs = nvfs; 3354 } 3355 } 3356 3357 atomic_set(&priv->opreq_count, 0); 3358 INIT_WORK(&priv->opreq_task, mlx4_opreq_action); 3359 3360 /* 3361 * Now reset the HCA before we touch the PCI capabilities or 3362 * attempt a firmware command, since a boot ROM may have left 3363 * the HCA in an undefined state. 3364 */ 3365 err = mlx4_reset(dev); 3366 if (err) { 3367 mlx4_err(dev, "Failed to reset HCA, aborting.\n"); 3368 goto err_sriov; 3369 } 3370 } 3371 3372 slave_start: 3373 err = mlx4_cmd_init(dev); 3374 if (err) { 3375 mlx4_err(dev, "Failed to init command interface, aborting.\n"); 3376 goto err_sriov; 3377 } 3378 3379 /* In slave functions, the communication channel must be initialized 3380 * before posting commands. Also, init num_slaves before calling 3381 * mlx4_init_hca */ 3382 if (mlx4_is_mfunc(dev)) { 3383 if (mlx4_is_master(dev)) 3384 dev->num_slaves = MLX4_MAX_NUM_SLAVES; 3385 else { 3386 dev->num_slaves = 0; 3387 err = mlx4_multi_func_init(dev); 3388 if (err) { 3389 mlx4_err(dev, "Failed to init slave mfunc" 3390 " interface, aborting.\n"); 3391 goto err_cmd; 3392 } 3393 } 3394 } 3395 3396 err = mlx4_init_hca(dev); 3397 if (err) { 3398 if (err == -EACCES) { 3399 /* Not primary Physical function 3400 * Running in slave mode */ 3401 mlx4_cmd_cleanup(dev); 3402 dev->flags |= MLX4_FLAG_SLAVE; 3403 dev->flags &= ~MLX4_FLAG_MASTER; 3404 goto slave_start; 3405 } else 3406 goto err_mfunc; 3407 } 3408 3409 /* In master functions, the communication channel must be initialized 3410 * after obtaining its address from fw */ 3411 if (mlx4_is_master(dev)) { 3412 err = mlx4_multi_func_init(dev); 3413 if (err) { 3414 mlx4_err(dev, "Failed to init master mfunc" 3415 "interface, aborting.\n"); 3416 goto err_close; 3417 } 3418 } 3419 3420 err = mlx4_alloc_eq_table(dev); 3421 if (err) 3422 goto err_master_mfunc; 3423 3424 priv->msix_ctl.pool_bm = 0; 3425 mutex_init(&priv->msix_ctl.pool_lock); 3426 3427 mlx4_enable_msi_x(dev); 3428 3429 /* no MSIX and no shared IRQ */ 3430 if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) { 3431 err = -ENOSPC; 3432 goto err_free_eq; 3433 } 3434 3435 if ((mlx4_is_mfunc(dev)) && 3436 !(dev->flags & MLX4_FLAG_MSI_X)) { 3437 err = -ENOSYS; 3438 mlx4_err(dev, "INTx is not supported in multi-function mode." 3439 " aborting.\n"); 3440 goto err_free_eq; 3441 } 3442 3443 if (!mlx4_is_slave(dev)) { 3444 err = mlx4_init_steering(dev); 3445 if (err) 3446 goto err_free_eq; 3447 } 3448 3449 mlx4_init_quotas(dev); 3450 3451 err = mlx4_setup_hca(dev); 3452 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && 3453 !mlx4_is_mfunc(dev)) { 3454 dev->flags &= ~MLX4_FLAG_MSI_X; 3455 dev->caps.num_comp_vectors = 1; 3456 dev->caps.comp_pool = 0; 3457 pci_disable_msix(pdev); 3458 err = mlx4_setup_hca(dev); 3459 } 3460 3461 if (err) 3462 goto err_steer; 3463 3464 mlx4_init_hca_info(dev); 3465 3466 for (port = 1; port <= dev->caps.num_ports; port++) { 3467 err = mlx4_init_port_info(dev, port); 3468 if (err) 3469 goto err_port; 3470 } 3471 3472 err = mlx4_register_device(dev); 3473 if (err) 3474 goto err_port; 3475 3476 mlx4_request_modules(dev); 3477 3478 mlx4_sense_init(dev); 3479 mlx4_start_sense(dev); 3480 3481 priv->pci_dev_data = pci_dev_data; 3482 pci_set_drvdata(pdev, dev); 3483 3484 return 0; 3485 3486 err_port: 3487 for (--port; port >= 1; --port) 3488 mlx4_cleanup_port_info(&priv->port[port]); 3489 3490 mlx4_cleanup_counters_table(dev); 3491 mlx4_cleanup_qp_table(dev); 3492 mlx4_cleanup_srq_table(dev); 3493 mlx4_cleanup_cq_table(dev); 3494 mlx4_cmd_use_polling(dev); 3495 mlx4_cleanup_eq_table(dev); 3496 mlx4_cleanup_mcg_table(dev); 3497 mlx4_cleanup_mr_table(dev); 3498 mlx4_cleanup_xrcd_table(dev); 3499 mlx4_cleanup_pd_table(dev); 3500 mlx4_cleanup_uar_table(dev); 3501 3502 err_steer: 3503 if (!mlx4_is_slave(dev)) 3504 mlx4_clear_steering(dev); 3505 3506 err_free_eq: 3507 mlx4_free_eq_table(dev); 3508 3509 err_master_mfunc: 3510 if (mlx4_is_master(dev)) { 3511 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); 3512 mlx4_multi_func_cleanup(dev); 3513 } 3514 3515 if (mlx4_is_slave(dev)) { 3516 kfree(dev->caps.qp0_tunnel); 3517 kfree(dev->caps.qp0_proxy); 3518 kfree(dev->caps.qp1_tunnel); 3519 kfree(dev->caps.qp1_proxy); 3520 } 3521 3522 err_close: 3523 if (dev->flags & MLX4_FLAG_MSI_X) 3524 pci_disable_msix(pdev); 3525 3526 mlx4_close_hca(dev); 3527 3528 err_mfunc: 3529 if (mlx4_is_slave(dev)) 3530 mlx4_multi_func_cleanup(dev); 3531 3532 err_cmd: 3533 mlx4_cmd_cleanup(dev); 3534 3535 err_sriov: 3536 if (dev->flags & MLX4_FLAG_SRIOV) 3537 pci_disable_sriov(pdev); 3538 3539 if (!mlx4_is_slave(dev)) 3540 mlx4_free_ownership(dev); 3541 3542 err_free_dev: 3543 kfree(priv); 3544 3545 err_release_regions: 3546 pci_release_regions(pdev); 3547 3548 err_disable_pdev: 3549 pci_disable_device(pdev); 3550 pci_set_drvdata(pdev, NULL); 3551 return err; 3552 } 3553 3554 static int __devinit mlx4_init_one(struct pci_dev *pdev, 3555 const struct pci_device_id *id) 3556 { 3557 device_set_desc(pdev->dev.bsddev, mlx4_version); 3558 return __mlx4_init_one(pdev, id->driver_data); 3559 } 3560 3561 static void mlx4_remove_one(struct pci_dev *pdev) 3562 { 3563 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3564 struct mlx4_priv *priv = mlx4_priv(dev); 3565 int p; 3566 3567 if (dev) { 3568 /* in SRIOV it is not allowed to unload the pf's 3569 * driver while there are alive vf's */ 3570 if (mlx4_is_master(dev)) { 3571 if (mlx4_how_many_lives_vf(dev)) 3572 mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); 3573 } 3574 mlx4_stop_sense(dev); 3575 mlx4_unregister_device(dev); 3576 3577 mlx4_cleanup_hca_info(&priv->hca_info); 3578 for (p = 1; p <= dev->caps.num_ports; p++) { 3579 mlx4_cleanup_port_info(&priv->port[p]); 3580 mlx4_CLOSE_PORT(dev, p); 3581 } 3582 3583 if (mlx4_is_master(dev)) 3584 mlx4_free_resource_tracker(dev, 3585 RES_TR_FREE_SLAVES_ONLY); 3586 3587 mlx4_cleanup_counters_table(dev); 3588 mlx4_cleanup_qp_table(dev); 3589 mlx4_cleanup_srq_table(dev); 3590 mlx4_cleanup_cq_table(dev); 3591 mlx4_cmd_use_polling(dev); 3592 mlx4_cleanup_eq_table(dev); 3593 mlx4_cleanup_mcg_table(dev); 3594 mlx4_cleanup_mr_table(dev); 3595 mlx4_cleanup_xrcd_table(dev); 3596 mlx4_cleanup_pd_table(dev); 3597 3598 if (mlx4_is_master(dev)) 3599 mlx4_free_resource_tracker(dev, 3600 RES_TR_FREE_STRUCTS_ONLY); 3601 3602 iounmap(priv->kar); 3603 mlx4_uar_free(dev, &priv->driver_uar); 3604 mlx4_cleanup_uar_table(dev); 3605 if (!mlx4_is_slave(dev)) 3606 mlx4_clear_steering(dev); 3607 mlx4_free_eq_table(dev); 3608 if (mlx4_is_master(dev)) 3609 mlx4_multi_func_cleanup(dev); 3610 mlx4_close_hca(dev); 3611 if (mlx4_is_slave(dev)) 3612 mlx4_multi_func_cleanup(dev); 3613 mlx4_cmd_cleanup(dev); 3614 3615 if (dev->flags & MLX4_FLAG_MSI_X) 3616 pci_disable_msix(pdev); 3617 if (dev->flags & MLX4_FLAG_SRIOV) { 3618 mlx4_warn(dev, "Disabling SR-IOV\n"); 3619 pci_disable_sriov(pdev); 3620 } 3621 3622 if (!mlx4_is_slave(dev)) 3623 mlx4_free_ownership(dev); 3624 3625 kfree(dev->caps.qp0_tunnel); 3626 kfree(dev->caps.qp0_proxy); 3627 kfree(dev->caps.qp1_tunnel); 3628 kfree(dev->caps.qp1_proxy); 3629 3630 kfree(priv); 3631 pci_release_regions(pdev); 3632 pci_disable_device(pdev); 3633 pci_set_drvdata(pdev, NULL); 3634 } 3635 } 3636 3637 static int restore_current_port_types(struct mlx4_dev *dev, 3638 enum mlx4_port_type *types, 3639 enum mlx4_port_type *poss_types) 3640 { 3641 struct mlx4_priv *priv = mlx4_priv(dev); 3642 int err, i; 3643 3644 mlx4_stop_sense(dev); 3645 mutex_lock(&priv->port_mutex); 3646 for (i = 0; i < dev->caps.num_ports; i++) 3647 dev->caps.possible_type[i + 1] = poss_types[i]; 3648 err = mlx4_change_port_types(dev, types); 3649 mlx4_start_sense(dev); 3650 mutex_unlock(&priv->port_mutex); 3651 return err; 3652 } 3653 3654 int mlx4_restart_one(struct pci_dev *pdev) 3655 { 3656 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3657 struct mlx4_priv *priv = mlx4_priv(dev); 3658 enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; 3659 enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; 3660 int pci_dev_data, err, i; 3661 3662 pci_dev_data = priv->pci_dev_data; 3663 for (i = 0; i < dev->caps.num_ports; i++) { 3664 curr_type[i] = dev->caps.port_type[i + 1]; 3665 poss_type[i] = dev->caps.possible_type[i + 1]; 3666 } 3667 3668 mlx4_remove_one(pdev); 3669 err = __mlx4_init_one(pdev, pci_dev_data); 3670 if (err) 3671 return err; 3672 3673 dev = pci_get_drvdata(pdev); 3674 err = restore_current_port_types(dev, curr_type, poss_type); 3675 if (err) 3676 mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", 3677 err); 3678 return 0; 3679 } 3680 3681 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { 3682 /* MT25408 "Hermon" SDR */ 3683 { PCI_VDEVICE(MELLANOX, 0x6340), 3684 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3685 /* MT25408 "Hermon" DDR */ 3686 { PCI_VDEVICE(MELLANOX, 0x634a), 3687 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3688 /* MT25408 "Hermon" QDR */ 3689 { PCI_VDEVICE(MELLANOX, 0x6354), 3690 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3691 /* MT25408 "Hermon" DDR PCIe gen2 */ 3692 { PCI_VDEVICE(MELLANOX, 0x6732), 3693 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3694 /* MT25408 "Hermon" QDR PCIe gen2 */ 3695 { PCI_VDEVICE(MELLANOX, 0x673c), 3696 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3697 /* MT25408 "Hermon" EN 10GigE */ 3698 { PCI_VDEVICE(MELLANOX, 0x6368), 3699 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3700 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 3701 { PCI_VDEVICE(MELLANOX, 0x6750), 3702 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3703 /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 3704 { PCI_VDEVICE(MELLANOX, 0x6372), 3705 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3706 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 3707 { PCI_VDEVICE(MELLANOX, 0x675a), 3708 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3709 /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 3710 { PCI_VDEVICE(MELLANOX, 0x6764), 3711 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3712 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 3713 { PCI_VDEVICE(MELLANOX, 0x6746), 3714 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3715 /* MT26478 ConnectX2 40GigE PCIe gen2 */ 3716 { PCI_VDEVICE(MELLANOX, 0x676e), 3717 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3718 /* MT25400 Family [ConnectX-2 Virtual Function] */ 3719 { PCI_VDEVICE(MELLANOX, 0x1002), 3720 .driver_data = MLX4_PCI_DEV_IS_VF }, 3721 /* MT27500 Family [ConnectX-3] */ 3722 { PCI_VDEVICE(MELLANOX, 0x1003) }, 3723 /* MT27500 Family [ConnectX-3 Virtual Function] */ 3724 { PCI_VDEVICE(MELLANOX, 0x1004), 3725 .driver_data = MLX4_PCI_DEV_IS_VF }, 3726 { PCI_VDEVICE(MELLANOX, 0x1005) }, /* MT27510 Family */ 3727 { PCI_VDEVICE(MELLANOX, 0x1006) }, /* MT27511 Family */ 3728 { PCI_VDEVICE(MELLANOX, 0x1007) }, /* MT27520 Family */ 3729 { PCI_VDEVICE(MELLANOX, 0x1008) }, /* MT27521 Family */ 3730 { PCI_VDEVICE(MELLANOX, 0x1009) }, /* MT27530 Family */ 3731 { PCI_VDEVICE(MELLANOX, 0x100a) }, /* MT27531 Family */ 3732 { PCI_VDEVICE(MELLANOX, 0x100b) }, /* MT27540 Family */ 3733 { PCI_VDEVICE(MELLANOX, 0x100c) }, /* MT27541 Family */ 3734 { PCI_VDEVICE(MELLANOX, 0x100d) }, /* MT27550 Family */ 3735 { PCI_VDEVICE(MELLANOX, 0x100e) }, /* MT27551 Family */ 3736 { PCI_VDEVICE(MELLANOX, 0x100f) }, /* MT27560 Family */ 3737 { PCI_VDEVICE(MELLANOX, 0x1010) }, /* MT27561 Family */ 3738 { 0, } 3739 }; 3740 3741 MODULE_DEVICE_TABLE(pci, mlx4_pci_table); 3742 3743 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, 3744 pci_channel_state_t state) 3745 { 3746 mlx4_remove_one(pdev); 3747 3748 return state == pci_channel_io_perm_failure ? 3749 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 3750 } 3751 3752 static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) 3753 { 3754 int ret = __mlx4_init_one(pdev, 0); 3755 3756 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 3757 } 3758 3759 static const struct pci_error_handlers mlx4_err_handler = { 3760 .error_detected = mlx4_pci_err_detected, 3761 .slot_reset = mlx4_pci_slot_reset, 3762 }; 3763 3764 static int suspend(struct pci_dev *pdev, pm_message_t state) 3765 { 3766 mlx4_remove_one(pdev); 3767 3768 return 0; 3769 } 3770 3771 static int resume(struct pci_dev *pdev) 3772 { 3773 return __mlx4_init_one(pdev, 0); 3774 } 3775 3776 static struct pci_driver mlx4_driver = { 3777 .name = DRV_NAME, 3778 .id_table = mlx4_pci_table, 3779 .probe = mlx4_init_one, 3780 .remove = __devexit_p(mlx4_remove_one), 3781 .suspend = suspend, 3782 .resume = resume, 3783 .err_handler = &mlx4_err_handler, 3784 }; 3785 3786 static int __init mlx4_verify_params(void) 3787 { 3788 int status; 3789 3790 status = update_defaults(&port_type_array); 3791 if (status == INVALID_STR) { 3792 if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) 3793 return -1; 3794 } else if (status == INVALID_DATA) { 3795 return -1; 3796 } 3797 3798 status = update_defaults(&num_vfs); 3799 if (status == INVALID_STR) { 3800 if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) 3801 return -1; 3802 } else if (status == INVALID_DATA) { 3803 return -1; 3804 } 3805 3806 status = update_defaults(&probe_vf); 3807 if (status == INVALID_STR) { 3808 if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) 3809 return -1; 3810 } else if (status == INVALID_DATA) { 3811 return -1; 3812 } 3813 3814 if (msi_x < 0) { 3815 pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); 3816 return -1; 3817 } 3818 3819 if ((log_num_mac < 0) || (log_num_mac > 7)) { 3820 pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); 3821 return -1; 3822 } 3823 3824 if (log_num_vlan != 0) 3825 pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", 3826 MLX4_LOG_NUM_VLANS); 3827 3828 if (mlx4_set_4k_mtu != -1) 3829 pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); 3830 3831 if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { 3832 pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); 3833 return -1; 3834 } 3835 3836 if (mlx4_log_num_mgm_entry_size != -1 && 3837 (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || 3838 mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { 3839 pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " 3840 "in legal range (-1 or %d..%d)\n", 3841 mlx4_log_num_mgm_entry_size, 3842 MLX4_MIN_MGM_LOG_ENTRY_SIZE, 3843 MLX4_MAX_MGM_LOG_ENTRY_SIZE); 3844 return -1; 3845 } 3846 3847 if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { 3848 pr_warning("mlx4_core: bad log_num_qp: %d\n", 3849 mod_param_profile.num_qp); 3850 return -1; 3851 } 3852 3853 if (mod_param_profile.num_srq < 10) { 3854 pr_warning("mlx4_core: too low log_num_srq: %d\n", 3855 mod_param_profile.num_srq); 3856 return -1; 3857 } 3858 3859 if (mod_param_profile.num_cq < 10) { 3860 pr_warning("mlx4_core: too low log_num_cq: %d\n", 3861 mod_param_profile.num_cq); 3862 return -1; 3863 } 3864 3865 if (mod_param_profile.num_mpt < 10) { 3866 pr_warning("mlx4_core: too low log_num_mpt: %d\n", 3867 mod_param_profile.num_mpt); 3868 return -1; 3869 } 3870 3871 if (mod_param_profile.num_mtt_segs && 3872 mod_param_profile.num_mtt_segs < 15) { 3873 pr_warning("mlx4_core: too low log_num_mtt: %d\n", 3874 mod_param_profile.num_mtt_segs); 3875 return -1; 3876 } 3877 3878 if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { 3879 pr_warning("mlx4_core: too high log_num_mtt: %d\n", 3880 mod_param_profile.num_mtt_segs); 3881 return -1; 3882 } 3883 return 0; 3884 } 3885 3886 static int __init mlx4_init(void) 3887 { 3888 int ret; 3889 3890 if (mlx4_verify_params()) 3891 return -EINVAL; 3892 3893 mlx4_catas_init(); 3894 3895 mlx4_wq = create_singlethread_workqueue("mlx4"); 3896 if (!mlx4_wq) 3897 return -ENOMEM; 3898 3899 if (enable_sys_tune) 3900 sys_tune_init(); 3901 3902 ret = pci_register_driver(&mlx4_driver); 3903 if (ret < 0) 3904 goto err; 3905 3906 return 0; 3907 3908 err: 3909 if (enable_sys_tune) 3910 sys_tune_fini(); 3911 3912 destroy_workqueue(mlx4_wq); 3913 3914 return ret; 3915 } 3916 3917 static void __exit mlx4_cleanup(void) 3918 { 3919 if (enable_sys_tune) 3920 sys_tune_fini(); 3921 3922 pci_unregister_driver(&mlx4_driver); 3923 destroy_workqueue(mlx4_wq); 3924 } 3925 3926 module_init_order(mlx4_init, SI_ORDER_MIDDLE); 3927 module_exit(mlx4_cleanup); 3928 3929 static int 3930 mlx4_evhand(module_t mod, int event, void *arg) 3931 { 3932 return (0); 3933 } 3934 3935 static moduledata_t mlx4_mod = { 3936 .name = "mlx4", 3937 .evhand = mlx4_evhand, 3938 }; 3939 MODULE_VERSION(mlx4, 1); 3940 DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); 3941 MODULE_DEPEND(mlx4, linuxkpi, 1, 1, 1); 3942 3943