1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include "health.h" 5 #include "en/ptp.h" 6 #include "en/devlink.h" 7 #include "lib/tout.h" 8 9 /* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */ 10 static const char * const sq_sw_state_type_name[] = { 11 [MLX5E_SQ_STATE_ENABLED] = "enabled", 12 [MLX5E_SQ_STATE_MPWQE] = "mpwqe", 13 [MLX5E_SQ_STATE_RECOVERING] = "recovering", 14 [MLX5E_SQ_STATE_IPSEC] = "ipsec", 15 [MLX5E_SQ_STATE_DIM] = "dim", 16 [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", 17 [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", 18 [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", 19 [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", 20 }; 21 22 static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) 23 { 24 struct mlx5_core_dev *dev = sq->mdev; 25 unsigned long exp_time; 26 27 exp_time = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FLUSH_ON_ERROR)); 28 29 while (time_before(jiffies, exp_time)) { 30 if (sq->cc == sq->pc) 31 return 0; 32 33 msleep(20); 34 } 35 36 netdev_err(sq->netdev, 37 "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", 38 sq->sqn, sq->cc, sq->pc); 39 40 return -ETIMEDOUT; 41 } 42 43 static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) 44 { 45 WARN_ONCE(sq->cc != sq->pc, 46 "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", 47 sq->sqn, sq->cc, sq->pc); 48 sq->cc = 0; 49 sq->dma_fifo_cc = 0; 50 sq->pc = 0; 51 } 52 53 static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq) 54 { 55 int err; 56 int i; 57 58 BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES, 59 "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h"); 60 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State"); 61 if (err) 62 return err; 63 64 for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) { 65 err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i], 66 test_bit(i, &sq->state)); 67 if (err) 68 return err; 69 } 70 71 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 72 } 73 74 static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) 75 { 76 struct mlx5_core_dev *mdev; 77 struct net_device *dev; 78 struct mlx5e_txqsq *sq; 79 u8 state; 80 int err; 81 82 sq = ctx; 83 mdev = sq->mdev; 84 dev = sq->netdev; 85 86 if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 87 return 0; 88 89 err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); 90 if (err) { 91 netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", 92 sq->sqn, err); 93 goto out; 94 } 95 96 if (state != MLX5_SQC_STATE_ERR) 97 goto out; 98 99 mlx5e_tx_disable_queue(sq->txq); 100 101 err = mlx5e_wait_for_sq_flush(sq); 102 if (err) 103 goto out; 104 105 /* At this point, no new packets will arrive from the stack as TXQ is 106 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all 107 * pending WQEs. SQ can safely reset the SQ. 108 */ 109 110 err = mlx5e_health_sq_to_ready(mdev, dev, sq->sqn); 111 if (err) 112 goto out; 113 114 mlx5e_reset_txqsq_cc_pc(sq); 115 sq->stats->recover++; 116 clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 117 mlx5e_activate_txqsq(sq); 118 if (sq->channel) 119 mlx5e_trigger_napi_icosq(sq->channel); 120 else 121 mlx5e_trigger_napi_sched(sq->cq.napi); 122 123 return 0; 124 out: 125 clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); 126 return err; 127 } 128 129 struct mlx5e_tx_timeout_ctx { 130 struct mlx5e_txqsq *sq; 131 signed int status; 132 }; 133 134 static int mlx5e_tx_reporter_timeout_recover(void *ctx) 135 { 136 struct mlx5e_tx_timeout_ctx *to_ctx; 137 struct mlx5e_priv *priv; 138 struct mlx5_eq_comp *eq; 139 struct mlx5e_txqsq *sq; 140 int err; 141 142 to_ctx = ctx; 143 sq = to_ctx->sq; 144 eq = sq->cq.mcq.eq; 145 priv = sq->priv; 146 err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats); 147 if (!err) { 148 to_ctx->status = 0; /* this sq recovered */ 149 return err; 150 } 151 152 err = mlx5e_safe_reopen_channels(priv); 153 if (!err) { 154 to_ctx->status = 1; /* all channels recovered */ 155 return err; 156 } 157 158 to_ctx->status = err; 159 clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 160 netdev_err(priv->netdev, 161 "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", 162 err); 163 164 return err; 165 } 166 167 static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) 168 { 169 struct mlx5e_ptpsq *ptpsq = ctx; 170 struct mlx5e_channels *chs; 171 struct net_device *netdev; 172 struct mlx5e_priv *priv; 173 int carrier_ok; 174 int err; 175 176 if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &ptpsq->txqsq.state)) 177 return 0; 178 179 priv = ptpsq->txqsq.priv; 180 181 mutex_lock(&priv->state_lock); 182 chs = &priv->channels; 183 netdev = priv->netdev; 184 185 carrier_ok = netif_carrier_ok(netdev); 186 netif_carrier_off(netdev); 187 188 mlx5e_deactivate_priv_channels(priv); 189 190 mlx5e_ptp_close(chs->ptp); 191 err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); 192 193 mlx5e_activate_priv_channels(priv); 194 195 /* return carrier back if needed */ 196 if (carrier_ok) 197 netif_carrier_on(netdev); 198 199 mutex_unlock(&priv->state_lock); 200 201 return err; 202 } 203 204 /* state lock cannot be grabbed within this function. 205 * It can cause a dead lock or a read-after-free. 206 */ 207 static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx) 208 { 209 return err_ctx->recover(err_ctx->ctx); 210 } 211 212 static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, 213 void *context, 214 struct netlink_ext_ack *extack) 215 { 216 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 217 struct mlx5e_err_ctx *err_ctx = context; 218 219 return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : 220 mlx5e_health_recover_channels(priv); 221 } 222 223 static int 224 mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, 225 struct mlx5e_txqsq *sq, int tc) 226 { 227 bool stopped = netif_xmit_stopped(sq->txq); 228 struct mlx5e_priv *priv = sq->priv; 229 u8 state; 230 int err; 231 232 err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); 233 if (err) 234 return err; 235 236 err = devlink_fmsg_u32_pair_put(fmsg, "tc", tc); 237 if (err) 238 return err; 239 240 err = devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); 241 if (err) 242 return err; 243 244 err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); 245 if (err) 246 return err; 247 248 err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); 249 if (err) 250 return err; 251 252 err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); 253 if (err) 254 return err; 255 256 err = devlink_fmsg_u32_pair_put(fmsg, "cc", sq->cc); 257 if (err) 258 return err; 259 260 err = devlink_fmsg_u32_pair_put(fmsg, "pc", sq->pc); 261 if (err) 262 return err; 263 264 err = mlx5e_health_sq_put_sw_state(fmsg, sq); 265 if (err) 266 return err; 267 268 err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); 269 if (err) 270 return err; 271 272 return mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); 273 } 274 275 static int 276 mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, 277 struct mlx5e_txqsq *sq, int tc) 278 { 279 int err; 280 281 err = devlink_fmsg_obj_nest_start(fmsg); 282 if (err) 283 return err; 284 285 err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", sq->ch_ix); 286 if (err) 287 return err; 288 289 err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, sq, tc); 290 if (err) 291 return err; 292 293 err = devlink_fmsg_obj_nest_end(fmsg); 294 if (err) 295 return err; 296 297 return 0; 298 } 299 300 static int 301 mlx5e_tx_reporter_build_diagnose_output_ptpsq(struct devlink_fmsg *fmsg, 302 struct mlx5e_ptpsq *ptpsq, int tc) 303 { 304 int err; 305 306 err = devlink_fmsg_obj_nest_start(fmsg); 307 if (err) 308 return err; 309 310 err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); 311 if (err) 312 return err; 313 314 err = mlx5e_tx_reporter_build_diagnose_output_sq_common(fmsg, &ptpsq->txqsq, tc); 315 if (err) 316 return err; 317 318 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 319 if (err) 320 return err; 321 322 err = mlx5e_health_cq_diag_fmsg(&ptpsq->ts_cq, fmsg); 323 if (err) 324 return err; 325 326 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 327 if (err) 328 return err; 329 330 err = devlink_fmsg_obj_nest_end(fmsg); 331 if (err) 332 return err; 333 334 return 0; 335 } 336 337 static int 338 mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, 339 struct mlx5e_txqsq *txqsq) 340 { 341 u32 sq_stride, sq_sz; 342 bool real_time; 343 int err; 344 345 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 346 if (err) 347 return err; 348 349 real_time = mlx5_is_real_time_sq(txqsq->mdev); 350 sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); 351 sq_stride = MLX5_SEND_WQE_BB; 352 353 err = devlink_fmsg_u64_pair_put(fmsg, "stride size", sq_stride); 354 if (err) 355 return err; 356 357 err = devlink_fmsg_u32_pair_put(fmsg, "size", sq_sz); 358 if (err) 359 return err; 360 361 err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); 362 if (err) 363 return err; 364 365 err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); 366 if (err) 367 return err; 368 369 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 370 } 371 372 static int 373 mlx5e_tx_reporter_diagnose_generic_tx_port_ts(struct devlink_fmsg *fmsg, 374 struct mlx5e_ptpsq *ptpsq) 375 { 376 int err; 377 378 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Port TS"); 379 if (err) 380 return err; 381 382 err = mlx5e_health_cq_common_diag_fmsg(&ptpsq->ts_cq, fmsg); 383 if (err) 384 return err; 385 386 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 387 } 388 389 static int 390 mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, 391 struct devlink_fmsg *fmsg) 392 { 393 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 394 struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; 395 struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 396 struct mlx5e_ptpsq *generic_ptpsq; 397 int err; 398 399 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); 400 if (err) 401 return err; 402 403 err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, generic_sq); 404 if (err) 405 return err; 406 407 if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 408 goto out; 409 410 generic_ptpsq = &ptp_ch->ptpsq[0]; 411 412 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); 413 if (err) 414 return err; 415 416 err = mlx5e_tx_reporter_diagnose_generic_txqsq(fmsg, &generic_ptpsq->txqsq); 417 if (err) 418 return err; 419 420 err = mlx5e_tx_reporter_diagnose_generic_tx_port_ts(fmsg, generic_ptpsq); 421 if (err) 422 return err; 423 424 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 425 if (err) 426 return err; 427 428 out: 429 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 430 } 431 432 static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, 433 struct devlink_fmsg *fmsg, 434 struct netlink_ext_ack *extack) 435 { 436 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 437 struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 438 439 int i, tc, err = 0; 440 441 mutex_lock(&priv->state_lock); 442 443 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 444 goto unlock; 445 446 err = mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); 447 if (err) 448 goto unlock; 449 450 err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 451 if (err) 452 goto unlock; 453 454 for (i = 0; i < priv->channels.num; i++) { 455 struct mlx5e_channel *c = priv->channels.c[i]; 456 457 for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 458 struct mlx5e_txqsq *sq = &c->sq[tc]; 459 460 err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc); 461 if (err) 462 goto unlock; 463 } 464 } 465 466 if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) 467 goto close_sqs_nest; 468 469 for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 470 err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg, 471 &ptp_ch->ptpsq[tc], 472 tc); 473 if (err) 474 goto unlock; 475 } 476 477 close_sqs_nest: 478 err = devlink_fmsg_arr_pair_nest_end(fmsg); 479 if (err) 480 goto unlock; 481 482 unlock: 483 mutex_unlock(&priv->state_lock); 484 return err; 485 } 486 487 static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 488 void *ctx) 489 { 490 struct mlx5_rsc_key key = {}; 491 struct mlx5e_txqsq *sq = ctx; 492 int err; 493 494 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 495 return 0; 496 497 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 498 if (err) 499 return err; 500 501 key.size = PAGE_SIZE; 502 key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 503 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 504 if (err) 505 return err; 506 507 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 508 if (err) 509 return err; 510 511 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); 512 if (err) 513 return err; 514 515 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); 516 if (err) 517 return err; 518 519 key.rsc = MLX5_SGMT_TYPE_FULL_QPC; 520 key.index1 = sq->sqn; 521 key.num_of_obj1 = 1; 522 523 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 524 if (err) 525 return err; 526 527 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 528 if (err) 529 return err; 530 531 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); 532 if (err) 533 return err; 534 535 key.rsc = MLX5_SGMT_TYPE_SND_BUFF; 536 key.num_of_obj2 = MLX5_RSC_DUMP_ALL; 537 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 538 if (err) 539 return err; 540 541 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 542 if (err) 543 return err; 544 545 return mlx5e_health_fmsg_named_obj_nest_end(fmsg); 546 } 547 548 static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, 549 void *ctx) 550 { 551 struct mlx5e_tx_timeout_ctx *to_ctx = ctx; 552 553 return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); 554 } 555 556 static int mlx5e_tx_reporter_ptpsq_unhealthy_dump(struct mlx5e_priv *priv, 557 struct devlink_fmsg *fmsg, 558 void *ctx) 559 { 560 struct mlx5e_ptpsq *ptpsq = ctx; 561 562 return mlx5e_tx_reporter_dump_sq(priv, fmsg, &ptpsq->txqsq); 563 } 564 565 static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, 566 struct devlink_fmsg *fmsg) 567 { 568 struct mlx5e_ptp *ptp_ch = priv->channels.ptp; 569 struct mlx5_rsc_key key = {}; 570 int i, tc, err; 571 572 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) 573 return 0; 574 575 err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); 576 if (err) 577 return err; 578 579 key.size = PAGE_SIZE; 580 key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL; 581 err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg); 582 if (err) 583 return err; 584 585 err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); 586 if (err) 587 return err; 588 589 err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); 590 if (err) 591 return err; 592 593 for (i = 0; i < priv->channels.num; i++) { 594 struct mlx5e_channel *c = priv->channels.c[i]; 595 596 for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 597 struct mlx5e_txqsq *sq = &c->sq[tc]; 598 599 err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ"); 600 if (err) 601 return err; 602 } 603 } 604 605 if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { 606 for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) { 607 struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; 608 609 err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ"); 610 if (err) 611 return err; 612 } 613 } 614 615 return devlink_fmsg_arr_pair_nest_end(fmsg); 616 } 617 618 static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv, 619 struct mlx5e_err_ctx *err_ctx, 620 struct devlink_fmsg *fmsg) 621 { 622 return err_ctx->dump(priv, fmsg, err_ctx->ctx); 623 } 624 625 static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter, 626 struct devlink_fmsg *fmsg, void *context, 627 struct netlink_ext_ack *extack) 628 { 629 struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); 630 struct mlx5e_err_ctx *err_ctx = context; 631 632 return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) : 633 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg); 634 } 635 636 void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq) 637 { 638 char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 639 struct mlx5e_priv *priv = sq->priv; 640 struct mlx5e_err_ctx err_ctx = {}; 641 642 err_ctx.ctx = sq; 643 err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; 644 err_ctx.dump = mlx5e_tx_reporter_dump_sq; 645 snprintf(err_str, sizeof(err_str), "ERR CQE on SQ: 0x%x", sq->sqn); 646 647 mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 648 } 649 650 int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) 651 { 652 char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 653 struct mlx5e_tx_timeout_ctx to_ctx = {}; 654 struct mlx5e_priv *priv = sq->priv; 655 struct mlx5e_err_ctx err_ctx = {}; 656 657 to_ctx.sq = sq; 658 err_ctx.ctx = &to_ctx; 659 err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 660 err_ctx.dump = mlx5e_tx_reporter_timeout_dump; 661 snprintf(err_str, sizeof(err_str), 662 "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", 663 sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 664 jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start))); 665 666 mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 667 return to_ctx.status; 668 } 669 670 void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq) 671 { 672 struct mlx5e_ptp_metadata_map *map = &ptpsq->metadata_map; 673 char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; 674 struct mlx5e_txqsq *txqsq = &ptpsq->txqsq; 675 struct mlx5e_cq *ts_cq = &ptpsq->ts_cq; 676 struct mlx5e_priv *priv = txqsq->priv; 677 struct mlx5e_err_ctx err_ctx = {}; 678 679 err_ctx.ctx = ptpsq; 680 err_ctx.recover = mlx5e_tx_reporter_ptpsq_unhealthy_recover; 681 err_ctx.dump = mlx5e_tx_reporter_ptpsq_unhealthy_dump; 682 snprintf(err_str, sizeof(err_str), 683 "Unhealthy TX port TS queue: %d, SQ: 0x%x, CQ: 0x%x, Undelivered CQEs: %u Map Capacity: %u", 684 txqsq->ch_ix, txqsq->sqn, ts_cq->mcq.cqn, map->undelivered_counter, map->capacity); 685 686 mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); 687 } 688 689 static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { 690 .name = "tx", 691 .recover = mlx5e_tx_reporter_recover, 692 .diagnose = mlx5e_tx_reporter_diagnose, 693 .dump = mlx5e_tx_reporter_dump, 694 }; 695 696 #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 697 698 void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) 699 { 700 struct devlink_health_reporter *reporter; 701 702 reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, 703 &mlx5_tx_reporter_ops, 704 MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); 705 if (IS_ERR(reporter)) { 706 netdev_warn(priv->netdev, 707 "Failed to create tx reporter, err = %ld\n", 708 PTR_ERR(reporter)); 709 return; 710 } 711 priv->tx_reporter = reporter; 712 } 713 714 void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) 715 { 716 if (!priv->tx_reporter) 717 return; 718 719 devlink_health_reporter_destroy(priv->tx_reporter); 720 priv->tx_reporter = NULL; 721 } 722