1 /* 2 * Copyright (c) 2014-2019, Cisco Systems, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 31 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 33 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 #ifndef _USDF_H_ 37 #define _USDF_H_ 38 39 #include <sys/queue.h> 40 #include <pthread.h> 41 42 #include <rdma/providers/fi_log.h> 43 #include <ofi_epoll.h> 44 45 #include "usdf_progress.h" 46 #include "usd.h" 47 48 49 #define USDF_PROV_NAME "usnic" 50 #define USDF_MAJOR_VERS 1 51 #define USDF_MINOR_VERS 0 52 #define USDF_PROV_VERSION FI_VERSION(USDF_MAJOR_VERS, USDF_MINOR_VERS) 53 54 extern struct fi_provider usdf_ops; 55 56 #define USDF_WARN_SYS(subsys, ...) \ 57 FI_WARN(&usdf_ops, FI_LOG_ ## subsys, __VA_ARGS__) 58 #define USDF_TRACE_SYS(subsys, ...) \ 59 FI_TRACE(&usdf_ops, FI_LOG_ ## subsys, __VA_ARGS__) 60 #define USDF_INFO_SYS(subsys, ...) \ 61 FI_INFO(&usdf_ops, FI_LOG_ ## subsys, __VA_ARGS__) 62 #define USDF_DBG_SYS(subsys, ...) \ 63 FI_DBG(&usdf_ops, FI_LOG_ ## subsys, __VA_ARGS__) 64 65 /* default to "FI_LOG_FABRIC" */ 66 #define USDF_WARN(...) USDF_WARN_SYS(FABRIC, __VA_ARGS__) 67 #define USDF_TRACE(...) USDF_TRACE_SYS(FABRIC, __VA_ARGS__) 68 #define USDF_INFO(...) USDF_INFO_SYS(FABRIC, __VA_ARGS__) 69 #define USDF_DBG(...) USDF_DBG_SYS(FABRIC, __VA_ARGS__) 70 71 #define USDF_HDR_BUF_ENTRY 64 72 #define USDF_EP_CAP_PIO (1ULL << 63) 73 74 #define USDF_MAX_PEERS (16 * 1024) 75 76 /* usdf event flags */ 77 #define USDF_EVENT_FLAG_ERROR (1ULL << 62) 78 #define USDF_EVENT_FLAG_FREE_BUF (1ULL << 63) 79 80 /* usdf domain capability: no loopback */ 81 #define USDF_DOM_CAPS (FI_REMOTE_COMM) 82 83 #define USDF_MR_IOV_LIMIT 1 84 #define USDF_MR_CNT (65535) 85 #define USDF_ADDR_STR_LEN (INET6_ADDRSTRLEN+8) 86 87 /* 88 * TAILQ stuff that should exist 89 */ 90 #define TAILQ_REMOVE_MARK(head, elm, link) \ 91 do { \ 92 TAILQ_REMOVE(head, elm, link); \ 93 (elm)->link.tqe_prev = NULL; \ 94 } while (0) 95 96 #define TAILQ_ON_LIST(elm, link) ((elm)->link.tqe_prev != NULL) 97 98 struct usdf_domain; 99 100 struct usdf_dev_entry { 101 struct usd_device *ue_dev; 102 struct usd_device_attrs ue_dattr; 103 int ue_dev_ok; 104 }; 105 struct usdf_usnic_info { 106 int uu_num_devs; 107 struct usd_device_entry uu_devs[USD_MAX_DEVICES]; 108 struct usdf_dev_entry uu_info[USD_MAX_DEVICES]; 109 }; 110 extern struct usdf_usnic_info *__usdf_devinfo; 111 112 struct usdf_fabric { 113 struct fid_fabric fab_fid; 114 struct fi_fabric_attr fab_attr; 115 struct usd_device_attrs *fab_dev_attrs; 116 int fab_arp_sockfd; 117 ofi_atomic32_t fab_refcnt; 118 ofi_atomic32_t num_blocked_waiting; 119 LIST_HEAD(,usdf_domain) fab_domain_list; 120 121 /* progression */ 122 pthread_t fab_thread; 123 int fab_exit; 124 ofi_epoll_t fab_epollfd; 125 int fab_eventfd; 126 struct usdf_poll_item fab_poll_item; 127 128 /* timer vars */ 129 uint32_t fab_active_timer_count; 130 LIST_HEAD(usdf_timer_bucket, usdf_timer_entry) *fab_timer_buckets; 131 uint64_t fab_cur_bucket_ms; 132 uint32_t fab_cur_bucket; 133 pthread_spinlock_t fab_timer_lock; 134 }; 135 #define fab_ftou(FAB) container_of(FAB, struct usdf_fabric, fab_fid) 136 #define fab_utof(FP) (&(FP)->fab_fid) 137 #define fab_fidtou(FID) container_of(FID, struct usdf_fabric, fab_fid.fid) 138 139 struct usdf_domain { 140 struct fid_domain dom_fid; 141 struct usdf_fabric *dom_fabric; 142 struct fi_info *dom_info; 143 ofi_atomic32_t dom_refcnt; 144 struct usdf_eq *dom_eq; 145 struct usd_device *dom_dev; 146 147 pthread_spinlock_t dom_progress_lock; 148 TAILQ_HEAD(,usdf_tx) dom_tx_ready; 149 TAILQ_HEAD(,usdf_cq_hard) dom_hcq_list; 150 151 /* used only by connected endpoints */ 152 struct usdf_ep **dom_peer_tab; 153 uint32_t dom_next_peer; 154 155 LIST_ENTRY(usdf_domain) dom_link; 156 }; 157 #define dom_ftou(FDOM) container_of(FDOM, struct usdf_domain, dom_fid) 158 #define dom_utof(DOM) (&(DOM)->dom_fid) 159 #define dom_fidtou(FID) container_of(FID, struct usdf_domain, dom_fid.fid) 160 161 enum usdf_pep_state { 162 USDF_PEP_UNBOUND, 163 USDF_PEP_BOUND, 164 USDF_PEP_LISTENING, 165 166 /* A "ROBBED" PEP has had its socket stolen. The only valid operation 167 * to call on a ROBBED PEP is fi_close(). */ 168 USDF_PEP_ROBBED 169 }; 170 171 struct usdf_pep { 172 struct fid_pep pep_fid; 173 ofi_atomic32_t pep_refcnt; 174 struct usdf_fabric *pep_fabric; 175 struct usdf_eq *pep_eq; 176 int pep_sock; 177 union { 178 struct sockaddr_in sin; 179 char addr_str[USDF_ADDR_STR_LEN]; 180 } pep_src_addr; 181 enum usdf_pep_state pep_state; 182 struct usdf_poll_item pep_pollitem; 183 struct fi_info *pep_info; 184 185 pthread_spinlock_t pep_cr_lock; 186 size_t pep_cr_max_data; 187 uint32_t pep_backlog; 188 uint32_t pep_cr_alloced; 189 TAILQ_HEAD(,usdf_connreq) pep_cr_free; 190 TAILQ_HEAD(,usdf_connreq) pep_cr_pending; 191 }; 192 #define pep_ftou(FPEP) container_of(FPEP, struct usdf_pep, pep_fid) 193 #define pep_fidtou(FID) container_of(FID, struct usdf_pep, pep_fid.fid) 194 #define pep_utof(PEP) (&(PEP)->pep_fid) 195 #define pep_utofid(PEP) (&(PEP)->pep_fid.fid) 196 197 struct usdf_tx { 198 struct fid_stx tx_fid; 199 ofi_atomic32_t tx_refcnt; 200 struct usdf_domain *tx_domain; 201 TAILQ_ENTRY(usdf_tx) tx_link; 202 203 struct fi_tx_attr tx_attr; 204 struct usd_qp *tx_qp; 205 void (*tx_progress)(struct usdf_tx *tx); 206 207 union { 208 struct { 209 struct usdf_cq_hard *tx_hcq; 210 211 uint8_t *tx_inject_bufs; 212 struct usdf_msg_qe *tx_wqe_buf; 213 TAILQ_HEAD(,usdf_msg_qe) tx_free_wqe; 214 TAILQ_HEAD(,usdf_ep) tx_ep_ready; 215 TAILQ_HEAD(,usdf_ep) tx_ep_have_acks; 216 size_t tx_num_free_wqe; 217 } msg; 218 struct { 219 struct usdf_cq_hard *tx_hcq; 220 221 ofi_atomic32_t tx_next_msg_id; 222 struct usdf_rdm_qe *tx_wqe_buf; 223 uint8_t *tx_inject_bufs; 224 TAILQ_HEAD(,usdf_rdm_qe) tx_free_wqe; 225 TAILQ_HEAD(,usdf_rdm_connection) tx_rdc_ready; 226 TAILQ_HEAD(,usdf_rdm_connection) tx_rdc_have_acks; 227 size_t tx_num_free_wqe; 228 } rdm; 229 } t; 230 }; 231 #define tx_ftou(FEP) container_of(FEP, struct usdf_tx, tx_fid) 232 #define tx_fidtou(FID) container_of(FID, struct usdf_tx, tx_fid) 233 #define tx_utof(RX) (&(RX)->tx_fid) 234 #define tx_utofid(RX) (&(RX)->tx_fid.fid) 235 236 struct usdf_rx { 237 struct fid_ep rx_fid; 238 ofi_atomic32_t rx_refcnt; 239 struct usdf_domain *rx_domain; 240 241 struct fi_rx_attr rx_attr; 242 struct usd_qp *rx_qp; 243 244 union { 245 struct { 246 struct usdf_cq_hard *rx_hcq; 247 248 uint8_t *rx_bufs; 249 struct usdf_msg_qe *rx_rqe_buf; 250 TAILQ_HEAD(,usdf_msg_qe) rx_free_rqe; 251 TAILQ_HEAD(,usdf_msg_qe) rx_posted_rqe; 252 size_t rx_num_free_rqe; 253 } msg; 254 struct { 255 int rx_sock; 256 struct usdf_cq_hard *rx_hcq; 257 struct usdf_tx *rx_tx; 258 259 uint8_t *rx_bufs; 260 struct usdf_rdm_qe *rx_rqe_buf; 261 TAILQ_HEAD(,usdf_rdm_qe) rx_free_rqe; 262 TAILQ_HEAD(,usdf_rdm_qe) rx_posted_rqe; 263 size_t rx_num_free_rqe; 264 } rdm; 265 } r; 266 }; 267 #define rx_ftou(FEP) container_of(FEP, struct usdf_rx, rx_fid) 268 #define rx_fidtou(FID) container_of(FID, struct usdf_rx, rx_fid) 269 #define rx_utof(RX) (&(RX)->rx_fid) 270 #define rx_utofid(RX) (&(RX)->rx_fid.fid) 271 272 enum { 273 USDF_EP_ENABLED = (1 << 0) 274 }; 275 276 struct usdf_ep { 277 struct fid_ep ep_fid; 278 struct usdf_domain *ep_domain; 279 ofi_atomic32_t ep_refcnt; 280 uint64_t ep_caps; 281 uint64_t ep_mode; 282 283 uint8_t ep_tx_dflt_signal_comp; 284 uint8_t ep_rx_dflt_signal_comp; 285 286 uint8_t ep_tx_completion; 287 uint8_t ep_rx_completion; 288 289 uint32_t flags; 290 291 uint32_t ep_wqe; /* requested queue sizes */ 292 uint32_t ep_rqe; 293 294 struct usd_qp_attrs ep_qp_attrs; 295 296 struct usdf_eq *ep_eq; 297 298 struct usdf_tx *ep_tx; 299 struct usdf_rx *ep_rx; 300 301 size_t max_msg_size; 302 303 union { 304 struct { 305 struct usd_qp *ep_qp; 306 struct usdf_cq *ep_wcq; 307 struct usdf_cq *ep_rcq; 308 309 int ep_sock; 310 struct usdf_av *ep_av; 311 312 /* TODO: Remove in favor of accessing op flags through 313 * ep_tx and ep_rx. Update once tx/rx context support 314 * is added to dgram */ 315 uint64_t tx_op_flags; 316 uint64_t rx_op_flags; 317 318 size_t tx_iov_limit; 319 size_t rx_iov_limit; 320 321 void *ep_hdr_buf; 322 struct usd_udp_hdr **ep_hdr_ptr; 323 } dg; 324 struct { 325 struct usdf_connreq *ep_connreq; 326 int ep_cm_sock; 327 struct sockaddr_in ep_lcl_addr; 328 struct usd_dest *ep_dest; 329 uint32_t ep_rem_peer_id; 330 uint32_t ep_lcl_peer_id; 331 332 TAILQ_HEAD(,usdf_msg_qe) ep_posted_wqe; 333 TAILQ_HEAD(usdf_msg_qe_head ,usdf_msg_qe) ep_sent_wqe; 334 uint32_t ep_fairness_credits; 335 uint32_t ep_seq_credits; 336 uint16_t ep_next_tx_seq; 337 uint16_t ep_last_rx_ack; 338 int ep_send_nak; 339 340 struct usdf_msg_qe *ep_cur_recv; 341 uint16_t ep_next_rx_seq; 342 TAILQ_ENTRY(usdf_ep) ep_ack_link; 343 344 struct usdf_timer_entry *ep_ack_timer; 345 346 TAILQ_ENTRY(usdf_ep) ep_link; 347 } msg; 348 struct { 349 int ep_sock; 350 struct usdf_av *ep_av; 351 352 } rdm; 353 } e; 354 }; 355 #define ep_ftou(FEP) container_of(FEP, struct usdf_ep, ep_fid) 356 #define ep_fidtou(FID) container_of(FID, struct usdf_ep, ep_fid.fid) 357 #define ep_utof(EP) (&(EP)->ep_fid) 358 #define ep_utofid(EP) (&(EP)->ep_fid.fid) 359 360 struct usdf_mr { 361 struct fid_mr mr_fid; 362 struct usd_mr *mr_mr; 363 }; 364 365 struct usdf_cq_hard { 366 struct usdf_cq *cqh_cq; 367 struct usd_cq *cqh_ucq; 368 ofi_atomic32_t cqh_refcnt; 369 void (*cqh_progress)(struct usdf_cq_hard *hcq); 370 void (*cqh_post)(struct usdf_cq_hard *hcq, void *context, size_t len, 371 int prov_errno, uint64_t flags); 372 TAILQ_ENTRY(usdf_cq_hard) cqh_link; 373 TAILQ_ENTRY(usdf_cq_hard) cqh_dom_link; 374 }; 375 376 struct usdf_cq_soft_entry { 377 void *cse_context; 378 uint64_t cse_flags; 379 size_t cse_len; 380 void *cse_buf; 381 uint64_t cse_data; 382 int cse_prov_errno; 383 }; 384 385 struct usdf_cq { 386 struct fid_cq cq_fid; 387 ofi_atomic32_t cq_refcnt; 388 struct usdf_domain *cq_domain; 389 struct fi_cq_attr cq_attr; 390 uint8_t cq_is_soft; 391 uint8_t cq_waiting; 392 393 union { 394 int fd; 395 struct fi_mutex_cond mutex_cond; 396 } object; 397 398 union { 399 struct { 400 struct usd_cq *cq_cq; 401 } hard; 402 struct { 403 struct usdf_cq_soft_entry *cq_comps; 404 struct usdf_cq_soft_entry *cq_end; 405 struct usdf_cq_soft_entry *cq_head; 406 struct usdf_cq_soft_entry *cq_tail; 407 /* Last operation used to distinguish full vs empty. */ 408 uint8_t cq_last_op; 409 TAILQ_HEAD(,usdf_cq_hard) cq_list; 410 } soft; 411 } c; 412 struct usd_completion cq_comp; 413 struct fi_ops_cq cq_ops; 414 }; 415 416 enum { 417 USDF_SOFT_CQ_READ, 418 USDF_SOFT_CQ_WRITE 419 }; 420 421 #define cq_ftou(FCQ) container_of(FCQ, struct usdf_cq, cq_fid) 422 #define cq_fidtou(FID) container_of(FID, struct usdf_cq, cq_fid.fid) 423 #define cq_utof(CQ) (&(CQ)->cq_fid) 424 425 struct usdf_err_data_entry { 426 struct slist_entry entry; 427 uint8_t seen; 428 uint8_t err_data[0]; 429 }; 430 431 struct usdf_event { 432 uint32_t ue_event; 433 void *ue_buf; 434 size_t ue_len; 435 uint64_t ue_flags; 436 }; 437 438 struct usdf_eq { 439 struct fid_eq eq_fid; 440 struct usdf_fabric *eq_fabric; 441 ofi_atomic32_t eq_refcnt; 442 443 pthread_spinlock_t eq_lock; 444 445 struct fi_eq_err_entry *eq_ev_buf; 446 struct usdf_event *eq_ev_ring; 447 struct usdf_event *eq_ev_head; 448 struct usdf_event *eq_ev_tail; 449 struct usdf_event *eq_ev_end; 450 int eq_ev_ring_size; 451 ofi_atomic32_t eq_num_events; 452 453 /* various ways to wait */ 454 struct fi_eq_attr eq_attr; 455 union { 456 int eq_fd; 457 }; 458 459 struct slist eq_err_data; 460 struct fi_ops_eq eq_ops_data; 461 }; 462 #define eq_ftou(FEQ) container_of(FEQ, struct usdf_eq, eq_fid) 463 #define eq_fidtou(FID) container_of(FID, struct usdf_eq, eq_fid.fid) 464 #define eq_utof(EQ) (&(EQ)->eq_fid) 465 466 /* 467 * Prototypes 468 */ 469 470 ssize_t usdf_eq_write_internal(struct usdf_eq *eq, uint32_t event, 471 const void *buf, size_t len, uint64_t flags); 472 473 /* fi_ops_fabric */ 474 int usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info, 475 struct fid_domain **domain, void *context); 476 int usdf_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr, 477 struct fid_eq **eq, void *context); 478 int usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info, 479 struct fid_pep **pep_p, void *context); 480 481 /* fi_ops_domain */ 482 int usdf_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, 483 struct fid_cq **cq_o, void *context); 484 int usdf_endpoint_open(struct fid_domain *domain, struct fi_info *info, 485 struct fid_ep **ep, void *context); 486 int usdf_av_open(struct fid_domain *domain, struct fi_av_attr *attr, 487 struct fid_av **av_o, void *context); 488 int usdf_query_atomic(struct fid_domain *domain, enum fi_datatype datatype, 489 enum fi_op op, struct fi_atomic_attr *attr, uint64_t flags); 490 491 /* Domain name functionality */ 492 int usdf_domain_getname(uint32_t version, struct usd_device_attrs *dap, 493 char **name); 494 bool usdf_domain_checkname(uint32_t version, struct usd_device_attrs *dap, 495 const char *hint); 496 497 /* fi_ops_mr */ 498 int usdf_reg_mr(struct fid *fid, const void *buf, size_t len, 499 uint64_t access, uint64_t offset, uint64_t requested_key, 500 uint64_t flags, struct fid_mr **mr_o, void *context); 501 int usdf_regv_mr(struct fid *fid, const struct iovec *iov, 502 size_t count, uint64_t access, 503 uint64_t offset, uint64_t requested_key, 504 uint64_t flags, struct fid_mr **mr, void *context); 505 int usdf_regattr(struct fid *fid, const struct fi_mr_attr *attr, 506 uint64_t flags, struct fid_mr **mr); 507 508 /* Fake IBV provider */ 509 void usdf_setup_fake_ibv_provider(void); 510 511 /* passive endpoint functions */ 512 int usdf_pep_steal_socket(struct usdf_pep *pep, int *is_bound, int *sock_o); 513 514 /* Utility functions */ 515 int usdf_catch_dom_attr(uint32_t version, const struct fi_info *hints, 516 struct fi_domain_attr *dom_attr); 517 int usdf_catch_tx_attr(uint32_t version, const struct fi_tx_attr *tx_attr); 518 int usdf_catch_rx_attr(uint32_t version, const struct fi_rx_attr *rx_attr); 519 struct sockaddr_in *usdf_format_to_sin(const struct fi_info *info, const void *addr); 520 void *usdf_sin_to_format(const struct fi_info *info, void *addr, size_t *len); 521 void usdf_free_sin_if_needed(const struct fi_info *info, struct sockaddr_in *sin); 522 523 #endif /* _USDF_H_ */ 524