1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019, Joyent, Inc. 14 */ 15 16 /* 17 * xHCI DMA Management Routines 18 * 19 * Please see the big theory statement in xhci.c for more information. 20 */ 21 22 #include <sys/usb/hcd/xhci/xhci.h> 23 24 int 25 xhci_check_dma_handle(xhci_t *xhcip, xhci_dma_buffer_t *xdb) 26 { 27 ddi_fm_error_t de; 28 29 if (!DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) 30 return (0); 31 32 ddi_fm_dma_err_get(xdb->xdb_dma_handle, &de, DDI_FME_VERSION); 33 return (de.fme_status); 34 } 35 36 void 37 xhci_dma_acc_attr(xhci_t *xhcip, ddi_device_acc_attr_t *accp) 38 { 39 accp->devacc_attr_version = DDI_DEVICE_ATTR_V0; 40 accp->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; 41 accp->devacc_attr_dataorder = DDI_STRICTORDER_ACC; 42 43 if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) { 44 accp->devacc_attr_access = DDI_FLAGERR_ACC; 45 } else { 46 accp->devacc_attr_access = DDI_DEFAULT_ACC; 47 } 48 } 49 50 /* 51 * These are DMA attributes that we assign when making a transfer. The SGL is 52 * variable and based on the caller, which varies based on the type of transfer 53 * we're doing. 54 */ 55 void 56 xhci_dma_transfer_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp, uint_t sgl) 57 { 58 VERIFY3U(sgl, >, 0); 59 VERIFY3U(sgl, <=, XHCI_TRANSFER_DMA_SGL); 60 attrp->dma_attr_version = DMA_ATTR_V0; 61 62 /* 63 * The range of data that we can use is based on what hardware supports. 64 */ 65 attrp->dma_attr_addr_lo = 0x0; 66 if (xhcip->xhci_caps.xcap_flags & XCAP_AC64) { 67 attrp->dma_attr_addr_hi = UINT64_MAX; 68 } else { 69 attrp->dma_attr_addr_hi = UINT32_MAX; 70 } 71 72 /* 73 * The count max indicates the total amount that will fit into one 74 * cookie, which is one TRB in our world. In other words 64k. 75 */ 76 attrp->dma_attr_count_max = XHCI_TRB_MAX_TRANSFER; 77 78 /* 79 * The alignment and segment are related. The alignment describes the 80 * alignment of the PA. The segment describes a boundary that the DMA 81 * allocation cannot cross. In other words, for a given chunk of 82 * allocated DMA memory, the allocated buffer cannot cross a 64k aligned 83 * boundary. However, the physical address only needs to be aligned to 84 * 64 bytes. 85 */ 86 attrp->dma_attr_align = XHCI_DMA_ALIGN; 87 attrp->dma_attr_seg = XHCI_TRB_MAX_TRANSFER - 1; 88 89 90 attrp->dma_attr_burstsizes = 0xfff; 91 92 /* 93 * This is the maximum we can send. Technically this is limited by the 94 * descriptors and not by hardware, hence why we use a large value for 95 * the max that'll be less than any memory allocation we ever throw at 96 * it. 97 */ 98 attrp->dma_attr_minxfer = 0x1; 99 attrp->dma_attr_maxxfer = UINT32_MAX; 100 101 /* 102 * This is determined by the caller. 103 */ 104 attrp->dma_attr_sgllen = sgl; 105 106 /* 107 * The granularity describes the addressing granularity. e.g. can things 108 * ask for chunks in units of this number of bytes. For PCI this should 109 * always be one. 110 */ 111 attrp->dma_attr_granular = 1; 112 113 if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) { 114 attrp->dma_attr_flags = DDI_DMA_FLAGERR; 115 } else { 116 attrp->dma_attr_flags = 0; 117 } 118 } 119 120 /* 121 * This routine tries to create DMA attributes for normal allocations for data 122 * structures and the like. By default we use the same values as the transfer 123 * attributes, but have explicit comments about how they're different. 124 */ 125 void 126 xhci_dma_dma_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp) 127 { 128 /* 129 * Note, we always use a single SGL for these DMA allocations as these 130 * are used for small data structures. 131 */ 132 xhci_dma_transfer_attr(xhcip, attrp, XHCI_DEF_DMA_SGL); 133 134 /* 135 * The maximum size of any of these structures is 4k as opposed to the 136 * 64K max described above. Similarly the boundary requirement is 137 * reduced to 4k. 138 */ 139 attrp->dma_attr_count_max = xhcip->xhci_caps.xcap_pagesize; 140 attrp->dma_attr_maxxfer = xhcip->xhci_caps.xcap_pagesize; 141 attrp->dma_attr_seg = xhcip->xhci_caps.xcap_pagesize - 1; 142 } 143 144 /* 145 * Fill in attributes for a scratchpad entry. The scratchpad entries are 146 * somewhat different in so far as they are closest to a normal DMA attribute, 147 * except they have stricter alignments, needing to be page sized. 148 * 149 * In addition, because we never access this memory ourselves, we can just mark 150 * it all as relaxed ordering. 151 */ 152 void 153 xhci_dma_scratchpad_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp) 154 { 155 xhci_dma_dma_attr(xhcip, attrp); 156 attrp->dma_attr_align = xhcip->xhci_caps.xcap_pagesize; 157 attrp->dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; 158 } 159 160 /* 161 * This should be used for the simple case of a single SGL entry, which is the 162 * vast majority of the non-transfer allocations. 163 */ 164 uint64_t 165 xhci_dma_pa(xhci_dma_buffer_t *xdb) 166 { 167 ASSERT(xdb->xdb_ncookies == 1); 168 return (xdb->xdb_cookies[0].dmac_laddress); 169 } 170 171 void 172 xhci_dma_free(xhci_dma_buffer_t *xdb) 173 { 174 if (xdb->xdb_ncookies != 0) { 175 VERIFY(xdb->xdb_dma_handle != NULL); 176 (void) ddi_dma_unbind_handle(xdb->xdb_dma_handle); 177 xdb->xdb_ncookies = 0; 178 bzero(xdb->xdb_cookies, sizeof (ddi_dma_cookie_t) * 179 XHCI_TRANSFER_DMA_SGL); 180 xdb->xdb_len = 0; 181 } 182 183 if (xdb->xdb_acc_handle != NULL) { 184 ddi_dma_mem_free(&xdb->xdb_acc_handle); 185 xdb->xdb_acc_handle = NULL; 186 xdb->xdb_va = NULL; 187 } 188 189 if (xdb->xdb_dma_handle != NULL) { 190 ddi_dma_free_handle(&xdb->xdb_dma_handle); 191 xdb->xdb_dma_handle = NULL; 192 } 193 194 ASSERT(xdb->xdb_va == NULL); 195 ASSERT(xdb->xdb_ncookies == 0); 196 ASSERT(xdb->xdb_cookies[0].dmac_laddress == 0); 197 ASSERT(xdb->xdb_len == 0); 198 } 199 200 boolean_t 201 xhci_dma_alloc(xhci_t *xhcip, xhci_dma_buffer_t *xdb, 202 ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero, 203 size_t size, boolean_t wait) 204 { 205 int ret, i; 206 uint_t flags = DDI_DMA_CONSISTENT; 207 size_t len; 208 ddi_dma_cookie_t cookie; 209 uint_t ncookies; 210 int (*memcb)(caddr_t); 211 212 if (wait == B_TRUE) { 213 memcb = DDI_DMA_SLEEP; 214 } else { 215 memcb = DDI_DMA_DONTWAIT; 216 } 217 218 ret = ddi_dma_alloc_handle(xhcip->xhci_dip, attrp, memcb, NULL, 219 &xdb->xdb_dma_handle); 220 if (ret != 0) { 221 xhci_log(xhcip, "!failed to allocate DMA handle: %d", ret); 222 xdb->xdb_dma_handle = NULL; 223 return (B_FALSE); 224 } 225 226 ret = ddi_dma_mem_alloc(xdb->xdb_dma_handle, size, accp, flags, memcb, 227 NULL, &xdb->xdb_va, &len, &xdb->xdb_acc_handle); 228 if (ret != DDI_SUCCESS) { 229 xhci_log(xhcip, "!failed to allocate DMA memory: %d", ret); 230 xdb->xdb_va = NULL; 231 xdb->xdb_acc_handle = NULL; 232 xhci_dma_free(xdb); 233 return (B_FALSE); 234 } 235 236 if (zero == B_TRUE) 237 bzero(xdb->xdb_va, len); 238 239 ret = ddi_dma_addr_bind_handle(xdb->xdb_dma_handle, NULL, 240 xdb->xdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, &cookie, 241 &ncookies); 242 if (ret != 0) { 243 xhci_log(xhcip, "!failed to bind DMA memory: %d", ret); 244 xhci_dma_free(xdb); 245 return (B_FALSE); 246 } 247 248 /* 249 * Note we explicitly store the logical length of this allocation. The 250 * physical length is available via the cookies. 251 */ 252 xdb->xdb_len = size; 253 xdb->xdb_ncookies = ncookies; 254 xdb->xdb_cookies[0] = cookie; 255 for (i = 1; i < ncookies; i++) { 256 ddi_dma_nextcookie(xdb->xdb_dma_handle, &xdb->xdb_cookies[i]); 257 } 258 259 260 return (B_TRUE); 261 } 262 263 void 264 xhci_transfer_free(xhci_t *xhcip, xhci_transfer_t *xt) 265 { 266 if (xt == NULL) 267 return; 268 269 VERIFY(xhcip != NULL); 270 xhci_dma_free(&xt->xt_buffer); 271 if (xt->xt_isoc != NULL) { 272 ASSERT3U(xt->xt_ntrbs, >, 0); 273 kmem_free(xt->xt_isoc, sizeof (usb_isoc_pkt_descr_t) * 274 xt->xt_ntrbs); 275 xt->xt_isoc = NULL; 276 } 277 if (xt->xt_trbs != NULL) { 278 ASSERT3U(xt->xt_ntrbs, >, 0); 279 kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * xt->xt_ntrbs); 280 xt->xt_trbs = NULL; 281 } 282 if (xt->xt_trbs_pa != NULL) { 283 ASSERT3U(xt->xt_ntrbs, >, 0); 284 kmem_free(xt->xt_trbs_pa, sizeof (uint64_t) * xt->xt_ntrbs); 285 xt->xt_trbs_pa = NULL; 286 } 287 kmem_free(xt, sizeof (xhci_transfer_t)); 288 } 289 290 xhci_transfer_t * 291 xhci_transfer_alloc(xhci_t *xhcip, xhci_endpoint_t *xep, size_t size, 292 uint_t trbs, int usb_flags) 293 { 294 int kmflags; 295 boolean_t dmawait; 296 xhci_transfer_t *xt; 297 ddi_device_acc_attr_t acc; 298 ddi_dma_attr_t attr; 299 300 if (usb_flags & USB_FLAGS_SLEEP) { 301 kmflags = KM_SLEEP; 302 dmawait = B_TRUE; 303 } else { 304 kmflags = KM_NOSLEEP; 305 dmawait = B_FALSE; 306 } 307 308 xt = kmem_zalloc(sizeof (xhci_transfer_t), kmflags); 309 if (xt == NULL) 310 return (NULL); 311 312 if (size != 0) { 313 int sgl = XHCI_DEF_DMA_SGL; 314 315 /* 316 * For BULK transfers, we always increase the number of SGL 317 * entries that we support to make things easier for the kernel. 318 * However, for control transfers, we currently opt to keep 319 * things a bit simpler and use our default of one SGL. There's 320 * no good technical reason for this, rather it just keeps 321 * things a bit easier. 322 * 323 * To simplify things, we don't use additional SGL entries for 324 * ISOC transfers. While this isn't the best, it isn't too far 325 * off from what ehci and co. have done before. If this becomes 326 * a technical issue, it's certainly possible to increase the 327 * SGL entry count. 328 * 329 * When we use the larger SGL count, we change our strategy for 330 * being notified. In such a case we will opt to use an event 331 * data packet. This helps deal with cases where some 332 * controllers don't properly generate events for the last entry 333 * in a TD with IOC when IOSP is set. 334 */ 335 if (xep->xep_type == USB_EP_ATTR_BULK) { 336 sgl = XHCI_TRANSFER_DMA_SGL; 337 trbs++; 338 } 339 340 xhci_dma_acc_attr(xhcip, &acc); 341 xhci_dma_transfer_attr(xhcip, &attr, sgl); 342 if (xhci_dma_alloc(xhcip, &xt->xt_buffer, &attr, &acc, B_FALSE, 343 size, dmawait) == B_FALSE) { 344 kmem_free(xt, sizeof (xhci_transfer_t)); 345 return (NULL); 346 } 347 348 /* 349 * ISOC transfers are a bit special and don't need additional 350 * TRBs for data. 351 */ 352 if (xep->xep_type != USB_EP_ATTR_ISOCH) 353 trbs += xt->xt_buffer.xdb_ncookies; 354 } 355 356 xt->xt_trbs = kmem_zalloc(sizeof (xhci_trb_t) * trbs, kmflags); 357 if (xt->xt_trbs == NULL) { 358 xhci_dma_free(&xt->xt_buffer); 359 kmem_free(xt, sizeof (xhci_transfer_t)); 360 return (NULL); 361 } 362 363 xt->xt_trbs_pa = kmem_zalloc(sizeof (uint64_t) * trbs, kmflags); 364 if (xt->xt_trbs_pa == NULL) { 365 kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs); 366 xhci_dma_free(&xt->xt_buffer); 367 kmem_free(xt, sizeof (xhci_transfer_t)); 368 return (NULL); 369 } 370 371 /* 372 * For ISOCH transfers, we need to also allocate the results data. 373 */ 374 if (xep->xep_type == USB_EP_ATTR_ISOCH) { 375 xt->xt_isoc = kmem_zalloc(sizeof (usb_isoc_pkt_descr_t) * trbs, 376 kmflags); 377 if (xt->xt_isoc == NULL) { 378 kmem_free(xt->xt_trbs_pa, sizeof (uint64_t) * trbs); 379 kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs); 380 xhci_dma_free(&xt->xt_buffer); 381 kmem_free(xt, sizeof (xhci_transfer_t)); 382 return (NULL); 383 } 384 } 385 386 xt->xt_ntrbs = trbs; 387 xt->xt_cr = USB_CR_OK; 388 389 return (xt); 390 } 391 392 /* 393 * Abstract the notion of copying out to handle the case of multiple DMA 394 * cookies. If tobuf is true, we are copying to the kernel provided buffer, 395 * otherwise we're copying into the DMA memory. 396 */ 397 void 398 xhci_transfer_copy(xhci_transfer_t *xt, void *buf, size_t len, 399 boolean_t tobuf) 400 { 401 void *dmabuf = xt->xt_buffer.xdb_va; 402 if (tobuf == B_TRUE) 403 bcopy(dmabuf, buf, len); 404 else 405 bcopy(buf, dmabuf, len); 406 } 407 408 int 409 xhci_transfer_sync(xhci_t *xhcip, xhci_transfer_t *xt, uint_t type) 410 { 411 XHCI_DMA_SYNC(xt->xt_buffer, type); 412 return (xhci_check_dma_handle(xhcip, &xt->xt_buffer)); 413 } 414 415 /* 416 * We're required to try and inform the xHCI controller about the number of data 417 * packets that are required. The algorithm to use is described in xHCI 1.1 / 418 * 4.11.2.4. While it might be tempting to just try and calculate the number of 419 * packets based on simple rounding of the remaining number of bytes, that 420 * misses a critical problem -- DMA boundaries may cause us to need additional 421 * packets that are missed initially. Consider a transfer made up of four 422 * different DMA buffers sized in bytes: 4096, 4096, 256, 256, with a 512 byte 423 * packet size. 424 * 425 * Remain 4608 512 256 0 426 * Bytes 4096 4096 256 256 427 * Naive TD 9 1 1 0 428 * Act TD 10 2 1 0 429 * 430 * This means that the only safe way forward here is to work backwards and see 431 * how many we need to work up to this point. 432 */ 433 static uint_t 434 xhci_transfer_get_tdsize(xhci_transfer_t *xt, uint_t off, uint_t mps) 435 { 436 int i; 437 uint_t npkt = 0; 438 439 /* 440 * There are always zero packets for the last TRB. 441 */ 442 ASSERT(xt->xt_buffer.xdb_ncookies > 0); 443 for (i = xt->xt_buffer.xdb_ncookies - 1; i > off; i--) { 444 size_t len = roundup(xt->xt_buffer.xdb_cookies[i].dmac_size, 445 mps); 446 npkt += len / mps; 447 } 448 449 /* 450 * Make sure to clamp this value otherwise we risk truncation. 451 */ 452 if (npkt >= XHCI_MAX_TDSIZE) 453 return (XHCI_MAX_TDSIZE); 454 455 return (npkt); 456 } 457 458 void 459 xhci_transfer_trb_fill_data(xhci_endpoint_t *xep, xhci_transfer_t *xt, int off, 460 boolean_t in) 461 { 462 uint_t mps, tdsize, flags; 463 int i; 464 465 VERIFY(xt->xt_buffer.xdb_ncookies > 0); 466 VERIFY(xep->xep_pipe != NULL); 467 VERIFY(off + xt->xt_buffer.xdb_ncookies <= xt->xt_ntrbs); 468 mps = xep->xep_pipe->p_ep.wMaxPacketSize; 469 470 if (in == B_TRUE) { 471 xt->xt_data_tohost = B_TRUE; 472 } 473 474 /* 475 * We assume that if we have a non-bulk endpoint, then we should only 476 * have a single cookie. This falls out from the default SGL length that 477 * we use for these other device types. 478 */ 479 if (xep->xep_type != USB_EP_ATTR_BULK) { 480 VERIFY3U(xt->xt_buffer.xdb_ncookies, ==, 1); 481 } 482 483 for (i = 0; i < xt->xt_buffer.xdb_ncookies; i++) { 484 uint64_t pa, dmasz; 485 486 pa = xt->xt_buffer.xdb_cookies[i].dmac_laddress; 487 dmasz = xt->xt_buffer.xdb_cookies[i].dmac_size; 488 489 tdsize = xhci_transfer_get_tdsize(xt, i, mps); 490 491 flags = XHCI_TRB_TYPE_NORMAL; 492 if (i == 0 && xep->xep_type == USB_EP_ATTR_CONTROL) { 493 flags = XHCI_TRB_TYPE_DATA; 494 if (in == B_TRUE) 495 flags |= XHCI_TRB_DIR_IN; 496 } 497 498 /* 499 * If we have more than one cookie, then we need to set chaining 500 * on every TRB and the last TRB will turn into an event data 501 * TRB. If we only have a single TRB, then we just set interrupt 502 * on completion (IOC). There's no need to specifically set 503 * interrupt on short packet (IOSP) in that case, as we'll 504 * always get the event notification. We still need the chain 505 * bit set on the last packet, so we can chain into the event 506 * data. Even if all the data on a bulk endpoint (the only 507 * endpoint type that uses chaining today) has only one cookie, 508 * then we'll still schedule an event data block. 509 */ 510 if (xep->xep_type == USB_EP_ATTR_BULK || 511 xt->xt_buffer.xdb_ncookies > 1) { 512 flags |= XHCI_TRB_CHAIN; 513 } 514 515 /* 516 * What we set for the last TRB depends on the type of the 517 * endpoint. If it's a bulk endpoint, then we have to set 518 * evaluate next trb (ENT) so we successfully process the event 519 * data TRB we'll set up. Otherwise, we need to make sure that 520 * we set interrupt on completion, so we get the event. However, 521 * we don't set the event on control endpoints, as the status 522 * stage TD will be the one where we get the event. But, we do 523 * still need an interrupt on short packet, because technically 524 * the status stage is in its own TD. 525 */ 526 if (i + 1 == xt->xt_buffer.xdb_ncookies) { 527 switch (xep->xep_type) { 528 case USB_EP_ATTR_BULK: 529 flags |= XHCI_TRB_ENT; 530 break; 531 case USB_EP_ATTR_CONTROL: 532 flags |= XHCI_TRB_ISP; 533 break; 534 default: 535 flags |= XHCI_TRB_IOC; 536 break; 537 } 538 } 539 540 xt->xt_trbs[off + i].trb_addr = LE_64(pa); 541 xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_LEN(dmasz) | 542 XHCI_TRB_TDREM(tdsize) | XHCI_TRB_INTR(0)); 543 xt->xt_trbs[off + i].trb_flags = LE_32(flags); 544 } 545 546 /* 547 * The last TRB in any bulk transfer is the Event Data TRB. 548 */ 549 if (xep->xep_type == USB_EP_ATTR_BULK) { 550 VERIFY(off + xt->xt_buffer.xdb_ncookies + 1 <= xt->xt_ntrbs); 551 xt->xt_trbs[off + i].trb_addr = LE_64((uintptr_t)xt); 552 xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_INTR(0)); 553 xt->xt_trbs[off + i].trb_flags = LE_32(XHCI_TRB_TYPE_EVENT | 554 XHCI_TRB_IOC); 555 } 556 } 557 558 /* 559 * These are utility functions for isochronus transfers to help calculate the 560 * transfer burst count (TBC) and transfer last burst packet count (TLPBC) 561 * entries for an isochronus entry. See xHCI 1.1 / 4.11.2.3 for how to calculate 562 * them. 563 */ 564 void 565 xhci_transfer_calculate_isoc(xhci_device_t *xd, xhci_endpoint_t *xep, 566 uint_t trb_len, uint_t *tbc, uint_t *tlbpc) 567 { 568 uint_t mps, tdpc, burst; 569 570 /* 571 * Even if we're asked to send no data, that actually requires the 572 * equivalent of sending one byte of data. 573 */ 574 if (trb_len == 0) 575 trb_len = 1; 576 577 mps = XHCI_EPCTX_GET_MPS(xd->xd_endout[xep->xep_num]->xec_info2); 578 burst = XHCI_EPCTX_GET_MAXB(xd->xd_endout[xep->xep_num]->xec_info2); 579 580 /* 581 * This is supposed to correspond to the Transfer Descriptor Packet 582 * Count from xHCI 1.1 / 4.14.1. 583 */ 584 tdpc = howmany(trb_len, mps); 585 *tbc = howmany(tdpc, burst + 1) - 1; 586 587 if ((tdpc % (burst + 1)) == 0) 588 *tlbpc = burst; 589 else 590 *tlbpc = (tdpc % (burst + 1)) - 1; 591 } 592