1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright (c) 2018, Joyent, Inc. 29 */ 30 31 /* 32 * Kernel asynchronous I/O. 33 * This is only for raw devices now (as of Nov. 1993). 34 */ 35 36 #include <sys/types.h> 37 #include <sys/errno.h> 38 #include <sys/conf.h> 39 #include <sys/file.h> 40 #include <sys/fs/snode.h> 41 #include <sys/unistd.h> 42 #include <sys/cmn_err.h> 43 #include <vm/as.h> 44 #include <vm/faultcode.h> 45 #include <sys/sysmacros.h> 46 #include <sys/procfs.h> 47 #include <sys/kmem.h> 48 #include <sys/autoconf.h> 49 #include <sys/ddi_impldefs.h> 50 #include <sys/sunddi.h> 51 #include <sys/aio_impl.h> 52 #include <sys/debug.h> 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/vmsystm.h> 56 #include <sys/fs/pxfs_ki.h> 57 #include <sys/contract/process_impl.h> 58 59 /* 60 * external entry point. 61 */ 62 #ifdef _LP64 63 static int64_t kaioc(long, long, long, long, long, long); 64 #endif 65 static int kaio(ulong_t *, rval_t *); 66 67 68 #define AIO_64 0 69 #define AIO_32 1 70 #define AIO_LARGEFILE 2 71 72 /* 73 * implementation specific functions (private) 74 */ 75 #ifdef _LP64 76 static int alio(int, aiocb_t **, int, struct sigevent *); 77 #endif 78 static int aionotify(void); 79 static int aioinit(void); 80 static int aiostart(void); 81 static void alio_cleanup(aio_t *, aiocb_t **, int, int); 82 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 83 cred_t *); 84 static void lio_set_error(aio_req_t *, int portused); 85 static aio_t *aio_aiop_alloc(); 86 static int aio_req_alloc(aio_req_t **, aio_result_t *); 87 static int aio_lio_alloc(aio_lio_t **); 88 static aio_req_t *aio_req_done(void *); 89 static aio_req_t *aio_req_remove(aio_req_t *); 90 static int aio_req_find(aio_result_t *, aio_req_t **); 91 static int aio_hash_insert(struct aio_req_t *, aio_t *); 92 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 93 aio_result_t *, vnode_t *, int); 94 static int aio_cleanup_thread(aio_t *); 95 static aio_lio_t *aio_list_get(aio_result_t *); 96 static void lio_set_uerror(void *, int); 97 extern void aio_zerolen(aio_req_t *); 98 static int aiowait(struct timeval *, int, long *); 99 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 100 static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 101 aio_req_t *reqlist, aio_t *aiop, model_t model); 102 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 103 static int aiosuspend(void *, int, struct timespec *, int, 104 long *, int); 105 static int aliowait(int, void *, int, void *, int); 106 static int aioerror(void *, int); 107 static int aio_cancel(int, void *, long *, int); 108 static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 109 static int aiorw(int, void *, int, int); 110 111 static int alioLF(int, void *, int, void *); 112 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 113 aio_result_t *, vnode_t *, int); 114 static int alio32(int, void *, int, void *); 115 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 116 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 117 118 #ifdef _SYSCALL32_IMPL 119 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 120 void aiocb_32ton(aiocb32_t *, aiocb_t *); 121 #endif /* _SYSCALL32_IMPL */ 122 123 /* 124 * implementation specific functions (external) 125 */ 126 void aio_req_free(aio_t *, aio_req_t *); 127 128 /* 129 * Event Port framework 130 */ 131 132 void aio_req_free_port(aio_t *, aio_req_t *); 133 static int aio_port_callback(void *, int *, pid_t, int, void *); 134 135 /* 136 * This is the loadable module wrapper. 137 */ 138 #include <sys/modctl.h> 139 #include <sys/syscall.h> 140 141 #ifdef _LP64 142 143 static struct sysent kaio_sysent = { 144 6, 145 SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 146 (int (*)())(uintptr_t)kaioc 147 }; 148 149 #ifdef _SYSCALL32_IMPL 150 static struct sysent kaio_sysent32 = { 151 7, 152 SE_NOUNLOAD | SE_64RVAL, 153 kaio 154 }; 155 #endif /* _SYSCALL32_IMPL */ 156 157 #else /* _LP64 */ 158 159 static struct sysent kaio_sysent = { 160 7, 161 SE_NOUNLOAD | SE_32RVAL1, 162 kaio 163 }; 164 165 #endif /* _LP64 */ 166 167 /* 168 * Module linkage information for the kernel. 169 */ 170 171 static struct modlsys modlsys = { 172 &mod_syscallops, 173 "kernel Async I/O", 174 &kaio_sysent 175 }; 176 177 #ifdef _SYSCALL32_IMPL 178 static struct modlsys modlsys32 = { 179 &mod_syscallops32, 180 "kernel Async I/O for 32 bit compatibility", 181 &kaio_sysent32 182 }; 183 #endif /* _SYSCALL32_IMPL */ 184 185 186 static struct modlinkage modlinkage = { 187 MODREV_1, 188 &modlsys, 189 #ifdef _SYSCALL32_IMPL 190 &modlsys32, 191 #endif 192 NULL 193 }; 194 195 int 196 _init(void) 197 { 198 int retval; 199 200 if ((retval = mod_install(&modlinkage)) != 0) 201 return (retval); 202 203 return (0); 204 } 205 206 int 207 _fini(void) 208 { 209 int retval; 210 211 retval = mod_remove(&modlinkage); 212 213 return (retval); 214 } 215 216 int 217 _info(struct modinfo *modinfop) 218 { 219 return (mod_info(&modlinkage, modinfop)); 220 } 221 222 #ifdef _LP64 223 static int64_t 224 kaioc( 225 long a0, 226 long a1, 227 long a2, 228 long a3, 229 long a4, 230 long a5) 231 { 232 int error; 233 long rval = 0; 234 235 switch ((int)a0 & ~AIO_POLL_BIT) { 236 case AIOREAD: 237 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 238 (offset_t)a4, (aio_result_t *)a5, FREAD); 239 break; 240 case AIOWRITE: 241 error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 242 (offset_t)a4, (aio_result_t *)a5, FWRITE); 243 break; 244 case AIOWAIT: 245 error = aiowait((struct timeval *)a1, (int)a2, &rval); 246 break; 247 case AIOWAITN: 248 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 249 (timespec_t *)a4); 250 break; 251 case AIONOTIFY: 252 error = aionotify(); 253 break; 254 case AIOINIT: 255 error = aioinit(); 256 break; 257 case AIOSTART: 258 error = aiostart(); 259 break; 260 case AIOLIO: 261 error = alio((int)a1, (aiocb_t **)a2, (int)a3, 262 (struct sigevent *)a4); 263 break; 264 case AIOLIOWAIT: 265 error = aliowait((int)a1, (void *)a2, (int)a3, 266 (struct sigevent *)a4, AIO_64); 267 break; 268 case AIOSUSPEND: 269 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 270 (int)a4, &rval, AIO_64); 271 break; 272 case AIOERROR: 273 error = aioerror((void *)a1, AIO_64); 274 break; 275 case AIOAREAD: 276 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 277 break; 278 case AIOAWRITE: 279 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 280 break; 281 case AIOCANCEL: 282 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 283 break; 284 285 /* 286 * The large file related stuff is valid only for 287 * 32 bit kernel and not for 64 bit kernel 288 * On 64 bit kernel we convert large file calls 289 * to regular 64bit calls. 290 */ 291 292 default: 293 error = EINVAL; 294 } 295 if (error) 296 return ((int64_t)set_errno(error)); 297 return (rval); 298 } 299 #endif 300 301 static int 302 kaio( 303 ulong_t *uap, 304 rval_t *rvp) 305 { 306 long rval = 0; 307 int error = 0; 308 offset_t off; 309 310 311 rvp->r_vals = 0; 312 #if defined(_LITTLE_ENDIAN) 313 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 314 #else 315 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 316 #endif 317 318 switch (uap[0] & ~AIO_POLL_BIT) { 319 /* 320 * It must be the 32 bit system call on 64 bit kernel 321 */ 322 case AIOREAD: 323 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 324 (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 325 case AIOWRITE: 326 return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 327 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 328 case AIOWAIT: 329 error = aiowait((struct timeval *)uap[1], (int)uap[2], 330 &rval); 331 break; 332 case AIOWAITN: 333 error = aiowaitn((void *)uap[1], (uint_t)uap[2], 334 (uint_t *)uap[3], (timespec_t *)uap[4]); 335 break; 336 case AIONOTIFY: 337 return (aionotify()); 338 case AIOINIT: 339 return (aioinit()); 340 case AIOSTART: 341 return (aiostart()); 342 case AIOLIO: 343 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 344 (void *)uap[4])); 345 case AIOLIOWAIT: 346 return (aliowait((int)uap[1], (void *)uap[2], 347 (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 348 case AIOSUSPEND: 349 error = aiosuspend((void *)uap[1], (int)uap[2], 350 (timespec_t *)uap[3], (int)uap[4], 351 &rval, AIO_32); 352 break; 353 case AIOERROR: 354 return (aioerror((void *)uap[1], AIO_32)); 355 case AIOAREAD: 356 return (aiorw((int)uap[0], (void *)uap[1], 357 FREAD, AIO_32)); 358 case AIOAWRITE: 359 return (aiorw((int)uap[0], (void *)uap[1], 360 FWRITE, AIO_32)); 361 case AIOCANCEL: 362 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 363 AIO_32)); 364 break; 365 case AIOLIO64: 366 return (alioLF((int)uap[1], (void *)uap[2], 367 (int)uap[3], (void *)uap[4])); 368 case AIOLIOWAIT64: 369 return (aliowait(uap[1], (void *)uap[2], 370 (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 371 case AIOSUSPEND64: 372 error = aiosuspend((void *)uap[1], (int)uap[2], 373 (timespec_t *)uap[3], (int)uap[4], &rval, 374 AIO_LARGEFILE); 375 break; 376 case AIOERROR64: 377 return (aioerror((void *)uap[1], AIO_LARGEFILE)); 378 case AIOAREAD64: 379 return (aiorw((int)uap[0], (void *)uap[1], FREAD, 380 AIO_LARGEFILE)); 381 case AIOAWRITE64: 382 return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 383 AIO_LARGEFILE)); 384 case AIOCANCEL64: 385 error = (aio_cancel((int)uap[1], (void *)uap[2], 386 &rval, AIO_LARGEFILE)); 387 break; 388 default: 389 return (EINVAL); 390 } 391 392 rvp->r_val1 = rval; 393 return (error); 394 } 395 396 /* 397 * wake up LWPs in this process that are sleeping in 398 * aiowait(). 399 */ 400 static int 401 aionotify(void) 402 { 403 aio_t *aiop; 404 405 aiop = curproc->p_aio; 406 if (aiop == NULL) 407 return (0); 408 409 mutex_enter(&aiop->aio_mutex); 410 aiop->aio_notifycnt++; 411 cv_broadcast(&aiop->aio_waitcv); 412 mutex_exit(&aiop->aio_mutex); 413 414 return (0); 415 } 416 417 static int 418 timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 419 timestruc_t **rqtp, int *blocking) 420 { 421 #ifdef _SYSCALL32_IMPL 422 struct timeval32 wait_time_32; 423 #endif 424 struct timeval wait_time; 425 model_t model = get_udatamodel(); 426 427 *rqtp = NULL; 428 if (timout == NULL) { /* wait indefinitely */ 429 *blocking = 1; 430 return (0); 431 } 432 433 /* 434 * Need to correctly compare with the -1 passed in for a user 435 * address pointer, with both 32 bit and 64 bit apps. 436 */ 437 if (model == DATAMODEL_NATIVE) { 438 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 439 *blocking = 0; 440 return (0); 441 } 442 443 if (copyin(timout, &wait_time, sizeof (wait_time))) 444 return (EFAULT); 445 } 446 #ifdef _SYSCALL32_IMPL 447 else { 448 /* 449 * -1 from a 32bit app. It will not get sign extended. 450 * don't wait if -1. 451 */ 452 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 453 *blocking = 0; 454 return (0); 455 } 456 457 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 458 return (EFAULT); 459 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 460 } 461 #endif /* _SYSCALL32_IMPL */ 462 463 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 464 *blocking = 0; 465 return (0); 466 } 467 468 if (wait_time.tv_sec < 0 || 469 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 470 return (EINVAL); 471 472 rqtime->tv_sec = wait_time.tv_sec; 473 rqtime->tv_nsec = wait_time.tv_usec * 1000; 474 *rqtp = rqtime; 475 *blocking = 1; 476 477 return (0); 478 } 479 480 static int 481 timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 482 timestruc_t **rqtp, int *blocking) 483 { 484 #ifdef _SYSCALL32_IMPL 485 timespec32_t wait_time_32; 486 #endif 487 model_t model = get_udatamodel(); 488 489 *rqtp = NULL; 490 if (timout == NULL) { 491 *blocking = 1; 492 return (0); 493 } 494 495 if (model == DATAMODEL_NATIVE) { 496 if (copyin(timout, rqtime, sizeof (*rqtime))) 497 return (EFAULT); 498 } 499 #ifdef _SYSCALL32_IMPL 500 else { 501 if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 502 return (EFAULT); 503 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 504 } 505 #endif /* _SYSCALL32_IMPL */ 506 507 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 508 *blocking = 0; 509 return (0); 510 } 511 512 if (rqtime->tv_sec < 0 || 513 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 514 return (EINVAL); 515 516 *rqtp = rqtime; 517 *blocking = 1; 518 519 return (0); 520 } 521 522 /*ARGSUSED*/ 523 static int 524 aiowait(struct timeval *timout, int dontblockflg, long *rval) 525 { 526 int error; 527 aio_t *aiop; 528 aio_req_t *reqp; 529 clock_t status; 530 int blocking; 531 int timecheck; 532 timestruc_t rqtime; 533 timestruc_t *rqtp; 534 535 aiop = curproc->p_aio; 536 if (aiop == NULL) 537 return (EINVAL); 538 539 /* 540 * Establish the absolute future time for the timeout. 541 */ 542 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 543 if (error) 544 return (error); 545 if (rqtp) { 546 timestruc_t now; 547 timecheck = timechanged; 548 gethrestime(&now); 549 timespecadd(rqtp, &now); 550 } 551 552 mutex_enter(&aiop->aio_mutex); 553 for (;;) { 554 /* process requests on poll queue */ 555 if (aiop->aio_pollq) { 556 mutex_exit(&aiop->aio_mutex); 557 aio_cleanup(0); 558 mutex_enter(&aiop->aio_mutex); 559 } 560 if ((reqp = aio_req_remove(NULL)) != NULL) { 561 *rval = (long)reqp->aio_req_resultp; 562 break; 563 } 564 /* user-level done queue might not be empty */ 565 if (aiop->aio_notifycnt > 0) { 566 aiop->aio_notifycnt--; 567 *rval = 1; 568 break; 569 } 570 /* don't block if no outstanding aio */ 571 if (aiop->aio_outstanding == 0 && dontblockflg) { 572 error = EINVAL; 573 break; 574 } 575 if (blocking) { 576 status = cv_waituntil_sig(&aiop->aio_waitcv, 577 &aiop->aio_mutex, rqtp, timecheck); 578 579 if (status > 0) /* check done queue again */ 580 continue; 581 if (status == 0) { /* interrupted by a signal */ 582 error = EINTR; 583 *rval = -1; 584 } else { /* timer expired */ 585 error = ETIME; 586 } 587 } 588 break; 589 } 590 mutex_exit(&aiop->aio_mutex); 591 if (reqp) { 592 aphysio_unlock(reqp); 593 aio_copyout_result(reqp); 594 mutex_enter(&aiop->aio_mutex); 595 aio_req_free(aiop, reqp); 596 mutex_exit(&aiop->aio_mutex); 597 } 598 return (error); 599 } 600 601 /* 602 * aiowaitn can be used to reap completed asynchronous requests submitted with 603 * lio_listio, aio_read or aio_write. 604 * This function only reaps asynchronous raw I/Os. 605 */ 606 607 /*ARGSUSED*/ 608 static int 609 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 610 { 611 int error = 0; 612 aio_t *aiop; 613 aio_req_t *reqlist = NULL; 614 caddr_t iocblist = NULL; /* array of iocb ptr's */ 615 uint_t waitcnt, cnt = 0; /* iocb cnt */ 616 size_t iocbsz; /* users iocb size */ 617 size_t riocbsz; /* returned iocb size */ 618 int iocb_index = 0; 619 model_t model = get_udatamodel(); 620 int blocking = 1; 621 int timecheck; 622 timestruc_t rqtime; 623 timestruc_t *rqtp; 624 625 aiop = curproc->p_aio; 626 if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX) 627 return (EINVAL); 628 629 if (aiop->aio_outstanding == 0) 630 return (EAGAIN); 631 632 if (copyin(nwait, &waitcnt, sizeof (uint_t))) 633 return (EFAULT); 634 635 /* set *nwait to zero, if we must return prematurely */ 636 if (copyout(&cnt, nwait, sizeof (uint_t))) 637 return (EFAULT); 638 639 if (waitcnt == 0) { 640 blocking = 0; 641 rqtp = NULL; 642 waitcnt = nent; 643 } else { 644 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 645 if (error) 646 return (error); 647 } 648 649 if (model == DATAMODEL_NATIVE) 650 iocbsz = (sizeof (aiocb_t *) * nent); 651 #ifdef _SYSCALL32_IMPL 652 else 653 iocbsz = (sizeof (caddr32_t) * nent); 654 #endif /* _SYSCALL32_IMPL */ 655 656 /* 657 * Only one aio_waitn call is allowed at a time. 658 * The active aio_waitn will collect all requests 659 * out of the "done" list and if necessary it will wait 660 * for some/all pending requests to fulfill the nwait 661 * parameter. 662 * A second or further aio_waitn calls will sleep here 663 * until the active aio_waitn finishes and leaves the kernel 664 * If the second call does not block (poll), then return 665 * immediately with the error code : EAGAIN. 666 * If the second call should block, then sleep here, but 667 * do not touch the timeout. The timeout starts when this 668 * aio_waitn-call becomes active. 669 */ 670 671 mutex_enter(&aiop->aio_mutex); 672 673 while (aiop->aio_flags & AIO_WAITN) { 674 if (blocking == 0) { 675 mutex_exit(&aiop->aio_mutex); 676 return (EAGAIN); 677 } 678 679 /* block, no timeout */ 680 aiop->aio_flags |= AIO_WAITN_PENDING; 681 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 682 mutex_exit(&aiop->aio_mutex); 683 return (EINTR); 684 } 685 } 686 687 /* 688 * Establish the absolute future time for the timeout. 689 */ 690 if (rqtp) { 691 timestruc_t now; 692 timecheck = timechanged; 693 gethrestime(&now); 694 timespecadd(rqtp, &now); 695 } 696 697 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 698 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 699 aiop->aio_iocb = NULL; 700 } 701 702 if (aiop->aio_iocb == NULL) { 703 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 704 if (iocblist == NULL) { 705 mutex_exit(&aiop->aio_mutex); 706 return (ENOMEM); 707 } 708 aiop->aio_iocb = (aiocb_t **)iocblist; 709 aiop->aio_iocbsz = iocbsz; 710 } else { 711 iocblist = (char *)aiop->aio_iocb; 712 } 713 714 aiop->aio_waitncnt = waitcnt; 715 aiop->aio_flags |= AIO_WAITN; 716 717 for (;;) { 718 /* push requests on poll queue to done queue */ 719 if (aiop->aio_pollq) { 720 mutex_exit(&aiop->aio_mutex); 721 aio_cleanup(0); 722 mutex_enter(&aiop->aio_mutex); 723 } 724 725 /* check for requests on done queue */ 726 if (aiop->aio_doneq) { 727 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 728 aiop->aio_waitncnt = waitcnt - cnt; 729 } 730 731 /* user-level done queue might not be empty */ 732 if (aiop->aio_notifycnt > 0) { 733 aiop->aio_notifycnt--; 734 error = 0; 735 break; 736 } 737 738 /* 739 * if we are here second time as a result of timer 740 * expiration, we reset error if there are enough 741 * aiocb's to satisfy request. 742 * We return also if all requests are already done 743 * and we picked up the whole done queue. 744 */ 745 746 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 747 aiop->aio_doneq == NULL)) { 748 error = 0; 749 break; 750 } 751 752 if ((cnt < waitcnt) && blocking) { 753 int rval = cv_waituntil_sig(&aiop->aio_waitcv, 754 &aiop->aio_mutex, rqtp, timecheck); 755 if (rval > 0) 756 continue; 757 if (rval < 0) { 758 error = ETIME; 759 blocking = 0; 760 continue; 761 } 762 error = EINTR; 763 } 764 break; 765 } 766 767 mutex_exit(&aiop->aio_mutex); 768 769 if (cnt > 0) { 770 771 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 772 aiop, model); 773 774 if (model == DATAMODEL_NATIVE) 775 riocbsz = (sizeof (aiocb_t *) * cnt); 776 #ifdef _SYSCALL32_IMPL 777 else 778 riocbsz = (sizeof (caddr32_t) * cnt); 779 #endif /* _SYSCALL32_IMPL */ 780 781 if (copyout(iocblist, uiocb, riocbsz) || 782 copyout(&cnt, nwait, sizeof (uint_t))) 783 error = EFAULT; 784 } 785 786 /* check if there is another thread waiting for execution */ 787 mutex_enter(&aiop->aio_mutex); 788 aiop->aio_flags &= ~AIO_WAITN; 789 if (aiop->aio_flags & AIO_WAITN_PENDING) { 790 aiop->aio_flags &= ~AIO_WAITN_PENDING; 791 cv_signal(&aiop->aio_waitncv); 792 } 793 mutex_exit(&aiop->aio_mutex); 794 795 return (error); 796 } 797 798 /* 799 * aio_unlock_requests 800 * copyouts the result of the request as well as the return value. 801 * It builds the list of completed asynchronous requests, 802 * unlocks the allocated memory ranges and 803 * put the aio request structure back into the free list. 804 */ 805 806 static int 807 aio_unlock_requests( 808 caddr_t iocblist, 809 int iocb_index, 810 aio_req_t *reqlist, 811 aio_t *aiop, 812 model_t model) 813 { 814 aio_req_t *reqp, *nreqp; 815 816 if (model == DATAMODEL_NATIVE) { 817 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 818 (((caddr_t *)iocblist)[iocb_index++]) = 819 reqp->aio_req_iocb.iocb; 820 nreqp = reqp->aio_req_next; 821 aphysio_unlock(reqp); 822 aio_copyout_result(reqp); 823 mutex_enter(&aiop->aio_mutex); 824 aio_req_free(aiop, reqp); 825 mutex_exit(&aiop->aio_mutex); 826 } 827 } 828 #ifdef _SYSCALL32_IMPL 829 else { 830 for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 831 ((caddr32_t *)iocblist)[iocb_index++] = 832 reqp->aio_req_iocb.iocb32; 833 nreqp = reqp->aio_req_next; 834 aphysio_unlock(reqp); 835 aio_copyout_result(reqp); 836 mutex_enter(&aiop->aio_mutex); 837 aio_req_free(aiop, reqp); 838 mutex_exit(&aiop->aio_mutex); 839 } 840 } 841 #endif /* _SYSCALL32_IMPL */ 842 return (iocb_index); 843 } 844 845 /* 846 * aio_reqlist_concat 847 * moves "max" elements from the done queue to the reqlist queue and removes 848 * the AIO_DONEQ flag. 849 * - reqlist queue is a simple linked list 850 * - done queue is a double linked list 851 */ 852 853 static int 854 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 855 { 856 aio_req_t *q2, *q2work, *list; 857 int count = 0; 858 859 list = *reqlist; 860 q2 = aiop->aio_doneq; 861 q2work = q2; 862 while (max-- > 0) { 863 q2work->aio_req_flags &= ~AIO_DONEQ; 864 q2work = q2work->aio_req_next; 865 count++; 866 if (q2work == q2) 867 break; 868 } 869 870 if (q2work == q2) { 871 /* all elements revised */ 872 q2->aio_req_prev->aio_req_next = list; 873 list = q2; 874 aiop->aio_doneq = NULL; 875 } else { 876 /* 877 * max < elements in the doneq 878 * detach only the required amount of elements 879 * out of the doneq 880 */ 881 q2work->aio_req_prev->aio_req_next = list; 882 list = q2; 883 884 aiop->aio_doneq = q2work; 885 q2work->aio_req_prev = q2->aio_req_prev; 886 q2->aio_req_prev->aio_req_next = q2work; 887 } 888 *reqlist = list; 889 return (count); 890 } 891 892 /*ARGSUSED*/ 893 static int 894 aiosuspend(void *aiocb, int nent, struct timespec *timout, int flag, 895 long *rval, int run_mode) 896 { 897 int error; 898 aio_t *aiop; 899 aio_req_t *reqp, *found, *next; 900 caddr_t cbplist = NULL; 901 aiocb_t *cbp, **ucbp; 902 #ifdef _SYSCALL32_IMPL 903 aiocb32_t *cbp32; 904 caddr32_t *ucbp32; 905 #endif /* _SYSCALL32_IMPL */ 906 aiocb64_32_t *cbp64; 907 int rv; 908 int i; 909 size_t ssize; 910 model_t model = get_udatamodel(); 911 int blocking; 912 int timecheck; 913 timestruc_t rqtime; 914 timestruc_t *rqtp; 915 916 aiop = curproc->p_aio; 917 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 918 return (EINVAL); 919 920 /* 921 * Establish the absolute future time for the timeout. 922 */ 923 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 924 if (error) 925 return (error); 926 if (rqtp) { 927 timestruc_t now; 928 timecheck = timechanged; 929 gethrestime(&now); 930 timespecadd(rqtp, &now); 931 } 932 933 /* 934 * If we are not blocking and there's no IO complete 935 * skip aiocb copyin. 936 */ 937 if (!blocking && (aiop->aio_pollq == NULL) && 938 (aiop->aio_doneq == NULL)) { 939 return (EAGAIN); 940 } 941 942 if (model == DATAMODEL_NATIVE) 943 ssize = (sizeof (aiocb_t *) * nent); 944 #ifdef _SYSCALL32_IMPL 945 else 946 ssize = (sizeof (caddr32_t) * nent); 947 #endif /* _SYSCALL32_IMPL */ 948 949 cbplist = kmem_alloc(ssize, KM_NOSLEEP); 950 if (cbplist == NULL) 951 return (ENOMEM); 952 953 if (copyin(aiocb, cbplist, ssize)) { 954 error = EFAULT; 955 goto done; 956 } 957 958 found = NULL; 959 /* 960 * we need to get the aio_cleanupq_mutex since we call 961 * aio_req_done(). 962 */ 963 mutex_enter(&aiop->aio_cleanupq_mutex); 964 mutex_enter(&aiop->aio_mutex); 965 for (;;) { 966 /* push requests on poll queue to done queue */ 967 if (aiop->aio_pollq) { 968 mutex_exit(&aiop->aio_mutex); 969 mutex_exit(&aiop->aio_cleanupq_mutex); 970 aio_cleanup(0); 971 mutex_enter(&aiop->aio_cleanupq_mutex); 972 mutex_enter(&aiop->aio_mutex); 973 } 974 /* check for requests on done queue */ 975 if (aiop->aio_doneq) { 976 if (model == DATAMODEL_NATIVE) 977 ucbp = (aiocb_t **)cbplist; 978 #ifdef _SYSCALL32_IMPL 979 else 980 ucbp32 = (caddr32_t *)cbplist; 981 #endif /* _SYSCALL32_IMPL */ 982 for (i = 0; i < nent; i++) { 983 if (model == DATAMODEL_NATIVE) { 984 if ((cbp = *ucbp++) == NULL) 985 continue; 986 if (run_mode != AIO_LARGEFILE) 987 reqp = aio_req_done( 988 &cbp->aio_resultp); 989 else { 990 cbp64 = (aiocb64_32_t *)cbp; 991 reqp = aio_req_done( 992 &cbp64->aio_resultp); 993 } 994 } 995 #ifdef _SYSCALL32_IMPL 996 else { 997 if (run_mode == AIO_32) { 998 if ((cbp32 = 999 (aiocb32_t *)(uintptr_t) 1000 *ucbp32++) == NULL) 1001 continue; 1002 reqp = aio_req_done( 1003 &cbp32->aio_resultp); 1004 } else if (run_mode == AIO_LARGEFILE) { 1005 if ((cbp64 = 1006 (aiocb64_32_t *)(uintptr_t) 1007 *ucbp32++) == NULL) 1008 continue; 1009 reqp = aio_req_done( 1010 &cbp64->aio_resultp); 1011 } 1012 1013 } 1014 #endif /* _SYSCALL32_IMPL */ 1015 if (reqp) { 1016 reqp->aio_req_next = found; 1017 found = reqp; 1018 } 1019 if (aiop->aio_doneq == NULL) 1020 break; 1021 } 1022 if (found) 1023 break; 1024 } 1025 if (aiop->aio_notifycnt > 0) { 1026 /* 1027 * nothing on the kernel's queue. the user 1028 * has notified the kernel that it has items 1029 * on a user-level queue. 1030 */ 1031 aiop->aio_notifycnt--; 1032 *rval = 1; 1033 error = 0; 1034 break; 1035 } 1036 /* don't block if nothing is outstanding */ 1037 if (aiop->aio_outstanding == 0) { 1038 error = EAGAIN; 1039 break; 1040 } 1041 if (blocking) { 1042 /* 1043 * drop the aio_cleanupq_mutex as we are 1044 * going to block. 1045 */ 1046 mutex_exit(&aiop->aio_cleanupq_mutex); 1047 rv = cv_waituntil_sig(&aiop->aio_waitcv, 1048 &aiop->aio_mutex, rqtp, timecheck); 1049 /* 1050 * we have to drop aio_mutex and 1051 * grab it in the right order. 1052 */ 1053 mutex_exit(&aiop->aio_mutex); 1054 mutex_enter(&aiop->aio_cleanupq_mutex); 1055 mutex_enter(&aiop->aio_mutex); 1056 if (rv > 0) /* check done queue again */ 1057 continue; 1058 if (rv == 0) /* interrupted by a signal */ 1059 error = EINTR; 1060 else /* timer expired */ 1061 error = ETIME; 1062 } else { 1063 error = EAGAIN; 1064 } 1065 break; 1066 } 1067 mutex_exit(&aiop->aio_mutex); 1068 mutex_exit(&aiop->aio_cleanupq_mutex); 1069 for (reqp = found; reqp != NULL; reqp = next) { 1070 next = reqp->aio_req_next; 1071 aphysio_unlock(reqp); 1072 aio_copyout_result(reqp); 1073 mutex_enter(&aiop->aio_mutex); 1074 aio_req_free(aiop, reqp); 1075 mutex_exit(&aiop->aio_mutex); 1076 } 1077 done: 1078 kmem_free(cbplist, ssize); 1079 return (error); 1080 } 1081 1082 /* 1083 * initialize aio by allocating an aio_t struct for this 1084 * process. 1085 */ 1086 static int 1087 aioinit(void) 1088 { 1089 proc_t *p = curproc; 1090 aio_t *aiop; 1091 mutex_enter(&p->p_lock); 1092 if ((aiop = p->p_aio) == NULL) { 1093 aiop = aio_aiop_alloc(); 1094 p->p_aio = aiop; 1095 } 1096 mutex_exit(&p->p_lock); 1097 if (aiop == NULL) 1098 return (ENOMEM); 1099 return (0); 1100 } 1101 1102 /* 1103 * start a special thread that will cleanup after aio requests 1104 * that are preventing a segment from being unmapped. as_unmap() 1105 * blocks until all phsyio to this segment is completed. this 1106 * doesn't happen until all the pages in this segment are not 1107 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 1108 * requests still outstanding. this special thread will make sure 1109 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 1110 * 1111 * this function will return an error if the process has only 1112 * one LWP. the assumption is that the caller is a separate LWP 1113 * that remains blocked in the kernel for the life of this process. 1114 */ 1115 static int 1116 aiostart(void) 1117 { 1118 proc_t *p = curproc; 1119 aio_t *aiop; 1120 int first, error = 0; 1121 1122 if (p->p_lwpcnt == 1) 1123 return (EDEADLK); 1124 mutex_enter(&p->p_lock); 1125 if ((aiop = p->p_aio) == NULL) 1126 error = EINVAL; 1127 else { 1128 first = aiop->aio_ok; 1129 if (aiop->aio_ok == 0) 1130 aiop->aio_ok = 1; 1131 } 1132 mutex_exit(&p->p_lock); 1133 if (error == 0 && first == 0) { 1134 return (aio_cleanup_thread(aiop)); 1135 /* should return only to exit */ 1136 } 1137 return (error); 1138 } 1139 1140 /* 1141 * Associate an aiocb with a port. 1142 * This function is used by aiorw() to associate a transaction with a port. 1143 * Allocate an event port structure (port_alloc_event()) and store the 1144 * delivered user pointer (portnfy_user) in the portkev_user field of the 1145 * port_kevent_t structure.. 1146 * The aio_req_portkev pointer in the aio_req_t structure was added to identify 1147 * the port association. 1148 */ 1149 1150 static int 1151 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 1152 aio_req_t *reqp, int event) 1153 { 1154 port_kevent_t *pkevp = NULL; 1155 int error; 1156 1157 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 1158 PORT_SOURCE_AIO, &pkevp); 1159 if (error) { 1160 if ((error == ENOMEM) || (error == EAGAIN)) 1161 error = EAGAIN; 1162 else 1163 error = EINVAL; 1164 } else { 1165 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 1166 aio_port_callback, reqp); 1167 pkevp->portkev_events = event; 1168 reqp->aio_req_portkev = pkevp; 1169 reqp->aio_req_port = pntfy->portnfy_port; 1170 } 1171 return (error); 1172 } 1173 1174 #ifdef _LP64 1175 1176 /* 1177 * Asynchronous list IO. A chain of aiocb's are copied in 1178 * one at a time. If the aiocb is invalid, it is skipped. 1179 * For each aiocb, the appropriate driver entry point is 1180 * called. Optimize for the common case where the list 1181 * of requests is to the same file descriptor. 1182 * 1183 * One possible optimization is to define a new driver entry 1184 * point that supports a list of IO requests. Whether this 1185 * improves performance depends somewhat on the driver's 1186 * locking strategy. Processing a list could adversely impact 1187 * the driver's interrupt latency. 1188 */ 1189 static int 1190 alio( 1191 int mode_arg, 1192 aiocb_t **aiocb_arg, 1193 int nent, 1194 struct sigevent *sigev) 1195 { 1196 file_t *fp; 1197 file_t *prev_fp = NULL; 1198 int prev_mode = -1; 1199 struct vnode *vp; 1200 aio_lio_t *head; 1201 aio_req_t *reqp; 1202 aio_t *aiop; 1203 caddr_t cbplist; 1204 aiocb_t cb; 1205 aiocb_t *aiocb = &cb; 1206 aiocb_t *cbp; 1207 aiocb_t **ucbp; 1208 struct sigevent sigevk; 1209 sigqueue_t *sqp; 1210 int (*aio_func)(); 1211 int mode; 1212 int error = 0; 1213 int aio_errors = 0; 1214 int i; 1215 size_t ssize; 1216 int deadhead = 0; 1217 int aio_notsupported = 0; 1218 int lio_head_port; 1219 int aio_port; 1220 int aio_thread; 1221 port_kevent_t *pkevtp = NULL; 1222 int portused = 0; 1223 port_notify_t pnotify; 1224 int event; 1225 1226 aiop = curproc->p_aio; 1227 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1228 return (EINVAL); 1229 1230 ssize = (sizeof (aiocb_t *) * nent); 1231 cbplist = kmem_alloc(ssize, KM_SLEEP); 1232 ucbp = (aiocb_t **)cbplist; 1233 1234 if (copyin(aiocb_arg, cbplist, ssize) || 1235 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 1236 kmem_free(cbplist, ssize); 1237 return (EFAULT); 1238 } 1239 1240 /* Event Ports */ 1241 if (sigev && 1242 (sigevk.sigev_notify == SIGEV_THREAD || 1243 sigevk.sigev_notify == SIGEV_PORT)) { 1244 if (sigevk.sigev_notify == SIGEV_THREAD) { 1245 pnotify.portnfy_port = sigevk.sigev_signo; 1246 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 1247 } else if (copyin(sigevk.sigev_value.sival_ptr, 1248 &pnotify, sizeof (pnotify))) { 1249 kmem_free(cbplist, ssize); 1250 return (EFAULT); 1251 } 1252 error = port_alloc_event(pnotify.portnfy_port, 1253 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 1254 if (error) { 1255 if (error == ENOMEM || error == EAGAIN) 1256 error = EAGAIN; 1257 else 1258 error = EINVAL; 1259 kmem_free(cbplist, ssize); 1260 return (error); 1261 } 1262 lio_head_port = pnotify.portnfy_port; 1263 portused = 1; 1264 } 1265 1266 /* 1267 * a list head should be allocated if notification is 1268 * enabled for this list. 1269 */ 1270 head = NULL; 1271 1272 if (mode_arg == LIO_WAIT || sigev) { 1273 mutex_enter(&aiop->aio_mutex); 1274 error = aio_lio_alloc(&head); 1275 mutex_exit(&aiop->aio_mutex); 1276 if (error) 1277 goto done; 1278 deadhead = 1; 1279 head->lio_nent = nent; 1280 head->lio_refcnt = nent; 1281 head->lio_port = -1; 1282 head->lio_portkev = NULL; 1283 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 1284 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 1285 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 1286 if (sqp == NULL) { 1287 error = EAGAIN; 1288 goto done; 1289 } 1290 sqp->sq_func = NULL; 1291 sqp->sq_next = NULL; 1292 sqp->sq_info.si_code = SI_ASYNCIO; 1293 sqp->sq_info.si_pid = curproc->p_pid; 1294 sqp->sq_info.si_ctid = PRCTID(curproc); 1295 sqp->sq_info.si_zoneid = getzoneid(); 1296 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 1297 sqp->sq_info.si_signo = sigevk.sigev_signo; 1298 sqp->sq_info.si_value = sigevk.sigev_value; 1299 head->lio_sigqp = sqp; 1300 } else { 1301 head->lio_sigqp = NULL; 1302 } 1303 if (pkevtp) { 1304 /* 1305 * Prepare data to send when list of aiocb's 1306 * has completed. 1307 */ 1308 port_init_event(pkevtp, (uintptr_t)sigev, 1309 (void *)(uintptr_t)pnotify.portnfy_user, 1310 NULL, head); 1311 pkevtp->portkev_events = AIOLIO; 1312 head->lio_portkev = pkevtp; 1313 head->lio_port = pnotify.portnfy_port; 1314 } 1315 } 1316 1317 for (i = 0; i < nent; i++, ucbp++) { 1318 1319 cbp = *ucbp; 1320 /* skip entry if it can't be copied. */ 1321 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 1322 if (head) { 1323 mutex_enter(&aiop->aio_mutex); 1324 head->lio_nent--; 1325 head->lio_refcnt--; 1326 mutex_exit(&aiop->aio_mutex); 1327 } 1328 continue; 1329 } 1330 1331 /* skip if opcode for aiocb is LIO_NOP */ 1332 mode = aiocb->aio_lio_opcode; 1333 if (mode == LIO_NOP) { 1334 cbp = NULL; 1335 if (head) { 1336 mutex_enter(&aiop->aio_mutex); 1337 head->lio_nent--; 1338 head->lio_refcnt--; 1339 mutex_exit(&aiop->aio_mutex); 1340 } 1341 continue; 1342 } 1343 1344 /* increment file descriptor's ref count. */ 1345 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 1346 lio_set_uerror(&cbp->aio_resultp, EBADF); 1347 if (head) { 1348 mutex_enter(&aiop->aio_mutex); 1349 head->lio_nent--; 1350 head->lio_refcnt--; 1351 mutex_exit(&aiop->aio_mutex); 1352 } 1353 aio_errors++; 1354 continue; 1355 } 1356 1357 /* 1358 * check the permission of the partition 1359 */ 1360 if ((fp->f_flag & mode) == 0) { 1361 releasef(aiocb->aio_fildes); 1362 lio_set_uerror(&cbp->aio_resultp, EBADF); 1363 if (head) { 1364 mutex_enter(&aiop->aio_mutex); 1365 head->lio_nent--; 1366 head->lio_refcnt--; 1367 mutex_exit(&aiop->aio_mutex); 1368 } 1369 aio_errors++; 1370 continue; 1371 } 1372 1373 /* 1374 * common case where requests are to the same fd 1375 * for the same r/w operation. 1376 * for UFS, need to set EBADFD 1377 */ 1378 vp = fp->f_vnode; 1379 if (fp != prev_fp || mode != prev_mode) { 1380 aio_func = check_vp(vp, mode); 1381 if (aio_func == NULL) { 1382 prev_fp = NULL; 1383 releasef(aiocb->aio_fildes); 1384 lio_set_uerror(&cbp->aio_resultp, EBADFD); 1385 aio_notsupported++; 1386 if (head) { 1387 mutex_enter(&aiop->aio_mutex); 1388 head->lio_nent--; 1389 head->lio_refcnt--; 1390 mutex_exit(&aiop->aio_mutex); 1391 } 1392 continue; 1393 } else { 1394 prev_fp = fp; 1395 prev_mode = mode; 1396 } 1397 } 1398 1399 error = aio_req_setup(&reqp, aiop, aiocb, 1400 &cbp->aio_resultp, vp, 0); 1401 if (error) { 1402 releasef(aiocb->aio_fildes); 1403 lio_set_uerror(&cbp->aio_resultp, error); 1404 if (head) { 1405 mutex_enter(&aiop->aio_mutex); 1406 head->lio_nent--; 1407 head->lio_refcnt--; 1408 mutex_exit(&aiop->aio_mutex); 1409 } 1410 aio_errors++; 1411 continue; 1412 } 1413 1414 reqp->aio_req_lio = head; 1415 deadhead = 0; 1416 1417 /* 1418 * Set the errno field now before sending the request to 1419 * the driver to avoid a race condition 1420 */ 1421 (void) suword32(&cbp->aio_resultp.aio_errno, 1422 EINPROGRESS); 1423 1424 reqp->aio_req_iocb.iocb = (caddr_t)cbp; 1425 1426 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 1427 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 1428 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 1429 if (aio_port | aio_thread) { 1430 port_kevent_t *lpkevp; 1431 /* 1432 * Prepare data to send with each aiocb completed. 1433 */ 1434 if (aio_port) { 1435 void *paddr = 1436 aiocb->aio_sigevent.sigev_value.sival_ptr; 1437 if (copyin(paddr, &pnotify, sizeof (pnotify))) 1438 error = EFAULT; 1439 } else { /* aio_thread */ 1440 pnotify.portnfy_port = 1441 aiocb->aio_sigevent.sigev_signo; 1442 pnotify.portnfy_user = 1443 aiocb->aio_sigevent.sigev_value.sival_ptr; 1444 } 1445 if (error) 1446 /* EMPTY */; 1447 else if (pkevtp != NULL && 1448 pnotify.portnfy_port == lio_head_port) 1449 error = port_dup_event(pkevtp, &lpkevp, 1450 PORT_ALLOC_DEFAULT); 1451 else 1452 error = port_alloc_event(pnotify.portnfy_port, 1453 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 1454 &lpkevp); 1455 if (error == 0) { 1456 port_init_event(lpkevp, (uintptr_t)cbp, 1457 (void *)(uintptr_t)pnotify.portnfy_user, 1458 aio_port_callback, reqp); 1459 lpkevp->portkev_events = event; 1460 reqp->aio_req_portkev = lpkevp; 1461 reqp->aio_req_port = pnotify.portnfy_port; 1462 } 1463 } 1464 1465 /* 1466 * send the request to driver. 1467 */ 1468 if (error == 0) { 1469 if (aiocb->aio_nbytes == 0) { 1470 clear_active_fd(aiocb->aio_fildes); 1471 aio_zerolen(reqp); 1472 continue; 1473 } 1474 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 1475 CRED()); 1476 } 1477 1478 /* 1479 * the fd's ref count is not decremented until the IO has 1480 * completed unless there was an error. 1481 */ 1482 if (error) { 1483 releasef(aiocb->aio_fildes); 1484 lio_set_uerror(&cbp->aio_resultp, error); 1485 if (head) { 1486 mutex_enter(&aiop->aio_mutex); 1487 head->lio_nent--; 1488 head->lio_refcnt--; 1489 mutex_exit(&aiop->aio_mutex); 1490 } 1491 if (error == ENOTSUP) 1492 aio_notsupported++; 1493 else 1494 aio_errors++; 1495 lio_set_error(reqp, portused); 1496 } else { 1497 clear_active_fd(aiocb->aio_fildes); 1498 } 1499 } 1500 1501 if (aio_notsupported) { 1502 error = ENOTSUP; 1503 } else if (aio_errors) { 1504 /* 1505 * return EIO if any request failed 1506 */ 1507 error = EIO; 1508 } 1509 1510 if (mode_arg == LIO_WAIT) { 1511 mutex_enter(&aiop->aio_mutex); 1512 while (head->lio_refcnt > 0) { 1513 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1514 mutex_exit(&aiop->aio_mutex); 1515 error = EINTR; 1516 goto done; 1517 } 1518 } 1519 mutex_exit(&aiop->aio_mutex); 1520 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 1521 } 1522 1523 done: 1524 kmem_free(cbplist, ssize); 1525 if (deadhead) { 1526 if (head->lio_sigqp) 1527 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 1528 if (head->lio_portkev) 1529 port_free_event(head->lio_portkev); 1530 kmem_free(head, sizeof (aio_lio_t)); 1531 } 1532 return (error); 1533 } 1534 1535 #endif /* _LP64 */ 1536 1537 /* 1538 * Asynchronous list IO. 1539 * If list I/O is called with LIO_WAIT it can still return 1540 * before all the I/O's are completed if a signal is caught 1541 * or if the list include UFS I/O requests. If this happens, 1542 * libaio will call aliowait() to wait for the I/O's to 1543 * complete 1544 */ 1545 /*ARGSUSED*/ 1546 static int 1547 aliowait( 1548 int mode, 1549 void *aiocb, 1550 int nent, 1551 void *sigev, 1552 int run_mode) 1553 { 1554 aio_lio_t *head; 1555 aio_t *aiop; 1556 caddr_t cbplist; 1557 aiocb_t *cbp, **ucbp; 1558 #ifdef _SYSCALL32_IMPL 1559 aiocb32_t *cbp32; 1560 caddr32_t *ucbp32; 1561 aiocb64_32_t *cbp64; 1562 #endif 1563 int error = 0; 1564 int i; 1565 size_t ssize = 0; 1566 model_t model = get_udatamodel(); 1567 1568 aiop = curproc->p_aio; 1569 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 1570 return (EINVAL); 1571 1572 if (model == DATAMODEL_NATIVE) 1573 ssize = (sizeof (aiocb_t *) * nent); 1574 #ifdef _SYSCALL32_IMPL 1575 else 1576 ssize = (sizeof (caddr32_t) * nent); 1577 #endif /* _SYSCALL32_IMPL */ 1578 1579 if (ssize == 0) 1580 return (EINVAL); 1581 1582 cbplist = kmem_alloc(ssize, KM_SLEEP); 1583 1584 if (model == DATAMODEL_NATIVE) 1585 ucbp = (aiocb_t **)cbplist; 1586 #ifdef _SYSCALL32_IMPL 1587 else 1588 ucbp32 = (caddr32_t *)cbplist; 1589 #endif /* _SYSCALL32_IMPL */ 1590 1591 if (copyin(aiocb, cbplist, ssize)) { 1592 error = EFAULT; 1593 goto done; 1594 } 1595 1596 /* 1597 * To find the list head, we go through the 1598 * list of aiocb structs, find the request 1599 * its for, then get the list head that reqp 1600 * points to 1601 */ 1602 head = NULL; 1603 1604 for (i = 0; i < nent; i++) { 1605 if (model == DATAMODEL_NATIVE) { 1606 /* 1607 * Since we are only checking for a NULL pointer 1608 * Following should work on both native data sizes 1609 * as well as for largefile aiocb. 1610 */ 1611 if ((cbp = *ucbp++) == NULL) 1612 continue; 1613 if (run_mode != AIO_LARGEFILE) 1614 if (head = aio_list_get(&cbp->aio_resultp)) 1615 break; 1616 else { 1617 /* 1618 * This is a case when largefile call is 1619 * made on 32 bit kernel. 1620 * Treat each pointer as pointer to 1621 * aiocb64_32 1622 */ 1623 if (head = aio_list_get((aio_result_t *) 1624 &(((aiocb64_32_t *)cbp)->aio_resultp))) 1625 break; 1626 } 1627 } 1628 #ifdef _SYSCALL32_IMPL 1629 else { 1630 if (run_mode == AIO_LARGEFILE) { 1631 if ((cbp64 = (aiocb64_32_t *) 1632 (uintptr_t)*ucbp32++) == NULL) 1633 continue; 1634 if (head = aio_list_get((aio_result_t *) 1635 &cbp64->aio_resultp)) 1636 break; 1637 } else if (run_mode == AIO_32) { 1638 if ((cbp32 = (aiocb32_t *) 1639 (uintptr_t)*ucbp32++) == NULL) 1640 continue; 1641 if (head = aio_list_get((aio_result_t *) 1642 &cbp32->aio_resultp)) 1643 break; 1644 } 1645 } 1646 #endif /* _SYSCALL32_IMPL */ 1647 } 1648 1649 if (head == NULL) { 1650 error = EINVAL; 1651 goto done; 1652 } 1653 1654 mutex_enter(&aiop->aio_mutex); 1655 while (head->lio_refcnt > 0) { 1656 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 1657 mutex_exit(&aiop->aio_mutex); 1658 error = EINTR; 1659 goto done; 1660 } 1661 } 1662 mutex_exit(&aiop->aio_mutex); 1663 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 1664 done: 1665 kmem_free(cbplist, ssize); 1666 return (error); 1667 } 1668 1669 aio_lio_t * 1670 aio_list_get(aio_result_t *resultp) 1671 { 1672 aio_lio_t *head = NULL; 1673 aio_t *aiop; 1674 aio_req_t **bucket; 1675 aio_req_t *reqp; 1676 long index; 1677 1678 aiop = curproc->p_aio; 1679 if (aiop == NULL) 1680 return (NULL); 1681 1682 if (resultp) { 1683 index = AIO_HASH(resultp); 1684 bucket = &aiop->aio_hash[index]; 1685 for (reqp = *bucket; reqp != NULL; 1686 reqp = reqp->aio_hash_next) { 1687 if (reqp->aio_req_resultp == resultp) { 1688 head = reqp->aio_req_lio; 1689 return (head); 1690 } 1691 } 1692 } 1693 return (NULL); 1694 } 1695 1696 1697 static void 1698 lio_set_uerror(void *resultp, int error) 1699 { 1700 /* 1701 * the resultp field is a pointer to where the 1702 * error should be written out to the user's 1703 * aiocb. 1704 * 1705 */ 1706 if (get_udatamodel() == DATAMODEL_NATIVE) { 1707 (void) sulword(&((aio_result_t *)resultp)->aio_return, 1708 (ssize_t)-1); 1709 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 1710 } 1711 #ifdef _SYSCALL32_IMPL 1712 else { 1713 (void) suword32(&((aio_result32_t *)resultp)->aio_return, 1714 (uint_t)-1); 1715 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 1716 } 1717 #endif /* _SYSCALL32_IMPL */ 1718 } 1719 1720 /* 1721 * do cleanup completion for all requests in list. memory for 1722 * each request is also freed. 1723 */ 1724 static void 1725 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 1726 { 1727 int i; 1728 aio_req_t *reqp; 1729 aio_result_t *resultp; 1730 aiocb64_32_t *aiocb_64; 1731 1732 for (i = 0; i < nent; i++) { 1733 if (get_udatamodel() == DATAMODEL_NATIVE) { 1734 if (cbp[i] == NULL) 1735 continue; 1736 if (run_mode == AIO_LARGEFILE) { 1737 aiocb_64 = (aiocb64_32_t *)cbp[i]; 1738 resultp = (aio_result_t *) 1739 &aiocb_64->aio_resultp; 1740 } else 1741 resultp = &cbp[i]->aio_resultp; 1742 } 1743 #ifdef _SYSCALL32_IMPL 1744 else { 1745 aiocb32_t *aiocb_32; 1746 caddr32_t *cbp32; 1747 1748 cbp32 = (caddr32_t *)cbp; 1749 if (cbp32[i] == 0) 1750 continue; 1751 if (run_mode == AIO_32) { 1752 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 1753 resultp = (aio_result_t *)&aiocb_32-> 1754 aio_resultp; 1755 } else if (run_mode == AIO_LARGEFILE) { 1756 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 1757 resultp = (aio_result_t *)&aiocb_64-> 1758 aio_resultp; 1759 } 1760 } 1761 #endif /* _SYSCALL32_IMPL */ 1762 /* 1763 * we need to get the aio_cleanupq_mutex since we call 1764 * aio_req_done(). 1765 */ 1766 mutex_enter(&aiop->aio_cleanupq_mutex); 1767 mutex_enter(&aiop->aio_mutex); 1768 reqp = aio_req_done(resultp); 1769 mutex_exit(&aiop->aio_mutex); 1770 mutex_exit(&aiop->aio_cleanupq_mutex); 1771 if (reqp != NULL) { 1772 aphysio_unlock(reqp); 1773 aio_copyout_result(reqp); 1774 mutex_enter(&aiop->aio_mutex); 1775 aio_req_free(aiop, reqp); 1776 mutex_exit(&aiop->aio_mutex); 1777 } 1778 } 1779 } 1780 1781 /* 1782 * Write out the results for an aio request that is done. 1783 */ 1784 static int 1785 aioerror(void *cb, int run_mode) 1786 { 1787 aio_result_t *resultp; 1788 aio_t *aiop; 1789 aio_req_t *reqp; 1790 int retval; 1791 1792 aiop = curproc->p_aio; 1793 if (aiop == NULL || cb == NULL) 1794 return (EINVAL); 1795 1796 if (get_udatamodel() == DATAMODEL_NATIVE) { 1797 if (run_mode == AIO_LARGEFILE) 1798 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1799 aio_resultp; 1800 else 1801 resultp = &((aiocb_t *)cb)->aio_resultp; 1802 } 1803 #ifdef _SYSCALL32_IMPL 1804 else { 1805 if (run_mode == AIO_LARGEFILE) 1806 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 1807 aio_resultp; 1808 else if (run_mode == AIO_32) 1809 resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 1810 aio_resultp; 1811 } 1812 #endif /* _SYSCALL32_IMPL */ 1813 /* 1814 * we need to get the aio_cleanupq_mutex since we call 1815 * aio_req_find(). 1816 */ 1817 mutex_enter(&aiop->aio_cleanupq_mutex); 1818 mutex_enter(&aiop->aio_mutex); 1819 retval = aio_req_find(resultp, &reqp); 1820 mutex_exit(&aiop->aio_mutex); 1821 mutex_exit(&aiop->aio_cleanupq_mutex); 1822 if (retval == 0) { 1823 aphysio_unlock(reqp); 1824 aio_copyout_result(reqp); 1825 mutex_enter(&aiop->aio_mutex); 1826 aio_req_free(aiop, reqp); 1827 mutex_exit(&aiop->aio_mutex); 1828 return (0); 1829 } else if (retval == 1) 1830 return (EINPROGRESS); 1831 else if (retval == 2) 1832 return (EINVAL); 1833 return (0); 1834 } 1835 1836 /* 1837 * aio_cancel - if no requests outstanding, 1838 * return AIO_ALLDONE 1839 * else 1840 * return AIO_NOTCANCELED 1841 */ 1842 static int 1843 aio_cancel(int fildes, void *cb, long *rval, int run_mode) 1844 { 1845 aio_t *aiop; 1846 void *resultp; 1847 int index; 1848 aio_req_t **bucket; 1849 aio_req_t *ent; 1850 1851 1852 /* 1853 * Verify valid file descriptor 1854 */ 1855 if ((getf(fildes)) == NULL) { 1856 return (EBADF); 1857 } 1858 releasef(fildes); 1859 1860 aiop = curproc->p_aio; 1861 if (aiop == NULL) 1862 return (EINVAL); 1863 1864 if (aiop->aio_outstanding == 0) { 1865 *rval = AIO_ALLDONE; 1866 return (0); 1867 } 1868 1869 mutex_enter(&aiop->aio_mutex); 1870 if (cb != NULL) { 1871 if (get_udatamodel() == DATAMODEL_NATIVE) { 1872 if (run_mode == AIO_LARGEFILE) 1873 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1874 ->aio_resultp; 1875 else 1876 resultp = &((aiocb_t *)cb)->aio_resultp; 1877 } 1878 #ifdef _SYSCALL32_IMPL 1879 else { 1880 if (run_mode == AIO_LARGEFILE) 1881 resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 1882 ->aio_resultp; 1883 else if (run_mode == AIO_32) 1884 resultp = (aio_result_t *)&((aiocb32_t *)cb) 1885 ->aio_resultp; 1886 } 1887 #endif /* _SYSCALL32_IMPL */ 1888 index = AIO_HASH(resultp); 1889 bucket = &aiop->aio_hash[index]; 1890 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1891 if (ent->aio_req_resultp == resultp) { 1892 if ((ent->aio_req_flags & AIO_PENDING) == 0) { 1893 mutex_exit(&aiop->aio_mutex); 1894 *rval = AIO_ALLDONE; 1895 return (0); 1896 } 1897 mutex_exit(&aiop->aio_mutex); 1898 *rval = AIO_NOTCANCELED; 1899 return (0); 1900 } 1901 } 1902 mutex_exit(&aiop->aio_mutex); 1903 *rval = AIO_ALLDONE; 1904 return (0); 1905 } 1906 1907 for (index = 0; index < AIO_HASHSZ; index++) { 1908 bucket = &aiop->aio_hash[index]; 1909 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 1910 if (ent->aio_req_fd == fildes) { 1911 if ((ent->aio_req_flags & AIO_PENDING) != 0) { 1912 mutex_exit(&aiop->aio_mutex); 1913 *rval = AIO_NOTCANCELED; 1914 return (0); 1915 } 1916 } 1917 } 1918 } 1919 mutex_exit(&aiop->aio_mutex); 1920 *rval = AIO_ALLDONE; 1921 return (0); 1922 } 1923 1924 /* 1925 * solaris version of asynchronous read and write 1926 */ 1927 static int 1928 arw( 1929 int opcode, 1930 int fdes, 1931 char *bufp, 1932 int bufsize, 1933 offset_t offset, 1934 aio_result_t *resultp, 1935 int mode) 1936 { 1937 file_t *fp; 1938 int error; 1939 struct vnode *vp; 1940 aio_req_t *reqp; 1941 aio_t *aiop; 1942 int (*aio_func)(); 1943 #ifdef _LP64 1944 aiocb_t aiocb; 1945 #else 1946 aiocb64_32_t aiocb64; 1947 #endif 1948 1949 aiop = curproc->p_aio; 1950 if (aiop == NULL) 1951 return (EINVAL); 1952 1953 if ((fp = getf(fdes)) == NULL) { 1954 return (EBADF); 1955 } 1956 1957 /* 1958 * check the permission of the partition 1959 */ 1960 if ((fp->f_flag & mode) == 0) { 1961 releasef(fdes); 1962 return (EBADF); 1963 } 1964 1965 vp = fp->f_vnode; 1966 aio_func = check_vp(vp, mode); 1967 if (aio_func == NULL) { 1968 releasef(fdes); 1969 return (EBADFD); 1970 } 1971 #ifdef _LP64 1972 aiocb.aio_fildes = fdes; 1973 aiocb.aio_buf = bufp; 1974 aiocb.aio_nbytes = bufsize; 1975 aiocb.aio_offset = offset; 1976 aiocb.aio_sigevent.sigev_notify = 0; 1977 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 1); 1978 #else 1979 aiocb64.aio_fildes = fdes; 1980 aiocb64.aio_buf = (caddr32_t)bufp; 1981 aiocb64.aio_nbytes = bufsize; 1982 aiocb64.aio_offset = offset; 1983 aiocb64.aio_sigevent.sigev_notify = 0; 1984 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 1); 1985 #endif 1986 if (error) { 1987 releasef(fdes); 1988 return (error); 1989 } 1990 1991 /* 1992 * enable polling on this request if the opcode has 1993 * the AIO poll bit set 1994 */ 1995 if (opcode & AIO_POLL_BIT) 1996 reqp->aio_req_flags |= AIO_POLL; 1997 1998 if (bufsize == 0) { 1999 clear_active_fd(fdes); 2000 aio_zerolen(reqp); 2001 return (0); 2002 } 2003 /* 2004 * send the request to driver. 2005 */ 2006 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2007 /* 2008 * the fd is stored in the aio_req_t by aio_req_setup(), and 2009 * is released by the aio_cleanup_thread() when the IO has 2010 * completed. 2011 */ 2012 if (error) { 2013 releasef(fdes); 2014 mutex_enter(&aiop->aio_mutex); 2015 aio_req_free(aiop, reqp); 2016 aiop->aio_pending--; 2017 if (aiop->aio_flags & AIO_REQ_BLOCK) 2018 cv_signal(&aiop->aio_cleanupcv); 2019 mutex_exit(&aiop->aio_mutex); 2020 return (error); 2021 } 2022 clear_active_fd(fdes); 2023 return (0); 2024 } 2025 2026 /* 2027 * posix version of asynchronous read and write 2028 */ 2029 static int 2030 aiorw( 2031 int opcode, 2032 void *aiocb_arg, 2033 int mode, 2034 int run_mode) 2035 { 2036 #ifdef _SYSCALL32_IMPL 2037 aiocb32_t aiocb32; 2038 struct sigevent32 *sigev32; 2039 port_notify32_t pntfy32; 2040 #endif 2041 aiocb64_32_t aiocb64; 2042 aiocb_t aiocb; 2043 file_t *fp; 2044 int error, fd; 2045 size_t bufsize; 2046 struct vnode *vp; 2047 aio_req_t *reqp; 2048 aio_t *aiop; 2049 int (*aio_func)(); 2050 aio_result_t *resultp; 2051 struct sigevent *sigev; 2052 model_t model; 2053 int aio_use_port = 0; 2054 port_notify_t pntfy; 2055 2056 model = get_udatamodel(); 2057 aiop = curproc->p_aio; 2058 if (aiop == NULL) 2059 return (EINVAL); 2060 2061 if (model == DATAMODEL_NATIVE) { 2062 if (run_mode != AIO_LARGEFILE) { 2063 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 2064 return (EFAULT); 2065 bufsize = aiocb.aio_nbytes; 2066 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 2067 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 2068 return (EBADF); 2069 } 2070 sigev = &aiocb.aio_sigevent; 2071 } else { 2072 /* 2073 * We come here only when we make largefile 2074 * call on 32 bit kernel using 32 bit library. 2075 */ 2076 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2077 return (EFAULT); 2078 bufsize = aiocb64.aio_nbytes; 2079 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2080 ->aio_resultp); 2081 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2082 return (EBADF); 2083 sigev = (struct sigevent *)&aiocb64.aio_sigevent; 2084 } 2085 2086 if (sigev->sigev_notify == SIGEV_PORT) { 2087 if (copyin((void *)sigev->sigev_value.sival_ptr, 2088 &pntfy, sizeof (port_notify_t))) { 2089 releasef(fd); 2090 return (EFAULT); 2091 } 2092 aio_use_port = 1; 2093 } else if (sigev->sigev_notify == SIGEV_THREAD) { 2094 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 2095 pntfy.portnfy_user = 2096 aiocb.aio_sigevent.sigev_value.sival_ptr; 2097 aio_use_port = 1; 2098 } 2099 } 2100 #ifdef _SYSCALL32_IMPL 2101 else { 2102 if (run_mode == AIO_32) { 2103 /* 32 bit system call is being made on 64 bit kernel */ 2104 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 2105 return (EFAULT); 2106 2107 bufsize = aiocb32.aio_nbytes; 2108 aiocb_32ton(&aiocb32, &aiocb); 2109 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 2110 aio_resultp); 2111 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 2112 return (EBADF); 2113 } 2114 sigev32 = &aiocb32.aio_sigevent; 2115 } else if (run_mode == AIO_LARGEFILE) { 2116 /* 2117 * We come here only when we make largefile 2118 * call on 64 bit kernel using 32 bit library. 2119 */ 2120 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 2121 return (EFAULT); 2122 bufsize = aiocb64.aio_nbytes; 2123 aiocb_LFton(&aiocb64, &aiocb); 2124 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 2125 ->aio_resultp); 2126 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 2127 return (EBADF); 2128 sigev32 = &aiocb64.aio_sigevent; 2129 } 2130 2131 if (sigev32->sigev_notify == SIGEV_PORT) { 2132 if (copyin( 2133 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 2134 &pntfy32, sizeof (port_notify32_t))) { 2135 releasef(fd); 2136 return (EFAULT); 2137 } 2138 pntfy.portnfy_port = pntfy32.portnfy_port; 2139 pntfy.portnfy_user = (void *)(uintptr_t) 2140 pntfy32.portnfy_user; 2141 aio_use_port = 1; 2142 } else if (sigev32->sigev_notify == SIGEV_THREAD) { 2143 pntfy.portnfy_port = sigev32->sigev_signo; 2144 pntfy.portnfy_user = (void *)(uintptr_t) 2145 sigev32->sigev_value.sival_ptr; 2146 aio_use_port = 1; 2147 } 2148 } 2149 #endif /* _SYSCALL32_IMPL */ 2150 2151 /* 2152 * check the permission of the partition 2153 */ 2154 2155 if ((fp->f_flag & mode) == 0) { 2156 releasef(fd); 2157 return (EBADF); 2158 } 2159 2160 vp = fp->f_vnode; 2161 aio_func = check_vp(vp, mode); 2162 if (aio_func == NULL) { 2163 releasef(fd); 2164 return (EBADFD); 2165 } 2166 if (run_mode == AIO_LARGEFILE) 2167 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 0); 2168 else 2169 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 0); 2170 2171 if (error) { 2172 releasef(fd); 2173 return (error); 2174 } 2175 /* 2176 * enable polling on this request if the opcode has 2177 * the AIO poll bit set 2178 */ 2179 if (opcode & AIO_POLL_BIT) 2180 reqp->aio_req_flags |= AIO_POLL; 2181 2182 if (model == DATAMODEL_NATIVE) 2183 reqp->aio_req_iocb.iocb = aiocb_arg; 2184 #ifdef _SYSCALL32_IMPL 2185 else 2186 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 2187 #endif 2188 2189 if (aio_use_port) { 2190 int event = (run_mode == AIO_LARGEFILE)? 2191 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 2192 ((mode == FREAD)? AIOAREAD : AIOAWRITE); 2193 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 2194 } 2195 2196 /* 2197 * send the request to driver. 2198 */ 2199 if (error == 0) { 2200 if (bufsize == 0) { 2201 clear_active_fd(fd); 2202 aio_zerolen(reqp); 2203 return (0); 2204 } 2205 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 2206 } 2207 2208 /* 2209 * the fd is stored in the aio_req_t by aio_req_setup(), and 2210 * is released by the aio_cleanup_thread() when the IO has 2211 * completed. 2212 */ 2213 if (error) { 2214 releasef(fd); 2215 mutex_enter(&aiop->aio_mutex); 2216 if (aio_use_port) 2217 aio_deq(&aiop->aio_portpending, reqp); 2218 aio_req_free(aiop, reqp); 2219 aiop->aio_pending--; 2220 if (aiop->aio_flags & AIO_REQ_BLOCK) 2221 cv_signal(&aiop->aio_cleanupcv); 2222 mutex_exit(&aiop->aio_mutex); 2223 return (error); 2224 } 2225 clear_active_fd(fd); 2226 return (0); 2227 } 2228 2229 2230 /* 2231 * set error for a list IO entry that failed. 2232 */ 2233 static void 2234 lio_set_error(aio_req_t *reqp, int portused) 2235 { 2236 aio_t *aiop = curproc->p_aio; 2237 2238 if (aiop == NULL) 2239 return; 2240 2241 mutex_enter(&aiop->aio_mutex); 2242 if (portused) 2243 aio_deq(&aiop->aio_portpending, reqp); 2244 aiop->aio_pending--; 2245 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 2246 reqp->aio_req_flags |= AIO_PHYSIODONE; 2247 /* 2248 * Need to free the request now as its never 2249 * going to get on the done queue 2250 * 2251 * Note: aio_outstanding is decremented in 2252 * aio_req_free() 2253 */ 2254 aio_req_free(aiop, reqp); 2255 if (aiop->aio_flags & AIO_REQ_BLOCK) 2256 cv_signal(&aiop->aio_cleanupcv); 2257 mutex_exit(&aiop->aio_mutex); 2258 } 2259 2260 /* 2261 * check if a specified request is done, and remove it from 2262 * the done queue. otherwise remove anybody from the done queue 2263 * if NULL is specified. 2264 */ 2265 static aio_req_t * 2266 aio_req_done(void *resultp) 2267 { 2268 aio_req_t **bucket; 2269 aio_req_t *ent; 2270 aio_t *aiop = curproc->p_aio; 2271 long index; 2272 2273 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2274 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2275 2276 if (resultp) { 2277 index = AIO_HASH(resultp); 2278 bucket = &aiop->aio_hash[index]; 2279 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2280 if (ent->aio_req_resultp == (aio_result_t *)resultp) { 2281 if (ent->aio_req_flags & AIO_DONEQ) { 2282 return (aio_req_remove(ent)); 2283 } 2284 return (NULL); 2285 } 2286 } 2287 /* no match, resultp is invalid */ 2288 return (NULL); 2289 } 2290 return (aio_req_remove(NULL)); 2291 } 2292 2293 /* 2294 * determine if a user-level resultp pointer is associated with an 2295 * active IO request. Zero is returned when the request is done, 2296 * and the request is removed from the done queue. Only when the 2297 * return value is zero, is the "reqp" pointer valid. One is returned 2298 * when the request is inprogress. Two is returned when the request 2299 * is invalid. 2300 */ 2301 static int 2302 aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 2303 { 2304 aio_req_t **bucket; 2305 aio_req_t *ent; 2306 aio_t *aiop = curproc->p_aio; 2307 long index; 2308 2309 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 2310 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2311 2312 index = AIO_HASH(resultp); 2313 bucket = &aiop->aio_hash[index]; 2314 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 2315 if (ent->aio_req_resultp == resultp) { 2316 if (ent->aio_req_flags & AIO_DONEQ) { 2317 *reqp = aio_req_remove(ent); 2318 return (0); 2319 } 2320 return (1); 2321 } 2322 } 2323 /* no match, resultp is invalid */ 2324 return (2); 2325 } 2326 2327 /* 2328 * remove a request from the done queue. 2329 */ 2330 static aio_req_t * 2331 aio_req_remove(aio_req_t *reqp) 2332 { 2333 aio_t *aiop = curproc->p_aio; 2334 2335 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2336 2337 if (reqp != NULL) { 2338 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2339 if (reqp->aio_req_next == reqp) { 2340 /* only one request on queue */ 2341 if (reqp == aiop->aio_doneq) { 2342 aiop->aio_doneq = NULL; 2343 } else { 2344 ASSERT(reqp == aiop->aio_cleanupq); 2345 aiop->aio_cleanupq = NULL; 2346 } 2347 } else { 2348 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2349 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2350 /* 2351 * The request can be either on the aio_doneq or the 2352 * aio_cleanupq 2353 */ 2354 if (reqp == aiop->aio_doneq) 2355 aiop->aio_doneq = reqp->aio_req_next; 2356 2357 if (reqp == aiop->aio_cleanupq) 2358 aiop->aio_cleanupq = reqp->aio_req_next; 2359 } 2360 reqp->aio_req_flags &= ~AIO_DONEQ; 2361 reqp->aio_req_next = NULL; 2362 reqp->aio_req_prev = NULL; 2363 } else if ((reqp = aiop->aio_doneq) != NULL) { 2364 ASSERT(reqp->aio_req_flags & AIO_DONEQ); 2365 if (reqp == reqp->aio_req_next) { 2366 /* only one request on queue */ 2367 aiop->aio_doneq = NULL; 2368 } else { 2369 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 2370 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 2371 aiop->aio_doneq = reqp->aio_req_next; 2372 } 2373 reqp->aio_req_flags &= ~AIO_DONEQ; 2374 reqp->aio_req_next = NULL; 2375 reqp->aio_req_prev = NULL; 2376 } 2377 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 2378 cv_broadcast(&aiop->aio_waitcv); 2379 return (reqp); 2380 } 2381 2382 static int 2383 aio_req_setup(aio_req_t **reqpp, aio_t *aiop, aiocb_t *arg, 2384 aio_result_t *resultp, vnode_t *vp, int old_solaris_req) 2385 { 2386 sigqueue_t *sqp = NULL; 2387 aio_req_t *reqp; 2388 struct uio *uio; 2389 struct sigevent *sigev; 2390 int error; 2391 2392 sigev = &arg->aio_sigevent; 2393 if (sigev->sigev_notify == SIGEV_SIGNAL && 2394 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 2395 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2396 if (sqp == NULL) 2397 return (EAGAIN); 2398 sqp->sq_func = NULL; 2399 sqp->sq_next = NULL; 2400 sqp->sq_info.si_code = SI_ASYNCIO; 2401 sqp->sq_info.si_pid = curproc->p_pid; 2402 sqp->sq_info.si_ctid = PRCTID(curproc); 2403 sqp->sq_info.si_zoneid = getzoneid(); 2404 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2405 sqp->sq_info.si_signo = sigev->sigev_signo; 2406 sqp->sq_info.si_value = sigev->sigev_value; 2407 } 2408 2409 mutex_enter(&aiop->aio_mutex); 2410 2411 if (aiop->aio_flags & AIO_REQ_BLOCK) { 2412 mutex_exit(&aiop->aio_mutex); 2413 if (sqp) 2414 kmem_free(sqp, sizeof (sigqueue_t)); 2415 return (EIO); 2416 } 2417 /* 2418 * get an aio_reqp from the free list or allocate one 2419 * from dynamic memory. 2420 */ 2421 if (error = aio_req_alloc(&reqp, resultp)) { 2422 mutex_exit(&aiop->aio_mutex); 2423 if (sqp) 2424 kmem_free(sqp, sizeof (sigqueue_t)); 2425 return (error); 2426 } 2427 aiop->aio_pending++; 2428 aiop->aio_outstanding++; 2429 reqp->aio_req_flags = AIO_PENDING; 2430 if (old_solaris_req) { 2431 /* this is an old solaris aio request */ 2432 reqp->aio_req_flags |= AIO_SOLARIS; 2433 aiop->aio_flags |= AIO_SOLARIS_REQ; 2434 } 2435 if (sigev->sigev_notify == SIGEV_THREAD || 2436 sigev->sigev_notify == SIGEV_PORT) 2437 aio_enq(&aiop->aio_portpending, reqp, 0); 2438 mutex_exit(&aiop->aio_mutex); 2439 /* 2440 * initialize aio request. 2441 */ 2442 reqp->aio_req_fd = arg->aio_fildes; 2443 reqp->aio_req_sigqp = sqp; 2444 reqp->aio_req_iocb.iocb = NULL; 2445 reqp->aio_req_lio = NULL; 2446 reqp->aio_req_buf.b_file = vp; 2447 uio = reqp->aio_req.aio_uio; 2448 uio->uio_iovcnt = 1; 2449 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 2450 uio->uio_iov->iov_len = arg->aio_nbytes; 2451 uio->uio_loffset = arg->aio_offset; 2452 *reqpp = reqp; 2453 return (0); 2454 } 2455 2456 /* 2457 * Allocate p_aio struct. 2458 */ 2459 static aio_t * 2460 aio_aiop_alloc(void) 2461 { 2462 aio_t *aiop; 2463 2464 ASSERT(MUTEX_HELD(&curproc->p_lock)); 2465 2466 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 2467 if (aiop) { 2468 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 2469 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 2470 NULL); 2471 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 2472 } 2473 return (aiop); 2474 } 2475 2476 /* 2477 * Allocate an aio_req struct. 2478 */ 2479 static int 2480 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 2481 { 2482 aio_req_t *reqp; 2483 aio_t *aiop = curproc->p_aio; 2484 2485 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2486 2487 if ((reqp = aiop->aio_free) != NULL) { 2488 aiop->aio_free = reqp->aio_req_next; 2489 bzero(reqp, sizeof (*reqp)); 2490 } else { 2491 /* 2492 * Check whether memory is getting tight. 2493 * This is a temporary mechanism to avoid memory 2494 * exhaustion by a single process until we come up 2495 * with a per process solution such as setrlimit(). 2496 */ 2497 if (freemem < desfree) 2498 return (EAGAIN); 2499 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 2500 if (reqp == NULL) 2501 return (EAGAIN); 2502 } 2503 reqp->aio_req.aio_uio = &reqp->aio_req_uio; 2504 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 2505 reqp->aio_req.aio_private = reqp; 2506 reqp->aio_req_buf.b_offset = -1; 2507 reqp->aio_req_resultp = resultp; 2508 if (aio_hash_insert(reqp, aiop)) { 2509 reqp->aio_req_next = aiop->aio_free; 2510 aiop->aio_free = reqp; 2511 return (EBUSY); 2512 } 2513 *nreqp = reqp; 2514 return (0); 2515 } 2516 2517 /* 2518 * Allocate an aio_lio_t struct. 2519 */ 2520 static int 2521 aio_lio_alloc(aio_lio_t **head) 2522 { 2523 aio_lio_t *liop; 2524 aio_t *aiop = curproc->p_aio; 2525 2526 ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 2527 2528 if ((liop = aiop->aio_lio_free) != NULL) { 2529 aiop->aio_lio_free = liop->lio_next; 2530 } else { 2531 /* 2532 * Check whether memory is getting tight. 2533 * This is a temporary mechanism to avoid memory 2534 * exhaustion by a single process until we come up 2535 * with a per process solution such as setrlimit(). 2536 */ 2537 if (freemem < desfree) 2538 return (EAGAIN); 2539 2540 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 2541 if (liop == NULL) 2542 return (EAGAIN); 2543 } 2544 *head = liop; 2545 return (0); 2546 } 2547 2548 /* 2549 * this is a special per-process thread that is only activated if 2550 * the process is unmapping a segment with outstanding aio. normally, 2551 * the process will have completed the aio before unmapping the 2552 * segment. If the process does unmap a segment with outstanding aio, 2553 * this special thread will guarentee that the locked pages due to 2554 * aphysio() are released, thereby permitting the segment to be 2555 * unmapped. In addition to this, the cleanup thread is woken up 2556 * during DR operations to release the locked pages. 2557 */ 2558 2559 static int 2560 aio_cleanup_thread(aio_t *aiop) 2561 { 2562 proc_t *p = curproc; 2563 struct as *as = p->p_as; 2564 int poked = 0; 2565 kcondvar_t *cvp; 2566 int exit_flag = 0; 2567 int rqclnup = 0; 2568 2569 sigfillset(&curthread->t_hold); 2570 sigdiffset(&curthread->t_hold, &cantmask); 2571 for (;;) { 2572 /* 2573 * if a segment is being unmapped, and the current 2574 * process's done queue is not empty, then every request 2575 * on the doneq with locked resources should be forced 2576 * to release their locks. By moving the doneq request 2577 * to the cleanupq, aio_cleanup() will process the cleanupq, 2578 * and place requests back onto the doneq. All requests 2579 * processed by aio_cleanup() will have their physical 2580 * resources unlocked. 2581 */ 2582 mutex_enter(&aiop->aio_mutex); 2583 if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 2584 aiop->aio_flags |= AIO_CLEANUP; 2585 mutex_enter(&as->a_contents); 2586 if (aiop->aio_rqclnup) { 2587 aiop->aio_rqclnup = 0; 2588 rqclnup = 1; 2589 } 2590 mutex_exit(&as->a_contents); 2591 if (aiop->aio_doneq) { 2592 aio_req_t *doneqhead = aiop->aio_doneq; 2593 aiop->aio_doneq = NULL; 2594 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 2595 } 2596 } 2597 mutex_exit(&aiop->aio_mutex); 2598 aio_cleanup(AIO_CLEANUP_THREAD); 2599 /* 2600 * thread should block on the cleanupcv while 2601 * AIO_CLEANUP is set. 2602 */ 2603 cvp = &aiop->aio_cleanupcv; 2604 mutex_enter(&aiop->aio_mutex); 2605 2606 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 2607 aiop->aio_notifyq != NULL || 2608 aiop->aio_portcleanupq != NULL) { 2609 mutex_exit(&aiop->aio_mutex); 2610 continue; 2611 } 2612 mutex_enter(&as->a_contents); 2613 2614 /* 2615 * AIO_CLEANUP determines when the cleanup thread 2616 * should be active. This flag is set when 2617 * the cleanup thread is awakened by as_unmap() or 2618 * due to DR operations. 2619 * The flag is cleared when the blocking as_unmap() 2620 * that originally awakened us is allowed to 2621 * complete. as_unmap() blocks when trying to 2622 * unmap a segment that has SOFTLOCKed pages. when 2623 * the segment's pages are all SOFTUNLOCKed, 2624 * as->a_flags & AS_UNMAPWAIT should be zero. 2625 * 2626 * In case of cleanup request by DR, the flag is cleared 2627 * once all the pending aio requests have been processed. 2628 * 2629 * The flag shouldn't be cleared right away if the 2630 * cleanup thread was interrupted because the process 2631 * is doing forkall(). This happens when cv_wait_sig() 2632 * returns zero, because it was awakened by a pokelwps(). 2633 * If the process is not exiting, it must be doing forkall(). 2634 */ 2635 if ((poked == 0) && 2636 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 2637 (aiop->aio_pending == 0))) { 2638 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 2639 cvp = &as->a_cv; 2640 rqclnup = 0; 2641 } 2642 mutex_exit(&aiop->aio_mutex); 2643 if (poked) { 2644 /* 2645 * If the process is exiting/killed, don't return 2646 * immediately without waiting for pending I/O's 2647 * and releasing the page locks. 2648 */ 2649 if (p->p_flag & (SEXITLWPS|SKILLED)) { 2650 /* 2651 * If exit_flag is set, then it is 2652 * safe to exit because we have released 2653 * page locks of completed I/O's. 2654 */ 2655 if (exit_flag) 2656 break; 2657 2658 mutex_exit(&as->a_contents); 2659 2660 /* 2661 * Wait for all the pending aio to complete. 2662 */ 2663 mutex_enter(&aiop->aio_mutex); 2664 aiop->aio_flags |= AIO_REQ_BLOCK; 2665 while (aiop->aio_pending != 0) 2666 cv_wait(&aiop->aio_cleanupcv, 2667 &aiop->aio_mutex); 2668 mutex_exit(&aiop->aio_mutex); 2669 exit_flag = 1; 2670 continue; 2671 } else if (p->p_flag & 2672 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 2673 /* 2674 * hold LWP until it 2675 * is continued. 2676 */ 2677 mutex_exit(&as->a_contents); 2678 mutex_enter(&p->p_lock); 2679 stop(PR_SUSPENDED, SUSPEND_NORMAL); 2680 mutex_exit(&p->p_lock); 2681 poked = 0; 2682 continue; 2683 } 2684 } else { 2685 /* 2686 * When started this thread will sleep on as->a_cv. 2687 * as_unmap will awake this thread if the 2688 * segment has SOFTLOCKed pages (poked = 0). 2689 * 1. pokelwps() awakes this thread => 2690 * break the loop to check SEXITLWPS, SHOLDFORK, etc 2691 * 2. as_unmap awakes this thread => 2692 * to break the loop it is necessary that 2693 * - AS_UNMAPWAIT is set (as_unmap is waiting for 2694 * memory to be unlocked) 2695 * - AIO_CLEANUP is not set 2696 * (if AIO_CLEANUP is set we have to wait for 2697 * pending requests. aio_done will send a signal 2698 * for every request which completes to continue 2699 * unmapping the corresponding address range) 2700 * 3. A cleanup request will wake this thread up, ex. 2701 * by the DR operations. The aio_rqclnup flag will 2702 * be set. 2703 */ 2704 while (poked == 0) { 2705 /* 2706 * The clean up requests that came in 2707 * after we had just cleaned up, couldn't 2708 * be causing the unmap thread to block - as 2709 * unmap event happened first. 2710 * Let aio_done() wake us up if it sees a need. 2711 */ 2712 if (aiop->aio_rqclnup && 2713 (aiop->aio_flags & AIO_CLEANUP) == 0) 2714 break; 2715 poked = !cv_wait_sig(cvp, &as->a_contents); 2716 if (AS_ISUNMAPWAIT(as) == 0) 2717 cv_signal(cvp); 2718 if (aiop->aio_outstanding != 0) 2719 break; 2720 } 2721 } 2722 mutex_exit(&as->a_contents); 2723 } 2724 exit: 2725 mutex_exit(&as->a_contents); 2726 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 2727 aston(curthread); /* make thread do post_syscall */ 2728 return (0); 2729 } 2730 2731 /* 2732 * save a reference to a user's outstanding aio in a hash list. 2733 */ 2734 static int 2735 aio_hash_insert( 2736 aio_req_t *aio_reqp, 2737 aio_t *aiop) 2738 { 2739 long index; 2740 aio_result_t *resultp = aio_reqp->aio_req_resultp; 2741 aio_req_t *current; 2742 aio_req_t **nextp; 2743 2744 index = AIO_HASH(resultp); 2745 nextp = &aiop->aio_hash[index]; 2746 while ((current = *nextp) != NULL) { 2747 if (current->aio_req_resultp == resultp) 2748 return (DUPLICATE); 2749 nextp = ¤t->aio_hash_next; 2750 } 2751 *nextp = aio_reqp; 2752 aio_reqp->aio_hash_next = NULL; 2753 return (0); 2754 } 2755 2756 static int 2757 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 2758 cred_t *) 2759 { 2760 struct snode *sp; 2761 dev_t dev; 2762 struct cb_ops *cb; 2763 major_t major; 2764 int (*aio_func)(); 2765 2766 dev = vp->v_rdev; 2767 major = getmajor(dev); 2768 2769 /* 2770 * return NULL for requests to files and STREAMs so 2771 * that libaio takes care of them. 2772 */ 2773 if (vp->v_type == VCHR) { 2774 /* no stream device for kaio */ 2775 if (STREAMSTAB(major)) { 2776 return (NULL); 2777 } 2778 } else { 2779 return (NULL); 2780 } 2781 2782 /* 2783 * Check old drivers which do not have async I/O entry points. 2784 */ 2785 if (devopsp[major]->devo_rev < 3) 2786 return (NULL); 2787 2788 cb = devopsp[major]->devo_cb_ops; 2789 2790 if (cb->cb_rev < 1) 2791 return (NULL); 2792 2793 /* 2794 * Check whether this device is a block device. 2795 * Kaio is not supported for devices like tty. 2796 */ 2797 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 2798 return (NULL); 2799 2800 /* 2801 * Clustering: If vnode is a PXFS vnode, then the device may be remote. 2802 * We cannot call the driver directly. Instead return the 2803 * PXFS functions. 2804 */ 2805 2806 if (IS_PXFSVP(vp)) { 2807 if (mode & FREAD) 2808 return (clpxfs_aio_read); 2809 else 2810 return (clpxfs_aio_write); 2811 } 2812 if (mode & FREAD) 2813 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 2814 else 2815 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 2816 2817 /* 2818 * Do we need this ? 2819 * nodev returns ENXIO anyway. 2820 */ 2821 if (aio_func == nodev) 2822 return (NULL); 2823 2824 sp = VTOS(vp); 2825 smark(sp, SACC); 2826 return (aio_func); 2827 } 2828 2829 /* 2830 * Clustering: We want check_vp to return a function prototyped 2831 * correctly that will be common to both PXFS and regular case. 2832 * We define this intermediate function that will do the right 2833 * thing for driver cases. 2834 */ 2835 2836 static int 2837 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2838 { 2839 dev_t dev; 2840 struct cb_ops *cb; 2841 2842 ASSERT(vp->v_type == VCHR); 2843 ASSERT(!IS_PXFSVP(vp)); 2844 dev = VTOS(vp)->s_dev; 2845 ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 2846 2847 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2848 2849 ASSERT(cb->cb_awrite != nodev); 2850 return ((*cb->cb_awrite)(dev, aio, cred_p)); 2851 } 2852 2853 /* 2854 * Clustering: We want check_vp to return a function prototyped 2855 * correctly that will be common to both PXFS and regular case. 2856 * We define this intermediate function that will do the right 2857 * thing for driver cases. 2858 */ 2859 2860 static int 2861 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 2862 { 2863 dev_t dev; 2864 struct cb_ops *cb; 2865 2866 ASSERT(vp->v_type == VCHR); 2867 ASSERT(!IS_PXFSVP(vp)); 2868 dev = VTOS(vp)->s_dev; 2869 ASSERT(!STREAMSTAB(getmajor(dev))); 2870 2871 cb = devopsp[getmajor(dev)]->devo_cb_ops; 2872 2873 ASSERT(cb->cb_aread != nodev); 2874 return ((*cb->cb_aread)(dev, aio, cred_p)); 2875 } 2876 2877 /* 2878 * This routine is called when a largefile call is made by a 32bit 2879 * process on a ILP32 or LP64 kernel. All 64bit processes are large 2880 * file by definition and will call alio() instead. 2881 */ 2882 static int 2883 alioLF( 2884 int mode_arg, 2885 void *aiocb_arg, 2886 int nent, 2887 void *sigev) 2888 { 2889 file_t *fp; 2890 file_t *prev_fp = NULL; 2891 int prev_mode = -1; 2892 struct vnode *vp; 2893 aio_lio_t *head; 2894 aio_req_t *reqp; 2895 aio_t *aiop; 2896 caddr_t cbplist; 2897 aiocb64_32_t cb64; 2898 aiocb64_32_t *aiocb = &cb64; 2899 aiocb64_32_t *cbp; 2900 caddr32_t *ucbp; 2901 #ifdef _LP64 2902 aiocb_t aiocb_n; 2903 #endif 2904 struct sigevent32 sigevk; 2905 sigqueue_t *sqp; 2906 int (*aio_func)(); 2907 int mode; 2908 int error = 0; 2909 int aio_errors = 0; 2910 int i; 2911 size_t ssize; 2912 int deadhead = 0; 2913 int aio_notsupported = 0; 2914 int lio_head_port; 2915 int aio_port; 2916 int aio_thread; 2917 port_kevent_t *pkevtp = NULL; 2918 int portused = 0; 2919 port_notify32_t pnotify; 2920 int event; 2921 2922 aiop = curproc->p_aio; 2923 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 2924 return (EINVAL); 2925 2926 ASSERT(get_udatamodel() == DATAMODEL_ILP32); 2927 2928 ssize = (sizeof (caddr32_t) * nent); 2929 cbplist = kmem_alloc(ssize, KM_SLEEP); 2930 ucbp = (caddr32_t *)cbplist; 2931 2932 if (copyin(aiocb_arg, cbplist, ssize) || 2933 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 2934 kmem_free(cbplist, ssize); 2935 return (EFAULT); 2936 } 2937 2938 /* Event Ports */ 2939 if (sigev && 2940 (sigevk.sigev_notify == SIGEV_THREAD || 2941 sigevk.sigev_notify == SIGEV_PORT)) { 2942 if (sigevk.sigev_notify == SIGEV_THREAD) { 2943 pnotify.portnfy_port = sigevk.sigev_signo; 2944 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 2945 } else if (copyin( 2946 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 2947 &pnotify, sizeof (pnotify))) { 2948 kmem_free(cbplist, ssize); 2949 return (EFAULT); 2950 } 2951 error = port_alloc_event(pnotify.portnfy_port, 2952 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 2953 if (error) { 2954 if (error == ENOMEM || error == EAGAIN) 2955 error = EAGAIN; 2956 else 2957 error = EINVAL; 2958 kmem_free(cbplist, ssize); 2959 return (error); 2960 } 2961 lio_head_port = pnotify.portnfy_port; 2962 portused = 1; 2963 } 2964 2965 /* 2966 * a list head should be allocated if notification is 2967 * enabled for this list. 2968 */ 2969 head = NULL; 2970 2971 if (mode_arg == LIO_WAIT || sigev) { 2972 mutex_enter(&aiop->aio_mutex); 2973 error = aio_lio_alloc(&head); 2974 mutex_exit(&aiop->aio_mutex); 2975 if (error) 2976 goto done; 2977 deadhead = 1; 2978 head->lio_nent = nent; 2979 head->lio_refcnt = nent; 2980 head->lio_port = -1; 2981 head->lio_portkev = NULL; 2982 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 2983 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 2984 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 2985 if (sqp == NULL) { 2986 error = EAGAIN; 2987 goto done; 2988 } 2989 sqp->sq_func = NULL; 2990 sqp->sq_next = NULL; 2991 sqp->sq_info.si_code = SI_ASYNCIO; 2992 sqp->sq_info.si_pid = curproc->p_pid; 2993 sqp->sq_info.si_ctid = PRCTID(curproc); 2994 sqp->sq_info.si_zoneid = getzoneid(); 2995 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 2996 sqp->sq_info.si_signo = sigevk.sigev_signo; 2997 sqp->sq_info.si_value.sival_int = 2998 sigevk.sigev_value.sival_int; 2999 head->lio_sigqp = sqp; 3000 } else { 3001 head->lio_sigqp = NULL; 3002 } 3003 if (pkevtp) { 3004 /* 3005 * Prepare data to send when list of aiocb's 3006 * has completed. 3007 */ 3008 port_init_event(pkevtp, (uintptr_t)sigev, 3009 (void *)(uintptr_t)pnotify.portnfy_user, 3010 NULL, head); 3011 pkevtp->portkev_events = AIOLIO64; 3012 head->lio_portkev = pkevtp; 3013 head->lio_port = pnotify.portnfy_port; 3014 } 3015 } 3016 3017 for (i = 0; i < nent; i++, ucbp++) { 3018 3019 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 3020 /* skip entry if it can't be copied. */ 3021 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 3022 if (head) { 3023 mutex_enter(&aiop->aio_mutex); 3024 head->lio_nent--; 3025 head->lio_refcnt--; 3026 mutex_exit(&aiop->aio_mutex); 3027 } 3028 continue; 3029 } 3030 3031 /* skip if opcode for aiocb is LIO_NOP */ 3032 mode = aiocb->aio_lio_opcode; 3033 if (mode == LIO_NOP) { 3034 cbp = NULL; 3035 if (head) { 3036 mutex_enter(&aiop->aio_mutex); 3037 head->lio_nent--; 3038 head->lio_refcnt--; 3039 mutex_exit(&aiop->aio_mutex); 3040 } 3041 continue; 3042 } 3043 3044 /* increment file descriptor's ref count. */ 3045 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3046 lio_set_uerror(&cbp->aio_resultp, EBADF); 3047 if (head) { 3048 mutex_enter(&aiop->aio_mutex); 3049 head->lio_nent--; 3050 head->lio_refcnt--; 3051 mutex_exit(&aiop->aio_mutex); 3052 } 3053 aio_errors++; 3054 continue; 3055 } 3056 3057 /* 3058 * check the permission of the partition 3059 */ 3060 if ((fp->f_flag & mode) == 0) { 3061 releasef(aiocb->aio_fildes); 3062 lio_set_uerror(&cbp->aio_resultp, EBADF); 3063 if (head) { 3064 mutex_enter(&aiop->aio_mutex); 3065 head->lio_nent--; 3066 head->lio_refcnt--; 3067 mutex_exit(&aiop->aio_mutex); 3068 } 3069 aio_errors++; 3070 continue; 3071 } 3072 3073 /* 3074 * common case where requests are to the same fd 3075 * for the same r/w operation 3076 * for UFS, need to set EBADFD 3077 */ 3078 vp = fp->f_vnode; 3079 if (fp != prev_fp || mode != prev_mode) { 3080 aio_func = check_vp(vp, mode); 3081 if (aio_func == NULL) { 3082 prev_fp = NULL; 3083 releasef(aiocb->aio_fildes); 3084 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3085 aio_notsupported++; 3086 if (head) { 3087 mutex_enter(&aiop->aio_mutex); 3088 head->lio_nent--; 3089 head->lio_refcnt--; 3090 mutex_exit(&aiop->aio_mutex); 3091 } 3092 continue; 3093 } else { 3094 prev_fp = fp; 3095 prev_mode = mode; 3096 } 3097 } 3098 3099 #ifdef _LP64 3100 aiocb_LFton(aiocb, &aiocb_n); 3101 error = aio_req_setup(&reqp, aiop, &aiocb_n, 3102 (aio_result_t *)&cbp->aio_resultp, vp, 0); 3103 #else 3104 error = aio_req_setupLF(&reqp, aiop, aiocb, 3105 (aio_result_t *)&cbp->aio_resultp, vp, 0); 3106 #endif /* _LP64 */ 3107 if (error) { 3108 releasef(aiocb->aio_fildes); 3109 lio_set_uerror(&cbp->aio_resultp, error); 3110 if (head) { 3111 mutex_enter(&aiop->aio_mutex); 3112 head->lio_nent--; 3113 head->lio_refcnt--; 3114 mutex_exit(&aiop->aio_mutex); 3115 } 3116 aio_errors++; 3117 continue; 3118 } 3119 3120 reqp->aio_req_lio = head; 3121 deadhead = 0; 3122 3123 /* 3124 * Set the errno field now before sending the request to 3125 * the driver to avoid a race condition 3126 */ 3127 (void) suword32(&cbp->aio_resultp.aio_errno, 3128 EINPROGRESS); 3129 3130 reqp->aio_req_iocb.iocb32 = *ucbp; 3131 3132 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 3133 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3134 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3135 if (aio_port | aio_thread) { 3136 port_kevent_t *lpkevp; 3137 /* 3138 * Prepare data to send with each aiocb completed. 3139 */ 3140 if (aio_port) { 3141 void *paddr = (void *)(uintptr_t) 3142 aiocb->aio_sigevent.sigev_value.sival_ptr; 3143 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3144 error = EFAULT; 3145 } else { /* aio_thread */ 3146 pnotify.portnfy_port = 3147 aiocb->aio_sigevent.sigev_signo; 3148 pnotify.portnfy_user = 3149 aiocb->aio_sigevent.sigev_value.sival_ptr; 3150 } 3151 if (error) 3152 /* EMPTY */; 3153 else if (pkevtp != NULL && 3154 pnotify.portnfy_port == lio_head_port) 3155 error = port_dup_event(pkevtp, &lpkevp, 3156 PORT_ALLOC_DEFAULT); 3157 else 3158 error = port_alloc_event(pnotify.portnfy_port, 3159 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3160 &lpkevp); 3161 if (error == 0) { 3162 port_init_event(lpkevp, (uintptr_t)*ucbp, 3163 (void *)(uintptr_t)pnotify.portnfy_user, 3164 aio_port_callback, reqp); 3165 lpkevp->portkev_events = event; 3166 reqp->aio_req_portkev = lpkevp; 3167 reqp->aio_req_port = pnotify.portnfy_port; 3168 } 3169 } 3170 3171 /* 3172 * send the request to driver. 3173 */ 3174 if (error == 0) { 3175 if (aiocb->aio_nbytes == 0) { 3176 clear_active_fd(aiocb->aio_fildes); 3177 aio_zerolen(reqp); 3178 continue; 3179 } 3180 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3181 CRED()); 3182 } 3183 3184 /* 3185 * the fd's ref count is not decremented until the IO has 3186 * completed unless there was an error. 3187 */ 3188 if (error) { 3189 releasef(aiocb->aio_fildes); 3190 lio_set_uerror(&cbp->aio_resultp, error); 3191 if (head) { 3192 mutex_enter(&aiop->aio_mutex); 3193 head->lio_nent--; 3194 head->lio_refcnt--; 3195 mutex_exit(&aiop->aio_mutex); 3196 } 3197 if (error == ENOTSUP) 3198 aio_notsupported++; 3199 else 3200 aio_errors++; 3201 lio_set_error(reqp, portused); 3202 } else { 3203 clear_active_fd(aiocb->aio_fildes); 3204 } 3205 } 3206 3207 if (aio_notsupported) { 3208 error = ENOTSUP; 3209 } else if (aio_errors) { 3210 /* 3211 * return EIO if any request failed 3212 */ 3213 error = EIO; 3214 } 3215 3216 if (mode_arg == LIO_WAIT) { 3217 mutex_enter(&aiop->aio_mutex); 3218 while (head->lio_refcnt > 0) { 3219 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3220 mutex_exit(&aiop->aio_mutex); 3221 error = EINTR; 3222 goto done; 3223 } 3224 } 3225 mutex_exit(&aiop->aio_mutex); 3226 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 3227 } 3228 3229 done: 3230 kmem_free(cbplist, ssize); 3231 if (deadhead) { 3232 if (head->lio_sigqp) 3233 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3234 if (head->lio_portkev) 3235 port_free_event(head->lio_portkev); 3236 kmem_free(head, sizeof (aio_lio_t)); 3237 } 3238 return (error); 3239 } 3240 3241 #ifdef _SYSCALL32_IMPL 3242 static void 3243 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 3244 { 3245 dest->aio_fildes = src->aio_fildes; 3246 dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 3247 dest->aio_nbytes = (size_t)src->aio_nbytes; 3248 dest->aio_offset = (off_t)src->aio_offset; 3249 dest->aio_reqprio = src->aio_reqprio; 3250 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3251 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3252 3253 /* 3254 * See comment in sigqueue32() on handling of 32-bit 3255 * sigvals in a 64-bit kernel. 3256 */ 3257 dest->aio_sigevent.sigev_value.sival_int = 3258 (int)src->aio_sigevent.sigev_value.sival_int; 3259 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3260 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3261 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3262 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3263 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3264 dest->aio_lio_opcode = src->aio_lio_opcode; 3265 dest->aio_state = src->aio_state; 3266 dest->aio__pad[0] = src->aio__pad[0]; 3267 } 3268 #endif 3269 3270 /* 3271 * This function is used only for largefile calls made by 3272 * 32 bit applications. 3273 */ 3274 static int 3275 aio_req_setupLF( 3276 aio_req_t **reqpp, 3277 aio_t *aiop, 3278 aiocb64_32_t *arg, 3279 aio_result_t *resultp, 3280 vnode_t *vp, 3281 int old_solaris_req) 3282 { 3283 sigqueue_t *sqp = NULL; 3284 aio_req_t *reqp; 3285 struct uio *uio; 3286 struct sigevent32 *sigev; 3287 int error; 3288 3289 sigev = &arg->aio_sigevent; 3290 if (sigev->sigev_notify == SIGEV_SIGNAL && 3291 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 3292 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3293 if (sqp == NULL) 3294 return (EAGAIN); 3295 sqp->sq_func = NULL; 3296 sqp->sq_next = NULL; 3297 sqp->sq_info.si_code = SI_ASYNCIO; 3298 sqp->sq_info.si_pid = curproc->p_pid; 3299 sqp->sq_info.si_ctid = PRCTID(curproc); 3300 sqp->sq_info.si_zoneid = getzoneid(); 3301 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3302 sqp->sq_info.si_signo = sigev->sigev_signo; 3303 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 3304 } 3305 3306 mutex_enter(&aiop->aio_mutex); 3307 3308 if (aiop->aio_flags & AIO_REQ_BLOCK) { 3309 mutex_exit(&aiop->aio_mutex); 3310 if (sqp) 3311 kmem_free(sqp, sizeof (sigqueue_t)); 3312 return (EIO); 3313 } 3314 /* 3315 * get an aio_reqp from the free list or allocate one 3316 * from dynamic memory. 3317 */ 3318 if (error = aio_req_alloc(&reqp, resultp)) { 3319 mutex_exit(&aiop->aio_mutex); 3320 if (sqp) 3321 kmem_free(sqp, sizeof (sigqueue_t)); 3322 return (error); 3323 } 3324 aiop->aio_pending++; 3325 aiop->aio_outstanding++; 3326 reqp->aio_req_flags = AIO_PENDING; 3327 if (old_solaris_req) { 3328 /* this is an old solaris aio request */ 3329 reqp->aio_req_flags |= AIO_SOLARIS; 3330 aiop->aio_flags |= AIO_SOLARIS_REQ; 3331 } 3332 if (sigev->sigev_notify == SIGEV_THREAD || 3333 sigev->sigev_notify == SIGEV_PORT) 3334 aio_enq(&aiop->aio_portpending, reqp, 0); 3335 mutex_exit(&aiop->aio_mutex); 3336 /* 3337 * initialize aio request. 3338 */ 3339 reqp->aio_req_fd = arg->aio_fildes; 3340 reqp->aio_req_sigqp = sqp; 3341 reqp->aio_req_iocb.iocb = NULL; 3342 reqp->aio_req_lio = NULL; 3343 reqp->aio_req_buf.b_file = vp; 3344 uio = reqp->aio_req.aio_uio; 3345 uio->uio_iovcnt = 1; 3346 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 3347 uio->uio_iov->iov_len = arg->aio_nbytes; 3348 uio->uio_loffset = arg->aio_offset; 3349 *reqpp = reqp; 3350 return (0); 3351 } 3352 3353 /* 3354 * This routine is called when a non largefile call is made by a 32bit 3355 * process on a ILP32 or LP64 kernel. 3356 */ 3357 static int 3358 alio32( 3359 int mode_arg, 3360 void *aiocb_arg, 3361 int nent, 3362 void *sigev) 3363 { 3364 file_t *fp; 3365 file_t *prev_fp = NULL; 3366 int prev_mode = -1; 3367 struct vnode *vp; 3368 aio_lio_t *head; 3369 aio_req_t *reqp; 3370 aio_t *aiop; 3371 caddr_t cbplist; 3372 aiocb_t cb; 3373 aiocb_t *aiocb = &cb; 3374 #ifdef _LP64 3375 aiocb32_t *cbp; 3376 caddr32_t *ucbp; 3377 aiocb32_t cb32; 3378 aiocb32_t *aiocb32 = &cb32; 3379 struct sigevent32 sigevk; 3380 #else 3381 aiocb_t *cbp, **ucbp; 3382 struct sigevent sigevk; 3383 #endif 3384 sigqueue_t *sqp; 3385 int (*aio_func)(); 3386 int mode; 3387 int error = 0; 3388 int aio_errors = 0; 3389 int i; 3390 size_t ssize; 3391 int deadhead = 0; 3392 int aio_notsupported = 0; 3393 int lio_head_port; 3394 int aio_port; 3395 int aio_thread; 3396 port_kevent_t *pkevtp = NULL; 3397 int portused = 0; 3398 #ifdef _LP64 3399 port_notify32_t pnotify; 3400 #else 3401 port_notify_t pnotify; 3402 #endif 3403 int event; 3404 3405 aiop = curproc->p_aio; 3406 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 3407 return (EINVAL); 3408 3409 #ifdef _LP64 3410 ssize = (sizeof (caddr32_t) * nent); 3411 #else 3412 ssize = (sizeof (aiocb_t *) * nent); 3413 #endif 3414 cbplist = kmem_alloc(ssize, KM_SLEEP); 3415 ucbp = (void *)cbplist; 3416 3417 if (copyin(aiocb_arg, cbplist, ssize) || 3418 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 3419 kmem_free(cbplist, ssize); 3420 return (EFAULT); 3421 } 3422 3423 /* Event Ports */ 3424 if (sigev && 3425 (sigevk.sigev_notify == SIGEV_THREAD || 3426 sigevk.sigev_notify == SIGEV_PORT)) { 3427 if (sigevk.sigev_notify == SIGEV_THREAD) { 3428 pnotify.portnfy_port = sigevk.sigev_signo; 3429 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 3430 } else if (copyin( 3431 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 3432 &pnotify, sizeof (pnotify))) { 3433 kmem_free(cbplist, ssize); 3434 return (EFAULT); 3435 } 3436 error = port_alloc_event(pnotify.portnfy_port, 3437 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 3438 if (error) { 3439 if (error == ENOMEM || error == EAGAIN) 3440 error = EAGAIN; 3441 else 3442 error = EINVAL; 3443 kmem_free(cbplist, ssize); 3444 return (error); 3445 } 3446 lio_head_port = pnotify.portnfy_port; 3447 portused = 1; 3448 } 3449 3450 /* 3451 * a list head should be allocated if notification is 3452 * enabled for this list. 3453 */ 3454 head = NULL; 3455 3456 if (mode_arg == LIO_WAIT || sigev) { 3457 mutex_enter(&aiop->aio_mutex); 3458 error = aio_lio_alloc(&head); 3459 mutex_exit(&aiop->aio_mutex); 3460 if (error) 3461 goto done; 3462 deadhead = 1; 3463 head->lio_nent = nent; 3464 head->lio_refcnt = nent; 3465 head->lio_port = -1; 3466 head->lio_portkev = NULL; 3467 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 3468 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 3469 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 3470 if (sqp == NULL) { 3471 error = EAGAIN; 3472 goto done; 3473 } 3474 sqp->sq_func = NULL; 3475 sqp->sq_next = NULL; 3476 sqp->sq_info.si_code = SI_ASYNCIO; 3477 sqp->sq_info.si_pid = curproc->p_pid; 3478 sqp->sq_info.si_ctid = PRCTID(curproc); 3479 sqp->sq_info.si_zoneid = getzoneid(); 3480 sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 3481 sqp->sq_info.si_signo = sigevk.sigev_signo; 3482 sqp->sq_info.si_value.sival_int = 3483 sigevk.sigev_value.sival_int; 3484 head->lio_sigqp = sqp; 3485 } else { 3486 head->lio_sigqp = NULL; 3487 } 3488 if (pkevtp) { 3489 /* 3490 * Prepare data to send when list of aiocb's has 3491 * completed. 3492 */ 3493 port_init_event(pkevtp, (uintptr_t)sigev, 3494 (void *)(uintptr_t)pnotify.portnfy_user, 3495 NULL, head); 3496 pkevtp->portkev_events = AIOLIO; 3497 head->lio_portkev = pkevtp; 3498 head->lio_port = pnotify.portnfy_port; 3499 } 3500 } 3501 3502 for (i = 0; i < nent; i++, ucbp++) { 3503 3504 /* skip entry if it can't be copied. */ 3505 #ifdef _LP64 3506 cbp = (aiocb32_t *)(uintptr_t)*ucbp; 3507 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 3508 #else 3509 cbp = (aiocb_t *)*ucbp; 3510 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 3511 #endif 3512 { 3513 if (head) { 3514 mutex_enter(&aiop->aio_mutex); 3515 head->lio_nent--; 3516 head->lio_refcnt--; 3517 mutex_exit(&aiop->aio_mutex); 3518 } 3519 continue; 3520 } 3521 #ifdef _LP64 3522 /* 3523 * copy 32 bit structure into 64 bit structure 3524 */ 3525 aiocb_32ton(aiocb32, aiocb); 3526 #endif /* _LP64 */ 3527 3528 /* skip if opcode for aiocb is LIO_NOP */ 3529 mode = aiocb->aio_lio_opcode; 3530 if (mode == LIO_NOP) { 3531 cbp = NULL; 3532 if (head) { 3533 mutex_enter(&aiop->aio_mutex); 3534 head->lio_nent--; 3535 head->lio_refcnt--; 3536 mutex_exit(&aiop->aio_mutex); 3537 } 3538 continue; 3539 } 3540 3541 /* increment file descriptor's ref count. */ 3542 if ((fp = getf(aiocb->aio_fildes)) == NULL) { 3543 lio_set_uerror(&cbp->aio_resultp, EBADF); 3544 if (head) { 3545 mutex_enter(&aiop->aio_mutex); 3546 head->lio_nent--; 3547 head->lio_refcnt--; 3548 mutex_exit(&aiop->aio_mutex); 3549 } 3550 aio_errors++; 3551 continue; 3552 } 3553 3554 /* 3555 * check the permission of the partition 3556 */ 3557 if ((fp->f_flag & mode) == 0) { 3558 releasef(aiocb->aio_fildes); 3559 lio_set_uerror(&cbp->aio_resultp, EBADF); 3560 if (head) { 3561 mutex_enter(&aiop->aio_mutex); 3562 head->lio_nent--; 3563 head->lio_refcnt--; 3564 mutex_exit(&aiop->aio_mutex); 3565 } 3566 aio_errors++; 3567 continue; 3568 } 3569 3570 /* 3571 * common case where requests are to the same fd 3572 * for the same r/w operation 3573 * for UFS, need to set EBADFD 3574 */ 3575 vp = fp->f_vnode; 3576 if (fp != prev_fp || mode != prev_mode) { 3577 aio_func = check_vp(vp, mode); 3578 if (aio_func == NULL) { 3579 prev_fp = NULL; 3580 releasef(aiocb->aio_fildes); 3581 lio_set_uerror(&cbp->aio_resultp, EBADFD); 3582 aio_notsupported++; 3583 if (head) { 3584 mutex_enter(&aiop->aio_mutex); 3585 head->lio_nent--; 3586 head->lio_refcnt--; 3587 mutex_exit(&aiop->aio_mutex); 3588 } 3589 continue; 3590 } else { 3591 prev_fp = fp; 3592 prev_mode = mode; 3593 } 3594 } 3595 3596 error = aio_req_setup(&reqp, aiop, aiocb, 3597 (aio_result_t *)&cbp->aio_resultp, vp, 0); 3598 if (error) { 3599 releasef(aiocb->aio_fildes); 3600 lio_set_uerror(&cbp->aio_resultp, error); 3601 if (head) { 3602 mutex_enter(&aiop->aio_mutex); 3603 head->lio_nent--; 3604 head->lio_refcnt--; 3605 mutex_exit(&aiop->aio_mutex); 3606 } 3607 aio_errors++; 3608 continue; 3609 } 3610 3611 reqp->aio_req_lio = head; 3612 deadhead = 0; 3613 3614 /* 3615 * Set the errno field now before sending the request to 3616 * the driver to avoid a race condition 3617 */ 3618 (void) suword32(&cbp->aio_resultp.aio_errno, 3619 EINPROGRESS); 3620 3621 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 3622 3623 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 3624 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 3625 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 3626 if (aio_port | aio_thread) { 3627 port_kevent_t *lpkevp; 3628 /* 3629 * Prepare data to send with each aiocb completed. 3630 */ 3631 #ifdef _LP64 3632 if (aio_port) { 3633 void *paddr = (void *)(uintptr_t) 3634 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3635 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3636 error = EFAULT; 3637 } else { /* aio_thread */ 3638 pnotify.portnfy_port = 3639 aiocb32->aio_sigevent.sigev_signo; 3640 pnotify.portnfy_user = 3641 aiocb32->aio_sigevent.sigev_value.sival_ptr; 3642 } 3643 #else 3644 if (aio_port) { 3645 void *paddr = 3646 aiocb->aio_sigevent.sigev_value.sival_ptr; 3647 if (copyin(paddr, &pnotify, sizeof (pnotify))) 3648 error = EFAULT; 3649 } else { /* aio_thread */ 3650 pnotify.portnfy_port = 3651 aiocb->aio_sigevent.sigev_signo; 3652 pnotify.portnfy_user = 3653 aiocb->aio_sigevent.sigev_value.sival_ptr; 3654 } 3655 #endif 3656 if (error) 3657 /* EMPTY */; 3658 else if (pkevtp != NULL && 3659 pnotify.portnfy_port == lio_head_port) 3660 error = port_dup_event(pkevtp, &lpkevp, 3661 PORT_ALLOC_DEFAULT); 3662 else 3663 error = port_alloc_event(pnotify.portnfy_port, 3664 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 3665 &lpkevp); 3666 if (error == 0) { 3667 port_init_event(lpkevp, (uintptr_t)cbp, 3668 (void *)(uintptr_t)pnotify.portnfy_user, 3669 aio_port_callback, reqp); 3670 lpkevp->portkev_events = event; 3671 reqp->aio_req_portkev = lpkevp; 3672 reqp->aio_req_port = pnotify.portnfy_port; 3673 } 3674 } 3675 3676 /* 3677 * send the request to driver. 3678 */ 3679 if (error == 0) { 3680 if (aiocb->aio_nbytes == 0) { 3681 clear_active_fd(aiocb->aio_fildes); 3682 aio_zerolen(reqp); 3683 continue; 3684 } 3685 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 3686 CRED()); 3687 } 3688 3689 /* 3690 * the fd's ref count is not decremented until the IO has 3691 * completed unless there was an error. 3692 */ 3693 if (error) { 3694 releasef(aiocb->aio_fildes); 3695 lio_set_uerror(&cbp->aio_resultp, error); 3696 if (head) { 3697 mutex_enter(&aiop->aio_mutex); 3698 head->lio_nent--; 3699 head->lio_refcnt--; 3700 mutex_exit(&aiop->aio_mutex); 3701 } 3702 if (error == ENOTSUP) 3703 aio_notsupported++; 3704 else 3705 aio_errors++; 3706 lio_set_error(reqp, portused); 3707 } else { 3708 clear_active_fd(aiocb->aio_fildes); 3709 } 3710 } 3711 3712 if (aio_notsupported) { 3713 error = ENOTSUP; 3714 } else if (aio_errors) { 3715 /* 3716 * return EIO if any request failed 3717 */ 3718 error = EIO; 3719 } 3720 3721 if (mode_arg == LIO_WAIT) { 3722 mutex_enter(&aiop->aio_mutex); 3723 while (head->lio_refcnt > 0) { 3724 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 3725 mutex_exit(&aiop->aio_mutex); 3726 error = EINTR; 3727 goto done; 3728 } 3729 } 3730 mutex_exit(&aiop->aio_mutex); 3731 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 3732 } 3733 3734 done: 3735 kmem_free(cbplist, ssize); 3736 if (deadhead) { 3737 if (head->lio_sigqp) 3738 kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 3739 if (head->lio_portkev) 3740 port_free_event(head->lio_portkev); 3741 kmem_free(head, sizeof (aio_lio_t)); 3742 } 3743 return (error); 3744 } 3745 3746 3747 #ifdef _SYSCALL32_IMPL 3748 void 3749 aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 3750 { 3751 dest->aio_fildes = src->aio_fildes; 3752 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 3753 dest->aio_nbytes = (size_t)src->aio_nbytes; 3754 dest->aio_offset = (off_t)src->aio_offset; 3755 dest->aio_reqprio = src->aio_reqprio; 3756 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 3757 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 3758 3759 /* 3760 * See comment in sigqueue32() on handling of 32-bit 3761 * sigvals in a 64-bit kernel. 3762 */ 3763 dest->aio_sigevent.sigev_value.sival_int = 3764 (int)src->aio_sigevent.sigev_value.sival_int; 3765 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 3766 (uintptr_t)src->aio_sigevent.sigev_notify_function; 3767 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 3768 (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 3769 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 3770 dest->aio_lio_opcode = src->aio_lio_opcode; 3771 dest->aio_state = src->aio_state; 3772 dest->aio__pad[0] = src->aio__pad[0]; 3773 } 3774 #endif /* _SYSCALL32_IMPL */ 3775 3776 /* 3777 * aio_port_callback() is called just before the event is retrieved from the 3778 * port. The task of this callback function is to finish the work of the 3779 * transaction for the application, it means : 3780 * - copyout transaction data to the application 3781 * (this thread is running in the right process context) 3782 * - keep trace of the transaction (update of counters). 3783 * - free allocated buffers 3784 * The aiocb pointer is the object element of the port_kevent_t structure. 3785 * 3786 * flag : 3787 * PORT_CALLBACK_DEFAULT : do copyout and free resources 3788 * PORT_CALLBACK_CLOSE : don't do copyout, free resources 3789 */ 3790 3791 /*ARGSUSED*/ 3792 int 3793 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 3794 { 3795 aio_t *aiop = curproc->p_aio; 3796 aio_req_t *reqp = arg; 3797 struct iovec *iov; 3798 struct buf *bp; 3799 void *resultp; 3800 3801 if (pid != curproc->p_pid) { 3802 /* wrong proc !!, can not deliver data here ... */ 3803 return (EACCES); 3804 } 3805 3806 mutex_enter(&aiop->aio_portq_mutex); 3807 reqp->aio_req_portkev = NULL; 3808 aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 3809 mutex_exit(&aiop->aio_portq_mutex); 3810 aphysio_unlock(reqp); /* unlock used pages */ 3811 mutex_enter(&aiop->aio_mutex); 3812 if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 3813 aio_req_free_port(aiop, reqp); /* back to free list */ 3814 mutex_exit(&aiop->aio_mutex); 3815 return (0); 3816 } 3817 3818 iov = reqp->aio_req_uio.uio_iov; 3819 bp = &reqp->aio_req_buf; 3820 resultp = (void *)reqp->aio_req_resultp; 3821 if (flag == PORT_CALLBACK_DEFAULT) 3822 aio_copyout_result_port(iov, bp, resultp); 3823 aio_req_free_port(aiop, reqp); /* request struct back to free list */ 3824 mutex_exit(&aiop->aio_mutex); 3825 return (0); 3826 } 3827