1*f841f6adSraf /* 2*f841f6adSraf * CDDL HEADER START 3*f841f6adSraf * 4*f841f6adSraf * The contents of this file are subject to the terms of the 5*f841f6adSraf * Common Development and Distribution License (the "License"). 6*f841f6adSraf * You may not use this file except in compliance with the License. 7*f841f6adSraf * 8*f841f6adSraf * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*f841f6adSraf * or http://www.opensolaris.org/os/licensing. 10*f841f6adSraf * See the License for the specific language governing permissions 11*f841f6adSraf * and limitations under the License. 12*f841f6adSraf * 13*f841f6adSraf * When distributing Covered Code, include this CDDL HEADER in each 14*f841f6adSraf * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*f841f6adSraf * If applicable, add the following below this CDDL HEADER, with the 16*f841f6adSraf * fields enclosed by brackets "[]" replaced with your own identifying 17*f841f6adSraf * information: Portions Copyright [yyyy] [name of copyright owner] 18*f841f6adSraf * 19*f841f6adSraf * CDDL HEADER END 20*f841f6adSraf */ 21*f841f6adSraf 22*f841f6adSraf /* 23*f841f6adSraf * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24*f841f6adSraf * Use is subject to license terms. 25*f841f6adSraf */ 26*f841f6adSraf 27*f841f6adSraf #pragma ident "%Z%%M% %I% %E% SMI" 28*f841f6adSraf 29*f841f6adSraf #include "synonyms.h" 30*f841f6adSraf #include "thr_uberdata.h" 31*f841f6adSraf #include "asyncio.h" 32*f841f6adSraf #include <atomic.h> 33*f841f6adSraf #include <sys/param.h> 34*f841f6adSraf #include <sys/file.h> 35*f841f6adSraf #include <sys/port.h> 36*f841f6adSraf 37*f841f6adSraf static int _aio_hash_insert(aio_result_t *, aio_req_t *); 38*f841f6adSraf static aio_req_t *_aio_req_get(aio_worker_t *); 39*f841f6adSraf static void _aio_req_add(aio_req_t *, aio_worker_t **, int); 40*f841f6adSraf static void _aio_req_del(aio_worker_t *, aio_req_t *, int); 41*f841f6adSraf static void _aio_work_done(aio_worker_t *); 42*f841f6adSraf static void _aio_enq_doneq(aio_req_t *); 43*f841f6adSraf 44*f841f6adSraf extern void _aio_lio_free(aio_lio_t *); 45*f841f6adSraf 46*f841f6adSraf extern int __fdsync(int, int); 47*f841f6adSraf extern int _port_dispatch(int, int, int, int, uintptr_t, void *); 48*f841f6adSraf 49*f841f6adSraf static int _aio_fsync_del(aio_worker_t *, aio_req_t *); 50*f841f6adSraf static void _aiodone(aio_req_t *, ssize_t, int); 51*f841f6adSraf static void _aio_cancel_work(aio_worker_t *, int, int *, int *); 52*f841f6adSraf static void _aio_finish_request(aio_worker_t *, ssize_t, int); 53*f841f6adSraf 54*f841f6adSraf /* 55*f841f6adSraf * switch for kernel async I/O 56*f841f6adSraf */ 57*f841f6adSraf int _kaio_ok = 0; /* 0 = disabled, 1 = on, -1 = error */ 58*f841f6adSraf 59*f841f6adSraf /* 60*f841f6adSraf * Key for thread-specific data 61*f841f6adSraf */ 62*f841f6adSraf pthread_key_t _aio_key; 63*f841f6adSraf 64*f841f6adSraf /* 65*f841f6adSraf * Array for determining whether or not a file supports kaio. 66*f841f6adSraf * Initialized in _kaio_init(). 67*f841f6adSraf */ 68*f841f6adSraf uint32_t *_kaio_supported = NULL; 69*f841f6adSraf 70*f841f6adSraf /* 71*f841f6adSraf * workers for read/write requests 72*f841f6adSraf * (__aio_mutex lock protects circular linked list of workers) 73*f841f6adSraf */ 74*f841f6adSraf aio_worker_t *__workers_rw; /* circular list of AIO workers */ 75*f841f6adSraf aio_worker_t *__nextworker_rw; /* next worker in list of workers */ 76*f841f6adSraf int __rw_workerscnt; /* number of read/write workers */ 77*f841f6adSraf 78*f841f6adSraf /* 79*f841f6adSraf * worker for notification requests. 80*f841f6adSraf */ 81*f841f6adSraf aio_worker_t *__workers_no; /* circular list of AIO workers */ 82*f841f6adSraf aio_worker_t *__nextworker_no; /* next worker in list of workers */ 83*f841f6adSraf int __no_workerscnt; /* number of write workers */ 84*f841f6adSraf 85*f841f6adSraf aio_req_t *_aio_done_tail; /* list of done requests */ 86*f841f6adSraf aio_req_t *_aio_done_head; 87*f841f6adSraf 88*f841f6adSraf mutex_t __aio_initlock = DEFAULTMUTEX; /* makes aio initialization atomic */ 89*f841f6adSraf cond_t __aio_initcv = DEFAULTCV; 90*f841f6adSraf int __aio_initbusy = 0; 91*f841f6adSraf 92*f841f6adSraf mutex_t __aio_mutex = DEFAULTMUTEX; /* protects counts, and linked lists */ 93*f841f6adSraf cond_t _aio_iowait_cv = DEFAULTCV; /* wait for userland I/Os */ 94*f841f6adSraf 95*f841f6adSraf pid_t __pid = (pid_t)-1; /* initialize as invalid pid */ 96*f841f6adSraf int _sigio_enabled = 0; /* when set, send SIGIO signal */ 97*f841f6adSraf 98*f841f6adSraf aio_hash_t *_aio_hash; 99*f841f6adSraf 100*f841f6adSraf aio_req_t *_aio_doneq; /* double linked done queue list */ 101*f841f6adSraf 102*f841f6adSraf int _aio_donecnt = 0; 103*f841f6adSraf int _aio_waitncnt = 0; /* # of requests for aio_waitn */ 104*f841f6adSraf int _aio_doneq_cnt = 0; 105*f841f6adSraf int _aio_outstand_cnt = 0; /* # of outstanding requests */ 106*f841f6adSraf int _kaio_outstand_cnt = 0; /* # of outstanding kaio requests */ 107*f841f6adSraf int _aio_req_done_cnt = 0; /* req. done but not in "done queue" */ 108*f841f6adSraf int _aio_kernel_suspend = 0; /* active kernel kaio calls */ 109*f841f6adSraf int _aio_suscv_cnt = 0; /* aio_suspend calls waiting on cv's */ 110*f841f6adSraf 111*f841f6adSraf int _max_workers = 256; /* max number of workers permitted */ 112*f841f6adSraf int _min_workers = 4; /* min number of workers */ 113*f841f6adSraf int _minworkload = 2; /* min number of request in q */ 114*f841f6adSraf int _aio_worker_cnt = 0; /* number of workers to do requests */ 115*f841f6adSraf int __uaio_ok = 0; /* AIO has been enabled */ 116*f841f6adSraf sigset_t _worker_set; /* worker's signal mask */ 117*f841f6adSraf 118*f841f6adSraf int _aiowait_flag = 0; /* when set, aiowait() is inprogress */ 119*f841f6adSraf int _aio_flags = 0; /* see asyncio.h defines for */ 120*f841f6adSraf 121*f841f6adSraf aio_worker_t *_kaiowp = NULL; /* points to kaio cleanup thread */ 122*f841f6adSraf 123*f841f6adSraf int hz; /* clock ticks per second */ 124*f841f6adSraf 125*f841f6adSraf static int 126*f841f6adSraf _kaio_supported_init(void) 127*f841f6adSraf { 128*f841f6adSraf void *ptr; 129*f841f6adSraf size_t size; 130*f841f6adSraf 131*f841f6adSraf if (_kaio_supported != NULL) /* already initialized */ 132*f841f6adSraf return (0); 133*f841f6adSraf 134*f841f6adSraf size = MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t); 135*f841f6adSraf ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, 136*f841f6adSraf MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 137*f841f6adSraf if (ptr == MAP_FAILED) 138*f841f6adSraf return (-1); 139*f841f6adSraf _kaio_supported = ptr; 140*f841f6adSraf return (0); 141*f841f6adSraf } 142*f841f6adSraf 143*f841f6adSraf /* 144*f841f6adSraf * The aio subsystem is initialized when an AIO request is made. 145*f841f6adSraf * Constants are initialized like the max number of workers that 146*f841f6adSraf * the subsystem can create, and the minimum number of workers 147*f841f6adSraf * permitted before imposing some restrictions. Also, some 148*f841f6adSraf * workers are created. 149*f841f6adSraf */ 150*f841f6adSraf int 151*f841f6adSraf __uaio_init(void) 152*f841f6adSraf { 153*f841f6adSraf int ret = -1; 154*f841f6adSraf int i; 155*f841f6adSraf 156*f841f6adSraf lmutex_lock(&__aio_initlock); 157*f841f6adSraf while (__aio_initbusy) 158*f841f6adSraf (void) _cond_wait(&__aio_initcv, &__aio_initlock); 159*f841f6adSraf if (__uaio_ok) { /* already initialized */ 160*f841f6adSraf lmutex_unlock(&__aio_initlock); 161*f841f6adSraf return (0); 162*f841f6adSraf } 163*f841f6adSraf __aio_initbusy = 1; 164*f841f6adSraf lmutex_unlock(&__aio_initlock); 165*f841f6adSraf 166*f841f6adSraf hz = (int)sysconf(_SC_CLK_TCK); 167*f841f6adSraf __pid = getpid(); 168*f841f6adSraf 169*f841f6adSraf setup_cancelsig(SIGAIOCANCEL); 170*f841f6adSraf 171*f841f6adSraf if (_kaio_supported_init() != 0) 172*f841f6adSraf goto out; 173*f841f6adSraf 174*f841f6adSraf /* 175*f841f6adSraf * Allocate and initialize the hash table. 176*f841f6adSraf */ 177*f841f6adSraf /* LINTED pointer cast */ 178*f841f6adSraf _aio_hash = (aio_hash_t *)mmap(NULL, 179*f841f6adSraf HASHSZ * sizeof (aio_hash_t), PROT_READ | PROT_WRITE, 180*f841f6adSraf MAP_PRIVATE | MAP_ANON, -1, (off_t)0); 181*f841f6adSraf if ((void *)_aio_hash == MAP_FAILED) { 182*f841f6adSraf _aio_hash = NULL; 183*f841f6adSraf goto out; 184*f841f6adSraf } 185*f841f6adSraf for (i = 0; i < HASHSZ; i++) 186*f841f6adSraf (void) mutex_init(&_aio_hash[i].hash_lock, USYNC_THREAD, NULL); 187*f841f6adSraf 188*f841f6adSraf /* 189*f841f6adSraf * Initialize worker's signal mask to only catch SIGAIOCANCEL. 190*f841f6adSraf */ 191*f841f6adSraf (void) sigfillset(&_worker_set); 192*f841f6adSraf (void) sigdelset(&_worker_set, SIGAIOCANCEL); 193*f841f6adSraf 194*f841f6adSraf /* 195*f841f6adSraf * Create the minimum number of read/write workers. 196*f841f6adSraf */ 197*f841f6adSraf for (i = 0; i < _min_workers; i++) 198*f841f6adSraf (void) _aio_create_worker(NULL, AIOREAD); 199*f841f6adSraf 200*f841f6adSraf /* 201*f841f6adSraf * Create one worker to send asynchronous notifications. 202*f841f6adSraf */ 203*f841f6adSraf (void) _aio_create_worker(NULL, AIONOTIFY); 204*f841f6adSraf 205*f841f6adSraf ret = 0; 206*f841f6adSraf out: 207*f841f6adSraf lmutex_lock(&__aio_initlock); 208*f841f6adSraf if (ret == 0) 209*f841f6adSraf __uaio_ok = 1; 210*f841f6adSraf __aio_initbusy = 0; 211*f841f6adSraf (void) cond_broadcast(&__aio_initcv); 212*f841f6adSraf lmutex_unlock(&__aio_initlock); 213*f841f6adSraf return (ret); 214*f841f6adSraf } 215*f841f6adSraf 216*f841f6adSraf /* 217*f841f6adSraf * Called from close() before actually performing the real _close(). 218*f841f6adSraf */ 219*f841f6adSraf void 220*f841f6adSraf _aio_close(int fd) 221*f841f6adSraf { 222*f841f6adSraf if (fd < 0) /* avoid cancelling everything */ 223*f841f6adSraf return; 224*f841f6adSraf /* 225*f841f6adSraf * Cancel all outstanding aio requests for this file descriptor. 226*f841f6adSraf */ 227*f841f6adSraf if (__uaio_ok) 228*f841f6adSraf (void) aiocancel_all(fd); 229*f841f6adSraf /* 230*f841f6adSraf * If we have allocated the bit array, clear the bit for this file. 231*f841f6adSraf * The next open may re-use this file descriptor and the new file 232*f841f6adSraf * may have different kaio() behaviour. 233*f841f6adSraf */ 234*f841f6adSraf if (_kaio_supported != NULL) 235*f841f6adSraf CLEAR_KAIO_SUPPORTED(fd); 236*f841f6adSraf } 237*f841f6adSraf 238*f841f6adSraf /* 239*f841f6adSraf * special kaio cleanup thread sits in a loop in the 240*f841f6adSraf * kernel waiting for pending kaio requests to complete. 241*f841f6adSraf */ 242*f841f6adSraf void * 243*f841f6adSraf _kaio_cleanup_thread(void *arg) 244*f841f6adSraf { 245*f841f6adSraf if (pthread_setspecific(_aio_key, arg) != 0) 246*f841f6adSraf aio_panic("_kaio_cleanup_thread, pthread_setspecific()"); 247*f841f6adSraf (void) _kaio(AIOSTART); 248*f841f6adSraf return (arg); 249*f841f6adSraf } 250*f841f6adSraf 251*f841f6adSraf /* 252*f841f6adSraf * initialize kaio. 253*f841f6adSraf */ 254*f841f6adSraf void 255*f841f6adSraf _kaio_init() 256*f841f6adSraf { 257*f841f6adSraf int error; 258*f841f6adSraf sigset_t oset; 259*f841f6adSraf 260*f841f6adSraf lmutex_lock(&__aio_initlock); 261*f841f6adSraf while (__aio_initbusy) 262*f841f6adSraf (void) _cond_wait(&__aio_initcv, &__aio_initlock); 263*f841f6adSraf if (_kaio_ok) { /* already initialized */ 264*f841f6adSraf lmutex_unlock(&__aio_initlock); 265*f841f6adSraf return; 266*f841f6adSraf } 267*f841f6adSraf __aio_initbusy = 1; 268*f841f6adSraf lmutex_unlock(&__aio_initlock); 269*f841f6adSraf 270*f841f6adSraf if (_kaio_supported_init() != 0) 271*f841f6adSraf error = ENOMEM; 272*f841f6adSraf else if ((_kaiowp = _aio_worker_alloc()) == NULL) 273*f841f6adSraf error = ENOMEM; 274*f841f6adSraf else if ((error = (int)_kaio(AIOINIT)) == 0) { 275*f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &maskset, &oset); 276*f841f6adSraf error = thr_create(NULL, AIOSTKSIZE, _kaio_cleanup_thread, 277*f841f6adSraf _kaiowp, THR_DAEMON, &_kaiowp->work_tid); 278*f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &oset, NULL); 279*f841f6adSraf } 280*f841f6adSraf if (error && _kaiowp != NULL) { 281*f841f6adSraf _aio_worker_free(_kaiowp); 282*f841f6adSraf _kaiowp = NULL; 283*f841f6adSraf } 284*f841f6adSraf 285*f841f6adSraf lmutex_lock(&__aio_initlock); 286*f841f6adSraf if (error) 287*f841f6adSraf _kaio_ok = -1; 288*f841f6adSraf else 289*f841f6adSraf _kaio_ok = 1; 290*f841f6adSraf __aio_initbusy = 0; 291*f841f6adSraf (void) cond_broadcast(&__aio_initcv); 292*f841f6adSraf lmutex_unlock(&__aio_initlock); 293*f841f6adSraf } 294*f841f6adSraf 295*f841f6adSraf int 296*f841f6adSraf aioread(int fd, caddr_t buf, int bufsz, off_t offset, int whence, 297*f841f6adSraf aio_result_t *resultp) 298*f841f6adSraf { 299*f841f6adSraf return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOREAD)); 300*f841f6adSraf } 301*f841f6adSraf 302*f841f6adSraf int 303*f841f6adSraf aiowrite(int fd, caddr_t buf, int bufsz, off_t offset, int whence, 304*f841f6adSraf aio_result_t *resultp) 305*f841f6adSraf { 306*f841f6adSraf return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOWRITE)); 307*f841f6adSraf } 308*f841f6adSraf 309*f841f6adSraf #if !defined(_LP64) 310*f841f6adSraf int 311*f841f6adSraf aioread64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, 312*f841f6adSraf aio_result_t *resultp) 313*f841f6adSraf { 314*f841f6adSraf return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAREAD64)); 315*f841f6adSraf } 316*f841f6adSraf 317*f841f6adSraf int 318*f841f6adSraf aiowrite64(int fd, caddr_t buf, int bufsz, off64_t offset, int whence, 319*f841f6adSraf aio_result_t *resultp) 320*f841f6adSraf { 321*f841f6adSraf return (_aiorw(fd, buf, bufsz, offset, whence, resultp, AIOAWRITE64)); 322*f841f6adSraf } 323*f841f6adSraf #endif /* !defined(_LP64) */ 324*f841f6adSraf 325*f841f6adSraf int 326*f841f6adSraf _aiorw(int fd, caddr_t buf, int bufsz, offset_t offset, int whence, 327*f841f6adSraf aio_result_t *resultp, int mode) 328*f841f6adSraf { 329*f841f6adSraf aio_req_t *reqp; 330*f841f6adSraf aio_args_t *ap; 331*f841f6adSraf offset_t loffset; 332*f841f6adSraf struct stat stat; 333*f841f6adSraf int error = 0; 334*f841f6adSraf int kerr; 335*f841f6adSraf int umode; 336*f841f6adSraf 337*f841f6adSraf switch (whence) { 338*f841f6adSraf 339*f841f6adSraf case SEEK_SET: 340*f841f6adSraf loffset = offset; 341*f841f6adSraf break; 342*f841f6adSraf case SEEK_CUR: 343*f841f6adSraf if ((loffset = llseek(fd, 0, SEEK_CUR)) == -1) 344*f841f6adSraf error = -1; 345*f841f6adSraf else 346*f841f6adSraf loffset += offset; 347*f841f6adSraf break; 348*f841f6adSraf case SEEK_END: 349*f841f6adSraf if (fstat(fd, &stat) == -1) 350*f841f6adSraf error = -1; 351*f841f6adSraf else 352*f841f6adSraf loffset = offset + stat.st_size; 353*f841f6adSraf break; 354*f841f6adSraf default: 355*f841f6adSraf errno = EINVAL; 356*f841f6adSraf error = -1; 357*f841f6adSraf } 358*f841f6adSraf 359*f841f6adSraf if (error) 360*f841f6adSraf return (error); 361*f841f6adSraf 362*f841f6adSraf /* initialize kaio */ 363*f841f6adSraf if (!_kaio_ok) 364*f841f6adSraf _kaio_init(); 365*f841f6adSraf 366*f841f6adSraf /* 367*f841f6adSraf * _aio_do_request() needs the original request code (mode) to be able 368*f841f6adSraf * to choose the appropiate 32/64 bit function. All other functions 369*f841f6adSraf * only require the difference between READ and WRITE (umode). 370*f841f6adSraf */ 371*f841f6adSraf if (mode == AIOAREAD64 || mode == AIOAWRITE64) 372*f841f6adSraf umode = mode - AIOAREAD64; 373*f841f6adSraf else 374*f841f6adSraf umode = mode; 375*f841f6adSraf 376*f841f6adSraf /* 377*f841f6adSraf * Try kernel aio first. 378*f841f6adSraf * If errno is ENOTSUP/EBADFD, fall back to the thread implementation. 379*f841f6adSraf */ 380*f841f6adSraf if (_kaio_ok > 0 && KAIO_SUPPORTED(fd)) { 381*f841f6adSraf resultp->aio_errno = 0; 382*f841f6adSraf sig_mutex_lock(&__aio_mutex); 383*f841f6adSraf _kaio_outstand_cnt++; 384*f841f6adSraf kerr = (int)_kaio(((resultp->aio_return == AIO_INPROGRESS) ? 385*f841f6adSraf (umode | AIO_POLL_BIT) : umode), 386*f841f6adSraf fd, buf, bufsz, loffset, resultp); 387*f841f6adSraf if (kerr == 0) { 388*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 389*f841f6adSraf return (0); 390*f841f6adSraf } 391*f841f6adSraf _kaio_outstand_cnt--; 392*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 393*f841f6adSraf if (errno != ENOTSUP && errno != EBADFD) 394*f841f6adSraf return (-1); 395*f841f6adSraf if (errno == EBADFD) 396*f841f6adSraf SET_KAIO_NOT_SUPPORTED(fd); 397*f841f6adSraf } 398*f841f6adSraf 399*f841f6adSraf if (!__uaio_ok && __uaio_init() == -1) 400*f841f6adSraf return (-1); 401*f841f6adSraf 402*f841f6adSraf if ((reqp = _aio_req_alloc()) == NULL) { 403*f841f6adSraf errno = EAGAIN; 404*f841f6adSraf return (-1); 405*f841f6adSraf } 406*f841f6adSraf 407*f841f6adSraf /* 408*f841f6adSraf * _aio_do_request() checks reqp->req_op to differentiate 409*f841f6adSraf * between 32 and 64 bit access. 410*f841f6adSraf */ 411*f841f6adSraf reqp->req_op = mode; 412*f841f6adSraf reqp->req_resultp = resultp; 413*f841f6adSraf ap = &reqp->req_args; 414*f841f6adSraf ap->fd = fd; 415*f841f6adSraf ap->buf = buf; 416*f841f6adSraf ap->bufsz = bufsz; 417*f841f6adSraf ap->offset = loffset; 418*f841f6adSraf 419*f841f6adSraf if (_aio_hash_insert(resultp, reqp) != 0) { 420*f841f6adSraf _aio_req_free(reqp); 421*f841f6adSraf errno = EINVAL; 422*f841f6adSraf return (-1); 423*f841f6adSraf } 424*f841f6adSraf /* 425*f841f6adSraf * _aio_req_add() only needs the difference between READ and 426*f841f6adSraf * WRITE to choose the right worker queue. 427*f841f6adSraf */ 428*f841f6adSraf _aio_req_add(reqp, &__nextworker_rw, umode); 429*f841f6adSraf return (0); 430*f841f6adSraf } 431*f841f6adSraf 432*f841f6adSraf int 433*f841f6adSraf aiocancel(aio_result_t *resultp) 434*f841f6adSraf { 435*f841f6adSraf aio_req_t *reqp; 436*f841f6adSraf aio_worker_t *aiowp; 437*f841f6adSraf int ret; 438*f841f6adSraf int done = 0; 439*f841f6adSraf int canceled = 0; 440*f841f6adSraf 441*f841f6adSraf if (!__uaio_ok) { 442*f841f6adSraf errno = EINVAL; 443*f841f6adSraf return (-1); 444*f841f6adSraf } 445*f841f6adSraf 446*f841f6adSraf sig_mutex_lock(&__aio_mutex); 447*f841f6adSraf reqp = _aio_hash_find(resultp); 448*f841f6adSraf if (reqp == NULL) { 449*f841f6adSraf if (_aio_outstand_cnt == _aio_req_done_cnt) 450*f841f6adSraf errno = EINVAL; 451*f841f6adSraf else 452*f841f6adSraf errno = EACCES; 453*f841f6adSraf ret = -1; 454*f841f6adSraf } else { 455*f841f6adSraf aiowp = reqp->req_worker; 456*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 457*f841f6adSraf (void) _aio_cancel_req(aiowp, reqp, &canceled, &done); 458*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 459*f841f6adSraf 460*f841f6adSraf if (canceled) { 461*f841f6adSraf ret = 0; 462*f841f6adSraf } else { 463*f841f6adSraf if (_aio_outstand_cnt == 0 || 464*f841f6adSraf _aio_outstand_cnt == _aio_req_done_cnt) 465*f841f6adSraf errno = EINVAL; 466*f841f6adSraf else 467*f841f6adSraf errno = EACCES; 468*f841f6adSraf ret = -1; 469*f841f6adSraf } 470*f841f6adSraf } 471*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 472*f841f6adSraf return (ret); 473*f841f6adSraf } 474*f841f6adSraf 475*f841f6adSraf /* 476*f841f6adSraf * This must be asynch safe 477*f841f6adSraf */ 478*f841f6adSraf aio_result_t * 479*f841f6adSraf aiowait(struct timeval *uwait) 480*f841f6adSraf { 481*f841f6adSraf aio_result_t *uresultp; 482*f841f6adSraf aio_result_t *kresultp; 483*f841f6adSraf aio_result_t *resultp; 484*f841f6adSraf int dontblock; 485*f841f6adSraf int timedwait = 0; 486*f841f6adSraf int kaio_errno = 0; 487*f841f6adSraf struct timeval twait; 488*f841f6adSraf struct timeval *wait = NULL; 489*f841f6adSraf hrtime_t hrtend; 490*f841f6adSraf hrtime_t hres; 491*f841f6adSraf 492*f841f6adSraf if (uwait) { 493*f841f6adSraf /* 494*f841f6adSraf * Check for a valid specified wait time. 495*f841f6adSraf * If it is invalid, fail the call right away. 496*f841f6adSraf */ 497*f841f6adSraf if (uwait->tv_sec < 0 || uwait->tv_usec < 0 || 498*f841f6adSraf uwait->tv_usec >= MICROSEC) { 499*f841f6adSraf errno = EINVAL; 500*f841f6adSraf return ((aio_result_t *)-1); 501*f841f6adSraf } 502*f841f6adSraf 503*f841f6adSraf if (uwait->tv_sec > 0 || uwait->tv_usec > 0) { 504*f841f6adSraf hrtend = gethrtime() + 505*f841f6adSraf (hrtime_t)uwait->tv_sec * NANOSEC + 506*f841f6adSraf (hrtime_t)uwait->tv_usec * (NANOSEC / MICROSEC); 507*f841f6adSraf twait = *uwait; 508*f841f6adSraf wait = &twait; 509*f841f6adSraf timedwait++; 510*f841f6adSraf } else { 511*f841f6adSraf /* polling */ 512*f841f6adSraf sig_mutex_lock(&__aio_mutex); 513*f841f6adSraf if (_kaio_outstand_cnt == 0) { 514*f841f6adSraf kresultp = (aio_result_t *)-1; 515*f841f6adSraf } else { 516*f841f6adSraf kresultp = (aio_result_t *)_kaio(AIOWAIT, 517*f841f6adSraf (struct timeval *)-1, 1); 518*f841f6adSraf if (kresultp != (aio_result_t *)-1 && 519*f841f6adSraf kresultp != NULL && 520*f841f6adSraf kresultp != (aio_result_t *)1) { 521*f841f6adSraf _kaio_outstand_cnt--; 522*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 523*f841f6adSraf return (kresultp); 524*f841f6adSraf } 525*f841f6adSraf } 526*f841f6adSraf uresultp = _aio_req_done(); 527*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 528*f841f6adSraf if (uresultp != NULL && 529*f841f6adSraf uresultp != (aio_result_t *)-1) { 530*f841f6adSraf return (uresultp); 531*f841f6adSraf } 532*f841f6adSraf if (uresultp == (aio_result_t *)-1 && 533*f841f6adSraf kresultp == (aio_result_t *)-1) { 534*f841f6adSraf errno = EINVAL; 535*f841f6adSraf return ((aio_result_t *)-1); 536*f841f6adSraf } else { 537*f841f6adSraf return (NULL); 538*f841f6adSraf } 539*f841f6adSraf } 540*f841f6adSraf } 541*f841f6adSraf 542*f841f6adSraf for (;;) { 543*f841f6adSraf sig_mutex_lock(&__aio_mutex); 544*f841f6adSraf uresultp = _aio_req_done(); 545*f841f6adSraf if (uresultp != NULL && uresultp != (aio_result_t *)-1) { 546*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 547*f841f6adSraf resultp = uresultp; 548*f841f6adSraf break; 549*f841f6adSraf } 550*f841f6adSraf _aiowait_flag++; 551*f841f6adSraf dontblock = (uresultp == (aio_result_t *)-1); 552*f841f6adSraf if (dontblock && _kaio_outstand_cnt == 0) { 553*f841f6adSraf kresultp = (aio_result_t *)-1; 554*f841f6adSraf kaio_errno = EINVAL; 555*f841f6adSraf } else { 556*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 557*f841f6adSraf kresultp = (aio_result_t *)_kaio(AIOWAIT, 558*f841f6adSraf wait, dontblock); 559*f841f6adSraf sig_mutex_lock(&__aio_mutex); 560*f841f6adSraf kaio_errno = errno; 561*f841f6adSraf } 562*f841f6adSraf _aiowait_flag--; 563*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 564*f841f6adSraf if (kresultp == (aio_result_t *)1) { 565*f841f6adSraf /* aiowait() awakened by an aionotify() */ 566*f841f6adSraf continue; 567*f841f6adSraf } else if (kresultp != NULL && 568*f841f6adSraf kresultp != (aio_result_t *)-1) { 569*f841f6adSraf resultp = kresultp; 570*f841f6adSraf sig_mutex_lock(&__aio_mutex); 571*f841f6adSraf _kaio_outstand_cnt--; 572*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 573*f841f6adSraf break; 574*f841f6adSraf } else if (kresultp == (aio_result_t *)-1 && 575*f841f6adSraf kaio_errno == EINVAL && 576*f841f6adSraf uresultp == (aio_result_t *)-1) { 577*f841f6adSraf errno = kaio_errno; 578*f841f6adSraf resultp = (aio_result_t *)-1; 579*f841f6adSraf break; 580*f841f6adSraf } else if (kresultp == (aio_result_t *)-1 && 581*f841f6adSraf kaio_errno == EINTR) { 582*f841f6adSraf errno = kaio_errno; 583*f841f6adSraf resultp = (aio_result_t *)-1; 584*f841f6adSraf break; 585*f841f6adSraf } else if (timedwait) { 586*f841f6adSraf hres = hrtend - gethrtime(); 587*f841f6adSraf if (hres <= 0) { 588*f841f6adSraf /* time is up; return */ 589*f841f6adSraf resultp = NULL; 590*f841f6adSraf break; 591*f841f6adSraf } else { 592*f841f6adSraf /* 593*f841f6adSraf * Some time left. Round up the remaining time 594*f841f6adSraf * in nanoseconds to microsec. Retry the call. 595*f841f6adSraf */ 596*f841f6adSraf hres += (NANOSEC / MICROSEC) - 1; 597*f841f6adSraf wait->tv_sec = hres / NANOSEC; 598*f841f6adSraf wait->tv_usec = 599*f841f6adSraf (hres % NANOSEC) / (NANOSEC / MICROSEC); 600*f841f6adSraf } 601*f841f6adSraf } else { 602*f841f6adSraf ASSERT(kresultp == NULL && uresultp == NULL); 603*f841f6adSraf resultp = NULL; 604*f841f6adSraf continue; 605*f841f6adSraf } 606*f841f6adSraf } 607*f841f6adSraf return (resultp); 608*f841f6adSraf } 609*f841f6adSraf 610*f841f6adSraf /* 611*f841f6adSraf * _aio_get_timedelta calculates the remaining time and stores the result 612*f841f6adSraf * into timespec_t *wait. 613*f841f6adSraf */ 614*f841f6adSraf 615*f841f6adSraf int 616*f841f6adSraf _aio_get_timedelta(timespec_t *end, timespec_t *wait) 617*f841f6adSraf { 618*f841f6adSraf int ret = 0; 619*f841f6adSraf struct timeval cur; 620*f841f6adSraf timespec_t curtime; 621*f841f6adSraf 622*f841f6adSraf (void) gettimeofday(&cur, NULL); 623*f841f6adSraf curtime.tv_sec = cur.tv_sec; 624*f841f6adSraf curtime.tv_nsec = cur.tv_usec * 1000; /* convert us to ns */ 625*f841f6adSraf 626*f841f6adSraf if (end->tv_sec >= curtime.tv_sec) { 627*f841f6adSraf wait->tv_sec = end->tv_sec - curtime.tv_sec; 628*f841f6adSraf if (end->tv_nsec >= curtime.tv_nsec) { 629*f841f6adSraf wait->tv_nsec = end->tv_nsec - curtime.tv_nsec; 630*f841f6adSraf if (wait->tv_sec == 0 && wait->tv_nsec == 0) 631*f841f6adSraf ret = -1; /* timer expired */ 632*f841f6adSraf } else { 633*f841f6adSraf if (end->tv_sec > curtime.tv_sec) { 634*f841f6adSraf wait->tv_sec -= 1; 635*f841f6adSraf wait->tv_nsec = NANOSEC - 636*f841f6adSraf (curtime.tv_nsec - end->tv_nsec); 637*f841f6adSraf } else { 638*f841f6adSraf ret = -1; /* timer expired */ 639*f841f6adSraf } 640*f841f6adSraf } 641*f841f6adSraf } else { 642*f841f6adSraf ret = -1; 643*f841f6adSraf } 644*f841f6adSraf return (ret); 645*f841f6adSraf } 646*f841f6adSraf 647*f841f6adSraf /* 648*f841f6adSraf * If closing by file descriptor: we will simply cancel all the outstanding 649*f841f6adSraf * aio`s and return. Those aio's in question will have either noticed the 650*f841f6adSraf * cancellation notice before, during, or after initiating io. 651*f841f6adSraf */ 652*f841f6adSraf int 653*f841f6adSraf aiocancel_all(int fd) 654*f841f6adSraf { 655*f841f6adSraf aio_req_t *reqp; 656*f841f6adSraf aio_req_t **reqpp; 657*f841f6adSraf aio_worker_t *first; 658*f841f6adSraf aio_worker_t *next; 659*f841f6adSraf int canceled = 0; 660*f841f6adSraf int done = 0; 661*f841f6adSraf int cancelall = 0; 662*f841f6adSraf 663*f841f6adSraf sig_mutex_lock(&__aio_mutex); 664*f841f6adSraf 665*f841f6adSraf if (_aio_outstand_cnt == 0) { 666*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 667*f841f6adSraf return (AIO_ALLDONE); 668*f841f6adSraf } 669*f841f6adSraf 670*f841f6adSraf /* 671*f841f6adSraf * Cancel requests from the read/write workers' queues. 672*f841f6adSraf */ 673*f841f6adSraf first = __nextworker_rw; 674*f841f6adSraf next = first; 675*f841f6adSraf do { 676*f841f6adSraf _aio_cancel_work(next, fd, &canceled, &done); 677*f841f6adSraf } while ((next = next->work_forw) != first); 678*f841f6adSraf 679*f841f6adSraf /* 680*f841f6adSraf * finally, check if there are requests on the done queue that 681*f841f6adSraf * should be canceled. 682*f841f6adSraf */ 683*f841f6adSraf if (fd < 0) 684*f841f6adSraf cancelall = 1; 685*f841f6adSraf reqpp = &_aio_done_tail; 686*f841f6adSraf while ((reqp = *reqpp) != NULL) { 687*f841f6adSraf if (cancelall || reqp->req_args.fd == fd) { 688*f841f6adSraf *reqpp = reqp->req_next; 689*f841f6adSraf _aio_donecnt--; 690*f841f6adSraf (void) _aio_hash_del(reqp->req_resultp); 691*f841f6adSraf _aio_req_free(reqp); 692*f841f6adSraf } else 693*f841f6adSraf reqpp = &reqp->req_next; 694*f841f6adSraf } 695*f841f6adSraf if (cancelall) { 696*f841f6adSraf ASSERT(_aio_donecnt == 0); 697*f841f6adSraf _aio_done_head = NULL; 698*f841f6adSraf } 699*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 700*f841f6adSraf 701*f841f6adSraf if (canceled && done == 0) 702*f841f6adSraf return (AIO_CANCELED); 703*f841f6adSraf else if (done && canceled == 0) 704*f841f6adSraf return (AIO_ALLDONE); 705*f841f6adSraf else if ((canceled + done == 0) && KAIO_SUPPORTED(fd)) 706*f841f6adSraf return ((int)_kaio(AIOCANCEL, fd, NULL)); 707*f841f6adSraf return (AIO_NOTCANCELED); 708*f841f6adSraf } 709*f841f6adSraf 710*f841f6adSraf /* 711*f841f6adSraf * Cancel requests from a given work queue. If the file descriptor 712*f841f6adSraf * parameter, fd, is non-negative, then only cancel those requests 713*f841f6adSraf * in this queue that are to this file descriptor. If the fd 714*f841f6adSraf * parameter is -1, then cancel all requests. 715*f841f6adSraf */ 716*f841f6adSraf static void 717*f841f6adSraf _aio_cancel_work(aio_worker_t *aiowp, int fd, int *canceled, int *done) 718*f841f6adSraf { 719*f841f6adSraf aio_req_t *reqp; 720*f841f6adSraf 721*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 722*f841f6adSraf /* 723*f841f6adSraf * cancel queued requests first. 724*f841f6adSraf */ 725*f841f6adSraf reqp = aiowp->work_tail1; 726*f841f6adSraf while (reqp != NULL) { 727*f841f6adSraf if (fd < 0 || reqp->req_args.fd == fd) { 728*f841f6adSraf if (_aio_cancel_req(aiowp, reqp, canceled, done)) { 729*f841f6adSraf /* 730*f841f6adSraf * Callers locks were dropped. 731*f841f6adSraf * reqp is invalid; start traversing 732*f841f6adSraf * the list from the beginning again. 733*f841f6adSraf */ 734*f841f6adSraf reqp = aiowp->work_tail1; 735*f841f6adSraf continue; 736*f841f6adSraf } 737*f841f6adSraf } 738*f841f6adSraf reqp = reqp->req_next; 739*f841f6adSraf } 740*f841f6adSraf /* 741*f841f6adSraf * Since the queued requests have been canceled, there can 742*f841f6adSraf * only be one inprogress request that should be canceled. 743*f841f6adSraf */ 744*f841f6adSraf if ((reqp = aiowp->work_req) != NULL && 745*f841f6adSraf (fd < 0 || reqp->req_args.fd == fd)) 746*f841f6adSraf (void) _aio_cancel_req(aiowp, reqp, canceled, done); 747*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 748*f841f6adSraf } 749*f841f6adSraf 750*f841f6adSraf /* 751*f841f6adSraf * Cancel a request. Return 1 if the callers locks were temporarily 752*f841f6adSraf * dropped, otherwise return 0. 753*f841f6adSraf */ 754*f841f6adSraf int 755*f841f6adSraf _aio_cancel_req(aio_worker_t *aiowp, aio_req_t *reqp, int *canceled, int *done) 756*f841f6adSraf { 757*f841f6adSraf int ostate = reqp->req_state; 758*f841f6adSraf 759*f841f6adSraf ASSERT(MUTEX_HELD(&__aio_mutex)); 760*f841f6adSraf ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); 761*f841f6adSraf if (ostate == AIO_REQ_CANCELED) 762*f841f6adSraf return (0); 763*f841f6adSraf if (ostate == AIO_REQ_DONE || ostate == AIO_REQ_DONEQ) { 764*f841f6adSraf (*done)++; 765*f841f6adSraf return (0); 766*f841f6adSraf } 767*f841f6adSraf if (reqp->req_op == AIOFSYNC && reqp != aiowp->work_req) { 768*f841f6adSraf ASSERT(POSIX_AIO(reqp)); 769*f841f6adSraf /* Cancel the queued aio_fsync() request */ 770*f841f6adSraf if (!reqp->req_head->lio_canned) { 771*f841f6adSraf reqp->req_head->lio_canned = 1; 772*f841f6adSraf _aio_outstand_cnt--; 773*f841f6adSraf (*canceled)++; 774*f841f6adSraf } 775*f841f6adSraf return (0); 776*f841f6adSraf } 777*f841f6adSraf reqp->req_state = AIO_REQ_CANCELED; 778*f841f6adSraf _aio_req_del(aiowp, reqp, ostate); 779*f841f6adSraf (void) _aio_hash_del(reqp->req_resultp); 780*f841f6adSraf (*canceled)++; 781*f841f6adSraf if (reqp == aiowp->work_req) { 782*f841f6adSraf ASSERT(ostate == AIO_REQ_INPROGRESS); 783*f841f6adSraf /* 784*f841f6adSraf * Set the result values now, before _aiodone() is called. 785*f841f6adSraf * We do this because the application can expect aio_return 786*f841f6adSraf * and aio_errno to be set to -1 and ECANCELED, respectively, 787*f841f6adSraf * immediately after a successful return from aiocancel() 788*f841f6adSraf * or aio_cancel(). 789*f841f6adSraf */ 790*f841f6adSraf _aio_set_result(reqp, -1, ECANCELED); 791*f841f6adSraf (void) thr_kill(aiowp->work_tid, SIGAIOCANCEL); 792*f841f6adSraf return (0); 793*f841f6adSraf } 794*f841f6adSraf if (!POSIX_AIO(reqp)) { 795*f841f6adSraf _aio_outstand_cnt--; 796*f841f6adSraf _aio_set_result(reqp, -1, ECANCELED); 797*f841f6adSraf return (0); 798*f841f6adSraf } 799*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 800*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 801*f841f6adSraf _aiodone(reqp, -1, ECANCELED); 802*f841f6adSraf sig_mutex_lock(&__aio_mutex); 803*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 804*f841f6adSraf return (1); 805*f841f6adSraf } 806*f841f6adSraf 807*f841f6adSraf int 808*f841f6adSraf _aio_create_worker(aio_req_t *reqp, int mode) 809*f841f6adSraf { 810*f841f6adSraf aio_worker_t *aiowp, **workers, **nextworker; 811*f841f6adSraf int *aio_workerscnt; 812*f841f6adSraf void *(*func)(void *); 813*f841f6adSraf sigset_t oset; 814*f841f6adSraf int error; 815*f841f6adSraf 816*f841f6adSraf /* 817*f841f6adSraf * Put the new worker thread in the right queue. 818*f841f6adSraf */ 819*f841f6adSraf switch (mode) { 820*f841f6adSraf case AIOREAD: 821*f841f6adSraf case AIOWRITE: 822*f841f6adSraf case AIOAREAD: 823*f841f6adSraf case AIOAWRITE: 824*f841f6adSraf #if !defined(_LP64) 825*f841f6adSraf case AIOAREAD64: 826*f841f6adSraf case AIOAWRITE64: 827*f841f6adSraf #endif 828*f841f6adSraf workers = &__workers_rw; 829*f841f6adSraf nextworker = &__nextworker_rw; 830*f841f6adSraf aio_workerscnt = &__rw_workerscnt; 831*f841f6adSraf func = _aio_do_request; 832*f841f6adSraf break; 833*f841f6adSraf case AIONOTIFY: 834*f841f6adSraf workers = &__workers_no; 835*f841f6adSraf nextworker = &__nextworker_no; 836*f841f6adSraf func = _aio_do_notify; 837*f841f6adSraf aio_workerscnt = &__no_workerscnt; 838*f841f6adSraf break; 839*f841f6adSraf default: 840*f841f6adSraf aio_panic("_aio_create_worker: invalid mode"); 841*f841f6adSraf break; 842*f841f6adSraf } 843*f841f6adSraf 844*f841f6adSraf if ((aiowp = _aio_worker_alloc()) == NULL) 845*f841f6adSraf return (-1); 846*f841f6adSraf 847*f841f6adSraf if (reqp) { 848*f841f6adSraf reqp->req_state = AIO_REQ_QUEUED; 849*f841f6adSraf reqp->req_worker = aiowp; 850*f841f6adSraf aiowp->work_head1 = reqp; 851*f841f6adSraf aiowp->work_tail1 = reqp; 852*f841f6adSraf aiowp->work_next1 = reqp; 853*f841f6adSraf aiowp->work_count1 = 1; 854*f841f6adSraf aiowp->work_minload1 = 1; 855*f841f6adSraf } 856*f841f6adSraf 857*f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &maskset, &oset); 858*f841f6adSraf error = thr_create(NULL, AIOSTKSIZE, func, aiowp, 859*f841f6adSraf THR_DAEMON | THR_SUSPENDED, &aiowp->work_tid); 860*f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &oset, NULL); 861*f841f6adSraf if (error) { 862*f841f6adSraf if (reqp) { 863*f841f6adSraf reqp->req_state = 0; 864*f841f6adSraf reqp->req_worker = NULL; 865*f841f6adSraf } 866*f841f6adSraf _aio_worker_free(aiowp); 867*f841f6adSraf return (-1); 868*f841f6adSraf } 869*f841f6adSraf 870*f841f6adSraf lmutex_lock(&__aio_mutex); 871*f841f6adSraf (*aio_workerscnt)++; 872*f841f6adSraf if (*workers == NULL) { 873*f841f6adSraf aiowp->work_forw = aiowp; 874*f841f6adSraf aiowp->work_backw = aiowp; 875*f841f6adSraf *nextworker = aiowp; 876*f841f6adSraf *workers = aiowp; 877*f841f6adSraf } else { 878*f841f6adSraf aiowp->work_backw = (*workers)->work_backw; 879*f841f6adSraf aiowp->work_forw = (*workers); 880*f841f6adSraf (*workers)->work_backw->work_forw = aiowp; 881*f841f6adSraf (*workers)->work_backw = aiowp; 882*f841f6adSraf } 883*f841f6adSraf _aio_worker_cnt++; 884*f841f6adSraf lmutex_unlock(&__aio_mutex); 885*f841f6adSraf 886*f841f6adSraf (void) thr_continue(aiowp->work_tid); 887*f841f6adSraf 888*f841f6adSraf return (0); 889*f841f6adSraf } 890*f841f6adSraf 891*f841f6adSraf /* 892*f841f6adSraf * This is the worker's main routine. 893*f841f6adSraf * The task of this function is to execute all queued requests; 894*f841f6adSraf * once the last pending request is executed this function will block 895*f841f6adSraf * in _aio_idle(). A new incoming request must wakeup this thread to 896*f841f6adSraf * restart the work. 897*f841f6adSraf * Every worker has an own work queue. The queue lock is required 898*f841f6adSraf * to synchronize the addition of new requests for this worker or 899*f841f6adSraf * cancellation of pending/running requests. 900*f841f6adSraf * 901*f841f6adSraf * Cancellation scenarios: 902*f841f6adSraf * The cancellation of a request is being done asynchronously using 903*f841f6adSraf * _aio_cancel_req() from another thread context. 904*f841f6adSraf * A queued request can be cancelled in different manners : 905*f841f6adSraf * a) request is queued but not "in progress" or "done" (AIO_REQ_QUEUED): 906*f841f6adSraf * - lock the queue -> remove the request -> unlock the queue 907*f841f6adSraf * - this function/thread does not detect this cancellation process 908*f841f6adSraf * b) request is in progress (AIO_REQ_INPROGRESS) : 909*f841f6adSraf * - this function first allow the cancellation of the running 910*f841f6adSraf * request with the flag "work_cancel_flg=1" 911*f841f6adSraf * see _aio_req_get() -> _aio_cancel_on() 912*f841f6adSraf * During this phase, it is allowed to interrupt the worker 913*f841f6adSraf * thread running the request (this thread) using the SIGAIOCANCEL 914*f841f6adSraf * signal. 915*f841f6adSraf * Once this thread returns from the kernel (because the request 916*f841f6adSraf * is just done), then it must disable a possible cancellation 917*f841f6adSraf * and proceed to finish the request. To disable the cancellation 918*f841f6adSraf * this thread must use _aio_cancel_off() to set "work_cancel_flg=0". 919*f841f6adSraf * c) request is already done (AIO_REQ_DONE || AIO_REQ_DONEQ): 920*f841f6adSraf * same procedure as in a) 921*f841f6adSraf * 922*f841f6adSraf * To b) 923*f841f6adSraf * This thread uses sigsetjmp() to define the position in the code, where 924*f841f6adSraf * it wish to continue working in the case that a SIGAIOCANCEL signal 925*f841f6adSraf * is detected. 926*f841f6adSraf * Normally this thread should get the cancellation signal during the 927*f841f6adSraf * kernel phase (reading or writing). In that case the signal handler 928*f841f6adSraf * aiosigcancelhndlr() is activated using the worker thread context, 929*f841f6adSraf * which again will use the siglongjmp() function to break the standard 930*f841f6adSraf * code flow and jump to the "sigsetjmp" position, provided that 931*f841f6adSraf * "work_cancel_flg" is set to "1". 932*f841f6adSraf * Because the "work_cancel_flg" is only manipulated by this worker 933*f841f6adSraf * thread and it can only run on one CPU at a given time, it is not 934*f841f6adSraf * necessary to protect that flag with the queue lock. 935*f841f6adSraf * Returning from the kernel (read or write system call) we must 936*f841f6adSraf * first disable the use of the SIGAIOCANCEL signal and accordingly 937*f841f6adSraf * the use of the siglongjmp() function to prevent a possible deadlock: 938*f841f6adSraf * - It can happens that this worker thread returns from the kernel and 939*f841f6adSraf * blocks in "work_qlock1", 940*f841f6adSraf * - then a second thread cancels the apparently "in progress" request 941*f841f6adSraf * and sends the SIGAIOCANCEL signal to the worker thread, 942*f841f6adSraf * - the worker thread gets assigned the "work_qlock1" and will returns 943*f841f6adSraf * from the kernel, 944*f841f6adSraf * - the kernel detects the pending signal and activates the signal 945*f841f6adSraf * handler instead, 946*f841f6adSraf * - if the "work_cancel_flg" is still set then the signal handler 947*f841f6adSraf * should use siglongjmp() to cancel the "in progress" request and 948*f841f6adSraf * it would try to acquire the same work_qlock1 in _aio_req_get() 949*f841f6adSraf * for a second time => deadlock. 950*f841f6adSraf * To avoid that situation we disable the cancellation of the request 951*f841f6adSraf * in progress BEFORE we try to acquire the work_qlock1. 952*f841f6adSraf * In that case the signal handler will not call siglongjmp() and the 953*f841f6adSraf * worker thread will continue running the standard code flow. 954*f841f6adSraf * Then this thread must check the AIO_REQ_CANCELED flag to emulate 955*f841f6adSraf * an eventually required siglongjmp() freeing the work_qlock1 and 956*f841f6adSraf * avoiding a deadlock. 957*f841f6adSraf */ 958*f841f6adSraf void * 959*f841f6adSraf _aio_do_request(void *arglist) 960*f841f6adSraf { 961*f841f6adSraf aio_worker_t *aiowp = (aio_worker_t *)arglist; 962*f841f6adSraf ulwp_t *self = curthread; 963*f841f6adSraf struct aio_args *arg; 964*f841f6adSraf aio_req_t *reqp; /* current AIO request */ 965*f841f6adSraf ssize_t retval; 966*f841f6adSraf int error; 967*f841f6adSraf 968*f841f6adSraf if (pthread_setspecific(_aio_key, aiowp) != 0) 969*f841f6adSraf aio_panic("_aio_do_request, pthread_setspecific()"); 970*f841f6adSraf (void) pthread_sigmask(SIG_SETMASK, &_worker_set, NULL); 971*f841f6adSraf ASSERT(aiowp->work_req == NULL); 972*f841f6adSraf 973*f841f6adSraf /* 974*f841f6adSraf * We resume here when an operation is cancelled. 975*f841f6adSraf * On first entry, aiowp->work_req == NULL, so all 976*f841f6adSraf * we do is block SIGAIOCANCEL. 977*f841f6adSraf */ 978*f841f6adSraf (void) sigsetjmp(aiowp->work_jmp_buf, 0); 979*f841f6adSraf ASSERT(self->ul_sigdefer == 0); 980*f841f6adSraf 981*f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 982*f841f6adSraf if (aiowp->work_req != NULL) 983*f841f6adSraf _aio_finish_request(aiowp, -1, ECANCELED); 984*f841f6adSraf 985*f841f6adSraf for (;;) { 986*f841f6adSraf /* 987*f841f6adSraf * Put completed requests on aio_done_list. This has 988*f841f6adSraf * to be done as part of the main loop to ensure that 989*f841f6adSraf * we don't artificially starve any aiowait'ers. 990*f841f6adSraf */ 991*f841f6adSraf if (aiowp->work_done1) 992*f841f6adSraf _aio_work_done(aiowp); 993*f841f6adSraf 994*f841f6adSraf top: 995*f841f6adSraf /* consume any deferred SIGAIOCANCEL signal here */ 996*f841f6adSraf sigon(self); 997*f841f6adSraf sigoff(self); 998*f841f6adSraf 999*f841f6adSraf while ((reqp = _aio_req_get(aiowp)) == NULL) { 1000*f841f6adSraf if (_aio_idle(aiowp) != 0) 1001*f841f6adSraf goto top; 1002*f841f6adSraf } 1003*f841f6adSraf arg = &reqp->req_args; 1004*f841f6adSraf ASSERT(reqp->req_state == AIO_REQ_INPROGRESS || 1005*f841f6adSraf reqp->req_state == AIO_REQ_CANCELED); 1006*f841f6adSraf error = 0; 1007*f841f6adSraf 1008*f841f6adSraf switch (reqp->req_op) { 1009*f841f6adSraf case AIOREAD: 1010*f841f6adSraf case AIOAREAD: 1011*f841f6adSraf sigon(self); /* unblock SIGAIOCANCEL */ 1012*f841f6adSraf retval = pread(arg->fd, arg->buf, 1013*f841f6adSraf arg->bufsz, arg->offset); 1014*f841f6adSraf if (retval == -1) { 1015*f841f6adSraf if (errno == ESPIPE) { 1016*f841f6adSraf retval = read(arg->fd, 1017*f841f6adSraf arg->buf, arg->bufsz); 1018*f841f6adSraf if (retval == -1) 1019*f841f6adSraf error = errno; 1020*f841f6adSraf } else { 1021*f841f6adSraf error = errno; 1022*f841f6adSraf } 1023*f841f6adSraf } 1024*f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1025*f841f6adSraf break; 1026*f841f6adSraf case AIOWRITE: 1027*f841f6adSraf case AIOAWRITE: 1028*f841f6adSraf sigon(self); /* unblock SIGAIOCANCEL */ 1029*f841f6adSraf retval = pwrite(arg->fd, arg->buf, 1030*f841f6adSraf arg->bufsz, arg->offset); 1031*f841f6adSraf if (retval == -1) { 1032*f841f6adSraf if (errno == ESPIPE) { 1033*f841f6adSraf retval = write(arg->fd, 1034*f841f6adSraf arg->buf, arg->bufsz); 1035*f841f6adSraf if (retval == -1) 1036*f841f6adSraf error = errno; 1037*f841f6adSraf } else { 1038*f841f6adSraf error = errno; 1039*f841f6adSraf } 1040*f841f6adSraf } 1041*f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1042*f841f6adSraf break; 1043*f841f6adSraf #if !defined(_LP64) 1044*f841f6adSraf case AIOAREAD64: 1045*f841f6adSraf sigon(self); /* unblock SIGAIOCANCEL */ 1046*f841f6adSraf retval = pread64(arg->fd, arg->buf, 1047*f841f6adSraf arg->bufsz, arg->offset); 1048*f841f6adSraf if (retval == -1) { 1049*f841f6adSraf if (errno == ESPIPE) { 1050*f841f6adSraf retval = read(arg->fd, 1051*f841f6adSraf arg->buf, arg->bufsz); 1052*f841f6adSraf if (retval == -1) 1053*f841f6adSraf error = errno; 1054*f841f6adSraf } else { 1055*f841f6adSraf error = errno; 1056*f841f6adSraf } 1057*f841f6adSraf } 1058*f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1059*f841f6adSraf break; 1060*f841f6adSraf case AIOAWRITE64: 1061*f841f6adSraf sigon(self); /* unblock SIGAIOCANCEL */ 1062*f841f6adSraf retval = pwrite64(arg->fd, arg->buf, 1063*f841f6adSraf arg->bufsz, arg->offset); 1064*f841f6adSraf if (retval == -1) { 1065*f841f6adSraf if (errno == ESPIPE) { 1066*f841f6adSraf retval = write(arg->fd, 1067*f841f6adSraf arg->buf, arg->bufsz); 1068*f841f6adSraf if (retval == -1) 1069*f841f6adSraf error = errno; 1070*f841f6adSraf } else { 1071*f841f6adSraf error = errno; 1072*f841f6adSraf } 1073*f841f6adSraf } 1074*f841f6adSraf sigoff(self); /* block SIGAIOCANCEL */ 1075*f841f6adSraf break; 1076*f841f6adSraf #endif /* !defined(_LP64) */ 1077*f841f6adSraf case AIOFSYNC: 1078*f841f6adSraf if (_aio_fsync_del(aiowp, reqp)) 1079*f841f6adSraf goto top; 1080*f841f6adSraf ASSERT(reqp->req_head == NULL); 1081*f841f6adSraf /* 1082*f841f6adSraf * All writes for this fsync request are now 1083*f841f6adSraf * acknowledged. Now make these writes visible 1084*f841f6adSraf * and put the final request into the hash table. 1085*f841f6adSraf */ 1086*f841f6adSraf if (reqp->req_state == AIO_REQ_CANCELED) { 1087*f841f6adSraf /* EMPTY */; 1088*f841f6adSraf } else if (arg->offset == O_SYNC) { 1089*f841f6adSraf if ((retval = __fdsync(arg->fd, FSYNC)) == -1) 1090*f841f6adSraf error = errno; 1091*f841f6adSraf } else { 1092*f841f6adSraf if ((retval = __fdsync(arg->fd, FDSYNC)) == -1) 1093*f841f6adSraf error = errno; 1094*f841f6adSraf } 1095*f841f6adSraf if (_aio_hash_insert(reqp->req_resultp, reqp) != 0) 1096*f841f6adSraf aio_panic("_aio_do_request(): AIOFSYNC: " 1097*f841f6adSraf "request already in hash table"); 1098*f841f6adSraf break; 1099*f841f6adSraf default: 1100*f841f6adSraf aio_panic("_aio_do_request, bad op"); 1101*f841f6adSraf } 1102*f841f6adSraf 1103*f841f6adSraf _aio_finish_request(aiowp, retval, error); 1104*f841f6adSraf } 1105*f841f6adSraf /* NOTREACHED */ 1106*f841f6adSraf return (NULL); 1107*f841f6adSraf } 1108*f841f6adSraf 1109*f841f6adSraf /* 1110*f841f6adSraf * Perform the tail processing for _aio_do_request(). 1111*f841f6adSraf * The in-progress request may or may not have been cancelled. 1112*f841f6adSraf */ 1113*f841f6adSraf static void 1114*f841f6adSraf _aio_finish_request(aio_worker_t *aiowp, ssize_t retval, int error) 1115*f841f6adSraf { 1116*f841f6adSraf aio_req_t *reqp; 1117*f841f6adSraf 1118*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1119*f841f6adSraf if ((reqp = aiowp->work_req) == NULL) 1120*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1121*f841f6adSraf else { 1122*f841f6adSraf aiowp->work_req = NULL; 1123*f841f6adSraf if (reqp->req_state == AIO_REQ_CANCELED) { 1124*f841f6adSraf retval = -1; 1125*f841f6adSraf error = ECANCELED; 1126*f841f6adSraf } 1127*f841f6adSraf if (!POSIX_AIO(reqp)) { 1128*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1129*f841f6adSraf sig_mutex_lock(&__aio_mutex); 1130*f841f6adSraf if (reqp->req_state == AIO_REQ_INPROGRESS) 1131*f841f6adSraf reqp->req_state = AIO_REQ_DONE; 1132*f841f6adSraf _aio_req_done_cnt++; 1133*f841f6adSraf _aio_set_result(reqp, retval, error); 1134*f841f6adSraf if (error == ECANCELED) 1135*f841f6adSraf _aio_outstand_cnt--; 1136*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1137*f841f6adSraf } else { 1138*f841f6adSraf if (reqp->req_state == AIO_REQ_INPROGRESS) 1139*f841f6adSraf reqp->req_state = AIO_REQ_DONE; 1140*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1141*f841f6adSraf _aiodone(reqp, retval, error); 1142*f841f6adSraf } 1143*f841f6adSraf } 1144*f841f6adSraf } 1145*f841f6adSraf 1146*f841f6adSraf void 1147*f841f6adSraf _aio_req_mark_done(aio_req_t *reqp) 1148*f841f6adSraf { 1149*f841f6adSraf #if !defined(_LP64) 1150*f841f6adSraf if (reqp->req_largefile) 1151*f841f6adSraf ((aiocb64_t *)reqp->req_aiocbp)->aio_state = USERAIO_DONE; 1152*f841f6adSraf else 1153*f841f6adSraf #endif 1154*f841f6adSraf ((aiocb_t *)reqp->req_aiocbp)->aio_state = USERAIO_DONE; 1155*f841f6adSraf } 1156*f841f6adSraf 1157*f841f6adSraf /* 1158*f841f6adSraf * Sleep for 'ticks' clock ticks to give somebody else a chance to run, 1159*f841f6adSraf * hopefully to consume one of our queued signals. 1160*f841f6adSraf */ 1161*f841f6adSraf static void 1162*f841f6adSraf _aio_delay(int ticks) 1163*f841f6adSraf { 1164*f841f6adSraf (void) usleep(ticks * (MICROSEC / hz)); 1165*f841f6adSraf } 1166*f841f6adSraf 1167*f841f6adSraf /* 1168*f841f6adSraf * Actually send the notifications. 1169*f841f6adSraf * We could block indefinitely here if the application 1170*f841f6adSraf * is not listening for the signal or port notifications. 1171*f841f6adSraf */ 1172*f841f6adSraf static void 1173*f841f6adSraf send_notification(notif_param_t *npp) 1174*f841f6adSraf { 1175*f841f6adSraf extern int __sigqueue(pid_t pid, int signo, 1176*f841f6adSraf /* const union sigval */ void *value, int si_code, int block); 1177*f841f6adSraf 1178*f841f6adSraf if (npp->np_signo) 1179*f841f6adSraf (void) __sigqueue(__pid, npp->np_signo, npp->np_user, 1180*f841f6adSraf SI_ASYNCIO, 1); 1181*f841f6adSraf else if (npp->np_port >= 0) 1182*f841f6adSraf (void) _port_dispatch(npp->np_port, 0, PORT_SOURCE_AIO, 1183*f841f6adSraf npp->np_event, npp->np_object, npp->np_user); 1184*f841f6adSraf 1185*f841f6adSraf if (npp->np_lio_signo) 1186*f841f6adSraf (void) __sigqueue(__pid, npp->np_lio_signo, npp->np_lio_user, 1187*f841f6adSraf SI_ASYNCIO, 1); 1188*f841f6adSraf else if (npp->np_lio_port >= 0) 1189*f841f6adSraf (void) _port_dispatch(npp->np_lio_port, 0, PORT_SOURCE_AIO, 1190*f841f6adSraf npp->np_lio_event, npp->np_lio_object, npp->np_lio_user); 1191*f841f6adSraf } 1192*f841f6adSraf 1193*f841f6adSraf /* 1194*f841f6adSraf * Asynchronous notification worker. 1195*f841f6adSraf */ 1196*f841f6adSraf void * 1197*f841f6adSraf _aio_do_notify(void *arg) 1198*f841f6adSraf { 1199*f841f6adSraf aio_worker_t *aiowp = (aio_worker_t *)arg; 1200*f841f6adSraf aio_req_t *reqp; 1201*f841f6adSraf 1202*f841f6adSraf /* 1203*f841f6adSraf * This isn't really necessary. All signals are blocked. 1204*f841f6adSraf */ 1205*f841f6adSraf if (pthread_setspecific(_aio_key, aiowp) != 0) 1206*f841f6adSraf aio_panic("_aio_do_notify, pthread_setspecific()"); 1207*f841f6adSraf 1208*f841f6adSraf /* 1209*f841f6adSraf * Notifications are never cancelled. 1210*f841f6adSraf * All signals remain blocked, forever. 1211*f841f6adSraf */ 1212*f841f6adSraf for (;;) { 1213*f841f6adSraf while ((reqp = _aio_req_get(aiowp)) == NULL) { 1214*f841f6adSraf if (_aio_idle(aiowp) != 0) 1215*f841f6adSraf aio_panic("_aio_do_notify: _aio_idle() failed"); 1216*f841f6adSraf } 1217*f841f6adSraf send_notification(&reqp->req_notify); 1218*f841f6adSraf _aio_req_free(reqp); 1219*f841f6adSraf } 1220*f841f6adSraf 1221*f841f6adSraf /* NOTREACHED */ 1222*f841f6adSraf return (NULL); 1223*f841f6adSraf } 1224*f841f6adSraf 1225*f841f6adSraf /* 1226*f841f6adSraf * Do the completion semantics for a request that was either canceled 1227*f841f6adSraf * by _aio_cancel_req() or was completed by _aio_do_request(). 1228*f841f6adSraf */ 1229*f841f6adSraf static void 1230*f841f6adSraf _aiodone(aio_req_t *reqp, ssize_t retval, int error) 1231*f841f6adSraf { 1232*f841f6adSraf aio_result_t *resultp = reqp->req_resultp; 1233*f841f6adSraf int notify = 0; 1234*f841f6adSraf aio_lio_t *head; 1235*f841f6adSraf int sigev_none; 1236*f841f6adSraf int sigev_signal; 1237*f841f6adSraf int sigev_thread; 1238*f841f6adSraf int sigev_port; 1239*f841f6adSraf notif_param_t np; 1240*f841f6adSraf 1241*f841f6adSraf /* 1242*f841f6adSraf * We call _aiodone() only for Posix I/O. 1243*f841f6adSraf */ 1244*f841f6adSraf ASSERT(POSIX_AIO(reqp)); 1245*f841f6adSraf 1246*f841f6adSraf sigev_none = 0; 1247*f841f6adSraf sigev_signal = 0; 1248*f841f6adSraf sigev_thread = 0; 1249*f841f6adSraf sigev_port = 0; 1250*f841f6adSraf np.np_signo = 0; 1251*f841f6adSraf np.np_port = -1; 1252*f841f6adSraf np.np_lio_signo = 0; 1253*f841f6adSraf np.np_lio_port = -1; 1254*f841f6adSraf 1255*f841f6adSraf switch (reqp->req_sigevent.sigev_notify) { 1256*f841f6adSraf case SIGEV_NONE: 1257*f841f6adSraf sigev_none = 1; 1258*f841f6adSraf break; 1259*f841f6adSraf case SIGEV_SIGNAL: 1260*f841f6adSraf sigev_signal = 1; 1261*f841f6adSraf break; 1262*f841f6adSraf case SIGEV_THREAD: 1263*f841f6adSraf sigev_thread = 1; 1264*f841f6adSraf break; 1265*f841f6adSraf case SIGEV_PORT: 1266*f841f6adSraf sigev_port = 1; 1267*f841f6adSraf break; 1268*f841f6adSraf default: 1269*f841f6adSraf aio_panic("_aiodone: improper sigev_notify"); 1270*f841f6adSraf break; 1271*f841f6adSraf } 1272*f841f6adSraf 1273*f841f6adSraf /* 1274*f841f6adSraf * Figure out the notification parameters while holding __aio_mutex. 1275*f841f6adSraf * Actually perform the notifications after dropping __aio_mutex. 1276*f841f6adSraf * This allows us to sleep for a long time (if the notifications 1277*f841f6adSraf * incur delays) without impeding other async I/O operations. 1278*f841f6adSraf */ 1279*f841f6adSraf 1280*f841f6adSraf sig_mutex_lock(&__aio_mutex); 1281*f841f6adSraf 1282*f841f6adSraf if (sigev_signal) { 1283*f841f6adSraf if ((np.np_signo = reqp->req_sigevent.sigev_signo) != 0) 1284*f841f6adSraf notify = 1; 1285*f841f6adSraf np.np_user = reqp->req_sigevent.sigev_value.sival_ptr; 1286*f841f6adSraf } else if (sigev_thread | sigev_port) { 1287*f841f6adSraf if ((np.np_port = reqp->req_sigevent.sigev_signo) >= 0) 1288*f841f6adSraf notify = 1; 1289*f841f6adSraf np.np_event = reqp->req_op; 1290*f841f6adSraf if (np.np_event == AIOFSYNC && reqp->req_largefile) 1291*f841f6adSraf np.np_event = AIOFSYNC64; 1292*f841f6adSraf np.np_object = (uintptr_t)reqp->req_aiocbp; 1293*f841f6adSraf np.np_user = reqp->req_sigevent.sigev_value.sival_ptr; 1294*f841f6adSraf } 1295*f841f6adSraf 1296*f841f6adSraf if (resultp->aio_errno == EINPROGRESS) 1297*f841f6adSraf _aio_set_result(reqp, retval, error); 1298*f841f6adSraf 1299*f841f6adSraf _aio_outstand_cnt--; 1300*f841f6adSraf 1301*f841f6adSraf head = reqp->req_head; 1302*f841f6adSraf reqp->req_head = NULL; 1303*f841f6adSraf 1304*f841f6adSraf if (sigev_none) { 1305*f841f6adSraf _aio_enq_doneq(reqp); 1306*f841f6adSraf reqp = NULL; 1307*f841f6adSraf } else { 1308*f841f6adSraf (void) _aio_hash_del(resultp); 1309*f841f6adSraf _aio_req_mark_done(reqp); 1310*f841f6adSraf } 1311*f841f6adSraf 1312*f841f6adSraf _aio_waitn_wakeup(); 1313*f841f6adSraf 1314*f841f6adSraf /* 1315*f841f6adSraf * __aio_waitn() sets AIO_WAIT_INPROGRESS and 1316*f841f6adSraf * __aio_suspend() increments "_aio_kernel_suspend" 1317*f841f6adSraf * when they are waiting in the kernel for completed I/Os. 1318*f841f6adSraf * 1319*f841f6adSraf * _kaio(AIONOTIFY) awakes the corresponding function 1320*f841f6adSraf * in the kernel; then the corresponding __aio_waitn() or 1321*f841f6adSraf * __aio_suspend() function could reap the recently 1322*f841f6adSraf * completed I/Os (_aiodone()). 1323*f841f6adSraf */ 1324*f841f6adSraf if ((_aio_flags & AIO_WAIT_INPROGRESS) || _aio_kernel_suspend > 0) 1325*f841f6adSraf (void) _kaio(AIONOTIFY); 1326*f841f6adSraf 1327*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1328*f841f6adSraf 1329*f841f6adSraf if (head != NULL) { 1330*f841f6adSraf /* 1331*f841f6adSraf * If all the lio requests have completed, 1332*f841f6adSraf * prepare to notify the waiting thread. 1333*f841f6adSraf */ 1334*f841f6adSraf sig_mutex_lock(&head->lio_mutex); 1335*f841f6adSraf ASSERT(head->lio_refcnt == head->lio_nent); 1336*f841f6adSraf if (head->lio_refcnt == 1) { 1337*f841f6adSraf int waiting = 0; 1338*f841f6adSraf if (head->lio_mode == LIO_WAIT) { 1339*f841f6adSraf if ((waiting = head->lio_waiting) != 0) 1340*f841f6adSraf (void) cond_signal(&head->lio_cond_cv); 1341*f841f6adSraf } else if (head->lio_port < 0) { /* none or signal */ 1342*f841f6adSraf if ((np.np_lio_signo = head->lio_signo) != 0) 1343*f841f6adSraf notify = 1; 1344*f841f6adSraf np.np_lio_user = head->lio_sigval.sival_ptr; 1345*f841f6adSraf } else { /* thread or port */ 1346*f841f6adSraf notify = 1; 1347*f841f6adSraf np.np_lio_port = head->lio_port; 1348*f841f6adSraf np.np_lio_event = head->lio_event; 1349*f841f6adSraf np.np_lio_object = 1350*f841f6adSraf (uintptr_t)head->lio_sigevent; 1351*f841f6adSraf np.np_lio_user = head->lio_sigval.sival_ptr; 1352*f841f6adSraf } 1353*f841f6adSraf head->lio_nent = head->lio_refcnt = 0; 1354*f841f6adSraf sig_mutex_unlock(&head->lio_mutex); 1355*f841f6adSraf if (waiting == 0) 1356*f841f6adSraf _aio_lio_free(head); 1357*f841f6adSraf } else { 1358*f841f6adSraf head->lio_nent--; 1359*f841f6adSraf head->lio_refcnt--; 1360*f841f6adSraf sig_mutex_unlock(&head->lio_mutex); 1361*f841f6adSraf } 1362*f841f6adSraf } 1363*f841f6adSraf 1364*f841f6adSraf /* 1365*f841f6adSraf * The request is completed; now perform the notifications. 1366*f841f6adSraf */ 1367*f841f6adSraf if (notify) { 1368*f841f6adSraf if (reqp != NULL) { 1369*f841f6adSraf /* 1370*f841f6adSraf * We usually put the request on the notification 1371*f841f6adSraf * queue because we don't want to block and delay 1372*f841f6adSraf * other operations behind us in the work queue. 1373*f841f6adSraf * Also we must never block on a cancel notification 1374*f841f6adSraf * because we are being called from an application 1375*f841f6adSraf * thread in this case and that could lead to deadlock 1376*f841f6adSraf * if no other thread is receiving notificatins. 1377*f841f6adSraf */ 1378*f841f6adSraf reqp->req_notify = np; 1379*f841f6adSraf reqp->req_op = AIONOTIFY; 1380*f841f6adSraf _aio_req_add(reqp, &__workers_no, AIONOTIFY); 1381*f841f6adSraf reqp = NULL; 1382*f841f6adSraf } else { 1383*f841f6adSraf /* 1384*f841f6adSraf * We already put the request on the done queue, 1385*f841f6adSraf * so we can't queue it to the notification queue. 1386*f841f6adSraf * Just do the notification directly. 1387*f841f6adSraf */ 1388*f841f6adSraf send_notification(&np); 1389*f841f6adSraf } 1390*f841f6adSraf } 1391*f841f6adSraf 1392*f841f6adSraf if (reqp != NULL) 1393*f841f6adSraf _aio_req_free(reqp); 1394*f841f6adSraf } 1395*f841f6adSraf 1396*f841f6adSraf /* 1397*f841f6adSraf * Delete fsync requests from list head until there is 1398*f841f6adSraf * only one left. Return 0 when there is only one, 1399*f841f6adSraf * otherwise return a non-zero value. 1400*f841f6adSraf */ 1401*f841f6adSraf static int 1402*f841f6adSraf _aio_fsync_del(aio_worker_t *aiowp, aio_req_t *reqp) 1403*f841f6adSraf { 1404*f841f6adSraf aio_lio_t *head = reqp->req_head; 1405*f841f6adSraf int rval = 0; 1406*f841f6adSraf 1407*f841f6adSraf ASSERT(reqp == aiowp->work_req); 1408*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1409*f841f6adSraf sig_mutex_lock(&head->lio_mutex); 1410*f841f6adSraf if (head->lio_refcnt > 1) { 1411*f841f6adSraf head->lio_refcnt--; 1412*f841f6adSraf head->lio_nent--; 1413*f841f6adSraf aiowp->work_req = NULL; 1414*f841f6adSraf sig_mutex_unlock(&head->lio_mutex); 1415*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1416*f841f6adSraf sig_mutex_lock(&__aio_mutex); 1417*f841f6adSraf _aio_outstand_cnt--; 1418*f841f6adSraf _aio_waitn_wakeup(); 1419*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1420*f841f6adSraf _aio_req_free(reqp); 1421*f841f6adSraf return (1); 1422*f841f6adSraf } 1423*f841f6adSraf ASSERT(head->lio_nent == 1 && head->lio_refcnt == 1); 1424*f841f6adSraf reqp->req_head = NULL; 1425*f841f6adSraf if (head->lio_canned) 1426*f841f6adSraf reqp->req_state = AIO_REQ_CANCELED; 1427*f841f6adSraf if (head->lio_mode == LIO_DESTROY) { 1428*f841f6adSraf aiowp->work_req = NULL; 1429*f841f6adSraf rval = 1; 1430*f841f6adSraf } 1431*f841f6adSraf sig_mutex_unlock(&head->lio_mutex); 1432*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1433*f841f6adSraf head->lio_refcnt--; 1434*f841f6adSraf head->lio_nent--; 1435*f841f6adSraf _aio_lio_free(head); 1436*f841f6adSraf if (rval != 0) 1437*f841f6adSraf _aio_req_free(reqp); 1438*f841f6adSraf return (rval); 1439*f841f6adSraf } 1440*f841f6adSraf 1441*f841f6adSraf /* 1442*f841f6adSraf * A worker is set idle when its work queue is empty. 1443*f841f6adSraf * The worker checks again that it has no more work 1444*f841f6adSraf * and then goes to sleep waiting for more work. 1445*f841f6adSraf */ 1446*f841f6adSraf int 1447*f841f6adSraf _aio_idle(aio_worker_t *aiowp) 1448*f841f6adSraf { 1449*f841f6adSraf int error = 0; 1450*f841f6adSraf 1451*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1452*f841f6adSraf if (aiowp->work_count1 == 0) { 1453*f841f6adSraf ASSERT(aiowp->work_minload1 == 0); 1454*f841f6adSraf aiowp->work_idleflg = 1; 1455*f841f6adSraf /* 1456*f841f6adSraf * A cancellation handler is not needed here. 1457*f841f6adSraf * aio worker threads are never cancelled via pthread_cancel(). 1458*f841f6adSraf */ 1459*f841f6adSraf error = sig_cond_wait(&aiowp->work_idle_cv, 1460*f841f6adSraf &aiowp->work_qlock1); 1461*f841f6adSraf /* 1462*f841f6adSraf * The idle flag is normally cleared before worker is awakened 1463*f841f6adSraf * by aio_req_add(). On error (EINTR), we clear it ourself. 1464*f841f6adSraf */ 1465*f841f6adSraf if (error) 1466*f841f6adSraf aiowp->work_idleflg = 0; 1467*f841f6adSraf } 1468*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1469*f841f6adSraf return (error); 1470*f841f6adSraf } 1471*f841f6adSraf 1472*f841f6adSraf /* 1473*f841f6adSraf * A worker's completed AIO requests are placed onto a global 1474*f841f6adSraf * done queue. The application is only sent a SIGIO signal if 1475*f841f6adSraf * the process has a handler enabled and it is not waiting via 1476*f841f6adSraf * aiowait(). 1477*f841f6adSraf */ 1478*f841f6adSraf static void 1479*f841f6adSraf _aio_work_done(aio_worker_t *aiowp) 1480*f841f6adSraf { 1481*f841f6adSraf aio_req_t *reqp; 1482*f841f6adSraf 1483*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1484*f841f6adSraf reqp = aiowp->work_prev1; 1485*f841f6adSraf reqp->req_next = NULL; 1486*f841f6adSraf aiowp->work_done1 = 0; 1487*f841f6adSraf aiowp->work_tail1 = aiowp->work_next1; 1488*f841f6adSraf if (aiowp->work_tail1 == NULL) 1489*f841f6adSraf aiowp->work_head1 = NULL; 1490*f841f6adSraf aiowp->work_prev1 = NULL; 1491*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1492*f841f6adSraf sig_mutex_lock(&__aio_mutex); 1493*f841f6adSraf _aio_donecnt++; 1494*f841f6adSraf _aio_outstand_cnt--; 1495*f841f6adSraf _aio_req_done_cnt--; 1496*f841f6adSraf ASSERT(_aio_donecnt > 0 && 1497*f841f6adSraf _aio_outstand_cnt >= 0 && 1498*f841f6adSraf _aio_req_done_cnt >= 0); 1499*f841f6adSraf ASSERT(reqp != NULL); 1500*f841f6adSraf 1501*f841f6adSraf if (_aio_done_tail == NULL) { 1502*f841f6adSraf _aio_done_head = _aio_done_tail = reqp; 1503*f841f6adSraf } else { 1504*f841f6adSraf _aio_done_head->req_next = reqp; 1505*f841f6adSraf _aio_done_head = reqp; 1506*f841f6adSraf } 1507*f841f6adSraf 1508*f841f6adSraf if (_aiowait_flag) { 1509*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1510*f841f6adSraf (void) _kaio(AIONOTIFY); 1511*f841f6adSraf } else { 1512*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1513*f841f6adSraf if (_sigio_enabled) 1514*f841f6adSraf (void) kill(__pid, SIGIO); 1515*f841f6adSraf } 1516*f841f6adSraf } 1517*f841f6adSraf 1518*f841f6adSraf /* 1519*f841f6adSraf * The done queue consists of AIO requests that are in either the 1520*f841f6adSraf * AIO_REQ_DONE or AIO_REQ_CANCELED state. Requests that were cancelled 1521*f841f6adSraf * are discarded. If the done queue is empty then NULL is returned. 1522*f841f6adSraf * Otherwise the address of a done aio_result_t is returned. 1523*f841f6adSraf */ 1524*f841f6adSraf aio_result_t * 1525*f841f6adSraf _aio_req_done(void) 1526*f841f6adSraf { 1527*f841f6adSraf aio_req_t *reqp; 1528*f841f6adSraf aio_result_t *resultp; 1529*f841f6adSraf 1530*f841f6adSraf ASSERT(MUTEX_HELD(&__aio_mutex)); 1531*f841f6adSraf 1532*f841f6adSraf if ((reqp = _aio_done_tail) != NULL) { 1533*f841f6adSraf if ((_aio_done_tail = reqp->req_next) == NULL) 1534*f841f6adSraf _aio_done_head = NULL; 1535*f841f6adSraf ASSERT(_aio_donecnt > 0); 1536*f841f6adSraf _aio_donecnt--; 1537*f841f6adSraf (void) _aio_hash_del(reqp->req_resultp); 1538*f841f6adSraf resultp = reqp->req_resultp; 1539*f841f6adSraf ASSERT(reqp->req_state == AIO_REQ_DONE); 1540*f841f6adSraf _aio_req_free(reqp); 1541*f841f6adSraf return (resultp); 1542*f841f6adSraf } 1543*f841f6adSraf /* is queue empty? */ 1544*f841f6adSraf if (reqp == NULL && _aio_outstand_cnt == 0) { 1545*f841f6adSraf return ((aio_result_t *)-1); 1546*f841f6adSraf } 1547*f841f6adSraf return (NULL); 1548*f841f6adSraf } 1549*f841f6adSraf 1550*f841f6adSraf /* 1551*f841f6adSraf * Set the return and errno values for the application's use. 1552*f841f6adSraf * 1553*f841f6adSraf * For the Posix interfaces, we must set the return value first followed 1554*f841f6adSraf * by the errno value because the Posix interfaces allow for a change 1555*f841f6adSraf * in the errno value from EINPROGRESS to something else to signal 1556*f841f6adSraf * the completion of the asynchronous request. 1557*f841f6adSraf * 1558*f841f6adSraf * The opposite is true for the Solaris interfaces. These allow for 1559*f841f6adSraf * a change in the return value from AIO_INPROGRESS to something else 1560*f841f6adSraf * to signal the completion of the asynchronous request. 1561*f841f6adSraf */ 1562*f841f6adSraf void 1563*f841f6adSraf _aio_set_result(aio_req_t *reqp, ssize_t retval, int error) 1564*f841f6adSraf { 1565*f841f6adSraf aio_result_t *resultp = reqp->req_resultp; 1566*f841f6adSraf 1567*f841f6adSraf if (POSIX_AIO(reqp)) { 1568*f841f6adSraf resultp->aio_return = retval; 1569*f841f6adSraf membar_producer(); 1570*f841f6adSraf resultp->aio_errno = error; 1571*f841f6adSraf } else { 1572*f841f6adSraf resultp->aio_errno = error; 1573*f841f6adSraf membar_producer(); 1574*f841f6adSraf resultp->aio_return = retval; 1575*f841f6adSraf } 1576*f841f6adSraf } 1577*f841f6adSraf 1578*f841f6adSraf /* 1579*f841f6adSraf * Add an AIO request onto the next work queue. 1580*f841f6adSraf * A circular list of workers is used to choose the next worker. 1581*f841f6adSraf */ 1582*f841f6adSraf void 1583*f841f6adSraf _aio_req_add(aio_req_t *reqp, aio_worker_t **nextworker, int mode) 1584*f841f6adSraf { 1585*f841f6adSraf ulwp_t *self = curthread; 1586*f841f6adSraf aio_worker_t *aiowp; 1587*f841f6adSraf aio_worker_t *first; 1588*f841f6adSraf int load_bal_flg = 1; 1589*f841f6adSraf int found; 1590*f841f6adSraf 1591*f841f6adSraf ASSERT(reqp->req_state != AIO_REQ_DONEQ); 1592*f841f6adSraf reqp->req_next = NULL; 1593*f841f6adSraf /* 1594*f841f6adSraf * Try to acquire the next worker's work queue. If it is locked, 1595*f841f6adSraf * then search the list of workers until a queue is found unlocked, 1596*f841f6adSraf * or until the list is completely traversed at which point another 1597*f841f6adSraf * worker will be created. 1598*f841f6adSraf */ 1599*f841f6adSraf sigoff(self); /* defer SIGIO */ 1600*f841f6adSraf sig_mutex_lock(&__aio_mutex); 1601*f841f6adSraf first = aiowp = *nextworker; 1602*f841f6adSraf if (mode != AIONOTIFY) 1603*f841f6adSraf _aio_outstand_cnt++; 1604*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1605*f841f6adSraf 1606*f841f6adSraf switch (mode) { 1607*f841f6adSraf case AIOREAD: 1608*f841f6adSraf case AIOWRITE: 1609*f841f6adSraf case AIOAREAD: 1610*f841f6adSraf case AIOAWRITE: 1611*f841f6adSraf #if !defined(_LP64) 1612*f841f6adSraf case AIOAREAD64: 1613*f841f6adSraf case AIOAWRITE64: 1614*f841f6adSraf #endif 1615*f841f6adSraf /* try to find an idle worker */ 1616*f841f6adSraf found = 0; 1617*f841f6adSraf do { 1618*f841f6adSraf if (sig_mutex_trylock(&aiowp->work_qlock1) == 0) { 1619*f841f6adSraf if (aiowp->work_idleflg) { 1620*f841f6adSraf found = 1; 1621*f841f6adSraf break; 1622*f841f6adSraf } 1623*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1624*f841f6adSraf } 1625*f841f6adSraf } while ((aiowp = aiowp->work_forw) != first); 1626*f841f6adSraf 1627*f841f6adSraf if (found) { 1628*f841f6adSraf aiowp->work_minload1++; 1629*f841f6adSraf break; 1630*f841f6adSraf } 1631*f841f6adSraf 1632*f841f6adSraf /* try to acquire some worker's queue lock */ 1633*f841f6adSraf do { 1634*f841f6adSraf if (sig_mutex_trylock(&aiowp->work_qlock1) == 0) { 1635*f841f6adSraf found = 1; 1636*f841f6adSraf break; 1637*f841f6adSraf } 1638*f841f6adSraf } while ((aiowp = aiowp->work_forw) != first); 1639*f841f6adSraf 1640*f841f6adSraf /* 1641*f841f6adSraf * Create more workers when the workers appear overloaded. 1642*f841f6adSraf * Either all the workers are busy draining their queues 1643*f841f6adSraf * or no worker's queue lock could be acquired. 1644*f841f6adSraf */ 1645*f841f6adSraf if (!found) { 1646*f841f6adSraf if (_aio_worker_cnt < _max_workers) { 1647*f841f6adSraf if (_aio_create_worker(reqp, mode)) 1648*f841f6adSraf aio_panic("_aio_req_add: add worker"); 1649*f841f6adSraf sigon(self); /* reenable SIGIO */ 1650*f841f6adSraf return; 1651*f841f6adSraf } 1652*f841f6adSraf 1653*f841f6adSraf /* 1654*f841f6adSraf * No worker available and we have created 1655*f841f6adSraf * _max_workers, keep going through the 1656*f841f6adSraf * list slowly until we get a lock 1657*f841f6adSraf */ 1658*f841f6adSraf while (sig_mutex_trylock(&aiowp->work_qlock1) != 0) { 1659*f841f6adSraf /* 1660*f841f6adSraf * give someone else a chance 1661*f841f6adSraf */ 1662*f841f6adSraf _aio_delay(1); 1663*f841f6adSraf aiowp = aiowp->work_forw; 1664*f841f6adSraf } 1665*f841f6adSraf } 1666*f841f6adSraf 1667*f841f6adSraf ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); 1668*f841f6adSraf if (_aio_worker_cnt < _max_workers && 1669*f841f6adSraf aiowp->work_minload1 >= _minworkload) { 1670*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1671*f841f6adSraf sig_mutex_lock(&__aio_mutex); 1672*f841f6adSraf *nextworker = aiowp->work_forw; 1673*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1674*f841f6adSraf if (_aio_create_worker(reqp, mode)) 1675*f841f6adSraf aio_panic("aio_req_add: add worker"); 1676*f841f6adSraf sigon(self); /* reenable SIGIO */ 1677*f841f6adSraf return; 1678*f841f6adSraf } 1679*f841f6adSraf aiowp->work_minload1++; 1680*f841f6adSraf break; 1681*f841f6adSraf case AIOFSYNC: 1682*f841f6adSraf case AIONOTIFY: 1683*f841f6adSraf load_bal_flg = 0; 1684*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1685*f841f6adSraf break; 1686*f841f6adSraf default: 1687*f841f6adSraf aio_panic("_aio_req_add: invalid mode"); 1688*f841f6adSraf break; 1689*f841f6adSraf } 1690*f841f6adSraf /* 1691*f841f6adSraf * Put request onto worker's work queue. 1692*f841f6adSraf */ 1693*f841f6adSraf if (aiowp->work_tail1 == NULL) { 1694*f841f6adSraf ASSERT(aiowp->work_count1 == 0); 1695*f841f6adSraf aiowp->work_tail1 = reqp; 1696*f841f6adSraf aiowp->work_next1 = reqp; 1697*f841f6adSraf } else { 1698*f841f6adSraf aiowp->work_head1->req_next = reqp; 1699*f841f6adSraf if (aiowp->work_next1 == NULL) 1700*f841f6adSraf aiowp->work_next1 = reqp; 1701*f841f6adSraf } 1702*f841f6adSraf reqp->req_state = AIO_REQ_QUEUED; 1703*f841f6adSraf reqp->req_worker = aiowp; 1704*f841f6adSraf aiowp->work_head1 = reqp; 1705*f841f6adSraf /* 1706*f841f6adSraf * Awaken worker if it is not currently active. 1707*f841f6adSraf */ 1708*f841f6adSraf if (aiowp->work_count1++ == 0 && aiowp->work_idleflg) { 1709*f841f6adSraf aiowp->work_idleflg = 0; 1710*f841f6adSraf (void) cond_signal(&aiowp->work_idle_cv); 1711*f841f6adSraf } 1712*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1713*f841f6adSraf 1714*f841f6adSraf if (load_bal_flg) { 1715*f841f6adSraf sig_mutex_lock(&__aio_mutex); 1716*f841f6adSraf *nextworker = aiowp->work_forw; 1717*f841f6adSraf sig_mutex_unlock(&__aio_mutex); 1718*f841f6adSraf } 1719*f841f6adSraf sigon(self); /* reenable SIGIO */ 1720*f841f6adSraf } 1721*f841f6adSraf 1722*f841f6adSraf /* 1723*f841f6adSraf * Get an AIO request for a specified worker. 1724*f841f6adSraf * If the work queue is empty, return NULL. 1725*f841f6adSraf */ 1726*f841f6adSraf aio_req_t * 1727*f841f6adSraf _aio_req_get(aio_worker_t *aiowp) 1728*f841f6adSraf { 1729*f841f6adSraf aio_req_t *reqp; 1730*f841f6adSraf 1731*f841f6adSraf sig_mutex_lock(&aiowp->work_qlock1); 1732*f841f6adSraf if ((reqp = aiowp->work_next1) != NULL) { 1733*f841f6adSraf /* 1734*f841f6adSraf * Remove a POSIX request from the queue; the 1735*f841f6adSraf * request queue is a singularly linked list 1736*f841f6adSraf * with a previous pointer. The request is 1737*f841f6adSraf * removed by updating the previous pointer. 1738*f841f6adSraf * 1739*f841f6adSraf * Non-posix requests are left on the queue 1740*f841f6adSraf * to eventually be placed on the done queue. 1741*f841f6adSraf */ 1742*f841f6adSraf 1743*f841f6adSraf if (POSIX_AIO(reqp)) { 1744*f841f6adSraf if (aiowp->work_prev1 == NULL) { 1745*f841f6adSraf aiowp->work_tail1 = reqp->req_next; 1746*f841f6adSraf if (aiowp->work_tail1 == NULL) 1747*f841f6adSraf aiowp->work_head1 = NULL; 1748*f841f6adSraf } else { 1749*f841f6adSraf aiowp->work_prev1->req_next = reqp->req_next; 1750*f841f6adSraf if (aiowp->work_head1 == reqp) 1751*f841f6adSraf aiowp->work_head1 = reqp->req_next; 1752*f841f6adSraf } 1753*f841f6adSraf 1754*f841f6adSraf } else { 1755*f841f6adSraf aiowp->work_prev1 = reqp; 1756*f841f6adSraf ASSERT(aiowp->work_done1 >= 0); 1757*f841f6adSraf aiowp->work_done1++; 1758*f841f6adSraf } 1759*f841f6adSraf ASSERT(reqp != reqp->req_next); 1760*f841f6adSraf aiowp->work_next1 = reqp->req_next; 1761*f841f6adSraf ASSERT(aiowp->work_count1 >= 1); 1762*f841f6adSraf aiowp->work_count1--; 1763*f841f6adSraf switch (reqp->req_op) { 1764*f841f6adSraf case AIOREAD: 1765*f841f6adSraf case AIOWRITE: 1766*f841f6adSraf case AIOAREAD: 1767*f841f6adSraf case AIOAWRITE: 1768*f841f6adSraf #if !defined(_LP64) 1769*f841f6adSraf case AIOAREAD64: 1770*f841f6adSraf case AIOAWRITE64: 1771*f841f6adSraf #endif 1772*f841f6adSraf ASSERT(aiowp->work_minload1 > 0); 1773*f841f6adSraf aiowp->work_minload1--; 1774*f841f6adSraf break; 1775*f841f6adSraf } 1776*f841f6adSraf reqp->req_state = AIO_REQ_INPROGRESS; 1777*f841f6adSraf } 1778*f841f6adSraf aiowp->work_req = reqp; 1779*f841f6adSraf ASSERT(reqp != NULL || aiowp->work_count1 == 0); 1780*f841f6adSraf sig_mutex_unlock(&aiowp->work_qlock1); 1781*f841f6adSraf return (reqp); 1782*f841f6adSraf } 1783*f841f6adSraf 1784*f841f6adSraf static void 1785*f841f6adSraf _aio_req_del(aio_worker_t *aiowp, aio_req_t *reqp, int ostate) 1786*f841f6adSraf { 1787*f841f6adSraf aio_req_t **last; 1788*f841f6adSraf aio_req_t *lastrp; 1789*f841f6adSraf aio_req_t *next; 1790*f841f6adSraf 1791*f841f6adSraf ASSERT(aiowp != NULL); 1792*f841f6adSraf ASSERT(MUTEX_HELD(&aiowp->work_qlock1)); 1793*f841f6adSraf if (POSIX_AIO(reqp)) { 1794*f841f6adSraf if (ostate != AIO_REQ_QUEUED) 1795*f841f6adSraf return; 1796*f841f6adSraf } 1797*f841f6adSraf last = &aiowp->work_tail1; 1798*f841f6adSraf lastrp = aiowp->work_tail1; 1799*f841f6adSraf ASSERT(ostate == AIO_REQ_QUEUED || ostate == AIO_REQ_INPROGRESS); 1800*f841f6adSraf while ((next = *last) != NULL) { 1801*f841f6adSraf if (next == reqp) { 1802*f841f6adSraf *last = next->req_next; 1803*f841f6adSraf if (aiowp->work_next1 == next) 1804*f841f6adSraf aiowp->work_next1 = next->req_next; 1805*f841f6adSraf 1806*f841f6adSraf if ((next->req_next != NULL) || 1807*f841f6adSraf (aiowp->work_done1 == 0)) { 1808*f841f6adSraf if (aiowp->work_head1 == next) 1809*f841f6adSraf aiowp->work_head1 = next->req_next; 1810*f841f6adSraf if (aiowp->work_prev1 == next) 1811*f841f6adSraf aiowp->work_prev1 = next->req_next; 1812*f841f6adSraf } else { 1813*f841f6adSraf if (aiowp->work_head1 == next) 1814*f841f6adSraf aiowp->work_head1 = lastrp; 1815*f841f6adSraf if (aiowp->work_prev1 == next) 1816*f841f6adSraf aiowp->work_prev1 = lastrp; 1817*f841f6adSraf } 1818*f841f6adSraf 1819*f841f6adSraf if (ostate == AIO_REQ_QUEUED) { 1820*f841f6adSraf ASSERT(aiowp->work_count1 >= 1); 1821*f841f6adSraf aiowp->work_count1--; 1822*f841f6adSraf ASSERT(aiowp->work_minload1 >= 1); 1823*f841f6adSraf aiowp->work_minload1--; 1824*f841f6adSraf } else { 1825*f841f6adSraf ASSERT(ostate == AIO_REQ_INPROGRESS && 1826*f841f6adSraf !POSIX_AIO(reqp)); 1827*f841f6adSraf aiowp->work_done1--; 1828*f841f6adSraf } 1829*f841f6adSraf return; 1830*f841f6adSraf } 1831*f841f6adSraf last = &next->req_next; 1832*f841f6adSraf lastrp = next; 1833*f841f6adSraf } 1834*f841f6adSraf /* NOTREACHED */ 1835*f841f6adSraf } 1836*f841f6adSraf 1837*f841f6adSraf static void 1838*f841f6adSraf _aio_enq_doneq(aio_req_t *reqp) 1839*f841f6adSraf { 1840*f841f6adSraf if (_aio_doneq == NULL) { 1841*f841f6adSraf _aio_doneq = reqp; 1842*f841f6adSraf reqp->req_next = reqp->req_prev = reqp; 1843*f841f6adSraf } else { 1844*f841f6adSraf reqp->req_next = _aio_doneq; 1845*f841f6adSraf reqp->req_prev = _aio_doneq->req_prev; 1846*f841f6adSraf _aio_doneq->req_prev->req_next = reqp; 1847*f841f6adSraf _aio_doneq->req_prev = reqp; 1848*f841f6adSraf } 1849*f841f6adSraf reqp->req_state = AIO_REQ_DONEQ; 1850*f841f6adSraf _aio_doneq_cnt++; 1851*f841f6adSraf } 1852*f841f6adSraf 1853*f841f6adSraf /* 1854*f841f6adSraf * caller owns the _aio_mutex 1855*f841f6adSraf */ 1856*f841f6adSraf aio_req_t * 1857*f841f6adSraf _aio_req_remove(aio_req_t *reqp) 1858*f841f6adSraf { 1859*f841f6adSraf if (reqp && reqp->req_state != AIO_REQ_DONEQ) 1860*f841f6adSraf return (NULL); 1861*f841f6adSraf 1862*f841f6adSraf if (reqp) { 1863*f841f6adSraf /* request in done queue */ 1864*f841f6adSraf if (_aio_doneq == reqp) 1865*f841f6adSraf _aio_doneq = reqp->req_next; 1866*f841f6adSraf if (_aio_doneq == reqp) { 1867*f841f6adSraf /* only one request on queue */ 1868*f841f6adSraf _aio_doneq = NULL; 1869*f841f6adSraf } else { 1870*f841f6adSraf aio_req_t *tmp = reqp->req_next; 1871*f841f6adSraf reqp->req_prev->req_next = tmp; 1872*f841f6adSraf tmp->req_prev = reqp->req_prev; 1873*f841f6adSraf } 1874*f841f6adSraf } else if ((reqp = _aio_doneq) != NULL) { 1875*f841f6adSraf if (reqp == reqp->req_next) { 1876*f841f6adSraf /* only one request on queue */ 1877*f841f6adSraf _aio_doneq = NULL; 1878*f841f6adSraf } else { 1879*f841f6adSraf reqp->req_prev->req_next = _aio_doneq = reqp->req_next; 1880*f841f6adSraf _aio_doneq->req_prev = reqp->req_prev; 1881*f841f6adSraf } 1882*f841f6adSraf } 1883*f841f6adSraf if (reqp) { 1884*f841f6adSraf _aio_doneq_cnt--; 1885*f841f6adSraf reqp->req_next = reqp->req_prev = reqp; 1886*f841f6adSraf reqp->req_state = AIO_REQ_DONE; 1887*f841f6adSraf } 1888*f841f6adSraf return (reqp); 1889*f841f6adSraf } 1890*f841f6adSraf 1891*f841f6adSraf /* 1892*f841f6adSraf * An AIO request is identified by an aio_result_t pointer. The library 1893*f841f6adSraf * maps this aio_result_t pointer to its internal representation using a 1894*f841f6adSraf * hash table. This function adds an aio_result_t pointer to the hash table. 1895*f841f6adSraf */ 1896*f841f6adSraf static int 1897*f841f6adSraf _aio_hash_insert(aio_result_t *resultp, aio_req_t *reqp) 1898*f841f6adSraf { 1899*f841f6adSraf aio_hash_t *hashp; 1900*f841f6adSraf aio_req_t **prev; 1901*f841f6adSraf aio_req_t *next; 1902*f841f6adSraf 1903*f841f6adSraf hashp = _aio_hash + AIOHASH(resultp); 1904*f841f6adSraf lmutex_lock(&hashp->hash_lock); 1905*f841f6adSraf prev = &hashp->hash_ptr; 1906*f841f6adSraf while ((next = *prev) != NULL) { 1907*f841f6adSraf if (resultp == next->req_resultp) { 1908*f841f6adSraf lmutex_unlock(&hashp->hash_lock); 1909*f841f6adSraf return (-1); 1910*f841f6adSraf } 1911*f841f6adSraf prev = &next->req_link; 1912*f841f6adSraf } 1913*f841f6adSraf *prev = reqp; 1914*f841f6adSraf ASSERT(reqp->req_link == NULL); 1915*f841f6adSraf lmutex_unlock(&hashp->hash_lock); 1916*f841f6adSraf return (0); 1917*f841f6adSraf } 1918*f841f6adSraf 1919*f841f6adSraf /* 1920*f841f6adSraf * Remove an entry from the hash table. 1921*f841f6adSraf */ 1922*f841f6adSraf aio_req_t * 1923*f841f6adSraf _aio_hash_del(aio_result_t *resultp) 1924*f841f6adSraf { 1925*f841f6adSraf aio_hash_t *hashp; 1926*f841f6adSraf aio_req_t **prev; 1927*f841f6adSraf aio_req_t *next = NULL; 1928*f841f6adSraf 1929*f841f6adSraf if (_aio_hash != NULL) { 1930*f841f6adSraf hashp = _aio_hash + AIOHASH(resultp); 1931*f841f6adSraf lmutex_lock(&hashp->hash_lock); 1932*f841f6adSraf prev = &hashp->hash_ptr; 1933*f841f6adSraf while ((next = *prev) != NULL) { 1934*f841f6adSraf if (resultp == next->req_resultp) { 1935*f841f6adSraf *prev = next->req_link; 1936*f841f6adSraf next->req_link = NULL; 1937*f841f6adSraf break; 1938*f841f6adSraf } 1939*f841f6adSraf prev = &next->req_link; 1940*f841f6adSraf } 1941*f841f6adSraf lmutex_unlock(&hashp->hash_lock); 1942*f841f6adSraf } 1943*f841f6adSraf return (next); 1944*f841f6adSraf } 1945*f841f6adSraf 1946*f841f6adSraf /* 1947*f841f6adSraf * find an entry in the hash table 1948*f841f6adSraf */ 1949*f841f6adSraf aio_req_t * 1950*f841f6adSraf _aio_hash_find(aio_result_t *resultp) 1951*f841f6adSraf { 1952*f841f6adSraf aio_hash_t *hashp; 1953*f841f6adSraf aio_req_t **prev; 1954*f841f6adSraf aio_req_t *next = NULL; 1955*f841f6adSraf 1956*f841f6adSraf if (_aio_hash != NULL) { 1957*f841f6adSraf hashp = _aio_hash + AIOHASH(resultp); 1958*f841f6adSraf lmutex_lock(&hashp->hash_lock); 1959*f841f6adSraf prev = &hashp->hash_ptr; 1960*f841f6adSraf while ((next = *prev) != NULL) { 1961*f841f6adSraf if (resultp == next->req_resultp) 1962*f841f6adSraf break; 1963*f841f6adSraf prev = &next->req_link; 1964*f841f6adSraf } 1965*f841f6adSraf lmutex_unlock(&hashp->hash_lock); 1966*f841f6adSraf } 1967*f841f6adSraf return (next); 1968*f841f6adSraf } 1969*f841f6adSraf 1970*f841f6adSraf /* 1971*f841f6adSraf * AIO interface for POSIX 1972*f841f6adSraf */ 1973*f841f6adSraf int 1974*f841f6adSraf _aio_rw(aiocb_t *aiocbp, aio_lio_t *lio_head, aio_worker_t **nextworker, 1975*f841f6adSraf int mode, int flg) 1976*f841f6adSraf { 1977*f841f6adSraf aio_req_t *reqp; 1978*f841f6adSraf aio_args_t *ap; 1979*f841f6adSraf int kerr; 1980*f841f6adSraf 1981*f841f6adSraf if (aiocbp == NULL) { 1982*f841f6adSraf errno = EINVAL; 1983*f841f6adSraf return (-1); 1984*f841f6adSraf } 1985*f841f6adSraf 1986*f841f6adSraf /* initialize kaio */ 1987*f841f6adSraf if (!_kaio_ok) 1988*f841f6adSraf _kaio_init(); 1989*f841f6adSraf 1990*f841f6adSraf aiocbp->aio_state = NOCHECK; 1991*f841f6adSraf 1992*f841f6adSraf /* 1993*f841f6adSraf * If we have been called because a list I/O 1994*f841f6adSraf * kaio() failed, we dont want to repeat the 1995*f841f6adSraf * system call 1996*f841f6adSraf */ 1997*f841f6adSraf 1998*f841f6adSraf if (flg & AIO_KAIO) { 1999*f841f6adSraf /* 2000*f841f6adSraf * Try kernel aio first. 2001*f841f6adSraf * If errno is ENOTSUP/EBADFD, 2002*f841f6adSraf * fall back to the thread implementation. 2003*f841f6adSraf */ 2004*f841f6adSraf if (_kaio_ok > 0 && KAIO_SUPPORTED(aiocbp->aio_fildes)) { 2005*f841f6adSraf aiocbp->aio_resultp.aio_errno = EINPROGRESS; 2006*f841f6adSraf aiocbp->aio_state = CHECK; 2007*f841f6adSraf kerr = (int)_kaio(mode, aiocbp); 2008*f841f6adSraf if (kerr == 0) 2009*f841f6adSraf return (0); 2010*f841f6adSraf if (errno != ENOTSUP && errno != EBADFD) { 2011*f841f6adSraf aiocbp->aio_resultp.aio_errno = errno; 2012*f841f6adSraf aiocbp->aio_resultp.aio_return = -1; 2013*f841f6adSraf aiocbp->aio_state = NOCHECK; 2014*f841f6adSraf return (-1); 2015*f841f6adSraf } 2016*f841f6adSraf if (errno == EBADFD) 2017*f841f6adSraf SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 2018*f841f6adSraf } 2019*f841f6adSraf } 2020*f841f6adSraf 2021*f841f6adSraf aiocbp->aio_resultp.aio_errno = EINPROGRESS; 2022*f841f6adSraf aiocbp->aio_state = USERAIO; 2023*f841f6adSraf 2024*f841f6adSraf if (!__uaio_ok && __uaio_init() == -1) 2025*f841f6adSraf return (-1); 2026*f841f6adSraf 2027*f841f6adSraf if ((reqp = _aio_req_alloc()) == NULL) { 2028*f841f6adSraf errno = EAGAIN; 2029*f841f6adSraf return (-1); 2030*f841f6adSraf } 2031*f841f6adSraf 2032*f841f6adSraf /* 2033*f841f6adSraf * If an LIO request, add the list head to the aio request 2034*f841f6adSraf */ 2035*f841f6adSraf reqp->req_head = lio_head; 2036*f841f6adSraf reqp->req_type = AIO_POSIX_REQ; 2037*f841f6adSraf reqp->req_op = mode; 2038*f841f6adSraf reqp->req_largefile = 0; 2039*f841f6adSraf 2040*f841f6adSraf if (aiocbp->aio_sigevent.sigev_notify == SIGEV_NONE) { 2041*f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_NONE; 2042*f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 2043*f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_SIGNAL; 2044*f841f6adSraf reqp->req_sigevent.sigev_signo = 2045*f841f6adSraf aiocbp->aio_sigevent.sigev_signo; 2046*f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2047*f841f6adSraf aiocbp->aio_sigevent.sigev_value.sival_ptr; 2048*f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_PORT) { 2049*f841f6adSraf port_notify_t *pn = aiocbp->aio_sigevent.sigev_value.sival_ptr; 2050*f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_PORT; 2051*f841f6adSraf /* 2052*f841f6adSraf * Reuse the sigevent structure to contain the port number 2053*f841f6adSraf * and the user value. Same for SIGEV_THREAD, below. 2054*f841f6adSraf */ 2055*f841f6adSraf reqp->req_sigevent.sigev_signo = 2056*f841f6adSraf pn->portnfy_port; 2057*f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2058*f841f6adSraf pn->portnfy_user; 2059*f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_THREAD) { 2060*f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_THREAD; 2061*f841f6adSraf /* 2062*f841f6adSraf * The sigevent structure contains the port number 2063*f841f6adSraf * and the user value. Same for SIGEV_PORT, above. 2064*f841f6adSraf */ 2065*f841f6adSraf reqp->req_sigevent.sigev_signo = 2066*f841f6adSraf aiocbp->aio_sigevent.sigev_signo; 2067*f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2068*f841f6adSraf aiocbp->aio_sigevent.sigev_value.sival_ptr; 2069*f841f6adSraf } 2070*f841f6adSraf 2071*f841f6adSraf reqp->req_resultp = &aiocbp->aio_resultp; 2072*f841f6adSraf reqp->req_aiocbp = aiocbp; 2073*f841f6adSraf ap = &reqp->req_args; 2074*f841f6adSraf ap->fd = aiocbp->aio_fildes; 2075*f841f6adSraf ap->buf = (caddr_t)aiocbp->aio_buf; 2076*f841f6adSraf ap->bufsz = aiocbp->aio_nbytes; 2077*f841f6adSraf ap->offset = aiocbp->aio_offset; 2078*f841f6adSraf 2079*f841f6adSraf if ((flg & AIO_NO_DUPS) && 2080*f841f6adSraf _aio_hash_insert(&aiocbp->aio_resultp, reqp) != 0) { 2081*f841f6adSraf aio_panic("_aio_rw(): request already in hash table"); 2082*f841f6adSraf _aio_req_free(reqp); 2083*f841f6adSraf errno = EINVAL; 2084*f841f6adSraf return (-1); 2085*f841f6adSraf } 2086*f841f6adSraf _aio_req_add(reqp, nextworker, mode); 2087*f841f6adSraf return (0); 2088*f841f6adSraf } 2089*f841f6adSraf 2090*f841f6adSraf #if !defined(_LP64) 2091*f841f6adSraf /* 2092*f841f6adSraf * 64-bit AIO interface for POSIX 2093*f841f6adSraf */ 2094*f841f6adSraf int 2095*f841f6adSraf _aio_rw64(aiocb64_t *aiocbp, aio_lio_t *lio_head, aio_worker_t **nextworker, 2096*f841f6adSraf int mode, int flg) 2097*f841f6adSraf { 2098*f841f6adSraf aio_req_t *reqp; 2099*f841f6adSraf aio_args_t *ap; 2100*f841f6adSraf int kerr; 2101*f841f6adSraf 2102*f841f6adSraf if (aiocbp == NULL) { 2103*f841f6adSraf errno = EINVAL; 2104*f841f6adSraf return (-1); 2105*f841f6adSraf } 2106*f841f6adSraf 2107*f841f6adSraf /* initialize kaio */ 2108*f841f6adSraf if (!_kaio_ok) 2109*f841f6adSraf _kaio_init(); 2110*f841f6adSraf 2111*f841f6adSraf aiocbp->aio_state = NOCHECK; 2112*f841f6adSraf 2113*f841f6adSraf /* 2114*f841f6adSraf * If we have been called because a list I/O 2115*f841f6adSraf * kaio() failed, we dont want to repeat the 2116*f841f6adSraf * system call 2117*f841f6adSraf */ 2118*f841f6adSraf 2119*f841f6adSraf if (flg & AIO_KAIO) { 2120*f841f6adSraf /* 2121*f841f6adSraf * Try kernel aio first. 2122*f841f6adSraf * If errno is ENOTSUP/EBADFD, 2123*f841f6adSraf * fall back to the thread implementation. 2124*f841f6adSraf */ 2125*f841f6adSraf if (_kaio_ok > 0 && KAIO_SUPPORTED(aiocbp->aio_fildes)) { 2126*f841f6adSraf aiocbp->aio_resultp.aio_errno = EINPROGRESS; 2127*f841f6adSraf aiocbp->aio_state = CHECK; 2128*f841f6adSraf kerr = (int)_kaio(mode, aiocbp); 2129*f841f6adSraf if (kerr == 0) 2130*f841f6adSraf return (0); 2131*f841f6adSraf if (errno != ENOTSUP && errno != EBADFD) { 2132*f841f6adSraf aiocbp->aio_resultp.aio_errno = errno; 2133*f841f6adSraf aiocbp->aio_resultp.aio_return = -1; 2134*f841f6adSraf aiocbp->aio_state = NOCHECK; 2135*f841f6adSraf return (-1); 2136*f841f6adSraf } 2137*f841f6adSraf if (errno == EBADFD) 2138*f841f6adSraf SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes); 2139*f841f6adSraf } 2140*f841f6adSraf } 2141*f841f6adSraf 2142*f841f6adSraf aiocbp->aio_resultp.aio_errno = EINPROGRESS; 2143*f841f6adSraf aiocbp->aio_state = USERAIO; 2144*f841f6adSraf 2145*f841f6adSraf if (!__uaio_ok && __uaio_init() == -1) 2146*f841f6adSraf return (-1); 2147*f841f6adSraf 2148*f841f6adSraf if ((reqp = _aio_req_alloc()) == NULL) { 2149*f841f6adSraf errno = EAGAIN; 2150*f841f6adSraf return (-1); 2151*f841f6adSraf } 2152*f841f6adSraf 2153*f841f6adSraf /* 2154*f841f6adSraf * If an LIO request, add the list head to the aio request 2155*f841f6adSraf */ 2156*f841f6adSraf reqp->req_head = lio_head; 2157*f841f6adSraf reqp->req_type = AIO_POSIX_REQ; 2158*f841f6adSraf reqp->req_op = mode; 2159*f841f6adSraf reqp->req_largefile = 1; 2160*f841f6adSraf 2161*f841f6adSraf if (aiocbp->aio_sigevent.sigev_notify == SIGEV_NONE) { 2162*f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_NONE; 2163*f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_SIGNAL) { 2164*f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_SIGNAL; 2165*f841f6adSraf reqp->req_sigevent.sigev_signo = 2166*f841f6adSraf aiocbp->aio_sigevent.sigev_signo; 2167*f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2168*f841f6adSraf aiocbp->aio_sigevent.sigev_value.sival_ptr; 2169*f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_PORT) { 2170*f841f6adSraf port_notify_t *pn = aiocbp->aio_sigevent.sigev_value.sival_ptr; 2171*f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_PORT; 2172*f841f6adSraf reqp->req_sigevent.sigev_signo = 2173*f841f6adSraf pn->portnfy_port; 2174*f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2175*f841f6adSraf pn->portnfy_user; 2176*f841f6adSraf } else if (aiocbp->aio_sigevent.sigev_notify == SIGEV_THREAD) { 2177*f841f6adSraf reqp->req_sigevent.sigev_notify = SIGEV_THREAD; 2178*f841f6adSraf reqp->req_sigevent.sigev_signo = 2179*f841f6adSraf aiocbp->aio_sigevent.sigev_signo; 2180*f841f6adSraf reqp->req_sigevent.sigev_value.sival_ptr = 2181*f841f6adSraf aiocbp->aio_sigevent.sigev_value.sival_ptr; 2182*f841f6adSraf } 2183*f841f6adSraf 2184*f841f6adSraf reqp->req_resultp = &aiocbp->aio_resultp; 2185*f841f6adSraf reqp->req_aiocbp = aiocbp; 2186*f841f6adSraf ap = &reqp->req_args; 2187*f841f6adSraf ap->fd = aiocbp->aio_fildes; 2188*f841f6adSraf ap->buf = (caddr_t)aiocbp->aio_buf; 2189*f841f6adSraf ap->bufsz = aiocbp->aio_nbytes; 2190*f841f6adSraf ap->offset = aiocbp->aio_offset; 2191*f841f6adSraf 2192*f841f6adSraf if ((flg & AIO_NO_DUPS) && 2193*f841f6adSraf _aio_hash_insert(&aiocbp->aio_resultp, reqp) != 0) { 2194*f841f6adSraf aio_panic("_aio_rw64(): request already in hash table"); 2195*f841f6adSraf _aio_req_free(reqp); 2196*f841f6adSraf errno = EINVAL; 2197*f841f6adSraf return (-1); 2198*f841f6adSraf } 2199*f841f6adSraf _aio_req_add(reqp, nextworker, mode); 2200*f841f6adSraf return (0); 2201*f841f6adSraf } 2202*f841f6adSraf #endif /* !defined(_LP64) */ 2203