xref: /illumos-gate/usr/src/lib/libc/port/threads/rwlock.c (revision 8eea8e29)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "thr_uberdata.h"
31 
32 #include <sys/sdt.h>
33 
34 #define	TRY_FLAG		0x10
35 #define	READ_LOCK		0
36 #define	WRITE_LOCK		1
37 #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
38 #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
39 
40 #define	NLOCKS	4	/* initial number of readlock_t structs allocated */
41 
42 /*
43  * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
44  */
45 static readlock_t *
46 rwl_entry(rwlock_t *rwlp)
47 {
48 	ulwp_t *self = curthread;
49 	readlock_t *remembered = NULL;
50 	readlock_t *readlockp;
51 	uint_t nlocks;
52 
53 	if ((nlocks = self->ul_rdlocks) != 0)
54 		readlockp = self->ul_readlock.array;
55 	else {
56 		nlocks = 1;
57 		readlockp = &self->ul_readlock.single;
58 	}
59 
60 	for (; nlocks; nlocks--, readlockp++) {
61 		if (readlockp->rd_rwlock == rwlp)
62 			return (readlockp);
63 		if (readlockp->rd_count == 0 && remembered == NULL)
64 			remembered = readlockp;
65 	}
66 	if (remembered != NULL) {
67 		remembered->rd_rwlock = rwlp;
68 		return (remembered);
69 	}
70 
71 	/*
72 	 * No entry available.  Allocate more space, converting the single
73 	 * readlock_t entry into an array of readlock_t entries if necessary.
74 	 */
75 	if ((nlocks = self->ul_rdlocks) == 0) {
76 		/*
77 		 * Initial allocation of the readlock_t array.
78 		 * Convert the single entry into an array.
79 		 */
80 		self->ul_rdlocks = nlocks = NLOCKS;
81 		readlockp = lmalloc(nlocks * sizeof (readlock_t));
82 		/*
83 		 * The single readlock_t becomes the first entry in the array.
84 		 */
85 		*readlockp = self->ul_readlock.single;
86 		self->ul_readlock.single.rd_count = 0;
87 		self->ul_readlock.array = readlockp;
88 		/*
89 		 * Return the next available entry in the array.
90 		 */
91 		(++readlockp)->rd_rwlock = rwlp;
92 		return (readlockp);
93 	}
94 	/*
95 	 * Reallocate the array, double the size each time.
96 	 */
97 	readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
98 	(void) _memcpy(readlockp, self->ul_readlock.array,
99 		nlocks * sizeof (readlock_t));
100 	lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
101 	self->ul_readlock.array = readlockp;
102 	self->ul_rdlocks *= 2;
103 	/*
104 	 * Return the next available entry in the newly allocated array.
105 	 */
106 	(readlockp += nlocks)->rd_rwlock = rwlp;
107 	return (readlockp);
108 }
109 
110 /*
111  * Free the array of rwlocks held for reading.
112  */
113 void
114 rwl_free(ulwp_t *ulwp)
115 {
116 	uint_t nlocks;
117 
118 	if ((nlocks = ulwp->ul_rdlocks) != 0)
119 		lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
120 	ulwp->ul_rdlocks = 0;
121 	ulwp->ul_readlock.single.rd_rwlock = NULL;
122 	ulwp->ul_readlock.single.rd_count = 0;
123 }
124 
125 /*
126  * Check if a reader version of the lock is held by the current thread.
127  * rw_read_is_held() is private to libc.
128  */
129 #pragma weak rw_read_is_held = _rw_read_held
130 #pragma weak rw_read_held = _rw_read_held
131 int
132 _rw_read_held(rwlock_t *rwlp)
133 {
134 	ulwp_t *self;
135 	readlock_t *readlockp;
136 	uint_t nlocks;
137 
138 	/* quick answer */
139 	if (rwlp->rwlock_type == USYNC_PROCESS) {
140 		if (!((uint32_t)rwlp->rwlock_readers & URW_READERS_MASK))
141 			return (0);
142 	} else if (rwlp->rwlock_readers <= 0) {
143 		return (0);
144 	}
145 
146 	/*
147 	 * The lock is held for reading by some thread.
148 	 * Search our array of rwlocks held for reading for a match.
149 	 */
150 	self = curthread;
151 	if ((nlocks = self->ul_rdlocks) != 0)
152 		readlockp = self->ul_readlock.array;
153 	else {
154 		nlocks = 1;
155 		readlockp = &self->ul_readlock.single;
156 	}
157 
158 	for (; nlocks; nlocks--, readlockp++)
159 		if (readlockp->rd_rwlock == rwlp)
160 			return (readlockp->rd_count? 1 : 0);
161 
162 	return (0);
163 }
164 
165 /*
166  * Check if a writer version of the lock is held by the current thread.
167  * rw_write_is_held() is private to libc.
168  */
169 #pragma weak rw_write_is_held = _rw_write_held
170 #pragma weak rw_write_held = _rw_write_held
171 int
172 _rw_write_held(rwlock_t *rwlp)
173 {
174 	ulwp_t *self = curthread;
175 	uberdata_t *udp = self->ul_uberdata;
176 
177 	if (rwlp->rwlock_type == USYNC_PROCESS)
178 		return (((uint32_t)rwlp->rwlock_readers & URW_WRITE_LOCKED) &&
179 		    (rwlp->rwlock_ownerpid == udp->pid) &&
180 		    (rwlp->rwlock_owner == (uintptr_t)self));
181 
182 	/* USYNC_THREAD */
183 	return (rwlp->rwlock_readers == -1 && mutex_is_held(&rwlp->mutex));
184 }
185 
186 #pragma weak rwlock_init = __rwlock_init
187 #pragma weak _rwlock_init = __rwlock_init
188 /* ARGSUSED2 */
189 int
190 __rwlock_init(rwlock_t *rwlp, int type, void *arg)
191 {
192 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
193 		return (EINVAL);
194 	/*
195 	 * Once reinitialized, we can no longer be holding a read or write lock.
196 	 * We can do nothing about other threads that are holding read locks.
197 	 */
198 	if (rw_read_is_held(rwlp))
199 		rwl_entry(rwlp)->rd_count = 0;
200 	(void) _memset(rwlp, 0, sizeof (*rwlp));
201 	rwlp->rwlock_type = (uint16_t)type;
202 	rwlp->rwlock_magic = RWL_MAGIC;
203 	rwlp->rwlock_readers = 0;
204 	rwlp->mutex.mutex_type = (uint8_t)type;
205 	rwlp->mutex.mutex_flag = LOCK_INITED;
206 	rwlp->mutex.mutex_magic = MUTEX_MAGIC;
207 	rwlp->readercv.cond_type = (uint16_t)type;
208 	rwlp->readercv.cond_magic = COND_MAGIC;
209 	rwlp->writercv.cond_type = (uint16_t)type;
210 	rwlp->writercv.cond_magic = COND_MAGIC;
211 	return (0);
212 }
213 
214 #pragma weak rwlock_destroy = __rwlock_destroy
215 #pragma weak _rwlock_destroy = __rwlock_destroy
216 #pragma weak pthread_rwlock_destroy = __rwlock_destroy
217 #pragma weak _pthread_rwlock_destroy = __rwlock_destroy
218 int
219 __rwlock_destroy(rwlock_t *rwlp)
220 {
221 	/*
222 	 * Once destroyed, we can no longer be holding a read or write lock.
223 	 * We can do nothing about other threads that are holding read locks.
224 	 */
225 	if (rw_read_is_held(rwlp))
226 		rwl_entry(rwlp)->rd_count = 0;
227 	rwlp->rwlock_magic = 0;
228 	tdb_sync_obj_deregister(rwlp);
229 	return (0);
230 }
231 
232 /*
233  * Wake up the next thread sleeping on the rwlock queue and then
234  * drop the queue lock.  Return non-zero if we wake up someone.
235  *
236  * This is called whenever a thread releases the lock and whenever a
237  * thread successfully or unsuccessfully attempts to acquire the lock.
238  * (Basically, whenever the state of the queue might have changed.)
239  *
240  * We wake up at most one thread.  If there are more threads to be
241  * awakened, the next one will be waked up by the thread we wake up.
242  * This ensures that queued threads will acquire the lock in priority
243  * order and that queued writers will take precedence over queued
244  * readers of the same priority.
245  */
246 static int
247 rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
248 {
249 	ulwp_t *ulwp;
250 	int more;
251 
252 	if (rwlp->rwlock_readers >= 0 && rwlp->rwlock_mwaiters) {
253 		/*
254 		 * The lock is free or at least is available to readers
255 		 * and there are (or might be) waiters on the queue.
256 		 */
257 		if (rwlp->rwlock_readers != 0 &&
258 		    (ulwp = queue_waiter(qp, rwlp)) == NULL)
259 			rwlp->rwlock_mwaiters = 0;
260 		else if (rwlp->rwlock_readers == 0 || !ulwp->ul_writer) {
261 			if ((ulwp = dequeue(qp, rwlp, &more)) == NULL)
262 				rwlp->rwlock_mwaiters = 0;
263 			else {
264 				ulwp_t *self = curthread;
265 				lwpid_t lwpid = ulwp->ul_lwpid;
266 
267 				rwlp->rwlock_mwaiters = (more? 1 : 0);
268 				no_preempt(self);
269 				queue_unlock(qp);
270 				(void) __lwp_unpark(lwpid);
271 				preempt(self);
272 				return (1);
273 			}
274 		}
275 	}
276 	queue_unlock(qp);
277 	return (0);
278 }
279 
280 /*
281  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
282  * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
283  *
284  * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
285  * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
286  * released, and if they need to sleep will release the mutex first. In the
287  * event of a spurious wakeup, these will return EAGAIN (because it is much
288  * easier for us to re-acquire the mutex here).
289  */
290 int
291 shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
292 {
293 	uint32_t *rwstate = (uint32_t *)&rwlp->readers;
294 	ulwp_t *self = curthread;
295 	uberdata_t *udp = self->ul_uberdata;
296 	int try_flag;
297 	int error = 0;
298 
299 	try_flag = (rd_wr & TRY_FLAG);
300 	rd_wr &= ~TRY_FLAG;
301 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
302 
303 	if (!try_flag) {
304 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
305 	}
306 
307 	do {
308 		if ((error = _private_mutex_lock(&rwlp->mutex)) != 0)
309 			break;
310 
311 		if (rd_wr == READ_LOCK) {
312 			/*
313 			 * We are a reader.
314 			 */
315 
316 			if ((*rwstate & ~URW_READERS_MASK) == 0) {
317 				(*rwstate)++;
318 				(void) _private_mutex_unlock(&rwlp->mutex);
319 			} else if (try_flag) {
320 				if (*rwstate & URW_WRITE_LOCKED) {
321 					error = EBUSY;
322 					(void) _private_mutex_unlock(
323 					    &rwlp->mutex);
324 				} else {
325 					/*
326 					 * We have a higher priority than any
327 					 * queued waiters, or the waiters bit
328 					 * may be inaccurate. Only the kernel
329 					 * knows for sure.
330 					 */
331 					rwlp->rwlock_mowner = 0;
332 					rwlp->rwlock_mownerpid = 0;
333 					error = __lwp_rwlock_tryrdlock(rwlp);
334 				}
335 			} else {
336 				rwlp->rwlock_mowner = 0;
337 				rwlp->rwlock_mownerpid = 0;
338 				error = __lwp_rwlock_rdlock(rwlp, tsp);
339 			}
340 		} else {
341 			/*
342 			 * We are a writer.
343 			 */
344 
345 			if (*rwstate == 0) {
346 				*rwstate = URW_WRITE_LOCKED;
347 				(void) _private_mutex_unlock(&rwlp->mutex);
348 			} else if (try_flag) {
349 				if (*rwstate & URW_WRITE_LOCKED) {
350 					error = EBUSY;
351 					(void) _private_mutex_unlock(
352 					    &rwlp->mutex);
353 				} else {
354 					/*
355 					 * The waiters bit may be inaccurate.
356 					 * Only the kernel knows for sure.
357 					 */
358 					rwlp->rwlock_mowner = 0;
359 					rwlp->rwlock_mownerpid = 0;
360 					error = __lwp_rwlock_trywrlock(rwlp);
361 				}
362 			} else {
363 				rwlp->rwlock_mowner = 0;
364 				rwlp->rwlock_mownerpid = 0;
365 				error = __lwp_rwlock_wrlock(rwlp, tsp);
366 			}
367 		}
368 	} while (error == EAGAIN);
369 
370 	if (error == 0) {
371 		if (rd_wr == WRITE_LOCK) {
372 			rwlp->rwlock_owner = (uintptr_t)self;
373 			rwlp->rwlock_ownerpid = udp->pid;
374 		}
375 		if (!try_flag) {
376 			DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, 1);
377 		}
378 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, rd_wr);
379 	} else if (!try_flag) {
380 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, 0);
381 		DTRACE_PROBE3(plockstat, rw__error, rwlp, rd_wr, error);
382 	}
383 	return (error);
384 }
385 
386 /*
387  * Code for unlock of process-shared (USYNC_PROCESS) rwlocks.
388  *
389  * Note: if the lock appears to have waiters we call __lwp_rwlock_unlock()
390  * holding the mutex. This returns with the mutex still held (for us to
391  * release).
392  */
393 int
394 shared_rwlock_unlock(rwlock_t *rwlp, int *waked)
395 {
396 	uint32_t *rwstate = (uint32_t *)&rwlp->readers;
397 	int error = 0;
398 
399 	if ((error = _private_mutex_lock(&rwlp->mutex)) != 0)
400 		return (error);
401 
402 	/* Reset flag used to suggest caller yields. */
403 	*waked = 0;
404 
405 	/* Our right to unlock was checked in __rw_unlock(). */
406 	if (*rwstate & URW_WRITE_LOCKED) {
407 		rwlp->rwlock_owner = 0;
408 		rwlp->rwlock_ownerpid = 0;
409 	}
410 
411 	if ((*rwstate & ~URW_READERS_MASK) == 0) {
412 		/* Simple multiple readers, no waiters case. */
413 		if (*rwstate > 0)
414 			(*rwstate)--;
415 	} else if (!(*rwstate & URW_HAS_WAITERS)) {
416 		/* Simple no waiters case (i.e. was write locked). */
417 		*rwstate = 0;
418 	} else {
419 		/*
420 		 * We appear to have waiters so we must call into the kernel.
421 		 * If there are waiters a full handoff will occur (rwstate
422 		 * will be updated, and one or more threads will be awoken).
423 		 */
424 		error = __lwp_rwlock_unlock(rwlp);
425 
426 		/* Suggest caller yields. */
427 		*waked = 1;
428 	}
429 
430 	(void) _private_mutex_unlock(&rwlp->mutex);
431 
432 	if (error) {
433 		DTRACE_PROBE3(plockstat, rw__error, rwlp, 0, error);
434 	} else {
435 		DTRACE_PROBE2(plockstat, rw__release, rwlp, READ_LOCK);
436 	}
437 
438 	return (error);
439 }
440 
441 
442 /*
443  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
444  * and trywrlock for process-private (USYNC_THREAD) rwlocks.
445  */
446 int
447 rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
448 {
449 	ulwp_t *self = curthread;
450 	queue_head_t *qp;
451 	ulwp_t *ulwp;
452 	int try_flag;
453 	int error = 0;
454 
455 	try_flag = (rd_wr & TRY_FLAG);
456 	rd_wr &= ~TRY_FLAG;
457 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
458 
459 	/*
460 	 * Optimize for the case of having only a single thread.
461 	 * (Most likely a traditional single-threaded application.)
462 	 * We don't need the protection of queue_lock() in this case.
463 	 * We need to defer signals, however (the other form of concurrency).
464 	 */
465 	if (!self->ul_uberdata->uberflags.uf_mt) {
466 		sigoff(self);
467 		if (rwlp->rwlock_readers < 0 ||
468 		    (rd_wr == WRITE_LOCK && rwlp->rwlock_readers != 0)) {
469 			sigon(self);
470 			if (try_flag)
471 				return (EBUSY);
472 			/*
473 			 * Sombody other than ourself owns the lock.  (If we
474 			 * owned the lock, either for reading or writing, we
475 			 * would already have returned EDEADLK in our caller.)
476 			 * This can happen only in the child of fork1() when
477 			 * some now-defunct thread was holding the lock when
478 			 * the fork1() was executed by the current thread.
479 			 * In this case, we just fall into the long way
480 			 * to block, either forever or with a timeout.
481 			 */
482 			ASSERT(MUTEX_OWNER(&rwlp->mutex) != self);
483 		} else {
484 			if (rd_wr == READ_LOCK)
485 				rwlp->rwlock_readers++;
486 			else {
487 				rwlp->rwlock_readers = -1;
488 				rwlp->rwlock_mlockw = LOCKSET;
489 				rwlp->rwlock_mowner = (uintptr_t)self;
490 			}
491 			sigon(self);
492 			DTRACE_PROBE2(plockstat, rw__acquire, rwlp, rd_wr);
493 			return (0);
494 		}
495 	}
496 
497 	if (!try_flag) {
498 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
499 	}
500 
501 	/*
502 	 * Do it the long way.
503 	 */
504 	qp = queue_lock(rwlp, MX);
505 	while (error == 0) {
506 		if (rwlp->rwlock_readers < 0 ||
507 		    (rd_wr == WRITE_LOCK && rwlp->rwlock_readers != 0))
508 			/* EMPTY */;	/* somebody holds the lock */
509 		else if (!rwlp->rwlock_mwaiters)
510 			break;		/* no queued waiters */
511 		else if ((ulwp = queue_waiter(qp, rwlp)) == NULL) {
512 			rwlp->rwlock_mwaiters = 0;
513 			break;		/* no queued waiters */
514 		} else {
515 			int our_pri = real_priority(self);
516 			int his_pri = real_priority(ulwp);
517 
518 			if (rd_wr == WRITE_LOCK) {
519 				/*
520 				 * We defer to a queued thread that has
521 				 * a higher priority than ours.
522 				 */
523 				if (his_pri <= our_pri)
524 					break;
525 			} else {
526 				/*
527 				 * We defer to a queued thread that has
528 				 * a higher priority than ours or that
529 				 * is a writer whose priority equals ours.
530 				 */
531 				if (his_pri < our_pri ||
532 				    (his_pri == our_pri && !ulwp->ul_writer))
533 					break;
534 			}
535 		}
536 		/*
537 		 * We are about to block.
538 		 * If we're doing a trylock, return EBUSY instead.
539 		 */
540 		if (try_flag) {
541 			error = EBUSY;
542 			break;
543 		}
544 		/*
545 		 * Enqueue writers ahead of readers of the
546 		 * same priority.
547 		 */
548 		self->ul_writer = rd_wr;	/* *must* be 0 or 1 */
549 		enqueue(qp, self, rwlp, MX);
550 		rwlp->rwlock_mwaiters = 1;
551 		set_parking_flag(self, 1);
552 		queue_unlock(qp);
553 		if ((error = __lwp_park(tsp, 0)) == EINTR)
554 			error = 0;
555 		self->ul_writer = 0;
556 		set_parking_flag(self, 0);
557 		qp = queue_lock(rwlp, MX);
558 		if (self->ul_sleepq)	/* timeout or spurious wakeup */
559 			rwlp->rwlock_mwaiters = dequeue_self(qp, rwlp);
560 	}
561 
562 	if (error == 0) {
563 		if (rd_wr == READ_LOCK)
564 			rwlp->rwlock_readers++;
565 		else {
566 			rwlp->rwlock_readers = -1;
567 			/* make it look like we acquired the embedded mutex */
568 			rwlp->rwlock_mlockw = LOCKSET;
569 			rwlp->rwlock_mowner = (uintptr_t)self;
570 		}
571 		if (!try_flag) {
572 			DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, 1);
573 		}
574 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, rd_wr);
575 	} else if (!try_flag) {
576 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, 0);
577 		DTRACE_PROBE3(plockstat, rw__error, rwlp, rd_wr, error);
578 	}
579 
580 	(void) rw_queue_release(qp, rwlp);
581 
582 	return (error);
583 }
584 
585 int
586 rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
587 {
588 	ulwp_t *self = curthread;
589 	uberdata_t *udp = self->ul_uberdata;
590 	readlock_t *readlockp;
591 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
592 	int error;
593 
594 	/*
595 	 * If we already hold a readers lock on this rwlock,
596 	 * just increment our reference count and return.
597 	 */
598 	readlockp = rwl_entry(rwlp);
599 	if (readlockp->rd_count != 0) {
600 		if (readlockp->rd_count == READ_LOCK_MAX)
601 			return (EAGAIN);
602 		readlockp->rd_count++;
603 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
604 		return (0);
605 	}
606 
607 	/*
608 	 * If we hold the writer lock, bail out.
609 	 */
610 	if (rw_write_is_held(rwlp)) {
611 		if (self->ul_error_detection)
612 			rwlock_error(rwlp, "rwlock_rdlock",
613 			    "calling thread owns the writer lock");
614 		return (EDEADLK);
615 	}
616 
617 	if (rwlp->rwlock_type == USYNC_PROCESS)		/* kernel-level */
618 		error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
619 	else						/* user-level */
620 		error = rwlock_lock(rwlp, tsp, READ_LOCK);
621 
622 	if (error == 0) {
623 		readlockp->rd_count = 1;
624 		if (rwsp)
625 			tdb_incr(rwsp->rw_rdlock);
626 	}
627 
628 	return (error);
629 }
630 
631 #pragma weak rw_rdlock = __rw_rdlock
632 #pragma weak _rw_rdlock = __rw_rdlock
633 #pragma weak pthread_rwlock_rdlock = __rw_rdlock
634 #pragma weak _pthread_rwlock_rdlock = __rw_rdlock
635 int
636 __rw_rdlock(rwlock_t *rwlp)
637 {
638 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
639 	return (rw_rdlock_impl(rwlp, NULL));
640 }
641 
642 void
643 lrw_rdlock(rwlock_t *rwlp)
644 {
645 	enter_critical(curthread);
646 	(void) rw_rdlock_impl(rwlp, NULL);
647 }
648 
649 #pragma weak pthread_rwlock_reltimedrdlock_np = \
650 	_pthread_rwlock_reltimedrdlock_np
651 int
652 _pthread_rwlock_reltimedrdlock_np(rwlock_t *rwlp, const timespec_t *reltime)
653 {
654 	timespec_t tslocal = *reltime;
655 	int error;
656 
657 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
658 	error = rw_rdlock_impl(rwlp, &tslocal);
659 	if (error == ETIME)
660 		error = ETIMEDOUT;
661 	return (error);
662 }
663 
664 #pragma weak pthread_rwlock_timedrdlock = _pthread_rwlock_timedrdlock
665 int
666 _pthread_rwlock_timedrdlock(rwlock_t *rwlp, const timespec_t *abstime)
667 {
668 	timespec_t tslocal;
669 	int error;
670 
671 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
672 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
673 	error = rw_rdlock_impl(rwlp, &tslocal);
674 	if (error == ETIME)
675 		error = ETIMEDOUT;
676 	return (error);
677 }
678 
679 int
680 rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
681 {
682 	ulwp_t *self = curthread;
683 	uberdata_t *udp = self->ul_uberdata;
684 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
685 	int error;
686 
687 	/*
688 	 * If we hold a readers lock on this rwlock, bail out.
689 	 */
690 	if (rw_read_is_held(rwlp)) {
691 		if (self->ul_error_detection)
692 			rwlock_error(rwlp, "rwlock_wrlock",
693 			    "calling thread owns the readers lock");
694 		return (EDEADLK);
695 	}
696 
697 	/*
698 	 * If we hold the writer lock, bail out.
699 	 */
700 	if (rw_write_is_held(rwlp)) {
701 		if (self->ul_error_detection)
702 			rwlock_error(rwlp, "rwlock_wrlock",
703 			    "calling thread owns the writer lock");
704 		return (EDEADLK);
705 	}
706 
707 	if (rwlp->rwlock_type == USYNC_PROCESS) {	/* kernel-level */
708 		error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
709 	} else {					/* user-level */
710 		error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
711 	}
712 
713 	if (error == 0 && rwsp) {
714 		tdb_incr(rwsp->rw_wrlock);
715 		rwsp->rw_wrlock_begin_hold = gethrtime();
716 	}
717 
718 	return (error);
719 }
720 
721 #pragma weak rw_wrlock = __rw_wrlock
722 #pragma weak _rw_wrlock = __rw_wrlock
723 #pragma weak pthread_rwlock_wrlock = __rw_wrlock
724 #pragma weak _pthread_rwlock_wrlock = __rw_wrlock
725 int
726 __rw_wrlock(rwlock_t *rwlp)
727 {
728 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
729 	return (rw_wrlock_impl(rwlp, NULL));
730 }
731 
732 void
733 lrw_wrlock(rwlock_t *rwlp)
734 {
735 	enter_critical(curthread);
736 	(void) rw_wrlock_impl(rwlp, NULL);
737 }
738 
739 #pragma weak pthread_rwlock_reltimedwrlock_np = \
740 	_pthread_rwlock_reltimedwrlock_np
741 int
742 _pthread_rwlock_reltimedwrlock_np(rwlock_t *rwlp, const timespec_t *reltime)
743 {
744 	timespec_t tslocal = *reltime;
745 	int error;
746 
747 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
748 	error = rw_wrlock_impl(rwlp, &tslocal);
749 	if (error == ETIME)
750 		error = ETIMEDOUT;
751 	return (error);
752 }
753 
754 #pragma weak pthread_rwlock_timedwrlock = _pthread_rwlock_timedwrlock
755 int
756 _pthread_rwlock_timedwrlock(rwlock_t *rwlp, const timespec_t *abstime)
757 {
758 	timespec_t tslocal;
759 	int error;
760 
761 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
762 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
763 	error = rw_wrlock_impl(rwlp, &tslocal);
764 	if (error == ETIME)
765 		error = ETIMEDOUT;
766 	return (error);
767 }
768 
769 #pragma weak rw_tryrdlock = __rw_tryrdlock
770 #pragma weak _rw_tryrdlock = __rw_tryrdlock
771 #pragma weak pthread_rwlock_tryrdlock = __rw_tryrdlock
772 #pragma weak _pthread_rwlock_tryrdlock = __rw_tryrdlock
773 int
774 __rw_tryrdlock(rwlock_t *rwlp)
775 {
776 	ulwp_t *self = curthread;
777 	uberdata_t *udp = self->ul_uberdata;
778 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
779 	readlock_t *readlockp;
780 	int error;
781 
782 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
783 
784 	if (rwsp)
785 		tdb_incr(rwsp->rw_rdlock_try);
786 
787 	/*
788 	 * If we already hold a readers lock on this rwlock,
789 	 * just increment our reference count and return.
790 	 */
791 	readlockp = rwl_entry(rwlp);
792 	if (readlockp->rd_count != 0) {
793 		if (readlockp->rd_count == READ_LOCK_MAX)
794 			return (EAGAIN);
795 		readlockp->rd_count++;
796 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
797 		return (0);
798 	}
799 
800 	if (rwlp->rwlock_type == USYNC_PROCESS)		/* kernel-level */
801 		error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
802 	else						/* user-level */
803 		error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
804 
805 	if (error == 0)
806 		readlockp->rd_count = 1;
807 	else if (rwsp)
808 		tdb_incr(rwsp->rw_rdlock_try_fail);
809 
810 	return (error);
811 }
812 
813 #pragma weak rw_trywrlock = __rw_trywrlock
814 #pragma weak _rw_trywrlock = __rw_trywrlock
815 #pragma weak pthread_rwlock_trywrlock = __rw_trywrlock
816 #pragma weak _pthread_rwlock_trywrlock = __rw_trywrlock
817 int
818 __rw_trywrlock(rwlock_t *rwlp)
819 {
820 	ulwp_t *self = curthread;
821 	uberdata_t *udp = self->ul_uberdata;
822 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
823 	int error;
824 
825 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
826 
827 	if (rwsp)
828 		tdb_incr(rwsp->rw_wrlock_try);
829 
830 	if (rwlp->rwlock_type == USYNC_PROCESS) {	/* kernel-level */
831 		error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
832 	} else {					/* user-level */
833 		error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
834 	}
835 	if (rwsp) {
836 		if (error)
837 			tdb_incr(rwsp->rw_wrlock_try_fail);
838 		else
839 			rwsp->rw_wrlock_begin_hold = gethrtime();
840 	}
841 	return (error);
842 }
843 
844 #pragma weak rw_unlock = __rw_unlock
845 #pragma weak _rw_unlock = __rw_unlock
846 #pragma weak pthread_rwlock_unlock = __rw_unlock
847 #pragma weak _pthread_rwlock_unlock = __rw_unlock
848 int
849 __rw_unlock(rwlock_t *rwlp)
850 {
851 	ulwp_t *self = curthread;
852 	uberdata_t *udp = self->ul_uberdata;
853 	tdb_rwlock_stats_t *rwsp;
854 	int32_t lock_count;
855 	int waked;
856 
857 	/* fetch the lock count once; it may change underfoot */
858 	lock_count = rwlp->rwlock_readers;
859 	if (rwlp->rwlock_type == USYNC_PROCESS) {
860 		/* munge it from rwstate */
861 		if (lock_count & URW_WRITE_LOCKED)
862 			lock_count = -1;
863 		else
864 			lock_count &= URW_READERS_MASK;
865 	}
866 
867 	if (lock_count < 0) {
868 		/*
869 		 * Since the writer lock is held, we'd better be
870 		 * holding it, else we cannot legitimately be here.
871 		 */
872 		if (!rw_write_is_held(rwlp)) {
873 			if (self->ul_error_detection)
874 				rwlock_error(rwlp, "rwlock_unlock",
875 				    "writer lock held, "
876 				    "but not by the calling thread");
877 			return (EPERM);
878 		}
879 		if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
880 			if (rwsp->rw_wrlock_begin_hold)
881 				rwsp->rw_wrlock_hold_time +=
882 				    gethrtime() - rwsp->rw_wrlock_begin_hold;
883 			rwsp->rw_wrlock_begin_hold = 0;
884 		}
885 	} else if (lock_count > 0) {
886 		/*
887 		 * A readers lock is held; if we don't hold one, bail out.
888 		 */
889 		readlock_t *readlockp = rwl_entry(rwlp);
890 		if (readlockp->rd_count == 0) {
891 			if (self->ul_error_detection)
892 				rwlock_error(rwlp, "rwlock_unlock",
893 				    "readers lock held, "
894 				    "but not by the calling thread");
895 			return (EPERM);
896 		}
897 		/*
898 		 * If we hold more than one readers lock on this rwlock,
899 		 * just decrement our reference count and return.
900 		 */
901 		if (--readlockp->rd_count != 0) {
902 			DTRACE_PROBE2(plockstat, rw__release, rwlp, READ_LOCK);
903 			return (0);
904 		}
905 	} else {
906 		/*
907 		 * This is a usage error.
908 		 * No thread should release an unowned lock.
909 		 */
910 		if (self->ul_error_detection)
911 			rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
912 		return (EPERM);
913 	}
914 
915 	if (rwlp->rwlock_type == USYNC_PROCESS) {	/* kernel-level */
916 		(void) shared_rwlock_unlock(rwlp, &waked);
917 	} else if (!udp->uberflags.uf_mt) {		/* single threaded */
918 		/*
919 		 * In the case of having only a single thread, we don't
920 		 * need the protection of queue_lock() (this parallels
921 		 * the optimization made in rwlock_lock(), above).
922 		 * As in rwlock_lock(), we need to defer signals.
923 		 */
924 		sigoff(self);
925 		if (rwlp->rwlock_readers > 0) {
926 			rwlp->rwlock_readers--;
927 			DTRACE_PROBE2(plockstat, rw__release, rwlp, READ_LOCK);
928 		} else {
929 			rwlp->rwlock_readers = 0;
930 			/* make it look like we released the embedded mutex */
931 			rwlp->rwlock_mowner = 0;
932 			rwlp->rwlock_mlockw = LOCKCLEAR;
933 			DTRACE_PROBE2(plockstat, rw__release, rwlp, WRITE_LOCK);
934 		}
935 		sigon(self);
936 		waked = 0;
937 	} else {					/* multithreaded */
938 		queue_head_t *qp;
939 
940 		qp = queue_lock(rwlp, MX);
941 		if (rwlp->rwlock_readers > 0) {
942 			rwlp->rwlock_readers--;
943 			DTRACE_PROBE2(plockstat, rw__release, rwlp, READ_LOCK);
944 		} else {
945 			rwlp->rwlock_readers = 0;
946 			/* make it look like we released the embedded mutex */
947 			rwlp->rwlock_mowner = 0;
948 			rwlp->rwlock_mlockw = LOCKCLEAR;
949 			DTRACE_PROBE2(plockstat, rw__release, rwlp, WRITE_LOCK);
950 		}
951 		waked = rw_queue_release(qp, rwlp);
952 	}
953 
954 	/*
955 	 * Yield to the thread we just waked up, just in case we might
956 	 * be about to grab the rwlock again immediately upon return.
957 	 * This is pretty weak but it helps on a uniprocessor and also
958 	 * when cpu affinity has assigned both ourself and the other
959 	 * thread to the same CPU.  Note that lwp_yield() will yield
960 	 * the processor only if the writer is at the same or higher
961 	 * priority than ourself.  This provides more balanced program
962 	 * behavior; it doesn't guarantee acquisition of the lock by
963 	 * the pending writer.
964 	 */
965 	if (waked)
966 		lwp_yield();
967 	return (0);
968 }
969 
970 void
971 lrw_unlock(rwlock_t *rwlp)
972 {
973 	(void) __rw_unlock(rwlp);
974 	exit_critical(curthread);
975 }
976