1 /*-
2  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
3  * Copyright (c) 2005 Matthew Dillon <dillon@backplane.com>
4  *
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <assert.h>
31 #include <errno.h>
32 #include <unistd.h>
33 #include <sys/time.h>
34 
35 #include "thr_private.h"
36 
37 #define cpu_ccfence()	__asm __volatile("" : : : "memory")
38 
39 /*
40  * This function is used to acquire a contested lock.
41  *
42  * There is a performance trade-off between spinning and sleeping.  In
43  * a heavily-multi-threaded program, heavily contested locks that are
44  * sleeping and waking up create a large IPI load on the system.  For
45  * example, qemu with a lot of CPUs configured.  It winds up being much
46  * faster to spin instead.
47  *
48  * So the first optimization here is to hard loop in-scale with the number
49  * of therads.
50  *
51  * The second optimization is to wake-up just one waiter at a time.  This
52  * is frought with issues because waiters can abort and races can result in
53  * nobody being woken up to acquire the released lock, so to smooth things
54  * over sleeps are limited to 1mS before we retry.
55  */
56 int
57 __thr_umtx_lock(volatile umtx_t *mtx, int id, int timo)
58 {
59 	int v;
60 	int errval;
61 	int ret = 0;
62 	int retry = _thread_active_threads * 200 + 10;
63 
64 	v = *mtx;
65 	cpu_ccfence();
66 	id &= 0x3FFFFFFF;
67 
68 	for (;;) {
69 		cpu_pause();
70 		if (v == 0) {
71 			if (atomic_fcmpset_int(mtx, &v, id))
72 				break;
73 			continue;
74 		}
75 		if (--retry) {
76 			v = *mtx;
77 			continue;
78 		}
79 
80 		/*
81 		 * Set the waiting bit.  If the fcmpset fails v is loaded
82 		 * with the current content of the mutex, and if the waiting
83 		 * bit is already set, we can also sleep.
84 		 */
85 		if (atomic_fcmpset_int(mtx, &v, v|0x40000000) ||
86 		    (v & 0x40000000)) {
87 			if (timo == 0) {
88 				_umtx_sleep_err(mtx, v|0x40000000, 1000);
89 			} else if (timo > 1500) {
90 				/*
91 				 * Short sleep and retry.  Because umtx
92 				 * ops can timeout and abort, wakeup1()
93 				 * races can cause a wakeup to be missed.
94 				 */
95 				_umtx_sleep_err(mtx, v|0x40000000, 1000);
96 				timo -= 1000;
97 			} else {
98 				/*
99 				 * Final sleep, do one last attempt to get
100 				 * the lock before giving up.
101 				 */
102 				errval = _umtx_sleep_err(mtx, v|0x40000000,
103 							 timo);
104 				if (__predict_false(errval == EAGAIN)) {
105 					if (atomic_cmpset_acq_int(mtx, 0, id))
106 						ret = 0;
107 					else
108 						ret = ETIMEDOUT;
109 					break;
110 				}
111 			}
112 		}
113 		retry = _thread_active_threads * 200 + 10;
114 	}
115 	return (ret);
116 }
117 
118 /*
119  * Inline followup when releasing a mutex.  The mutex has been released
120  * but 'v' either doesn't match id or needs a wakeup.
121  */
122 void
123 __thr_umtx_unlock(volatile umtx_t *mtx, int v, int id)
124 {
125 	if (v & 0x40000000) {
126 		_umtx_wakeup_err(mtx, 1);
127 		v &= 0x3FFFFFFF;
128 	}
129 	THR_ASSERT(v == id, "thr_umtx_unlock: wrong owner");
130 }
131 
132 /*
133  * Low level timed umtx lock.  This function must never return
134  * EINTR.
135  */
136 int
137 __thr_umtx_timedlock(volatile umtx_t *mtx, int id,
138 		     const struct timespec *timeout)
139 {
140 	struct timespec ts, ts2, ts3;
141 	int timo, ret;
142 
143 	if ((timeout->tv_sec < 0) ||
144 	    (timeout->tv_sec == 0 && timeout->tv_nsec <= 0)) {
145 		return (ETIMEDOUT);
146 	}
147 
148 	/* XXX there should have MONO timer! */
149 	clock_gettime(CLOCK_REALTIME, &ts);
150 	timespecadd(&ts, timeout, &ts);
151 	ts2 = *timeout;
152 
153 	id &= 0x3FFFFFFF;
154 
155 	for (;;) {
156 		if (ts2.tv_nsec) {
157 			timo = (int)(ts2.tv_nsec / 1000);
158 			if (timo == 0)
159 				timo = 1;
160 		} else {
161 			timo = 1000000;
162 		}
163 		ret = __thr_umtx_lock(mtx, id, timo);
164 		if (ret != EINTR && ret != ETIMEDOUT)
165 			break;
166 		clock_gettime(CLOCK_REALTIME, &ts3);
167 		timespecsub(&ts, &ts3, &ts2);
168 		if (ts2.tv_sec < 0 ||
169 		    (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
170 			ret = ETIMEDOUT;
171 			break;
172 		}
173 	}
174 	return (ret);
175 }
176 
177 /*
178  * Regular umtx wait that cannot return EINTR
179  */
180 int
181 _thr_umtx_wait(volatile umtx_t *mtx, int exp, const struct timespec *timeout,
182 	       int clockid)
183 {
184 	struct timespec ts, ts2, ts3;
185 	int timo, errval, ret = 0;
186 
187 	cpu_ccfence();
188 	if (*mtx != exp)
189 		return (0);
190 
191 	if (timeout == NULL) {
192 		/*
193 		 * NOTE: If no timeout, EINTR cannot be returned.  Ignore
194 		 *	 EINTR.
195 		 */
196 		while ((errval = _umtx_sleep_err(mtx, exp, 10000000)) > 0) {
197 			if (errval == EBUSY)
198 				break;
199 #if 0
200 			if (errval == ETIMEDOUT || errval == EWOULDBLOCK) {
201 				if (*mtx != exp) {
202 					fprintf(stderr,
203 					    "thr_umtx_wait: FAULT VALUE CHANGE "
204 					    "%d -> %d oncond %p\n",
205 					    exp, *mtx, mtx);
206 				}
207 			}
208 #endif
209 			if (*mtx != exp)
210 				return(0);
211 		}
212 		return (ret);
213 	}
214 
215 	/*
216 	 * Timed waits can return EINTR
217 	 */
218 	if ((timeout->tv_sec < 0) ||
219 	    (timeout->tv_sec == 0 && timeout->tv_nsec <= 0))
220 	return (ETIMEDOUT);
221 
222 	clock_gettime(clockid, &ts);
223 	timespecadd(&ts, timeout, &ts);
224 	ts2 = *timeout;
225 
226 	for (;;) {
227 		if (ts2.tv_nsec) {
228 			timo = (int)(ts2.tv_nsec / 1000);
229 			if (timo == 0)
230 				timo = 1;
231 		} else {
232 			timo = 1000000;
233 		}
234 
235 		if ((errval = _umtx_sleep_err(mtx, exp, timo)) > 0) {
236 			if (errval == EBUSY) {
237 				ret = 0;
238 				break;
239 			}
240 			if (errval == EINTR) {
241 				ret = EINTR;
242 				break;
243 			}
244 		}
245 
246 		clock_gettime(clockid, &ts3);
247 		timespecsub(&ts, &ts3, &ts2);
248 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
249 			ret = ETIMEDOUT;
250 			break;
251 		}
252 	}
253 	return (ret);
254 }
255 
256 /*
257  * Simple version without a timeout which can also return EINTR
258  */
259 int
260 _thr_umtx_wait_intr(volatile umtx_t *mtx, int exp)
261 {
262 	int ret = 0;
263 	int errval;
264 
265 	cpu_ccfence();
266 	for (;;) {
267 		if (*mtx != exp)
268 			return (0);
269 		errval = _umtx_sleep_err(mtx, exp, 10000000);
270 		if (errval == 0)
271 			break;
272 		if (errval == EBUSY)
273 			break;
274 		if (errval == EINTR) {
275 			ret = errval;
276 			break;
277 		}
278 		cpu_ccfence();
279 	}
280 	return (ret);
281 }
282 
283 void
284 _thr_umtx_wake(volatile umtx_t *mtx, int count)
285 {
286 	_umtx_wakeup_err(mtx, count);
287 }
288