1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
5 * Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#if defined(__aarch64__)
30#define	__VDSO_PREFIX		__kernel
31#else
32#define	__VDSO_PREFIX		__vdso
33#endif
34
35#define	__vdsoN(x)	__CONCAT(__CONCAT(__VDSO_PREFIX,_),x)
36
37static int
38fls(int mask)
39{
40
41	if (mask == 0)
42		return (0);
43	return ((__builtin_clz(mask) ^ 0x1f) + 1);
44}
45
46#ifdef _LP64
47static int
48flsl(long mask)
49{
50	int bit;
51
52	if (mask == 0)
53		return (0);
54	for (bit = 1; mask != 1; bit++)
55		mask = (unsigned long)mask >> 1;
56	return (bit);
57}
58#else
59static int
60flsll(long long mask)
61{
62	int bit;
63
64	if (mask == 0)
65		return (0);
66	for (bit = 1; mask != 1; bit++)
67		mask = (unsigned long long)mask >> 1;
68	return (bit);
69}
70#endif
71
72static int
73__vdso_native_to_linux_timespec(struct l_timespec *lts,
74    struct timespec *nts)
75{
76
77#ifdef COMPAT_LINUX32
78	if (nts->tv_sec > INT_MAX || nts->tv_sec < INT_MIN)
79		return (LINUX_EOVERFLOW);
80#endif
81	lts->tv_sec = nts->tv_sec;
82	lts->tv_nsec = nts->tv_nsec;
83	return (0);
84}
85
86static int
87__vdso_native_to_linux_timeval(l_timeval *ltv,
88    struct timeval *ntv)
89{
90
91#ifdef COMPAT_LINUX32
92	if (ntv->tv_sec > INT_MAX || ntv->tv_sec < INT_MIN)
93		return (LINUX_EOVERFLOW);
94#endif
95	ltv->tv_sec = ntv->tv_sec;
96	ltv->tv_usec = ntv->tv_usec;
97	return (0);
98}
99
100
101#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
102static int
103__vdso_native_to_linux_timespec64(struct l_timespec64 *lts,
104    struct timespec *nts)
105{
106
107	lts->tv_sec = nts->tv_sec;
108	lts->tv_nsec = nts->tv_nsec;
109	return (0);
110}
111#endif
112
113static int
114__vdso_linux_to_native_clockid(clockid_t *n, clockid_t l)
115{
116
117	switch (l) {
118	case LINUX_CLOCK_REALTIME:
119		*n = CLOCK_REALTIME;
120		break;
121	case LINUX_CLOCK_MONOTONIC:
122		*n = CLOCK_MONOTONIC;
123		break;
124	case LINUX_CLOCK_REALTIME_COARSE:
125		*n = CLOCK_REALTIME_FAST;
126		break;
127	case LINUX_CLOCK_MONOTONIC_COARSE:
128	case LINUX_CLOCK_MONOTONIC_RAW:
129		*n = CLOCK_MONOTONIC_FAST;
130		break;
131	case LINUX_CLOCK_BOOTTIME:
132		*n = CLOCK_UPTIME;
133		break;
134	default:
135		return (LINUX_EINVAL);
136	}
137	return (0);
138}
139
140/*
141 * The code below adapted from
142 * lib/libc/sys/__vdso_gettimeofday.c
143 */
144
145static inline void
146__vdso_gettimekeep(struct vdso_timekeep **tk)
147{
148
149	*tk = (struct vdso_timekeep *)kern_timekeep_base;
150}
151
152static int
153tc_delta(const struct vdso_timehands *th, u_int *delta)
154{
155	int error;
156	u_int tc;
157
158	error = __vdso_gettc(th, &tc);
159	if (error == 0)
160		*delta = (tc - th->th_offset_count) & th->th_counter_mask;
161	return (error);
162}
163
164/*
165 * Calculate the absolute or boot-relative time from the
166 * machine-specific fast timecounter and the published timehands
167 * structure read from the shared page.
168 *
169 * The lockless reading scheme is similar to the one used to read the
170 * in-kernel timehands, see sys/kern/kern_tc.c:binuptime().  This code
171 * is based on the kernel implementation.
172 */
173static int
174freebsd_binuptime(struct bintime *bt, struct vdso_timekeep *tk, bool abs)
175{
176	struct vdso_timehands *th;
177	uint32_t curr, gen;
178	uint64_t scale, x;
179	u_int delta, scale_bits;
180	int error;
181
182	do {
183		if (!tk->tk_enabled)
184			return (ENOSYS);
185
186		curr = atomic_load_acq_32(&tk->tk_current);
187		th = &tk->tk_th[curr];
188		gen = atomic_load_acq_32(&th->th_gen);
189		*bt = th->th_offset;
190		error = tc_delta(th, &delta);
191		if (error == EAGAIN)
192			continue;
193		if (error != 0)
194			return (error);
195		scale = th->th_scale;
196#ifdef _LP64
197		scale_bits = flsl(scale);
198#else
199		scale_bits = flsll(scale);
200#endif
201		if (__predict_false(scale_bits + fls(delta) > 63)) {
202			x = (scale >> 32) * delta;
203			scale &= 0xffffffff;
204			bt->sec += x >> 32;
205			bintime_addx(bt, x << 32);
206		}
207		bintime_addx(bt, scale * delta);
208		if (abs)
209			bintime_add(bt, &th->th_boottime);
210
211		/*
212		 * Ensure that the load of th_offset is completed
213		 * before the load of th_gen.
214		 */
215		atomic_thread_fence_acq();
216	} while (curr != tk->tk_current || gen == 0 || gen != th->th_gen);
217	return (0);
218}
219
220static int
221freebsd_getnanouptime(struct bintime *bt, struct vdso_timekeep *tk)
222{
223	struct vdso_timehands *th;
224	uint32_t curr, gen;
225
226	do {
227		if (!tk->tk_enabled)
228			return (ENOSYS);
229
230		curr = atomic_load_acq_32(&tk->tk_current);
231		th = &tk->tk_th[curr];
232		gen = atomic_load_acq_32(&th->th_gen);
233		*bt = th->th_offset;
234
235		/*
236		 * Ensure that the load of th_offset is completed
237		 * before the load of th_gen.
238		 */
239		atomic_thread_fence_acq();
240	} while (curr != tk->tk_current || gen == 0 || gen != th->th_gen);
241	return (0);
242}
243
244static int
245freebsd_gettimeofday(struct timeval *tv, struct timezone *tz)
246{
247	struct vdso_timekeep *tk;
248	struct bintime bt;
249	int error;
250
251	if (tz != NULL)
252		return (ENOSYS);
253	__vdso_gettimekeep(&tk);
254	if (tk == NULL)
255		return (ENOSYS);
256	if (tk->tk_ver != VDSO_TK_VER_CURR)
257		return (ENOSYS);
258	error = freebsd_binuptime(&bt, tk, true);
259	if (error == 0)
260		bintime2timeval(&bt, tv);
261	return (error);
262}
263
264static int
265freebsd_clock_gettime(clockid_t clock_id, struct timespec *ts)
266{
267	struct vdso_timekeep *tk;
268	struct bintime bt;
269	int error;
270
271	__vdso_gettimekeep(&tk);
272	if (tk == NULL)
273		return (ENOSYS);
274	if (tk->tk_ver != VDSO_TK_VER_CURR)
275		return (ENOSYS);
276	switch (clock_id) {
277	case CLOCK_REALTIME:
278	case CLOCK_REALTIME_PRECISE:
279	case CLOCK_REALTIME_FAST:
280		error = freebsd_binuptime(&bt, tk, true);
281		break;
282	case CLOCK_MONOTONIC:
283	case CLOCK_MONOTONIC_PRECISE:
284	case CLOCK_UPTIME:
285	case CLOCK_UPTIME_PRECISE:
286		error = freebsd_binuptime(&bt, tk, false);
287		break;
288	case CLOCK_MONOTONIC_FAST:
289	case CLOCK_UPTIME_FAST:
290		error = freebsd_getnanouptime(&bt, tk);
291		break;
292	default:
293		error = ENOSYS;
294		break;
295	}
296	if (error == 0)
297		bintime2timespec(&bt, ts);
298	return (error);
299}
300
301/*
302 * Linux vDSO interfaces
303 *
304 */
305int
306__vdsoN(clock_gettime)(clockid_t clock_id, struct l_timespec *lts)
307{
308	struct timespec ts;
309	clockid_t which;
310	int error;
311
312	error = __vdso_linux_to_native_clockid(&which, clock_id);
313	if (error != 0)
314		return (__vdso_clock_gettime_fallback(clock_id, lts));
315	error = freebsd_clock_gettime(which, &ts);
316	if (error == 0)
317		return (-__vdso_native_to_linux_timespec(lts, &ts));
318	else
319		return (__vdso_clock_gettime_fallback(clock_id, lts));
320}
321
322int
323__vdsoN(gettimeofday)(l_timeval *ltv, struct timezone *tz)
324{
325	struct timeval tv;
326	int error;
327
328	error = freebsd_gettimeofday(&tv, tz);
329	if (error != 0)
330		return (__vdso_gettimeofday_fallback(ltv, tz));
331	return (-__vdso_native_to_linux_timeval(ltv, &tv));
332}
333
334int
335__vdsoN(clock_getres)(clockid_t clock_id, struct l_timespec *lts)
336{
337
338	return (__vdso_clock_getres_fallback(clock_id, lts));
339}
340
341#if defined(__i386__) || defined(COMPAT_LINUX32)
342int
343__vdso_clock_gettime64(clockid_t clock_id, struct l_timespec64 *lts)
344{
345	struct timespec ts;
346	clockid_t which;
347	int error;
348
349	error = __vdso_linux_to_native_clockid(&which, clock_id);
350	if (error != 0)
351		return (__vdso_clock_gettime64_fallback(clock_id, lts));
352	error = freebsd_clock_gettime(which, &ts);
353	if (error == 0)
354		return(-__vdso_native_to_linux_timespec64(lts, &ts));
355	else
356		return(__vdso_clock_gettime64_fallback(clock_id, lts));
357}
358
359int clock_gettime64(clockid_t clock_id, struct l_timespec64 *lts)
360    __attribute__((weak, alias("__vdso_clock_gettime64")));
361#endif
362
363#if defined(__i386__) || defined(__amd64__)
364int
365__vdso_getcpu(uint32_t *cpu, uint32_t *node, void *cache)
366{
367	int ret;
368
369	if (node != NULL)
370		return (__vdso_getcpu_fallback(cpu, node, cache));
371	ret = __vdso_getcpu_try();
372	if (ret < 0)
373		return (__vdso_getcpu_fallback(cpu, node, cache));
374	*cpu = ret;
375	return (0);
376}
377#endif
378
379#if defined(__i386__) || defined(__amd64__)
380int
381__vdso_time(long *tm)
382{
383	struct timeval tv;
384	int error;
385
386	error = freebsd_gettimeofday(&tv, NULL);
387	if (error != 0)
388		return (__vdso_time_fallback(tm));
389	if (tm != NULL)
390		*tm = tv.tv_sec;
391	return (tv.tv_sec);
392}
393#endif
394