1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
5 * Copyright (c) 2021 Dmitry Chagin <dchagin@FreeBSD.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29static int
30fls(int mask)
31{
32
33	if (mask == 0)
34		return (0);
35	return ((__builtin_clz(mask) ^ 0x1f) + 1);
36}
37
38#ifdef _LP64
39static int
40flsl(long mask)
41{
42	int bit;
43
44	if (mask == 0)
45		return (0);
46	for (bit = 1; mask != 1; bit++)
47		mask = (unsigned long)mask >> 1;
48	return (bit);
49}
50#else
51static int
52flsll(long long mask)
53{
54	int bit;
55
56	if (mask == 0)
57		return (0);
58	for (bit = 1; mask != 1; bit++)
59		mask = (unsigned long long)mask >> 1;
60	return (bit);
61}
62#endif
63
64static int
65__vdso_native_to_linux_timespec(struct l_timespec *lts,
66    struct timespec *nts)
67{
68
69#ifdef COMPAT_LINUX32
70	if (nts->tv_sec > INT_MAX || nts->tv_sec < INT_MIN)
71		return (LINUX_EOVERFLOW);
72#endif
73	lts->tv_sec = nts->tv_sec;
74	lts->tv_nsec = nts->tv_nsec;
75	return (0);
76}
77
78static int
79__vdso_native_to_linux_timeval(l_timeval *ltv,
80    struct timeval *ntv)
81{
82
83#ifdef COMPAT_LINUX32
84	if (ntv->tv_sec > INT_MAX || ntv->tv_sec < INT_MIN)
85		return (LINUX_EOVERFLOW);
86#endif
87	ltv->tv_sec = ntv->tv_sec;
88	ltv->tv_usec = ntv->tv_usec;
89	return (0);
90}
91
92
93#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
94static int
95__vdso_native_to_linux_timespec64(struct l_timespec64 *lts,
96    struct timespec *nts)
97{
98
99	lts->tv_sec = nts->tv_sec;
100	lts->tv_nsec = nts->tv_nsec;
101	return (0);
102}
103#endif
104
105static int
106__vdso_linux_to_native_clockid(clockid_t *n, clockid_t l)
107{
108
109	switch (l) {
110	case LINUX_CLOCK_REALTIME:
111		*n = CLOCK_REALTIME;
112		break;
113	case LINUX_CLOCK_MONOTONIC:
114		*n = CLOCK_MONOTONIC;
115		break;
116	case LINUX_CLOCK_REALTIME_COARSE:
117		*n = CLOCK_REALTIME_FAST;
118		break;
119	case LINUX_CLOCK_MONOTONIC_COARSE:
120	case LINUX_CLOCK_MONOTONIC_RAW:
121		*n = CLOCK_MONOTONIC_FAST;
122		break;
123	case LINUX_CLOCK_BOOTTIME:
124		*n = CLOCK_UPTIME;
125		break;
126	default:
127		return (LINUX_EINVAL);
128	}
129	return (0);
130}
131
132/*
133 * The code below adapted from
134 * lib/libc/sys/__vdso_gettimeofday.c
135 */
136
137static inline void
138__vdso_gettimekeep(struct vdso_timekeep **tk)
139{
140
141	*tk = (struct vdso_timekeep *)kern_timekeep_base;
142}
143
144static int
145tc_delta(const struct vdso_timehands *th, u_int *delta)
146{
147	int error;
148	u_int tc;
149
150	error = __vdso_gettc(th, &tc);
151	if (error == 0)
152		*delta = (tc - th->th_offset_count) & th->th_counter_mask;
153	return (error);
154}
155
156/*
157 * Calculate the absolute or boot-relative time from the
158 * machine-specific fast timecounter and the published timehands
159 * structure read from the shared page.
160 *
161 * The lockless reading scheme is similar to the one used to read the
162 * in-kernel timehands, see sys/kern/kern_tc.c:binuptime().  This code
163 * is based on the kernel implementation.
164 */
165static int
166freebsd_binuptime(struct bintime *bt, struct vdso_timekeep *tk, bool abs)
167{
168	struct vdso_timehands *th;
169	uint32_t curr, gen;
170	uint64_t scale, x;
171	u_int delta, scale_bits;
172	int error;
173
174	do {
175		if (!tk->tk_enabled)
176			return (ENOSYS);
177
178		curr = atomic_load_acq_32(&tk->tk_current);
179		th = &tk->tk_th[curr];
180		gen = atomic_load_acq_32(&th->th_gen);
181		*bt = th->th_offset;
182		error = tc_delta(th, &delta);
183		if (error == EAGAIN)
184			continue;
185		if (error != 0)
186			return (error);
187		scale = th->th_scale;
188#ifdef _LP64
189		scale_bits = flsl(scale);
190#else
191		scale_bits = flsll(scale);
192#endif
193		if (__predict_false(scale_bits + fls(delta) > 63)) {
194			x = (scale >> 32) * delta;
195			scale &= 0xffffffff;
196			bt->sec += x >> 32;
197			bintime_addx(bt, x << 32);
198		}
199		bintime_addx(bt, scale * delta);
200		if (abs)
201			bintime_add(bt, &th->th_boottime);
202
203		/*
204		 * Ensure that the load of th_offset is completed
205		 * before the load of th_gen.
206		 */
207		atomic_thread_fence_acq();
208	} while (curr != tk->tk_current || gen == 0 || gen != th->th_gen);
209	return (0);
210}
211
212static int
213freebsd_getnanouptime(struct bintime *bt, struct vdso_timekeep *tk)
214{
215	struct vdso_timehands *th;
216	uint32_t curr, gen;
217
218	do {
219		if (!tk->tk_enabled)
220			return (ENOSYS);
221
222		curr = atomic_load_acq_32(&tk->tk_current);
223		th = &tk->tk_th[curr];
224		gen = atomic_load_acq_32(&th->th_gen);
225		*bt = th->th_offset;
226
227		/*
228		 * Ensure that the load of th_offset is completed
229		 * before the load of th_gen.
230		 */
231		atomic_thread_fence_acq();
232	} while (curr != tk->tk_current || gen == 0 || gen != th->th_gen);
233	return (0);
234}
235
236static int
237freebsd_gettimeofday(struct timeval *tv, struct timezone *tz)
238{
239	struct vdso_timekeep *tk;
240	struct bintime bt;
241	int error;
242
243	if (tz != NULL)
244		return (ENOSYS);
245	__vdso_gettimekeep(&tk);
246	if (tk == NULL)
247		return (ENOSYS);
248	if (tk->tk_ver != VDSO_TK_VER_CURR)
249		return (ENOSYS);
250	error = freebsd_binuptime(&bt, tk, true);
251	if (error == 0)
252		bintime2timeval(&bt, tv);
253	return (error);
254}
255
256static int
257freebsd_clock_gettime(clockid_t clock_id, struct timespec *ts)
258{
259	struct vdso_timekeep *tk;
260	struct bintime bt;
261	int error;
262
263	__vdso_gettimekeep(&tk);
264	if (tk == NULL)
265		return (ENOSYS);
266	if (tk->tk_ver != VDSO_TK_VER_CURR)
267		return (ENOSYS);
268	switch (clock_id) {
269	case CLOCK_REALTIME:
270	case CLOCK_REALTIME_PRECISE:
271	case CLOCK_REALTIME_FAST:
272		error = freebsd_binuptime(&bt, tk, true);
273		break;
274	case CLOCK_MONOTONIC:
275	case CLOCK_MONOTONIC_PRECISE:
276	case CLOCK_UPTIME:
277	case CLOCK_UPTIME_PRECISE:
278		error = freebsd_binuptime(&bt, tk, false);
279		break;
280	case CLOCK_MONOTONIC_FAST:
281	case CLOCK_UPTIME_FAST:
282		error = freebsd_getnanouptime(&bt, tk);
283		break;
284	default:
285		error = ENOSYS;
286		break;
287	}
288	if (error == 0)
289		bintime2timespec(&bt, ts);
290	return (error);
291}
292
293/*
294 * Linux vDSO interfaces
295 *
296 */
297int
298__vdso_clock_gettime(clockid_t clock_id, struct l_timespec *lts)
299{
300	struct timespec ts;
301	clockid_t which;
302	int error;
303
304	error = __vdso_linux_to_native_clockid(&which, clock_id);
305	if (error != 0)
306		return (__vdso_clock_gettime_fallback(clock_id, lts));
307	error = freebsd_clock_gettime(which, &ts);
308	if (error == 0)
309		return (-__vdso_native_to_linux_timespec(lts, &ts));
310	else
311		return (__vdso_clock_gettime_fallback(clock_id, lts));
312}
313
314int
315__vdso_gettimeofday(l_timeval *ltv, struct timezone *tz)
316{
317	struct timeval tv;
318	int error;
319
320	error = freebsd_gettimeofday(&tv, tz);
321	if (error != 0)
322		return (__vdso_gettimeofday_fallback(ltv, tz));
323	return (-__vdso_native_to_linux_timeval(ltv, &tv));
324}
325
326int
327__vdso_clock_getres(clockid_t clock_id, struct l_timespec *lts)
328{
329
330	return (__vdso_clock_getres_fallback(clock_id, lts));
331}
332
333#if defined(__i386__) || defined(COMPAT_LINUX32)
334int
335__vdso_clock_gettime64(clockid_t clock_id, struct l_timespec64 *lts)
336{
337	struct timespec ts;
338	clockid_t which;
339	int error;
340
341	error = __vdso_linux_to_native_clockid(&which, clock_id);
342	if (error != 0)
343		return (__vdso_clock_gettime64_fallback(clock_id, lts));
344	error = freebsd_clock_gettime(which, &ts);
345	if (error == 0)
346		return(-__vdso_native_to_linux_timespec64(lts, &ts));
347	else
348		return(__vdso_clock_gettime64_fallback(clock_id, lts));
349}
350
351int clock_gettime64(clockid_t clock_id, struct l_timespec64 *lts)
352    __attribute__((weak, alias("__vdso_clock_gettime64")));
353#endif
354
355#if defined(__i386__) || defined(__amd64__)
356int
357__vdso_getcpu(uint32_t *cpu, uint32_t *node, void *cache)
358{
359	int ret;
360
361	if (node != NULL)
362		return (__vdso_getcpu_fallback(cpu, node, cache));
363	ret = __vdso_getcpu_try();
364	if (ret < 0)
365		return (__vdso_getcpu_fallback(cpu, node, cache));
366	*cpu = ret;
367	return (0);
368}
369#endif
370
371#if defined(__i386__) || defined(__amd64__)
372int
373__vdso_time(long *tm)
374{
375	struct timeval tv;
376	int error;
377
378	error = freebsd_gettimeofday(&tv, NULL);
379	if (error != 0)
380		return (__vdso_time_fallback(tm));
381	if (tm != NULL)
382		*tm = tv.tv_sec;
383	return (tv.tv_sec);
384}
385#endif
386