xref: /linux/arch/riscv/lib/delay.c (revision 5f6286a6)
150acfb2bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
26d60b6eeSPalmer Dabbelt /*
36d60b6eeSPalmer Dabbelt  * Copyright (C) 2012 Regents of the University of California
46d60b6eeSPalmer Dabbelt  */
56d60b6eeSPalmer Dabbelt 
66d60b6eeSPalmer Dabbelt #include <linux/delay.h>
7*5f6286a6SAndy Shevchenko #include <linux/math.h>
86d60b6eeSPalmer Dabbelt #include <linux/param.h>
96d60b6eeSPalmer Dabbelt #include <linux/timex.h>
10*5f6286a6SAndy Shevchenko #include <linux/types.h>
116d60b6eeSPalmer Dabbelt #include <linux/export.h>
126d60b6eeSPalmer Dabbelt 
13*5f6286a6SAndy Shevchenko #include <asm/processor.h>
14*5f6286a6SAndy Shevchenko 
156d60b6eeSPalmer Dabbelt /*
166d60b6eeSPalmer Dabbelt  * This is copies from arch/arm/include/asm/delay.h
176d60b6eeSPalmer Dabbelt  *
186d60b6eeSPalmer Dabbelt  * Loop (or tick) based delay:
196d60b6eeSPalmer Dabbelt  *
206d60b6eeSPalmer Dabbelt  * loops = loops_per_jiffy * jiffies_per_sec * delay_us / us_per_sec
216d60b6eeSPalmer Dabbelt  *
226d60b6eeSPalmer Dabbelt  * where:
236d60b6eeSPalmer Dabbelt  *
246d60b6eeSPalmer Dabbelt  * jiffies_per_sec = HZ
256d60b6eeSPalmer Dabbelt  * us_per_sec = 1000000
266d60b6eeSPalmer Dabbelt  *
276d60b6eeSPalmer Dabbelt  * Therefore the constant part is HZ / 1000000 which is a small
286d60b6eeSPalmer Dabbelt  * fractional number. To make this usable with integer math, we
296d60b6eeSPalmer Dabbelt  * scale up this constant by 2^31, perform the actual multiplication,
306d60b6eeSPalmer Dabbelt  * and scale the result back down by 2^31 with a simple shift:
316d60b6eeSPalmer Dabbelt  *
326d60b6eeSPalmer Dabbelt  * loops = (loops_per_jiffy * delay_us * UDELAY_MULT) >> 31
336d60b6eeSPalmer Dabbelt  *
346d60b6eeSPalmer Dabbelt  * where:
356d60b6eeSPalmer Dabbelt  *
366d60b6eeSPalmer Dabbelt  * UDELAY_MULT = 2^31 * HZ / 1000000
376d60b6eeSPalmer Dabbelt  *             = (2^31 / 1000000) * HZ
386d60b6eeSPalmer Dabbelt  *             = 2147.483648 * HZ
396d60b6eeSPalmer Dabbelt  *             = 2147 * HZ + 483648 * HZ / 1000000
406d60b6eeSPalmer Dabbelt  *
416d60b6eeSPalmer Dabbelt  * 31 is the biggest scale shift value that won't overflow 32 bits for
426d60b6eeSPalmer Dabbelt  * delay_us * UDELAY_MULT assuming HZ <= 1000 and delay_us <= 2000.
436d60b6eeSPalmer Dabbelt  */
446d60b6eeSPalmer Dabbelt #define MAX_UDELAY_US	2000
456d60b6eeSPalmer Dabbelt #define MAX_UDELAY_HZ	1000
466d60b6eeSPalmer Dabbelt #define UDELAY_MULT	(2147UL * HZ + 483648UL * HZ / 1000000UL)
476d60b6eeSPalmer Dabbelt #define UDELAY_SHIFT	31
486d60b6eeSPalmer Dabbelt 
496d60b6eeSPalmer Dabbelt #if HZ > MAX_UDELAY_HZ
506d60b6eeSPalmer Dabbelt #error "HZ > MAX_UDELAY_HZ"
516d60b6eeSPalmer Dabbelt #endif
526d60b6eeSPalmer Dabbelt 
536d60b6eeSPalmer Dabbelt /*
546d60b6eeSPalmer Dabbelt  * RISC-V supports both UDELAY and NDELAY.  This is largely the same as above,
556d60b6eeSPalmer Dabbelt  * but with different constants.  I added 10 bits to the shift to get this, but
566d60b6eeSPalmer Dabbelt  * the result is that I need a 64-bit multiply, which is slow on 32-bit
576d60b6eeSPalmer Dabbelt  * platforms.
586d60b6eeSPalmer Dabbelt  *
596d60b6eeSPalmer Dabbelt  * NDELAY_MULT = 2^41 * HZ / 1000000000
606d60b6eeSPalmer Dabbelt  *             = (2^41 / 1000000000) * HZ
616d60b6eeSPalmer Dabbelt  *             = 2199.02325555 * HZ
626d60b6eeSPalmer Dabbelt  *             = 2199 * HZ + 23255550 * HZ / 1000000000
636d60b6eeSPalmer Dabbelt  *
646d60b6eeSPalmer Dabbelt  * The maximum here is to avoid 64-bit overflow, but it isn't checked as it
656d60b6eeSPalmer Dabbelt  * won't happen.
666d60b6eeSPalmer Dabbelt  */
676d60b6eeSPalmer Dabbelt #define MAX_NDELAY_NS   (1ULL << 42)
686d60b6eeSPalmer Dabbelt #define MAX_NDELAY_HZ	MAX_UDELAY_HZ
696d60b6eeSPalmer Dabbelt #define NDELAY_MULT	((unsigned long long)(2199ULL * HZ + 23255550ULL * HZ / 1000000000ULL))
706d60b6eeSPalmer Dabbelt #define NDELAY_SHIFT	41
716d60b6eeSPalmer Dabbelt 
726d60b6eeSPalmer Dabbelt #if HZ > MAX_NDELAY_HZ
736d60b6eeSPalmer Dabbelt #error "HZ > MAX_NDELAY_HZ"
746d60b6eeSPalmer Dabbelt #endif
756d60b6eeSPalmer Dabbelt 
__delay(unsigned long cycles)766d60b6eeSPalmer Dabbelt void __delay(unsigned long cycles)
776d60b6eeSPalmer Dabbelt {
786d60b6eeSPalmer Dabbelt 	u64 t0 = get_cycles();
796d60b6eeSPalmer Dabbelt 
806d60b6eeSPalmer Dabbelt 	while ((unsigned long)(get_cycles() - t0) < cycles)
816d60b6eeSPalmer Dabbelt 		cpu_relax();
826d60b6eeSPalmer Dabbelt }
8324948b7eSOlof Johansson EXPORT_SYMBOL(__delay);
846d60b6eeSPalmer Dabbelt 
udelay(unsigned long usecs)856d60b6eeSPalmer Dabbelt void udelay(unsigned long usecs)
866d60b6eeSPalmer Dabbelt {
87d0e1f211SNick Hu 	u64 ucycles = (u64)usecs * lpj_fine * UDELAY_MULT;
8866cc016aSPaul Walmsley 	u64 n;
896d60b6eeSPalmer Dabbelt 
906d60b6eeSPalmer Dabbelt 	if (unlikely(usecs > MAX_UDELAY_US)) {
9166cc016aSPaul Walmsley 		n = (u64)usecs * riscv_timebase;
9266cc016aSPaul Walmsley 		do_div(n, 1000000);
9366cc016aSPaul Walmsley 
9466cc016aSPaul Walmsley 		__delay(n);
956d60b6eeSPalmer Dabbelt 		return;
966d60b6eeSPalmer Dabbelt 	}
976d60b6eeSPalmer Dabbelt 
986d60b6eeSPalmer Dabbelt 	__delay(ucycles >> UDELAY_SHIFT);
996d60b6eeSPalmer Dabbelt }
1006d60b6eeSPalmer Dabbelt EXPORT_SYMBOL(udelay);
1016d60b6eeSPalmer Dabbelt 
ndelay(unsigned long nsecs)1026d60b6eeSPalmer Dabbelt void ndelay(unsigned long nsecs)
1036d60b6eeSPalmer Dabbelt {
1046d60b6eeSPalmer Dabbelt 	/*
1056d60b6eeSPalmer Dabbelt 	 * This doesn't bother checking for overflow, as it won't happen (it's
1066d60b6eeSPalmer Dabbelt 	 * an hour) of delay.
1076d60b6eeSPalmer Dabbelt 	 */
1086d60b6eeSPalmer Dabbelt 	unsigned long long ncycles = nsecs * lpj_fine * NDELAY_MULT;
1096d60b6eeSPalmer Dabbelt 	__delay(ncycles >> NDELAY_SHIFT);
1106d60b6eeSPalmer Dabbelt }
1116d60b6eeSPalmer Dabbelt EXPORT_SYMBOL(ndelay);
112