1*a9fa9459Szrj /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2*a9fa9459Szrj    Copyright (C) 1991-2016 Free Software Foundation, Inc.
3*a9fa9459Szrj 
4*a9fa9459Szrj    This file is part of the GNU C Library.
5*a9fa9459Szrj 
6*a9fa9459Szrj    The GNU C Library is free software; you can redistribute it and/or
7*a9fa9459Szrj    modify it under the terms of the GNU Lesser General Public
8*a9fa9459Szrj    License as published by the Free Software Foundation; either
9*a9fa9459Szrj    version 2.1 of the License, or (at your option) any later version.
10*a9fa9459Szrj 
11*a9fa9459Szrj    In addition to the permissions in the GNU Lesser General Public
12*a9fa9459Szrj    License, the Free Software Foundation gives you unlimited
13*a9fa9459Szrj    permission to link the compiled version of this file into
14*a9fa9459Szrj    combinations with other programs, and to distribute those
15*a9fa9459Szrj    combinations without any restriction coming from the use of this
16*a9fa9459Szrj    file.  (The Lesser General Public License restrictions do apply in
17*a9fa9459Szrj    other respects; for example, they cover modification of the file,
18*a9fa9459Szrj    and distribution when not linked into a combine executable.)
19*a9fa9459Szrj 
20*a9fa9459Szrj    The GNU C Library is distributed in the hope that it will be useful,
21*a9fa9459Szrj    but WITHOUT ANY WARRANTY; without even the implied warranty of
22*a9fa9459Szrj    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23*a9fa9459Szrj    Lesser General Public License for more details.
24*a9fa9459Szrj 
25*a9fa9459Szrj    You should have received a copy of the GNU Lesser General Public
26*a9fa9459Szrj    License along with the GNU C Library; if not, see
27*a9fa9459Szrj    <http://www.gnu.org/licenses/>.  */
28*a9fa9459Szrj 
29*a9fa9459Szrj /* You have to define the following before including this file:
30*a9fa9459Szrj 
31*a9fa9459Szrj    UWtype -- An unsigned type, default type for operations (typically a "word")
32*a9fa9459Szrj    UHWtype -- An unsigned type, at least half the size of UWtype.
33*a9fa9459Szrj    UDWtype -- An unsigned type, at least twice as large a UWtype
34*a9fa9459Szrj    W_TYPE_SIZE -- size in bits of UWtype
35*a9fa9459Szrj 
36*a9fa9459Szrj    UQItype -- Unsigned 8 bit type.
37*a9fa9459Szrj    SItype, USItype -- Signed and unsigned 32 bit types.
38*a9fa9459Szrj    DItype, UDItype -- Signed and unsigned 64 bit types.
39*a9fa9459Szrj 
40*a9fa9459Szrj    On a 32 bit machine UWtype should typically be USItype;
41*a9fa9459Szrj    on a 64 bit machine, UWtype should typically be UDItype.  */
42*a9fa9459Szrj 
43*a9fa9459Szrj #define __BITS4 (W_TYPE_SIZE / 4)
44*a9fa9459Szrj #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45*a9fa9459Szrj #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46*a9fa9459Szrj #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47*a9fa9459Szrj 
48*a9fa9459Szrj #ifndef W_TYPE_SIZE
49*a9fa9459Szrj #define W_TYPE_SIZE	32
50*a9fa9459Szrj #define UWtype		USItype
51*a9fa9459Szrj #define UHWtype		USItype
52*a9fa9459Szrj #define UDWtype		UDItype
53*a9fa9459Szrj #endif
54*a9fa9459Szrj 
55*a9fa9459Szrj /* Used in glibc only.  */
56*a9fa9459Szrj #ifndef attribute_hidden
57*a9fa9459Szrj #define attribute_hidden
58*a9fa9459Szrj #endif
59*a9fa9459Szrj 
60*a9fa9459Szrj extern const UQItype __clz_tab[256] attribute_hidden;
61*a9fa9459Szrj 
62*a9fa9459Szrj /* Define auxiliary asm macros.
63*a9fa9459Szrj 
64*a9fa9459Szrj    1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65*a9fa9459Szrj    UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
66*a9fa9459Szrj    word product in HIGH_PROD and LOW_PROD.
67*a9fa9459Szrj 
68*a9fa9459Szrj    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69*a9fa9459Szrj    UDWtype product.  This is just a variant of umul_ppmm.
70*a9fa9459Szrj 
71*a9fa9459Szrj    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
72*a9fa9459Szrj    denominator) divides a UDWtype, composed by the UWtype integers
73*a9fa9459Szrj    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74*a9fa9459Szrj    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
75*a9fa9459Szrj    than DENOMINATOR for correct operation.  If, in addition, the most
76*a9fa9459Szrj    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77*a9fa9459Szrj    UDIV_NEEDS_NORMALIZATION is defined to 1.
78*a9fa9459Szrj 
79*a9fa9459Szrj    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
80*a9fa9459Szrj    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
81*a9fa9459Szrj    is rounded towards 0.
82*a9fa9459Szrj 
83*a9fa9459Szrj    5) count_leading_zeros(count, x) counts the number of zero-bits from the
84*a9fa9459Szrj    msb to the first nonzero bit in the UWtype X.  This is the number of
85*a9fa9459Szrj    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
86*a9fa9459Szrj    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
87*a9fa9459Szrj 
88*a9fa9459Szrj    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89*a9fa9459Szrj    from the least significant end.
90*a9fa9459Szrj 
91*a9fa9459Szrj    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
92*a9fa9459Szrj    high_addend_2, low_addend_2) adds two UWtype integers, composed by
93*a9fa9459Szrj    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94*a9fa9459Szrj    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
95*a9fa9459Szrj    (i.e. carry out) is not stored anywhere, and is lost.
96*a9fa9459Szrj 
97*a9fa9459Szrj    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98*a9fa9459Szrj    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99*a9fa9459Szrj    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100*a9fa9459Szrj    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
101*a9fa9459Szrj    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
102*a9fa9459Szrj    and is lost.
103*a9fa9459Szrj 
104*a9fa9459Szrj    If any of these macros are left undefined for a particular CPU,
105*a9fa9459Szrj    C macros are used.  */
106*a9fa9459Szrj 
107*a9fa9459Szrj /* The CPUs come in alphabetical order below.
108*a9fa9459Szrj 
109*a9fa9459Szrj    Please add support for more CPUs here, or improve the current support
110*a9fa9459Szrj    for the CPUs below!
111*a9fa9459Szrj    (E.g. WE32100, IBM360.)  */
112*a9fa9459Szrj 
113*a9fa9459Szrj #if defined (__GNUC__) && !defined (NO_ASM)
114*a9fa9459Szrj 
115*a9fa9459Szrj /* We sometimes need to clobber "cc" with gcc2, but that would not be
116*a9fa9459Szrj    understood by gcc1.  Use cpp to avoid major code duplication.  */
117*a9fa9459Szrj #if __GNUC__ < 2
118*a9fa9459Szrj #define __CLOBBER_CC
119*a9fa9459Szrj #define __AND_CLOBBER_CC
120*a9fa9459Szrj #else /* __GNUC__ >= 2 */
121*a9fa9459Szrj #define __CLOBBER_CC : "cc"
122*a9fa9459Szrj #define __AND_CLOBBER_CC , "cc"
123*a9fa9459Szrj #endif /* __GNUC__ < 2 */
124*a9fa9459Szrj 
125*a9fa9459Szrj #if defined (__aarch64__)
126*a9fa9459Szrj 
127*a9fa9459Szrj #if W_TYPE_SIZE == 32
128*a9fa9459Szrj #define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
129*a9fa9459Szrj #define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctz (X))
130*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
131*a9fa9459Szrj #endif /* W_TYPE_SIZE == 32 */
132*a9fa9459Szrj 
133*a9fa9459Szrj #if W_TYPE_SIZE == 64
134*a9fa9459Szrj #define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clzll (X))
135*a9fa9459Szrj #define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctzll (X))
136*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 64
137*a9fa9459Szrj #endif /* W_TYPE_SIZE == 64 */
138*a9fa9459Szrj 
139*a9fa9459Szrj #endif /* __aarch64__ */
140*a9fa9459Szrj 
141*a9fa9459Szrj #if defined (__alpha) && W_TYPE_SIZE == 64
142*a9fa9459Szrj /* There is a bug in g++ before version 5 that
143*a9fa9459Szrj    errors on __builtin_alpha_umulh.  */
144*a9fa9459Szrj #if !defined(__cplusplus) || __GNUC__ >= 5
145*a9fa9459Szrj #define umul_ppmm(ph, pl, m0, m1) \
146*a9fa9459Szrj   do {									\
147*a9fa9459Szrj     UDItype __m0 = (m0), __m1 = (m1);					\
148*a9fa9459Szrj     (ph) = __builtin_alpha_umulh (__m0, __m1);				\
149*a9fa9459Szrj     (pl) = __m0 * __m1;							\
150*a9fa9459Szrj   } while (0)
151*a9fa9459Szrj #define UMUL_TIME 46
152*a9fa9459Szrj #endif /* !c++ */
153*a9fa9459Szrj #ifndef LONGLONG_STANDALONE
154*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, d) \
155*a9fa9459Szrj   do { UDItype __r;							\
156*a9fa9459Szrj     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
157*a9fa9459Szrj     (r) = __r;								\
158*a9fa9459Szrj   } while (0)
159*a9fa9459Szrj extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
160*a9fa9459Szrj #define UDIV_TIME 220
161*a9fa9459Szrj #endif /* LONGLONG_STANDALONE */
162*a9fa9459Szrj #ifdef __alpha_cix__
163*a9fa9459Szrj #define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clzl (X))
164*a9fa9459Szrj #define count_trailing_zeros(COUNT,X)	((COUNT) = __builtin_ctzl (X))
165*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 64
166*a9fa9459Szrj #else
167*a9fa9459Szrj #define count_leading_zeros(COUNT,X) \
168*a9fa9459Szrj   do {									\
169*a9fa9459Szrj     UDItype __xr = (X), __t, __a;					\
170*a9fa9459Szrj     __t = __builtin_alpha_cmpbge (0, __xr);				\
171*a9fa9459Szrj     __a = __clz_tab[__t ^ 0xff] - 1;					\
172*a9fa9459Szrj     __t = __builtin_alpha_extbl (__xr, __a);				\
173*a9fa9459Szrj     (COUNT) = 64 - (__clz_tab[__t] + __a*8);				\
174*a9fa9459Szrj   } while (0)
175*a9fa9459Szrj #define count_trailing_zeros(COUNT,X) \
176*a9fa9459Szrj   do {									\
177*a9fa9459Szrj     UDItype __xr = (X), __t, __a;					\
178*a9fa9459Szrj     __t = __builtin_alpha_cmpbge (0, __xr);				\
179*a9fa9459Szrj     __t = ~__t & -~__t;							\
180*a9fa9459Szrj     __a = ((__t & 0xCC) != 0) * 2;					\
181*a9fa9459Szrj     __a += ((__t & 0xF0) != 0) * 4;					\
182*a9fa9459Szrj     __a += ((__t & 0xAA) != 0);						\
183*a9fa9459Szrj     __t = __builtin_alpha_extbl (__xr, __a);				\
184*a9fa9459Szrj     __a <<= 3;								\
185*a9fa9459Szrj     __t &= -__t;							\
186*a9fa9459Szrj     __a += ((__t & 0xCC) != 0) * 2;					\
187*a9fa9459Szrj     __a += ((__t & 0xF0) != 0) * 4;					\
188*a9fa9459Szrj     __a += ((__t & 0xAA) != 0);						\
189*a9fa9459Szrj     (COUNT) = __a;							\
190*a9fa9459Szrj   } while (0)
191*a9fa9459Szrj #endif /* __alpha_cix__ */
192*a9fa9459Szrj #endif /* __alpha */
193*a9fa9459Szrj 
194*a9fa9459Szrj #if defined (__arc__) && W_TYPE_SIZE == 32
195*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
196*a9fa9459Szrj   __asm__ ("add.f	%1, %4, %5\n\tadc	%0, %2, %3"		\
197*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
198*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
199*a9fa9459Szrj 	   : "%r" ((USItype) (ah)),					\
200*a9fa9459Szrj 	     "rIJ" ((USItype) (bh)),					\
201*a9fa9459Szrj 	     "%r" ((USItype) (al)),					\
202*a9fa9459Szrj 	     "rIJ" ((USItype) (bl)))
203*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
204*a9fa9459Szrj   __asm__ ("sub.f	%1, %4, %5\n\tsbc	%0, %2, %3"		\
205*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
206*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
207*a9fa9459Szrj 	   : "r" ((USItype) (ah)),					\
208*a9fa9459Szrj 	     "rIJ" ((USItype) (bh)),					\
209*a9fa9459Szrj 	     "r" ((USItype) (al)),					\
210*a9fa9459Szrj 	     "rIJ" ((USItype) (bl)))
211*a9fa9459Szrj 
212*a9fa9459Szrj #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
213*a9fa9459Szrj #ifdef __ARC_NORM__
214*a9fa9459Szrj #define count_leading_zeros(count, x) \
215*a9fa9459Szrj   do									\
216*a9fa9459Szrj     {									\
217*a9fa9459Szrj       SItype c_;							\
218*a9fa9459Szrj 									\
219*a9fa9459Szrj       __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
220*a9fa9459Szrj       (count) = c_ + 1;							\
221*a9fa9459Szrj     }									\
222*a9fa9459Szrj   while (0)
223*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
224*a9fa9459Szrj #endif
225*a9fa9459Szrj #endif
226*a9fa9459Szrj 
227*a9fa9459Szrj #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
228*a9fa9459Szrj  && W_TYPE_SIZE == 32
229*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
230*a9fa9459Szrj   __asm__ ("adds	%1, %4, %5\n\tadc	%0, %2, %3"		\
231*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
232*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
233*a9fa9459Szrj 	   : "%r" ((USItype) (ah)),					\
234*a9fa9459Szrj 	     "rI" ((USItype) (bh)),					\
235*a9fa9459Szrj 	     "%r" ((USItype) (al)),					\
236*a9fa9459Szrj 	     "rI" ((USItype) (bl)) __CLOBBER_CC)
237*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
238*a9fa9459Szrj   __asm__ ("subs	%1, %4, %5\n\tsbc	%0, %2, %3"		\
239*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
240*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
241*a9fa9459Szrj 	   : "r" ((USItype) (ah)),					\
242*a9fa9459Szrj 	     "rI" ((USItype) (bh)),					\
243*a9fa9459Szrj 	     "r" ((USItype) (al)),					\
244*a9fa9459Szrj 	     "rI" ((USItype) (bl)) __CLOBBER_CC)
245*a9fa9459Szrj # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
246*a9fa9459Szrj      || defined(__ARM_ARCH_3__)
247*a9fa9459Szrj #  define umul_ppmm(xh, xl, a, b)					\
248*a9fa9459Szrj   do {									\
249*a9fa9459Szrj     register USItype __t0, __t1, __t2;					\
250*a9fa9459Szrj     __asm__ ("%@ Inlined umul_ppmm\n"					\
251*a9fa9459Szrj 	   "	mov	%2, %5, lsr #16\n"				\
252*a9fa9459Szrj 	   "	mov	%0, %6, lsr #16\n"				\
253*a9fa9459Szrj 	   "	bic	%3, %5, %2, lsl #16\n"				\
254*a9fa9459Szrj 	   "	bic	%4, %6, %0, lsl #16\n"				\
255*a9fa9459Szrj 	   "	mul	%1, %3, %4\n"					\
256*a9fa9459Szrj 	   "	mul	%4, %2, %4\n"					\
257*a9fa9459Szrj 	   "	mul	%3, %0, %3\n"					\
258*a9fa9459Szrj 	   "	mul	%0, %2, %0\n"					\
259*a9fa9459Szrj 	   "	adds	%3, %4, %3\n"					\
260*a9fa9459Szrj 	   "	addcs	%0, %0, #65536\n"				\
261*a9fa9459Szrj 	   "	adds	%1, %1, %3, lsl #16\n"				\
262*a9fa9459Szrj 	   "	adc	%0, %0, %3, lsr #16"				\
263*a9fa9459Szrj 	   : "=&r" ((USItype) (xh)),					\
264*a9fa9459Szrj 	     "=r" ((USItype) (xl)),					\
265*a9fa9459Szrj 	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
266*a9fa9459Szrj 	   : "r" ((USItype) (a)),					\
267*a9fa9459Szrj 	     "r" ((USItype) (b)) __CLOBBER_CC );			\
268*a9fa9459Szrj   } while (0)
269*a9fa9459Szrj #  define UMUL_TIME 20
270*a9fa9459Szrj # else
271*a9fa9459Szrj #  define umul_ppmm(xh, xl, a, b)					\
272*a9fa9459Szrj   do {									\
273*a9fa9459Szrj     /* Generate umull, under compiler control.  */			\
274*a9fa9459Szrj     register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);	\
275*a9fa9459Szrj     (xl) = (USItype)__t0;						\
276*a9fa9459Szrj     (xh) = (USItype)(__t0 >> 32);					\
277*a9fa9459Szrj   } while (0)
278*a9fa9459Szrj #  define UMUL_TIME 3
279*a9fa9459Szrj # endif
280*a9fa9459Szrj # define UDIV_TIME 100
281*a9fa9459Szrj #endif /* __arm__ */
282*a9fa9459Szrj 
283*a9fa9459Szrj #if defined(__arm__)
284*a9fa9459Szrj /* Let gcc decide how best to implement count_leading_zeros.  */
285*a9fa9459Szrj #define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
286*a9fa9459Szrj #define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
287*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
288*a9fa9459Szrj #endif
289*a9fa9459Szrj 
290*a9fa9459Szrj #if defined (__AVR__)
291*a9fa9459Szrj 
292*a9fa9459Szrj #if W_TYPE_SIZE == 16
293*a9fa9459Szrj #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
294*a9fa9459Szrj #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
295*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 16
296*a9fa9459Szrj #endif /* W_TYPE_SIZE == 16 */
297*a9fa9459Szrj 
298*a9fa9459Szrj #if W_TYPE_SIZE == 32
299*a9fa9459Szrj #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
300*a9fa9459Szrj #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
301*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
302*a9fa9459Szrj #endif /* W_TYPE_SIZE == 32 */
303*a9fa9459Szrj 
304*a9fa9459Szrj #if W_TYPE_SIZE == 64
305*a9fa9459Szrj #define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
306*a9fa9459Szrj #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
307*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 64
308*a9fa9459Szrj #endif /* W_TYPE_SIZE == 64 */
309*a9fa9459Szrj 
310*a9fa9459Szrj #endif /* defined (__AVR__) */
311*a9fa9459Szrj 
312*a9fa9459Szrj #if defined (__CRIS__)
313*a9fa9459Szrj 
314*a9fa9459Szrj #if __CRIS_arch_version >= 3
315*a9fa9459Szrj #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
316*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
317*a9fa9459Szrj #endif /* __CRIS_arch_version >= 3 */
318*a9fa9459Szrj 
319*a9fa9459Szrj #if __CRIS_arch_version >= 8
320*a9fa9459Szrj #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
321*a9fa9459Szrj #endif /* __CRIS_arch_version >= 8 */
322*a9fa9459Szrj 
323*a9fa9459Szrj #if __CRIS_arch_version >= 10
324*a9fa9459Szrj #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
325*a9fa9459Szrj #else
326*a9fa9459Szrj #define __umulsidi3 __umulsidi3
327*a9fa9459Szrj extern UDItype __umulsidi3 (USItype, USItype);
328*a9fa9459Szrj #endif /* __CRIS_arch_version >= 10 */
329*a9fa9459Szrj 
330*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v)		\
331*a9fa9459Szrj   do {					\
332*a9fa9459Szrj     UDItype __x = __umulsidi3 (u, v);	\
333*a9fa9459Szrj     (w0) = (USItype) (__x);		\
334*a9fa9459Szrj     (w1) = (USItype) (__x >> 32);	\
335*a9fa9459Szrj   } while (0)
336*a9fa9459Szrj 
337*a9fa9459Szrj /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
338*a9fa9459Szrj    DFmode ("double" intrinsics, avoiding two of the three insns handling
339*a9fa9459Szrj    carry), but defining them as open-code C composing and doing the
340*a9fa9459Szrj    operation in DImode (UDImode) shows that the DImode needs work:
341*a9fa9459Szrj    register pressure from requiring neighboring registers and the
342*a9fa9459Szrj    traffic to and from them come to dominate, in the 4.7 series.  */
343*a9fa9459Szrj 
344*a9fa9459Szrj #endif /* defined (__CRIS__) */
345*a9fa9459Szrj 
346*a9fa9459Szrj #if defined (__hppa) && W_TYPE_SIZE == 32
347*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
348*a9fa9459Szrj   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"				\
349*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
350*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
351*a9fa9459Szrj 	   : "%rM" ((USItype) (ah)),					\
352*a9fa9459Szrj 	     "rM" ((USItype) (bh)),					\
353*a9fa9459Szrj 	     "%rM" ((USItype) (al)),					\
354*a9fa9459Szrj 	     "rM" ((USItype) (bl)))
355*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
356*a9fa9459Szrj   __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"				\
357*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
358*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
359*a9fa9459Szrj 	   : "rM" ((USItype) (ah)),					\
360*a9fa9459Szrj 	     "rM" ((USItype) (bh)),					\
361*a9fa9459Szrj 	     "rM" ((USItype) (al)),					\
362*a9fa9459Szrj 	     "rM" ((USItype) (bl)))
363*a9fa9459Szrj #if defined (_PA_RISC1_1)
364*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
365*a9fa9459Szrj   do {									\
366*a9fa9459Szrj     union								\
367*a9fa9459Szrj       {									\
368*a9fa9459Szrj 	UDItype __f;							\
369*a9fa9459Szrj 	struct {USItype __w1, __w0;} __w1w0;				\
370*a9fa9459Szrj       } __t;								\
371*a9fa9459Szrj     __asm__ ("xmpyu %1,%2,%0"						\
372*a9fa9459Szrj 	     : "=x" (__t.__f)						\
373*a9fa9459Szrj 	     : "x" ((USItype) (u)),					\
374*a9fa9459Szrj 	       "x" ((USItype) (v)));					\
375*a9fa9459Szrj     (w1) = __t.__w1w0.__w1;						\
376*a9fa9459Szrj     (w0) = __t.__w1w0.__w0;						\
377*a9fa9459Szrj      } while (0)
378*a9fa9459Szrj #define UMUL_TIME 8
379*a9fa9459Szrj #else
380*a9fa9459Szrj #define UMUL_TIME 30
381*a9fa9459Szrj #endif
382*a9fa9459Szrj #define UDIV_TIME 40
383*a9fa9459Szrj #define count_leading_zeros(count, x) \
384*a9fa9459Szrj   do {									\
385*a9fa9459Szrj     USItype __tmp;							\
386*a9fa9459Szrj     __asm__ (								\
387*a9fa9459Szrj        "ldi		1,%0\n"						\
388*a9fa9459Szrj "	extru,=		%1,15,16,%%r0		; Bits 31..16 zero?\n"	\
389*a9fa9459Szrj "	extru,tr	%1,15,16,%1		; No.  Shift down, skip add.\n"\
390*a9fa9459Szrj "	ldo		16(%0),%0		; Yes.  Perform add.\n"	\
391*a9fa9459Szrj "	extru,=		%1,23,8,%%r0		; Bits 15..8 zero?\n"	\
392*a9fa9459Szrj "	extru,tr	%1,23,8,%1		; No.  Shift down, skip add.\n"\
393*a9fa9459Szrj "	ldo		8(%0),%0		; Yes.  Perform add.\n"	\
394*a9fa9459Szrj "	extru,=		%1,27,4,%%r0		; Bits 7..4 zero?\n"	\
395*a9fa9459Szrj "	extru,tr	%1,27,4,%1		; No.  Shift down, skip add.\n"\
396*a9fa9459Szrj "	ldo		4(%0),%0		; Yes.  Perform add.\n"	\
397*a9fa9459Szrj "	extru,=		%1,29,2,%%r0		; Bits 3..2 zero?\n"	\
398*a9fa9459Szrj "	extru,tr	%1,29,2,%1		; No.  Shift down, skip add.\n"\
399*a9fa9459Szrj "	ldo		2(%0),%0		; Yes.  Perform add.\n"	\
400*a9fa9459Szrj "	extru		%1,30,1,%1		; Extract bit 1.\n"	\
401*a9fa9459Szrj "	sub		%0,%1,%0		; Subtract it.\n"	\
402*a9fa9459Szrj 	: "=r" (count), "=r" (__tmp) : "1" (x));			\
403*a9fa9459Szrj   } while (0)
404*a9fa9459Szrj #endif
405*a9fa9459Szrj 
406*a9fa9459Szrj #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
407*a9fa9459Szrj #if !defined (__zarch__)
408*a9fa9459Szrj #define smul_ppmm(xh, xl, m0, m1) \
409*a9fa9459Szrj   do {									\
410*a9fa9459Szrj     union {DItype __ll;							\
411*a9fa9459Szrj 	   struct {USItype __h, __l;} __i;				\
412*a9fa9459Szrj 	  } __x;							\
413*a9fa9459Szrj     __asm__ ("lr %N0,%1\n\tmr %0,%2"					\
414*a9fa9459Szrj 	     : "=&r" (__x.__ll)						\
415*a9fa9459Szrj 	     : "r" (m0), "r" (m1));					\
416*a9fa9459Szrj     (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
417*a9fa9459Szrj   } while (0)
418*a9fa9459Szrj #define sdiv_qrnnd(q, r, n1, n0, d) \
419*a9fa9459Szrj   do {									\
420*a9fa9459Szrj     union {DItype __ll;							\
421*a9fa9459Szrj 	   struct {USItype __h, __l;} __i;				\
422*a9fa9459Szrj 	  } __x;							\
423*a9fa9459Szrj     __x.__i.__h = n1; __x.__i.__l = n0;					\
424*a9fa9459Szrj     __asm__ ("dr %0,%2"							\
425*a9fa9459Szrj 	     : "=r" (__x.__ll)						\
426*a9fa9459Szrj 	     : "0" (__x.__ll), "r" (d));				\
427*a9fa9459Szrj     (q) = __x.__i.__l; (r) = __x.__i.__h;				\
428*a9fa9459Szrj   } while (0)
429*a9fa9459Szrj #else
430*a9fa9459Szrj #define smul_ppmm(xh, xl, m0, m1) \
431*a9fa9459Szrj   do {                                                                  \
432*a9fa9459Szrj     register SItype __r0 __asm__ ("0");					\
433*a9fa9459Szrj     register SItype __r1 __asm__ ("1") = (m0);				\
434*a9fa9459Szrj 									\
435*a9fa9459Szrj     __asm__ ("mr\t%%r0,%3"                                              \
436*a9fa9459Szrj 	     : "=r" (__r0), "=r" (__r1)					\
437*a9fa9459Szrj 	     : "r"  (__r1),  "r" (m1));					\
438*a9fa9459Szrj     (xh) = __r0; (xl) = __r1;						\
439*a9fa9459Szrj   } while (0)
440*a9fa9459Szrj 
441*a9fa9459Szrj #define sdiv_qrnnd(q, r, n1, n0, d) \
442*a9fa9459Szrj   do {									\
443*a9fa9459Szrj     register SItype __r0 __asm__ ("0") = (n1);				\
444*a9fa9459Szrj     register SItype __r1 __asm__ ("1") = (n0);				\
445*a9fa9459Szrj 									\
446*a9fa9459Szrj     __asm__ ("dr\t%%r0,%4"                                              \
447*a9fa9459Szrj 	     : "=r" (__r0), "=r" (__r1)					\
448*a9fa9459Szrj 	     : "r" (__r0), "r" (__r1), "r" (d));			\
449*a9fa9459Szrj     (q) = __r1; (r) = __r0;						\
450*a9fa9459Szrj   } while (0)
451*a9fa9459Szrj #endif /* __zarch__ */
452*a9fa9459Szrj #endif
453*a9fa9459Szrj 
454*a9fa9459Szrj #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
455*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
456*a9fa9459Szrj   __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"		\
457*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
458*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
459*a9fa9459Szrj 	   : "%0" ((USItype) (ah)),					\
460*a9fa9459Szrj 	     "g" ((USItype) (bh)),					\
461*a9fa9459Szrj 	     "%1" ((USItype) (al)),					\
462*a9fa9459Szrj 	     "g" ((USItype) (bl)))
463*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
464*a9fa9459Szrj   __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"		\
465*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
466*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
467*a9fa9459Szrj 	   : "0" ((USItype) (ah)),					\
468*a9fa9459Szrj 	     "g" ((USItype) (bh)),					\
469*a9fa9459Szrj 	     "1" ((USItype) (al)),					\
470*a9fa9459Szrj 	     "g" ((USItype) (bl)))
471*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
472*a9fa9459Szrj   __asm__ ("mul{l} %3"							\
473*a9fa9459Szrj 	   : "=a" ((USItype) (w0)),					\
474*a9fa9459Szrj 	     "=d" ((USItype) (w1))					\
475*a9fa9459Szrj 	   : "%0" ((USItype) (u)),					\
476*a9fa9459Szrj 	     "rm" ((USItype) (v)))
477*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, dv) \
478*a9fa9459Szrj   __asm__ ("div{l} %4"							\
479*a9fa9459Szrj 	   : "=a" ((USItype) (q)),					\
480*a9fa9459Szrj 	     "=d" ((USItype) (r))					\
481*a9fa9459Szrj 	   : "0" ((USItype) (n0)),					\
482*a9fa9459Szrj 	     "1" ((USItype) (n1)),					\
483*a9fa9459Szrj 	     "rm" ((USItype) (dv)))
484*a9fa9459Szrj #define count_leading_zeros(count, x)	((count) = __builtin_clz (x))
485*a9fa9459Szrj #define count_trailing_zeros(count, x)	((count) = __builtin_ctz (x))
486*a9fa9459Szrj #define UMUL_TIME 40
487*a9fa9459Szrj #define UDIV_TIME 40
488*a9fa9459Szrj #endif /* 80x86 */
489*a9fa9459Szrj 
490*a9fa9459Szrj #if defined (__x86_64__) && W_TYPE_SIZE == 64
491*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
492*a9fa9459Szrj   __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"		\
493*a9fa9459Szrj 	   : "=r" ((UDItype) (sh)),					\
494*a9fa9459Szrj 	     "=&r" ((UDItype) (sl))					\
495*a9fa9459Szrj 	   : "%0" ((UDItype) (ah)),					\
496*a9fa9459Szrj 	     "rme" ((UDItype) (bh)),					\
497*a9fa9459Szrj 	     "%1" ((UDItype) (al)),					\
498*a9fa9459Szrj 	     "rme" ((UDItype) (bl)))
499*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
500*a9fa9459Szrj   __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"		\
501*a9fa9459Szrj 	   : "=r" ((UDItype) (sh)),					\
502*a9fa9459Szrj 	     "=&r" ((UDItype) (sl))					\
503*a9fa9459Szrj 	   : "0" ((UDItype) (ah)),					\
504*a9fa9459Szrj 	     "rme" ((UDItype) (bh)),					\
505*a9fa9459Szrj 	     "1" ((UDItype) (al)),					\
506*a9fa9459Szrj 	     "rme" ((UDItype) (bl)))
507*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
508*a9fa9459Szrj   __asm__ ("mul{q} %3"							\
509*a9fa9459Szrj 	   : "=a" ((UDItype) (w0)),					\
510*a9fa9459Szrj 	     "=d" ((UDItype) (w1))					\
511*a9fa9459Szrj 	   : "%0" ((UDItype) (u)),					\
512*a9fa9459Szrj 	     "rm" ((UDItype) (v)))
513*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, dv) \
514*a9fa9459Szrj   __asm__ ("div{q} %4"							\
515*a9fa9459Szrj 	   : "=a" ((UDItype) (q)),					\
516*a9fa9459Szrj 	     "=d" ((UDItype) (r))					\
517*a9fa9459Szrj 	   : "0" ((UDItype) (n0)),					\
518*a9fa9459Szrj 	     "1" ((UDItype) (n1)),					\
519*a9fa9459Szrj 	     "rm" ((UDItype) (dv)))
520*a9fa9459Szrj #define count_leading_zeros(count, x)	((count) = __builtin_clzll (x))
521*a9fa9459Szrj #define count_trailing_zeros(count, x)	((count) = __builtin_ctzll (x))
522*a9fa9459Szrj #define UMUL_TIME 40
523*a9fa9459Szrj #define UDIV_TIME 40
524*a9fa9459Szrj #endif /* x86_64 */
525*a9fa9459Szrj 
526*a9fa9459Szrj #if defined (__i960__) && W_TYPE_SIZE == 32
527*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
528*a9fa9459Szrj   ({union {UDItype __ll;						\
529*a9fa9459Szrj 	   struct {USItype __l, __h;} __i;				\
530*a9fa9459Szrj 	  } __xx;							\
531*a9fa9459Szrj   __asm__ ("emul	%2,%1,%0"					\
532*a9fa9459Szrj 	   : "=d" (__xx.__ll)						\
533*a9fa9459Szrj 	   : "%dI" ((USItype) (u)),					\
534*a9fa9459Szrj 	     "dI" ((USItype) (v)));					\
535*a9fa9459Szrj   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
536*a9fa9459Szrj #define __umulsidi3(u, v) \
537*a9fa9459Szrj   ({UDItype __w;							\
538*a9fa9459Szrj     __asm__ ("emul	%2,%1,%0"					\
539*a9fa9459Szrj 	     : "=d" (__w)						\
540*a9fa9459Szrj 	     : "%dI" ((USItype) (u)),					\
541*a9fa9459Szrj 	       "dI" ((USItype) (v)));					\
542*a9fa9459Szrj     __w; })
543*a9fa9459Szrj #endif /* __i960__ */
544*a9fa9459Szrj 
545*a9fa9459Szrj #if defined (__ia64) && W_TYPE_SIZE == 64
546*a9fa9459Szrj /* This form encourages gcc (pre-release 3.4 at least) to emit predicated
547*a9fa9459Szrj    "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
548*a9fa9459Szrj    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
549*a9fa9459Szrj    register, which takes an extra cycle.  */
550*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
551*a9fa9459Szrj   do {									\
552*a9fa9459Szrj     UWtype __x;								\
553*a9fa9459Szrj     __x = (al) - (bl);							\
554*a9fa9459Szrj     if ((al) < (bl))							\
555*a9fa9459Szrj       (sh) = (ah) - (bh) - 1;						\
556*a9fa9459Szrj     else								\
557*a9fa9459Szrj       (sh) = (ah) - (bh);						\
558*a9fa9459Szrj     (sl) = __x;								\
559*a9fa9459Szrj   } while (0)
560*a9fa9459Szrj 
561*a9fa9459Szrj /* Do both product parts in assembly, since that gives better code with
562*a9fa9459Szrj    all gcc versions.  Some callers will just use the upper part, and in
563*a9fa9459Szrj    that situation we waste an instruction, but not any cycles.  */
564*a9fa9459Szrj #define umul_ppmm(ph, pl, m0, m1)					\
565*a9fa9459Szrj   __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"		\
566*a9fa9459Szrj 	   : "=&f" (ph), "=f" (pl)					\
567*a9fa9459Szrj 	   : "f" (m0), "f" (m1))
568*a9fa9459Szrj #define count_leading_zeros(count, x)					\
569*a9fa9459Szrj   do {									\
570*a9fa9459Szrj     UWtype _x = (x), _y, _a, _c;					\
571*a9fa9459Szrj     __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));		\
572*a9fa9459Szrj     __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));		\
573*a9fa9459Szrj     _c = (_a - 1) << 3;							\
574*a9fa9459Szrj     _x >>= _c;								\
575*a9fa9459Szrj     if (_x >= 1 << 4)							\
576*a9fa9459Szrj       _x >>= 4, _c += 4;						\
577*a9fa9459Szrj     if (_x >= 1 << 2)							\
578*a9fa9459Szrj       _x >>= 2, _c += 2;						\
579*a9fa9459Szrj     _c += _x >> 1;							\
580*a9fa9459Szrj     (count) =  W_TYPE_SIZE - 1 - _c;					\
581*a9fa9459Szrj   } while (0)
582*a9fa9459Szrj /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
583*a9fa9459Szrj    based, and we don't need a special case for x==0 here */
584*a9fa9459Szrj #define count_trailing_zeros(count, x)					\
585*a9fa9459Szrj   do {									\
586*a9fa9459Szrj     UWtype __ctz_x = (x);						\
587*a9fa9459Szrj     __asm__ ("popcnt %0 = %1"						\
588*a9fa9459Szrj 	     : "=r" (count)						\
589*a9fa9459Szrj 	     : "r" ((__ctz_x-1) & ~__ctz_x));				\
590*a9fa9459Szrj   } while (0)
591*a9fa9459Szrj #define UMUL_TIME 14
592*a9fa9459Szrj #endif
593*a9fa9459Szrj 
594*a9fa9459Szrj #if defined (__M32R__) && W_TYPE_SIZE == 32
595*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
596*a9fa9459Szrj   /* The cmp clears the condition bit.  */ \
597*a9fa9459Szrj   __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"			\
598*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
599*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
600*a9fa9459Szrj 	   : "0" ((USItype) (ah)),					\
601*a9fa9459Szrj 	     "r" ((USItype) (bh)),					\
602*a9fa9459Szrj 	     "1" ((USItype) (al)),					\
603*a9fa9459Szrj 	     "r" ((USItype) (bl))					\
604*a9fa9459Szrj 	   : "cbit")
605*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
606*a9fa9459Szrj   /* The cmp clears the condition bit.  */ \
607*a9fa9459Szrj   __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"			\
608*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
609*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
610*a9fa9459Szrj 	   : "0" ((USItype) (ah)),					\
611*a9fa9459Szrj 	     "r" ((USItype) (bh)),					\
612*a9fa9459Szrj 	     "1" ((USItype) (al)),					\
613*a9fa9459Szrj 	     "r" ((USItype) (bl))					\
614*a9fa9459Szrj 	   : "cbit")
615*a9fa9459Szrj #endif /* __M32R__ */
616*a9fa9459Szrj 
617*a9fa9459Szrj #if defined (__mc68000__) && W_TYPE_SIZE == 32
618*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
619*a9fa9459Szrj   __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"				\
620*a9fa9459Szrj 	   : "=d" ((USItype) (sh)),					\
621*a9fa9459Szrj 	     "=&d" ((USItype) (sl))					\
622*a9fa9459Szrj 	   : "%0" ((USItype) (ah)),					\
623*a9fa9459Szrj 	     "d" ((USItype) (bh)),					\
624*a9fa9459Szrj 	     "%1" ((USItype) (al)),					\
625*a9fa9459Szrj 	     "g" ((USItype) (bl)))
626*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
627*a9fa9459Szrj   __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"				\
628*a9fa9459Szrj 	   : "=d" ((USItype) (sh)),					\
629*a9fa9459Szrj 	     "=&d" ((USItype) (sl))					\
630*a9fa9459Szrj 	   : "0" ((USItype) (ah)),					\
631*a9fa9459Szrj 	     "d" ((USItype) (bh)),					\
632*a9fa9459Szrj 	     "1" ((USItype) (al)),					\
633*a9fa9459Szrj 	     "g" ((USItype) (bl)))
634*a9fa9459Szrj 
635*a9fa9459Szrj /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
636*a9fa9459Szrj #if (defined (__mc68020__) && !defined (__mc68060__))
637*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
638*a9fa9459Szrj   __asm__ ("mulu%.l %3,%1:%0"						\
639*a9fa9459Szrj 	   : "=d" ((USItype) (w0)),					\
640*a9fa9459Szrj 	     "=d" ((USItype) (w1))					\
641*a9fa9459Szrj 	   : "%0" ((USItype) (u)),					\
642*a9fa9459Szrj 	     "dmi" ((USItype) (v)))
643*a9fa9459Szrj #define UMUL_TIME 45
644*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, d) \
645*a9fa9459Szrj   __asm__ ("divu%.l %4,%1:%0"						\
646*a9fa9459Szrj 	   : "=d" ((USItype) (q)),					\
647*a9fa9459Szrj 	     "=d" ((USItype) (r))					\
648*a9fa9459Szrj 	   : "0" ((USItype) (n0)),					\
649*a9fa9459Szrj 	     "1" ((USItype) (n1)),					\
650*a9fa9459Szrj 	     "dmi" ((USItype) (d)))
651*a9fa9459Szrj #define UDIV_TIME 90
652*a9fa9459Szrj #define sdiv_qrnnd(q, r, n1, n0, d) \
653*a9fa9459Szrj   __asm__ ("divs%.l %4,%1:%0"						\
654*a9fa9459Szrj 	   : "=d" ((USItype) (q)),					\
655*a9fa9459Szrj 	     "=d" ((USItype) (r))					\
656*a9fa9459Szrj 	   : "0" ((USItype) (n0)),					\
657*a9fa9459Szrj 	     "1" ((USItype) (n1)),					\
658*a9fa9459Szrj 	     "dmi" ((USItype) (d)))
659*a9fa9459Szrj 
660*a9fa9459Szrj #elif defined (__mcoldfire__) /* not mc68020 */
661*a9fa9459Szrj 
662*a9fa9459Szrj #define umul_ppmm(xh, xl, a, b) \
663*a9fa9459Szrj   __asm__ ("| Inlined umul_ppmm\n"					\
664*a9fa9459Szrj 	   "	move%.l	%2,%/d0\n"					\
665*a9fa9459Szrj 	   "	move%.l	%3,%/d1\n"					\
666*a9fa9459Szrj 	   "	move%.l	%/d0,%/d2\n"					\
667*a9fa9459Szrj 	   "	swap	%/d0\n"						\
668*a9fa9459Szrj 	   "	move%.l	%/d1,%/d3\n"					\
669*a9fa9459Szrj 	   "	swap	%/d1\n"						\
670*a9fa9459Szrj 	   "	move%.w	%/d2,%/d4\n"					\
671*a9fa9459Szrj 	   "	mulu	%/d3,%/d4\n"					\
672*a9fa9459Szrj 	   "	mulu	%/d1,%/d2\n"					\
673*a9fa9459Szrj 	   "	mulu	%/d0,%/d3\n"					\
674*a9fa9459Szrj 	   "	mulu	%/d0,%/d1\n"					\
675*a9fa9459Szrj 	   "	move%.l	%/d4,%/d0\n"					\
676*a9fa9459Szrj 	   "	clr%.w	%/d0\n"						\
677*a9fa9459Szrj 	   "	swap	%/d0\n"						\
678*a9fa9459Szrj 	   "	add%.l	%/d0,%/d2\n"					\
679*a9fa9459Szrj 	   "	add%.l	%/d3,%/d2\n"					\
680*a9fa9459Szrj 	   "	jcc	1f\n"						\
681*a9fa9459Szrj 	   "	add%.l	%#65536,%/d1\n"					\
682*a9fa9459Szrj 	   "1:	swap	%/d2\n"						\
683*a9fa9459Szrj 	   "	moveq	%#0,%/d0\n"					\
684*a9fa9459Szrj 	   "	move%.w	%/d2,%/d0\n"					\
685*a9fa9459Szrj 	   "	move%.w	%/d4,%/d2\n"					\
686*a9fa9459Szrj 	   "	move%.l	%/d2,%1\n"					\
687*a9fa9459Szrj 	   "	add%.l	%/d1,%/d0\n"					\
688*a9fa9459Szrj 	   "	move%.l	%/d0,%0"					\
689*a9fa9459Szrj 	   : "=g" ((USItype) (xh)),					\
690*a9fa9459Szrj 	     "=g" ((USItype) (xl))					\
691*a9fa9459Szrj 	   : "g" ((USItype) (a)),					\
692*a9fa9459Szrj 	     "g" ((USItype) (b))					\
693*a9fa9459Szrj 	   : "d0", "d1", "d2", "d3", "d4")
694*a9fa9459Szrj #define UMUL_TIME 100
695*a9fa9459Szrj #define UDIV_TIME 400
696*a9fa9459Szrj #else /* not ColdFire */
697*a9fa9459Szrj /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
698*a9fa9459Szrj #define umul_ppmm(xh, xl, a, b) \
699*a9fa9459Szrj   __asm__ ("| Inlined umul_ppmm\n"					\
700*a9fa9459Szrj 	   "	move%.l	%2,%/d0\n"					\
701*a9fa9459Szrj 	   "	move%.l	%3,%/d1\n"					\
702*a9fa9459Szrj 	   "	move%.l	%/d0,%/d2\n"					\
703*a9fa9459Szrj 	   "	swap	%/d0\n"						\
704*a9fa9459Szrj 	   "	move%.l	%/d1,%/d3\n"					\
705*a9fa9459Szrj 	   "	swap	%/d1\n"						\
706*a9fa9459Szrj 	   "	move%.w	%/d2,%/d4\n"					\
707*a9fa9459Szrj 	   "	mulu	%/d3,%/d4\n"					\
708*a9fa9459Szrj 	   "	mulu	%/d1,%/d2\n"					\
709*a9fa9459Szrj 	   "	mulu	%/d0,%/d3\n"					\
710*a9fa9459Szrj 	   "	mulu	%/d0,%/d1\n"					\
711*a9fa9459Szrj 	   "	move%.l	%/d4,%/d0\n"					\
712*a9fa9459Szrj 	   "	eor%.w	%/d0,%/d0\n"					\
713*a9fa9459Szrj 	   "	swap	%/d0\n"						\
714*a9fa9459Szrj 	   "	add%.l	%/d0,%/d2\n"					\
715*a9fa9459Szrj 	   "	add%.l	%/d3,%/d2\n"					\
716*a9fa9459Szrj 	   "	jcc	1f\n"						\
717*a9fa9459Szrj 	   "	add%.l	%#65536,%/d1\n"					\
718*a9fa9459Szrj 	   "1:	swap	%/d2\n"						\
719*a9fa9459Szrj 	   "	moveq	%#0,%/d0\n"					\
720*a9fa9459Szrj 	   "	move%.w	%/d2,%/d0\n"					\
721*a9fa9459Szrj 	   "	move%.w	%/d4,%/d2\n"					\
722*a9fa9459Szrj 	   "	move%.l	%/d2,%1\n"					\
723*a9fa9459Szrj 	   "	add%.l	%/d1,%/d0\n"					\
724*a9fa9459Szrj 	   "	move%.l	%/d0,%0"					\
725*a9fa9459Szrj 	   : "=g" ((USItype) (xh)),					\
726*a9fa9459Szrj 	     "=g" ((USItype) (xl))					\
727*a9fa9459Szrj 	   : "g" ((USItype) (a)),					\
728*a9fa9459Szrj 	     "g" ((USItype) (b))					\
729*a9fa9459Szrj 	   : "d0", "d1", "d2", "d3", "d4")
730*a9fa9459Szrj #define UMUL_TIME 100
731*a9fa9459Szrj #define UDIV_TIME 400
732*a9fa9459Szrj 
733*a9fa9459Szrj #endif /* not mc68020 */
734*a9fa9459Szrj 
735*a9fa9459Szrj /* The '020, '030, '040 and '060 have bitfield insns.
736*a9fa9459Szrj    cpu32 disguises as a 68020, but lacks them.  */
737*a9fa9459Szrj #if defined (__mc68020__) && !defined (__mcpu32__)
738*a9fa9459Szrj #define count_leading_zeros(count, x) \
739*a9fa9459Szrj   __asm__ ("bfffo %1{%b2:%b2},%0"					\
740*a9fa9459Szrj 	   : "=d" ((USItype) (count))					\
741*a9fa9459Szrj 	   : "od" ((USItype) (x)), "n" (0))
742*a9fa9459Szrj /* Some ColdFire architectures have a ff1 instruction supported via
743*a9fa9459Szrj    __builtin_clz. */
744*a9fa9459Szrj #elif defined (__mcfisaaplus__) || defined (__mcfisac__)
745*a9fa9459Szrj #define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
746*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
747*a9fa9459Szrj #endif
748*a9fa9459Szrj #endif /* mc68000 */
749*a9fa9459Szrj 
750*a9fa9459Szrj #if defined (__m88000__) && W_TYPE_SIZE == 32
751*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
752*a9fa9459Szrj   __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"			\
753*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
754*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
755*a9fa9459Szrj 	   : "%rJ" ((USItype) (ah)),					\
756*a9fa9459Szrj 	     "rJ" ((USItype) (bh)),					\
757*a9fa9459Szrj 	     "%rJ" ((USItype) (al)),					\
758*a9fa9459Szrj 	     "rJ" ((USItype) (bl)))
759*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
760*a9fa9459Szrj   __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"			\
761*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
762*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
763*a9fa9459Szrj 	   : "rJ" ((USItype) (ah)),					\
764*a9fa9459Szrj 	     "rJ" ((USItype) (bh)),					\
765*a9fa9459Szrj 	     "rJ" ((USItype) (al)),					\
766*a9fa9459Szrj 	     "rJ" ((USItype) (bl)))
767*a9fa9459Szrj #define count_leading_zeros(count, x) \
768*a9fa9459Szrj   do {									\
769*a9fa9459Szrj     USItype __cbtmp;							\
770*a9fa9459Szrj     __asm__ ("ff1 %0,%1"						\
771*a9fa9459Szrj 	     : "=r" (__cbtmp)						\
772*a9fa9459Szrj 	     : "r" ((USItype) (x)));					\
773*a9fa9459Szrj     (count) = __cbtmp ^ 31;						\
774*a9fa9459Szrj   } while (0)
775*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 63 /* sic */
776*a9fa9459Szrj #if defined (__mc88110__)
777*a9fa9459Szrj #define umul_ppmm(wh, wl, u, v) \
778*a9fa9459Szrj   do {									\
779*a9fa9459Szrj     union {UDItype __ll;						\
780*a9fa9459Szrj 	   struct {USItype __h, __l;} __i;				\
781*a9fa9459Szrj 	  } __xx;							\
782*a9fa9459Szrj     __asm__ ("mulu.d	%0,%1,%2"					\
783*a9fa9459Szrj 	     : "=r" (__xx.__ll)						\
784*a9fa9459Szrj 	     : "r" ((USItype) (u)),					\
785*a9fa9459Szrj 	       "r" ((USItype) (v)));					\
786*a9fa9459Szrj     (wh) = __xx.__i.__h;						\
787*a9fa9459Szrj     (wl) = __xx.__i.__l;						\
788*a9fa9459Szrj   } while (0)
789*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, d) \
790*a9fa9459Szrj   ({union {UDItype __ll;						\
791*a9fa9459Szrj 	   struct {USItype __h, __l;} __i;				\
792*a9fa9459Szrj 	  } __xx;							\
793*a9fa9459Szrj   USItype __q;								\
794*a9fa9459Szrj   __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
795*a9fa9459Szrj   __asm__ ("divu.d %0,%1,%2"						\
796*a9fa9459Szrj 	   : "=r" (__q)							\
797*a9fa9459Szrj 	   : "r" (__xx.__ll),						\
798*a9fa9459Szrj 	     "r" ((USItype) (d)));					\
799*a9fa9459Szrj   (r) = (n0) - __q * (d); (q) = __q; })
800*a9fa9459Szrj #define UMUL_TIME 5
801*a9fa9459Szrj #define UDIV_TIME 25
802*a9fa9459Szrj #else
803*a9fa9459Szrj #define UMUL_TIME 17
804*a9fa9459Szrj #define UDIV_TIME 150
805*a9fa9459Szrj #endif /* __mc88110__ */
806*a9fa9459Szrj #endif /* __m88000__ */
807*a9fa9459Szrj 
808*a9fa9459Szrj #if defined (__mn10300__)
809*a9fa9459Szrj # if defined (__AM33__)
810*a9fa9459Szrj #  define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
811*a9fa9459Szrj #  define umul_ppmm(w1, w0, u, v)		\
812*a9fa9459Szrj     asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
813*a9fa9459Szrj #  define smul_ppmm(w1, w0, u, v)		\
814*a9fa9459Szrj     asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
815*a9fa9459Szrj # else
816*a9fa9459Szrj #  define umul_ppmm(w1, w0, u, v)		\
817*a9fa9459Szrj     asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
818*a9fa9459Szrj #  define smul_ppmm(w1, w0, u, v)		\
819*a9fa9459Szrj     asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
820*a9fa9459Szrj # endif
821*a9fa9459Szrj # define add_ssaaaa(sh, sl, ah, al, bh, bl)	\
822*a9fa9459Szrj   do {						\
823*a9fa9459Szrj     DWunion __s, __a, __b;			\
824*a9fa9459Szrj     __a.s.low = (al); __a.s.high = (ah);	\
825*a9fa9459Szrj     __b.s.low = (bl); __b.s.high = (bh);	\
826*a9fa9459Szrj     __s.ll = __a.ll + __b.ll;			\
827*a9fa9459Szrj     (sl) = __s.s.low; (sh) = __s.s.high;	\
828*a9fa9459Szrj   } while (0)
829*a9fa9459Szrj # define sub_ddmmss(sh, sl, ah, al, bh, bl)	\
830*a9fa9459Szrj   do {						\
831*a9fa9459Szrj     DWunion __s, __a, __b;			\
832*a9fa9459Szrj     __a.s.low = (al); __a.s.high = (ah);	\
833*a9fa9459Szrj     __b.s.low = (bl); __b.s.high = (bh);	\
834*a9fa9459Szrj     __s.ll = __a.ll - __b.ll;			\
835*a9fa9459Szrj     (sl) = __s.s.low; (sh) = __s.s.high;	\
836*a9fa9459Szrj   } while (0)
837*a9fa9459Szrj # define udiv_qrnnd(q, r, nh, nl, d)		\
838*a9fa9459Szrj   asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
839*a9fa9459Szrj # define sdiv_qrnnd(q, r, nh, nl, d)		\
840*a9fa9459Szrj   asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
841*a9fa9459Szrj # define UMUL_TIME 3
842*a9fa9459Szrj # define UDIV_TIME 38
843*a9fa9459Szrj #endif
844*a9fa9459Szrj 
845*a9fa9459Szrj #if defined (__mips__) && W_TYPE_SIZE == 32
846*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v)						\
847*a9fa9459Szrj   do {									\
848*a9fa9459Szrj     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);		\
849*a9fa9459Szrj     (w1) = (USItype) (__x >> 32);					\
850*a9fa9459Szrj     (w0) = (USItype) (__x);						\
851*a9fa9459Szrj   } while (0)
852*a9fa9459Szrj #define UMUL_TIME 10
853*a9fa9459Szrj #define UDIV_TIME 100
854*a9fa9459Szrj 
855*a9fa9459Szrj #if (__mips == 32 || __mips == 64) && ! defined (__mips16)
856*a9fa9459Szrj #define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
857*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
858*a9fa9459Szrj #endif
859*a9fa9459Szrj #endif /* __mips__ */
860*a9fa9459Szrj 
861*a9fa9459Szrj #if defined (__ns32000__) && W_TYPE_SIZE == 32
862*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
863*a9fa9459Szrj   ({union {UDItype __ll;						\
864*a9fa9459Szrj 	   struct {USItype __l, __h;} __i;				\
865*a9fa9459Szrj 	  } __xx;							\
866*a9fa9459Szrj   __asm__ ("meid %2,%0"							\
867*a9fa9459Szrj 	   : "=g" (__xx.__ll)						\
868*a9fa9459Szrj 	   : "%0" ((USItype) (u)),					\
869*a9fa9459Szrj 	     "g" ((USItype) (v)));					\
870*a9fa9459Szrj   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
871*a9fa9459Szrj #define __umulsidi3(u, v) \
872*a9fa9459Szrj   ({UDItype __w;							\
873*a9fa9459Szrj     __asm__ ("meid %2,%0"						\
874*a9fa9459Szrj 	     : "=g" (__w)						\
875*a9fa9459Szrj 	     : "%0" ((USItype) (u)),					\
876*a9fa9459Szrj 	       "g" ((USItype) (v)));					\
877*a9fa9459Szrj     __w; })
878*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, d) \
879*a9fa9459Szrj   ({union {UDItype __ll;						\
880*a9fa9459Szrj 	   struct {USItype __l, __h;} __i;				\
881*a9fa9459Szrj 	  } __xx;							\
882*a9fa9459Szrj   __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
883*a9fa9459Szrj   __asm__ ("deid %2,%0"							\
884*a9fa9459Szrj 	   : "=g" (__xx.__ll)						\
885*a9fa9459Szrj 	   : "0" (__xx.__ll),						\
886*a9fa9459Szrj 	     "g" ((USItype) (d)));					\
887*a9fa9459Szrj   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
888*a9fa9459Szrj #define count_trailing_zeros(count,x) \
889*a9fa9459Szrj   do {									\
890*a9fa9459Szrj     __asm__ ("ffsd     %2,%0"						\
891*a9fa9459Szrj 	    : "=r" ((USItype) (count))					\
892*a9fa9459Szrj 	    : "0" ((USItype) 0),					\
893*a9fa9459Szrj 	      "r" ((USItype) (x)));					\
894*a9fa9459Szrj   } while (0)
895*a9fa9459Szrj #endif /* __ns32000__ */
896*a9fa9459Szrj 
897*a9fa9459Szrj /* FIXME: We should test _IBMR2 here when we add assembly support for the
898*a9fa9459Szrj    system vendor compilers.
899*a9fa9459Szrj    FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
900*a9fa9459Szrj    enough, since that hits ARM and m68k too.  */
901*a9fa9459Szrj #if (defined (_ARCH_PPC)	/* AIX */				\
902*a9fa9459Szrj      || defined (__powerpc__)	/* gcc */				\
903*a9fa9459Szrj      || defined (__POWERPC__)	/* BEOS */				\
904*a9fa9459Szrj      || defined (__ppc__)	/* Darwin */				\
905*a9fa9459Szrj      || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
906*a9fa9459Szrj      || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
907*a9fa9459Szrj 	 && CPU_FAMILY == PPC)                                                \
908*a9fa9459Szrj      ) && W_TYPE_SIZE == 32
909*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
910*a9fa9459Szrj   do {									\
911*a9fa9459Szrj     if (__builtin_constant_p (bh) && (bh) == 0)				\
912*a9fa9459Szrj       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
913*a9fa9459Szrj 	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
914*a9fa9459Szrj     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
915*a9fa9459Szrj       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
916*a9fa9459Szrj 	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
917*a9fa9459Szrj     else								\
918*a9fa9459Szrj       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
919*a9fa9459Szrj 	     : "=r" (sh), "=&r" (sl)					\
920*a9fa9459Szrj 	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
921*a9fa9459Szrj   } while (0)
922*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
923*a9fa9459Szrj   do {									\
924*a9fa9459Szrj     if (__builtin_constant_p (ah) && (ah) == 0)				\
925*a9fa9459Szrj       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	\
926*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
927*a9fa9459Szrj     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
928*a9fa9459Szrj       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	\
929*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
930*a9fa9459Szrj     else if (__builtin_constant_p (bh) && (bh) == 0)			\
931*a9fa9459Szrj       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"		\
932*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
933*a9fa9459Szrj     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
934*a9fa9459Szrj       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"		\
935*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
936*a9fa9459Szrj     else								\
937*a9fa9459Szrj       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	\
938*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl)					\
939*a9fa9459Szrj 	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
940*a9fa9459Szrj   } while (0)
941*a9fa9459Szrj #define count_leading_zeros(count, x) \
942*a9fa9459Szrj   __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
943*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
944*a9fa9459Szrj #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
945*a9fa9459Szrj   || defined (__ppc__)                                                    \
946*a9fa9459Szrj   || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
947*a9fa9459Szrj   || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
948*a9fa9459Szrj 	 && CPU_FAMILY == PPC)
949*a9fa9459Szrj #define umul_ppmm(ph, pl, m0, m1) \
950*a9fa9459Szrj   do {									\
951*a9fa9459Szrj     USItype __m0 = (m0), __m1 = (m1);					\
952*a9fa9459Szrj     __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
953*a9fa9459Szrj     (pl) = __m0 * __m1;							\
954*a9fa9459Szrj   } while (0)
955*a9fa9459Szrj #define UMUL_TIME 15
956*a9fa9459Szrj #define smul_ppmm(ph, pl, m0, m1) \
957*a9fa9459Szrj   do {									\
958*a9fa9459Szrj     SItype __m0 = (m0), __m1 = (m1);					\
959*a9fa9459Szrj     __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
960*a9fa9459Szrj     (pl) = __m0 * __m1;							\
961*a9fa9459Szrj   } while (0)
962*a9fa9459Szrj #define SMUL_TIME 14
963*a9fa9459Szrj #define UDIV_TIME 120
964*a9fa9459Szrj #endif
965*a9fa9459Szrj #endif /* 32-bit POWER architecture variants.  */
966*a9fa9459Szrj 
967*a9fa9459Szrj /* We should test _IBMR2 here when we add assembly support for the system
968*a9fa9459Szrj    vendor compilers.  */
969*a9fa9459Szrj #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
970*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
971*a9fa9459Szrj   do {									\
972*a9fa9459Szrj     if (__builtin_constant_p (bh) && (bh) == 0)				\
973*a9fa9459Szrj       __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
974*a9fa9459Szrj 	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
975*a9fa9459Szrj     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
976*a9fa9459Szrj       __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
977*a9fa9459Szrj 	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
978*a9fa9459Szrj     else								\
979*a9fa9459Szrj       __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
980*a9fa9459Szrj 	     : "=r" (sh), "=&r" (sl)					\
981*a9fa9459Szrj 	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
982*a9fa9459Szrj   } while (0)
983*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
984*a9fa9459Szrj   do {									\
985*a9fa9459Szrj     if (__builtin_constant_p (ah) && (ah) == 0)				\
986*a9fa9459Szrj       __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	\
987*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
988*a9fa9459Szrj     else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)		\
989*a9fa9459Szrj       __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	\
990*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
991*a9fa9459Szrj     else if (__builtin_constant_p (bh) && (bh) == 0)			\
992*a9fa9459Szrj       __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"		\
993*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
994*a9fa9459Szrj     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
995*a9fa9459Szrj       __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"		\
996*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
997*a9fa9459Szrj     else								\
998*a9fa9459Szrj       __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	\
999*a9fa9459Szrj 	       : "=r" (sh), "=&r" (sl)					\
1000*a9fa9459Szrj 	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
1001*a9fa9459Szrj   } while (0)
1002*a9fa9459Szrj #define count_leading_zeros(count, x) \
1003*a9fa9459Szrj   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
1004*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 64
1005*a9fa9459Szrj #define umul_ppmm(ph, pl, m0, m1) \
1006*a9fa9459Szrj   do {									\
1007*a9fa9459Szrj     UDItype __m0 = (m0), __m1 = (m1);					\
1008*a9fa9459Szrj     __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
1009*a9fa9459Szrj     (pl) = __m0 * __m1;							\
1010*a9fa9459Szrj   } while (0)
1011*a9fa9459Szrj #define UMUL_TIME 15
1012*a9fa9459Szrj #define smul_ppmm(ph, pl, m0, m1) \
1013*a9fa9459Szrj   do {									\
1014*a9fa9459Szrj     DItype __m0 = (m0), __m1 = (m1);					\
1015*a9fa9459Szrj     __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
1016*a9fa9459Szrj     (pl) = __m0 * __m1;							\
1017*a9fa9459Szrj   } while (0)
1018*a9fa9459Szrj #define SMUL_TIME 14  /* ??? */
1019*a9fa9459Szrj #define UDIV_TIME 120 /* ??? */
1020*a9fa9459Szrj #endif /* 64-bit PowerPC.  */
1021*a9fa9459Szrj 
1022*a9fa9459Szrj #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
1023*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1024*a9fa9459Szrj   __asm__ ("a %1,%5\n\tae %0,%3"					\
1025*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
1026*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
1027*a9fa9459Szrj 	   : "%0" ((USItype) (ah)),					\
1028*a9fa9459Szrj 	     "r" ((USItype) (bh)),					\
1029*a9fa9459Szrj 	     "%1" ((USItype) (al)),					\
1030*a9fa9459Szrj 	     "r" ((USItype) (bl)))
1031*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1032*a9fa9459Szrj   __asm__ ("s %1,%5\n\tse %0,%3"					\
1033*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
1034*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
1035*a9fa9459Szrj 	   : "0" ((USItype) (ah)),					\
1036*a9fa9459Szrj 	     "r" ((USItype) (bh)),					\
1037*a9fa9459Szrj 	     "1" ((USItype) (al)),					\
1038*a9fa9459Szrj 	     "r" ((USItype) (bl)))
1039*a9fa9459Szrj #define umul_ppmm(ph, pl, m0, m1) \
1040*a9fa9459Szrj   do {									\
1041*a9fa9459Szrj     USItype __m0 = (m0), __m1 = (m1);					\
1042*a9fa9459Szrj     __asm__ (								\
1043*a9fa9459Szrj        "s	r2,r2\n"						\
1044*a9fa9459Szrj "	mts	r10,%2\n"						\
1045*a9fa9459Szrj "	m	r2,%3\n"						\
1046*a9fa9459Szrj "	m	r2,%3\n"						\
1047*a9fa9459Szrj "	m	r2,%3\n"						\
1048*a9fa9459Szrj "	m	r2,%3\n"						\
1049*a9fa9459Szrj "	m	r2,%3\n"						\
1050*a9fa9459Szrj "	m	r2,%3\n"						\
1051*a9fa9459Szrj "	m	r2,%3\n"						\
1052*a9fa9459Szrj "	m	r2,%3\n"						\
1053*a9fa9459Szrj "	m	r2,%3\n"						\
1054*a9fa9459Szrj "	m	r2,%3\n"						\
1055*a9fa9459Szrj "	m	r2,%3\n"						\
1056*a9fa9459Szrj "	m	r2,%3\n"						\
1057*a9fa9459Szrj "	m	r2,%3\n"						\
1058*a9fa9459Szrj "	m	r2,%3\n"						\
1059*a9fa9459Szrj "	m	r2,%3\n"						\
1060*a9fa9459Szrj "	m	r2,%3\n"						\
1061*a9fa9459Szrj "	cas	%0,r2,r0\n"						\
1062*a9fa9459Szrj "	mfs	r10,%1"							\
1063*a9fa9459Szrj 	     : "=r" ((USItype) (ph)),					\
1064*a9fa9459Szrj 	       "=r" ((USItype) (pl))					\
1065*a9fa9459Szrj 	     : "%r" (__m0),						\
1066*a9fa9459Szrj 		"r" (__m1)						\
1067*a9fa9459Szrj 	     : "r2");							\
1068*a9fa9459Szrj     (ph) += ((((SItype) __m0 >> 31) & __m1)				\
1069*a9fa9459Szrj 	     + (((SItype) __m1 >> 31) & __m0));				\
1070*a9fa9459Szrj   } while (0)
1071*a9fa9459Szrj #define UMUL_TIME 20
1072*a9fa9459Szrj #define UDIV_TIME 200
1073*a9fa9459Szrj #define count_leading_zeros(count, x) \
1074*a9fa9459Szrj   do {									\
1075*a9fa9459Szrj     if ((x) >= 0x10000)							\
1076*a9fa9459Szrj       __asm__ ("clz	%0,%1"						\
1077*a9fa9459Szrj 	       : "=r" ((USItype) (count))				\
1078*a9fa9459Szrj 	       : "r" ((USItype) (x) >> 16));				\
1079*a9fa9459Szrj     else								\
1080*a9fa9459Szrj       {									\
1081*a9fa9459Szrj 	__asm__ ("clz	%0,%1"						\
1082*a9fa9459Szrj 		 : "=r" ((USItype) (count))				\
1083*a9fa9459Szrj 		 : "r" ((USItype) (x)));					\
1084*a9fa9459Szrj 	(count) += 16;							\
1085*a9fa9459Szrj       }									\
1086*a9fa9459Szrj   } while (0)
1087*a9fa9459Szrj #endif
1088*a9fa9459Szrj 
1089*a9fa9459Szrj #if defined(__sh__) && (!defined (__SHMEDIA__) || !__SHMEDIA__) && W_TYPE_SIZE == 32
1090*a9fa9459Szrj #ifndef __sh1__
1091*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
1092*a9fa9459Szrj   __asm__ (								\
1093*a9fa9459Szrj        "dmulu.l	%2,%3\n\tsts%M1	macl,%1\n\tsts%M0	mach,%0"	\
1094*a9fa9459Szrj 	   : "=r<" ((USItype)(w1)),					\
1095*a9fa9459Szrj 	     "=r<" ((USItype)(w0))					\
1096*a9fa9459Szrj 	   : "r" ((USItype)(u)),					\
1097*a9fa9459Szrj 	     "r" ((USItype)(v))						\
1098*a9fa9459Szrj 	   : "macl", "mach")
1099*a9fa9459Szrj #define UMUL_TIME 5
1100*a9fa9459Szrj #endif
1101*a9fa9459Szrj 
1102*a9fa9459Szrj /* This is the same algorithm as __udiv_qrnnd_c.  */
1103*a9fa9459Szrj #define UDIV_NEEDS_NORMALIZATION 1
1104*a9fa9459Szrj 
1105*a9fa9459Szrj #ifdef __FDPIC__
1106*a9fa9459Szrj /* FDPIC needs a special version of the asm fragment to extract the
1107*a9fa9459Szrj    code address from the function descriptor. __udiv_qrnnd_16 is
1108*a9fa9459Szrj    assumed to be local and not to use the GOT, so loading r12 is
1109*a9fa9459Szrj    not needed. */
1110*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, d) \
1111*a9fa9459Szrj   do {									\
1112*a9fa9459Szrj     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
1113*a9fa9459Szrj 			__attribute__ ((visibility ("hidden")));	\
1114*a9fa9459Szrj     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
1115*a9fa9459Szrj     __asm__ (								\
1116*a9fa9459Szrj 	"mov%M4	%4,r5\n"						\
1117*a9fa9459Szrj "	swap.w	%3,r4\n"						\
1118*a9fa9459Szrj "	swap.w	r5,r6\n"						\
1119*a9fa9459Szrj "	mov.l	@%5,r2\n"						\
1120*a9fa9459Szrj "	jsr	@r2\n"							\
1121*a9fa9459Szrj "	shll16	r6\n"							\
1122*a9fa9459Szrj "	swap.w	r4,r4\n"						\
1123*a9fa9459Szrj "	mov.l	@%5,r2\n"						\
1124*a9fa9459Szrj "	jsr	@r2\n"							\
1125*a9fa9459Szrj "	swap.w	r1,%0\n"						\
1126*a9fa9459Szrj "	or	r1,%0"							\
1127*a9fa9459Szrj 	: "=r" (q), "=&z" (r)						\
1128*a9fa9459Szrj 	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
1129*a9fa9459Szrj 	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
1130*a9fa9459Szrj   } while (0)
1131*a9fa9459Szrj #else
1132*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, d) \
1133*a9fa9459Szrj   do {									\
1134*a9fa9459Szrj     extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
1135*a9fa9459Szrj 			__attribute__ ((visibility ("hidden")));	\
1136*a9fa9459Szrj     /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
1137*a9fa9459Szrj     __asm__ (								\
1138*a9fa9459Szrj 	"mov%M4 %4,r5\n"						\
1139*a9fa9459Szrj "	swap.w %3,r4\n"							\
1140*a9fa9459Szrj "	swap.w r5,r6\n"							\
1141*a9fa9459Szrj "	jsr @%5\n"							\
1142*a9fa9459Szrj "	shll16 r6\n"							\
1143*a9fa9459Szrj "	swap.w r4,r4\n"							\
1144*a9fa9459Szrj "	jsr @%5\n"							\
1145*a9fa9459Szrj "	swap.w r1,%0\n"							\
1146*a9fa9459Szrj "	or r1,%0"							\
1147*a9fa9459Szrj 	: "=r" (q), "=&z" (r)						\
1148*a9fa9459Szrj 	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
1149*a9fa9459Szrj 	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
1150*a9fa9459Szrj   } while (0)
1151*a9fa9459Szrj #endif /* __FDPIC__  */
1152*a9fa9459Szrj 
1153*a9fa9459Szrj #define UDIV_TIME 80
1154*a9fa9459Szrj 
1155*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
1156*a9fa9459Szrj   __asm__ ("clrt;subc %5,%1; subc %4,%0"				\
1157*a9fa9459Szrj 	   : "=r" (sh), "=r" (sl)					\
1158*a9fa9459Szrj 	   : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
1159*a9fa9459Szrj 
1160*a9fa9459Szrj #endif /* __sh__ */
1161*a9fa9459Szrj 
1162*a9fa9459Szrj #if defined (__SH5__) && defined (__SHMEDIA__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1163*a9fa9459Szrj #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1164*a9fa9459Szrj #define count_leading_zeros(count, x) \
1165*a9fa9459Szrj   do									\
1166*a9fa9459Szrj     {									\
1167*a9fa9459Szrj       UDItype x_ = (USItype)(x);					\
1168*a9fa9459Szrj       SItype c_;							\
1169*a9fa9459Szrj 									\
1170*a9fa9459Szrj       __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));			\
1171*a9fa9459Szrj       (count) = c_ - 31;						\
1172*a9fa9459Szrj     }									\
1173*a9fa9459Szrj   while (0)
1174*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 32
1175*a9fa9459Szrj #endif
1176*a9fa9459Szrj 
1177*a9fa9459Szrj #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1178*a9fa9459Szrj     && W_TYPE_SIZE == 32
1179*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1180*a9fa9459Szrj   __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"				\
1181*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
1182*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
1183*a9fa9459Szrj 	   : "%rJ" ((USItype) (ah)),					\
1184*a9fa9459Szrj 	     "rI" ((USItype) (bh)),					\
1185*a9fa9459Szrj 	     "%rJ" ((USItype) (al)),					\
1186*a9fa9459Szrj 	     "rI" ((USItype) (bl))					\
1187*a9fa9459Szrj 	   __CLOBBER_CC)
1188*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1189*a9fa9459Szrj   __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"				\
1190*a9fa9459Szrj 	   : "=r" ((USItype) (sh)),					\
1191*a9fa9459Szrj 	     "=&r" ((USItype) (sl))					\
1192*a9fa9459Szrj 	   : "rJ" ((USItype) (ah)),					\
1193*a9fa9459Szrj 	     "rI" ((USItype) (bh)),					\
1194*a9fa9459Szrj 	     "rJ" ((USItype) (al)),					\
1195*a9fa9459Szrj 	     "rI" ((USItype) (bl))					\
1196*a9fa9459Szrj 	   __CLOBBER_CC)
1197*a9fa9459Szrj #if defined (__sparc_v9__)
1198*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
1199*a9fa9459Szrj   do {									\
1200*a9fa9459Szrj     register USItype __g1 asm ("g1");					\
1201*a9fa9459Szrj     __asm__ ("umul\t%2,%3,%1\n\t"					\
1202*a9fa9459Szrj 	     "srlx\t%1, 32, %0"						\
1203*a9fa9459Szrj 	     : "=r" ((USItype) (w1)),					\
1204*a9fa9459Szrj 	       "=r" (__g1)						\
1205*a9fa9459Szrj 	     : "r" ((USItype) (u)),					\
1206*a9fa9459Szrj 	       "r" ((USItype) (v)));					\
1207*a9fa9459Szrj     (w0) = __g1;							\
1208*a9fa9459Szrj   } while (0)
1209*a9fa9459Szrj #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1210*a9fa9459Szrj   __asm__ ("mov\t%2,%%y\n\t"						\
1211*a9fa9459Szrj 	   "udiv\t%3,%4,%0\n\t"						\
1212*a9fa9459Szrj 	   "umul\t%0,%4,%1\n\t"						\
1213*a9fa9459Szrj 	   "sub\t%3,%1,%1"						\
1214*a9fa9459Szrj 	   : "=&r" ((USItype) (__q)),					\
1215*a9fa9459Szrj 	     "=&r" ((USItype) (__r))					\
1216*a9fa9459Szrj 	   : "r" ((USItype) (__n1)),					\
1217*a9fa9459Szrj 	     "r" ((USItype) (__n0)),					\
1218*a9fa9459Szrj 	     "r" ((USItype) (__d)))
1219*a9fa9459Szrj #else
1220*a9fa9459Szrj #if defined (__sparc_v8__)
1221*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
1222*a9fa9459Szrj   __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
1223*a9fa9459Szrj 	   : "=r" ((USItype) (w1)),					\
1224*a9fa9459Szrj 	     "=r" ((USItype) (w0))					\
1225*a9fa9459Szrj 	   : "r" ((USItype) (u)),					\
1226*a9fa9459Szrj 	     "r" ((USItype) (v)))
1227*a9fa9459Szrj #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1228*a9fa9459Szrj   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
1229*a9fa9459Szrj 	   : "=&r" ((USItype) (__q)),					\
1230*a9fa9459Szrj 	     "=&r" ((USItype) (__r))					\
1231*a9fa9459Szrj 	   : "r" ((USItype) (__n1)),					\
1232*a9fa9459Szrj 	     "r" ((USItype) (__n0)),					\
1233*a9fa9459Szrj 	     "r" ((USItype) (__d)))
1234*a9fa9459Szrj #else
1235*a9fa9459Szrj #if defined (__sparclite__)
1236*a9fa9459Szrj /* This has hardware multiply but not divide.  It also has two additional
1237*a9fa9459Szrj    instructions scan (ffs from high bit) and divscc.  */
1238*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
1239*a9fa9459Szrj   __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
1240*a9fa9459Szrj 	   : "=r" ((USItype) (w1)),					\
1241*a9fa9459Szrj 	     "=r" ((USItype) (w0))					\
1242*a9fa9459Szrj 	   : "r" ((USItype) (u)),					\
1243*a9fa9459Szrj 	     "r" ((USItype) (v)))
1244*a9fa9459Szrj #define udiv_qrnnd(q, r, n1, n0, d) \
1245*a9fa9459Szrj   __asm__ ("! Inlined udiv_qrnnd\n"					\
1246*a9fa9459Szrj "	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
1247*a9fa9459Szrj "	tst	%%g0\n"							\
1248*a9fa9459Szrj "	divscc	%3,%4,%%g1\n"						\
1249*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1250*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1251*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1252*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1253*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1254*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1255*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1256*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1257*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1258*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1259*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1260*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1261*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1262*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1263*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1264*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1265*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1266*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1267*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1268*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1269*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1270*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1271*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1272*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1273*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1274*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1275*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1276*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1277*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1278*a9fa9459Szrj "	divscc	%%g1,%4,%%g1\n"						\
1279*a9fa9459Szrj "	divscc	%%g1,%4,%0\n"						\
1280*a9fa9459Szrj "	rd	%%y,%1\n"						\
1281*a9fa9459Szrj "	bl,a 1f\n"							\
1282*a9fa9459Szrj "	add	%1,%4,%1\n"						\
1283*a9fa9459Szrj "1:	! End of inline udiv_qrnnd"					\
1284*a9fa9459Szrj 	   : "=r" ((USItype) (q)),					\
1285*a9fa9459Szrj 	     "=r" ((USItype) (r))					\
1286*a9fa9459Szrj 	   : "r" ((USItype) (n1)),					\
1287*a9fa9459Szrj 	     "r" ((USItype) (n0)),					\
1288*a9fa9459Szrj 	     "rI" ((USItype) (d))					\
1289*a9fa9459Szrj 	   : "g1" __AND_CLOBBER_CC)
1290*a9fa9459Szrj #define UDIV_TIME 37
1291*a9fa9459Szrj #define count_leading_zeros(count, x) \
1292*a9fa9459Szrj   do {                                                                  \
1293*a9fa9459Szrj   __asm__ ("scan %1,1,%0"                                               \
1294*a9fa9459Szrj 	   : "=r" ((USItype) (count))                                   \
1295*a9fa9459Szrj 	   : "r" ((USItype) (x)));					\
1296*a9fa9459Szrj   } while (0)
1297*a9fa9459Szrj /* Early sparclites return 63 for an argument of 0, but they warn that future
1298*a9fa9459Szrj    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
1299*a9fa9459Szrj    undefined.  */
1300*a9fa9459Szrj #else
1301*a9fa9459Szrj /* SPARC without integer multiplication and divide instructions.
1302*a9fa9459Szrj    (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1303*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v) \
1304*a9fa9459Szrj   __asm__ ("! Inlined umul_ppmm\n"					\
1305*a9fa9459Szrj "	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n"\
1306*a9fa9459Szrj "	sra	%3,31,%%o5	! Don't move this insn\n"		\
1307*a9fa9459Szrj "	and	%2,%%o5,%%o5	! Don't move this insn\n"		\
1308*a9fa9459Szrj "	andcc	%%g0,0,%%g1	! Don't move this insn\n"		\
1309*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1310*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1311*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1312*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1313*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1314*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1315*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1316*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1317*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1318*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1319*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1320*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1321*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1322*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1323*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1324*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1325*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1326*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1327*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1328*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1329*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1330*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1331*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1332*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1333*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1334*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1335*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1336*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1337*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1338*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1339*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1340*a9fa9459Szrj "	mulscc	%%g1,%3,%%g1\n"						\
1341*a9fa9459Szrj "	mulscc	%%g1,0,%%g1\n"						\
1342*a9fa9459Szrj "	add	%%g1,%%o5,%0\n"						\
1343*a9fa9459Szrj "	rd	%%y,%1"							\
1344*a9fa9459Szrj 	   : "=r" ((USItype) (w1)),					\
1345*a9fa9459Szrj 	     "=r" ((USItype) (w0))					\
1346*a9fa9459Szrj 	   : "%rI" ((USItype) (u)),					\
1347*a9fa9459Szrj 	     "r" ((USItype) (v))						\
1348*a9fa9459Szrj 	   : "g1", "o5" __AND_CLOBBER_CC)
1349*a9fa9459Szrj #define UMUL_TIME 39		/* 39 instructions */
1350*a9fa9459Szrj /* It's quite necessary to add this much assembler for the sparc.
1351*a9fa9459Szrj    The default udiv_qrnnd (in C) is more than 10 times slower!  */
1352*a9fa9459Szrj #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1353*a9fa9459Szrj   __asm__ ("! Inlined udiv_qrnnd\n"					\
1354*a9fa9459Szrj "	mov	32,%%g1\n"						\
1355*a9fa9459Szrj "	subcc	%1,%2,%%g0\n"						\
1356*a9fa9459Szrj "1:	bcs	5f\n"							\
1357*a9fa9459Szrj "	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
1358*a9fa9459Szrj "	sub	%1,%2,%1	! this kills msb of n\n"		\
1359*a9fa9459Szrj "	addx	%1,%1,%1	! so this can't give carry\n"		\
1360*a9fa9459Szrj "	subcc	%%g1,1,%%g1\n"						\
1361*a9fa9459Szrj "2:	bne	1b\n"							\
1362*a9fa9459Szrj "	 subcc	%1,%2,%%g0\n"						\
1363*a9fa9459Szrj "	bcs	3f\n"							\
1364*a9fa9459Szrj "	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
1365*a9fa9459Szrj "	b	3f\n"							\
1366*a9fa9459Szrj "	 sub	%1,%2,%1	! this kills msb of n\n"		\
1367*a9fa9459Szrj "4:	sub	%1,%2,%1\n"						\
1368*a9fa9459Szrj "5:	addxcc	%1,%1,%1\n"						\
1369*a9fa9459Szrj "	bcc	2b\n"							\
1370*a9fa9459Szrj "	 subcc	%%g1,1,%%g1\n"						\
1371*a9fa9459Szrj "! Got carry from n.  Subtract next step to cancel this carry.\n"	\
1372*a9fa9459Szrj "	bne	4b\n"							\
1373*a9fa9459Szrj "	 addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n"	\
1374*a9fa9459Szrj "	sub	%1,%2,%1\n"						\
1375*a9fa9459Szrj "3:	xnor	%0,0,%0\n"						\
1376*a9fa9459Szrj "	! End of inline udiv_qrnnd"					\
1377*a9fa9459Szrj 	   : "=&r" ((USItype) (__q)),					\
1378*a9fa9459Szrj 	     "=&r" ((USItype) (__r))					\
1379*a9fa9459Szrj 	   : "r" ((USItype) (__d)),					\
1380*a9fa9459Szrj 	     "1" ((USItype) (__n1)),					\
1381*a9fa9459Szrj 	     "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1382*a9fa9459Szrj #define UDIV_TIME (3+7*32)	/* 7 instructions/iteration. 32 iterations.  */
1383*a9fa9459Szrj #endif /* __sparclite__ */
1384*a9fa9459Szrj #endif /* __sparc_v8__ */
1385*a9fa9459Szrj #endif /* __sparc_v9__ */
1386*a9fa9459Szrj #endif /* sparc32 */
1387*a9fa9459Szrj 
1388*a9fa9459Szrj #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1389*a9fa9459Szrj     && W_TYPE_SIZE == 64
1390*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
1391*a9fa9459Szrj   do {									\
1392*a9fa9459Szrj     UDItype __carry = 0;						\
1393*a9fa9459Szrj     __asm__ ("addcc\t%r5,%6,%1\n\t"					\
1394*a9fa9459Szrj 	     "add\t%r3,%4,%0\n\t"					\
1395*a9fa9459Szrj 	     "movcs\t%%xcc, 1, %2\n\t"					\
1396*a9fa9459Szrj 	     "add\t%0, %2, %0"						\
1397*a9fa9459Szrj 	     : "=r" ((UDItype)(sh)),				      	\
1398*a9fa9459Szrj 	       "=&r" ((UDItype)(sl)),				      	\
1399*a9fa9459Szrj 	       "+r" (__carry)				      		\
1400*a9fa9459Szrj 	     : "%rJ" ((UDItype)(ah)),				     	\
1401*a9fa9459Szrj 	       "rI" ((UDItype)(bh)),				      	\
1402*a9fa9459Szrj 	       "%rJ" ((UDItype)(al)),				     	\
1403*a9fa9459Szrj 	       "rI" ((UDItype)(bl))				       	\
1404*a9fa9459Szrj 	     __CLOBBER_CC);						\
1405*a9fa9459Szrj   } while (0)
1406*a9fa9459Szrj 
1407*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
1408*a9fa9459Szrj   do {									\
1409*a9fa9459Szrj     UDItype __carry = 0;						\
1410*a9fa9459Szrj     __asm__ ("subcc\t%r5,%6,%1\n\t"					\
1411*a9fa9459Szrj 	     "sub\t%r3,%4,%0\n\t"					\
1412*a9fa9459Szrj 	     "movcs\t%%xcc, 1, %2\n\t"					\
1413*a9fa9459Szrj 	     "sub\t%0, %2, %0"						\
1414*a9fa9459Szrj 	     : "=r" ((UDItype)(sh)),				      	\
1415*a9fa9459Szrj 	       "=&r" ((UDItype)(sl)),				      	\
1416*a9fa9459Szrj 	       "+r" (__carry)				      		\
1417*a9fa9459Szrj 	     : "%rJ" ((UDItype)(ah)),				     	\
1418*a9fa9459Szrj 	       "rI" ((UDItype)(bh)),				      	\
1419*a9fa9459Szrj 	       "%rJ" ((UDItype)(al)),				     	\
1420*a9fa9459Szrj 	       "rI" ((UDItype)(bl))				       	\
1421*a9fa9459Szrj 	     __CLOBBER_CC);						\
1422*a9fa9459Szrj   } while (0)
1423*a9fa9459Szrj 
1424*a9fa9459Szrj #define umul_ppmm(wh, wl, u, v)						\
1425*a9fa9459Szrj   do {									\
1426*a9fa9459Szrj 	  UDItype tmp1, tmp2, tmp3, tmp4;				\
1427*a9fa9459Szrj 	  __asm__ __volatile__ (					\
1428*a9fa9459Szrj 		   "srl %7,0,%3\n\t"					\
1429*a9fa9459Szrj 		   "mulx %3,%6,%1\n\t"					\
1430*a9fa9459Szrj 		   "srlx %6,32,%2\n\t"					\
1431*a9fa9459Szrj 		   "mulx %2,%3,%4\n\t"					\
1432*a9fa9459Szrj 		   "sllx %4,32,%5\n\t"					\
1433*a9fa9459Szrj 		   "srl %6,0,%3\n\t"					\
1434*a9fa9459Szrj 		   "sub %1,%5,%5\n\t"					\
1435*a9fa9459Szrj 		   "srlx %5,32,%5\n\t"					\
1436*a9fa9459Szrj 		   "addcc %4,%5,%4\n\t"					\
1437*a9fa9459Szrj 		   "srlx %7,32,%5\n\t"					\
1438*a9fa9459Szrj 		   "mulx %3,%5,%3\n\t"					\
1439*a9fa9459Szrj 		   "mulx %2,%5,%5\n\t"					\
1440*a9fa9459Szrj 		   "sethi %%hi(0x80000000),%2\n\t"			\
1441*a9fa9459Szrj 		   "addcc %4,%3,%4\n\t"					\
1442*a9fa9459Szrj 		   "srlx %4,32,%4\n\t"					\
1443*a9fa9459Szrj 		   "add %2,%2,%2\n\t"					\
1444*a9fa9459Szrj 		   "movcc %%xcc,%%g0,%2\n\t"				\
1445*a9fa9459Szrj 		   "addcc %5,%4,%5\n\t"					\
1446*a9fa9459Szrj 		   "sllx %3,32,%3\n\t"					\
1447*a9fa9459Szrj 		   "add %1,%3,%1\n\t"					\
1448*a9fa9459Szrj 		   "add %5,%2,%0"					\
1449*a9fa9459Szrj 	   : "=r" ((UDItype)(wh)),					\
1450*a9fa9459Szrj 	     "=&r" ((UDItype)(wl)),					\
1451*a9fa9459Szrj 	     "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)	\
1452*a9fa9459Szrj 	   : "r" ((UDItype)(u)),					\
1453*a9fa9459Szrj 	     "r" ((UDItype)(v))						\
1454*a9fa9459Szrj 	   __CLOBBER_CC);						\
1455*a9fa9459Szrj   } while (0)
1456*a9fa9459Szrj #define UMUL_TIME 96
1457*a9fa9459Szrj #define UDIV_TIME 230
1458*a9fa9459Szrj #endif /* sparc64 */
1459*a9fa9459Szrj 
1460*a9fa9459Szrj #if defined (__vax__) && W_TYPE_SIZE == 32
1461*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1462*a9fa9459Szrj   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
1463*a9fa9459Szrj 	   : "=g" ((USItype) (sh)),					\
1464*a9fa9459Szrj 	     "=&g" ((USItype) (sl))					\
1465*a9fa9459Szrj 	   : "%0" ((USItype) (ah)),					\
1466*a9fa9459Szrj 	     "g" ((USItype) (bh)),					\
1467*a9fa9459Szrj 	     "%1" ((USItype) (al)),					\
1468*a9fa9459Szrj 	     "g" ((USItype) (bl)))
1469*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1470*a9fa9459Szrj   __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"					\
1471*a9fa9459Szrj 	   : "=g" ((USItype) (sh)),					\
1472*a9fa9459Szrj 	     "=&g" ((USItype) (sl))					\
1473*a9fa9459Szrj 	   : "0" ((USItype) (ah)),					\
1474*a9fa9459Szrj 	     "g" ((USItype) (bh)),					\
1475*a9fa9459Szrj 	     "1" ((USItype) (al)),					\
1476*a9fa9459Szrj 	     "g" ((USItype) (bl)))
1477*a9fa9459Szrj #define umul_ppmm(xh, xl, m0, m1) \
1478*a9fa9459Szrj   do {									\
1479*a9fa9459Szrj     union {								\
1480*a9fa9459Szrj 	UDItype __ll;							\
1481*a9fa9459Szrj 	struct {USItype __l, __h;} __i;					\
1482*a9fa9459Szrj       } __xx;								\
1483*a9fa9459Szrj     USItype __m0 = (m0), __m1 = (m1);					\
1484*a9fa9459Szrj     __asm__ ("emul %1,%2,$0,%0"						\
1485*a9fa9459Szrj 	     : "=r" (__xx.__ll)						\
1486*a9fa9459Szrj 	     : "g" (__m0),						\
1487*a9fa9459Szrj 	       "g" (__m1));						\
1488*a9fa9459Szrj     (xh) = __xx.__i.__h;						\
1489*a9fa9459Szrj     (xl) = __xx.__i.__l;						\
1490*a9fa9459Szrj     (xh) += ((((SItype) __m0 >> 31) & __m1)				\
1491*a9fa9459Szrj 	     + (((SItype) __m1 >> 31) & __m0));				\
1492*a9fa9459Szrj   } while (0)
1493*a9fa9459Szrj #define sdiv_qrnnd(q, r, n1, n0, d) \
1494*a9fa9459Szrj   do {									\
1495*a9fa9459Szrj     union {DItype __ll;							\
1496*a9fa9459Szrj 	   struct {SItype __l, __h;} __i;				\
1497*a9fa9459Szrj 	  } __xx;							\
1498*a9fa9459Szrj     __xx.__i.__h = n1; __xx.__i.__l = n0;				\
1499*a9fa9459Szrj     __asm__ ("ediv %3,%2,%0,%1"						\
1500*a9fa9459Szrj 	     : "=g" (q), "=g" (r)					\
1501*a9fa9459Szrj 	     : "g" (__xx.__ll), "g" (d));				\
1502*a9fa9459Szrj   } while (0)
1503*a9fa9459Szrj #endif /* __vax__ */
1504*a9fa9459Szrj 
1505*a9fa9459Szrj #ifdef _TMS320C6X
1506*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1507*a9fa9459Szrj   do									\
1508*a9fa9459Szrj     {									\
1509*a9fa9459Szrj       UDItype __ll;							\
1510*a9fa9459Szrj       __asm__ ("addu .l1 %1, %2, %0"					\
1511*a9fa9459Szrj 	       : "=a" (__ll) : "a" (al), "a" (bl));			\
1512*a9fa9459Szrj       (sl) = (USItype)__ll;						\
1513*a9fa9459Szrj       (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);			\
1514*a9fa9459Szrj     }									\
1515*a9fa9459Szrj   while (0)
1516*a9fa9459Szrj 
1517*a9fa9459Szrj #ifdef _TMS320C6400_PLUS
1518*a9fa9459Szrj #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1519*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v)						\
1520*a9fa9459Szrj   do {									\
1521*a9fa9459Szrj     UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);		\
1522*a9fa9459Szrj     (w1) = (USItype) (__x >> 32);					\
1523*a9fa9459Szrj     (w0) = (USItype) (__x);						\
1524*a9fa9459Szrj   } while (0)
1525*a9fa9459Szrj #endif  /* _TMS320C6400_PLUS */
1526*a9fa9459Szrj 
1527*a9fa9459Szrj #define count_leading_zeros(count, x)	((count) = __builtin_clz (x))
1528*a9fa9459Szrj #ifdef _TMS320C6400
1529*a9fa9459Szrj #define count_trailing_zeros(count, x)	((count) = __builtin_ctz (x))
1530*a9fa9459Szrj #endif
1531*a9fa9459Szrj #define UMUL_TIME 4
1532*a9fa9459Szrj #define UDIV_TIME 40
1533*a9fa9459Szrj #endif /* _TMS320C6X */
1534*a9fa9459Szrj 
1535*a9fa9459Szrj #if defined (__xtensa__) && W_TYPE_SIZE == 32
1536*a9fa9459Szrj /* This code is not Xtensa-configuration-specific, so rely on the compiler
1537*a9fa9459Szrj    to expand builtin functions depending on what configuration features
1538*a9fa9459Szrj    are available.  This avoids library calls when the operation can be
1539*a9fa9459Szrj    performed in-line.  */
1540*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v)						\
1541*a9fa9459Szrj   do {									\
1542*a9fa9459Szrj     DWunion __w;							\
1543*a9fa9459Szrj     __w.ll = __builtin_umulsidi3 (u, v);				\
1544*a9fa9459Szrj     w1 = __w.s.high;							\
1545*a9fa9459Szrj     w0 = __w.s.low;							\
1546*a9fa9459Szrj   } while (0)
1547*a9fa9459Szrj #define __umulsidi3(u, v)		__builtin_umulsidi3 (u, v)
1548*a9fa9459Szrj #define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
1549*a9fa9459Szrj #define count_trailing_zeros(COUNT, X)	((COUNT) = __builtin_ctz (X))
1550*a9fa9459Szrj #endif /* __xtensa__ */
1551*a9fa9459Szrj 
1552*a9fa9459Szrj #if defined xstormy16
1553*a9fa9459Szrj extern UHItype __stormy16_count_leading_zeros (UHItype);
1554*a9fa9459Szrj #define count_leading_zeros(count, x)					\
1555*a9fa9459Szrj   do									\
1556*a9fa9459Szrj     {									\
1557*a9fa9459Szrj       UHItype size;							\
1558*a9fa9459Szrj 									\
1559*a9fa9459Szrj       /* We assume that W_TYPE_SIZE is a multiple of 16...  */		\
1560*a9fa9459Szrj       for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)		\
1561*a9fa9459Szrj 	{								\
1562*a9fa9459Szrj 	  UHItype c;							\
1563*a9fa9459Szrj 									\
1564*a9fa9459Szrj 	  c = __clzhi2 ((x) >> (size - 16));				\
1565*a9fa9459Szrj 	  (count) += c;							\
1566*a9fa9459Szrj 	  if (c != 16)							\
1567*a9fa9459Szrj 	    break;							\
1568*a9fa9459Szrj 	}								\
1569*a9fa9459Szrj     }									\
1570*a9fa9459Szrj   while (0)
1571*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1572*a9fa9459Szrj #endif
1573*a9fa9459Szrj 
1574*a9fa9459Szrj #if defined (__z8000__) && W_TYPE_SIZE == 16
1575*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1576*a9fa9459Szrj   __asm__ ("add	%H1,%H5\n\tadc	%H0,%H3"				\
1577*a9fa9459Szrj 	   : "=r" ((unsigned int)(sh)),					\
1578*a9fa9459Szrj 	     "=&r" ((unsigned int)(sl))					\
1579*a9fa9459Szrj 	   : "%0" ((unsigned int)(ah)),					\
1580*a9fa9459Szrj 	     "r" ((unsigned int)(bh)),					\
1581*a9fa9459Szrj 	     "%1" ((unsigned int)(al)),					\
1582*a9fa9459Szrj 	     "rQR" ((unsigned int)(bl)))
1583*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1584*a9fa9459Szrj   __asm__ ("sub	%H1,%H5\n\tsbc	%H0,%H3"				\
1585*a9fa9459Szrj 	   : "=r" ((unsigned int)(sh)),					\
1586*a9fa9459Szrj 	     "=&r" ((unsigned int)(sl))					\
1587*a9fa9459Szrj 	   : "0" ((unsigned int)(ah)),					\
1588*a9fa9459Szrj 	     "r" ((unsigned int)(bh)),					\
1589*a9fa9459Szrj 	     "1" ((unsigned int)(al)),					\
1590*a9fa9459Szrj 	     "rQR" ((unsigned int)(bl)))
1591*a9fa9459Szrj #define umul_ppmm(xh, xl, m0, m1) \
1592*a9fa9459Szrj   do {									\
1593*a9fa9459Szrj     union {long int __ll;						\
1594*a9fa9459Szrj 	   struct {unsigned int __h, __l;} __i;				\
1595*a9fa9459Szrj 	  } __xx;							\
1596*a9fa9459Szrj     unsigned int __m0 = (m0), __m1 = (m1);				\
1597*a9fa9459Szrj     __asm__ ("mult	%S0,%H3"					\
1598*a9fa9459Szrj 	     : "=r" (__xx.__i.__h),					\
1599*a9fa9459Szrj 	       "=r" (__xx.__i.__l)					\
1600*a9fa9459Szrj 	     : "%1" (__m0),						\
1601*a9fa9459Szrj 	       "rQR" (__m1));						\
1602*a9fa9459Szrj     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
1603*a9fa9459Szrj     (xh) += ((((signed int) __m0 >> 15) & __m1)				\
1604*a9fa9459Szrj 	     + (((signed int) __m1 >> 15) & __m0));			\
1605*a9fa9459Szrj   } while (0)
1606*a9fa9459Szrj #endif /* __z8000__ */
1607*a9fa9459Szrj 
1608*a9fa9459Szrj #endif /* __GNUC__ */
1609*a9fa9459Szrj 
1610*a9fa9459Szrj /* If this machine has no inline assembler, use C macros.  */
1611*a9fa9459Szrj 
1612*a9fa9459Szrj #if !defined (add_ssaaaa)
1613*a9fa9459Szrj #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1614*a9fa9459Szrj   do {									\
1615*a9fa9459Szrj     UWtype __x;								\
1616*a9fa9459Szrj     __x = (al) + (bl);							\
1617*a9fa9459Szrj     (sh) = (ah) + (bh) + (__x < (al));					\
1618*a9fa9459Szrj     (sl) = __x;								\
1619*a9fa9459Szrj   } while (0)
1620*a9fa9459Szrj #endif
1621*a9fa9459Szrj 
1622*a9fa9459Szrj #if !defined (sub_ddmmss)
1623*a9fa9459Szrj #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1624*a9fa9459Szrj   do {									\
1625*a9fa9459Szrj     UWtype __x;								\
1626*a9fa9459Szrj     __x = (al) - (bl);							\
1627*a9fa9459Szrj     (sh) = (ah) - (bh) - (__x > (al));					\
1628*a9fa9459Szrj     (sl) = __x;								\
1629*a9fa9459Szrj   } while (0)
1630*a9fa9459Szrj #endif
1631*a9fa9459Szrj 
1632*a9fa9459Szrj /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1633*a9fa9459Szrj    smul_ppmm.  */
1634*a9fa9459Szrj #if !defined (umul_ppmm) && defined (smul_ppmm)
1635*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v)						\
1636*a9fa9459Szrj   do {									\
1637*a9fa9459Szrj     UWtype __w1;							\
1638*a9fa9459Szrj     UWtype __xm0 = (u), __xm1 = (v);					\
1639*a9fa9459Szrj     smul_ppmm (__w1, w0, __xm0, __xm1);					\
1640*a9fa9459Szrj     (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
1641*a9fa9459Szrj 		+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
1642*a9fa9459Szrj   } while (0)
1643*a9fa9459Szrj #endif
1644*a9fa9459Szrj 
1645*a9fa9459Szrj /* If we still don't have umul_ppmm, define it using plain C.  */
1646*a9fa9459Szrj #if !defined (umul_ppmm)
1647*a9fa9459Szrj #define umul_ppmm(w1, w0, u, v)						\
1648*a9fa9459Szrj   do {									\
1649*a9fa9459Szrj     UWtype __x0, __x1, __x2, __x3;					\
1650*a9fa9459Szrj     UHWtype __ul, __vl, __uh, __vh;					\
1651*a9fa9459Szrj 									\
1652*a9fa9459Szrj     __ul = __ll_lowpart (u);						\
1653*a9fa9459Szrj     __uh = __ll_highpart (u);						\
1654*a9fa9459Szrj     __vl = __ll_lowpart (v);						\
1655*a9fa9459Szrj     __vh = __ll_highpart (v);						\
1656*a9fa9459Szrj 									\
1657*a9fa9459Szrj     __x0 = (UWtype) __ul * __vl;					\
1658*a9fa9459Szrj     __x1 = (UWtype) __ul * __vh;					\
1659*a9fa9459Szrj     __x2 = (UWtype) __uh * __vl;					\
1660*a9fa9459Szrj     __x3 = (UWtype) __uh * __vh;					\
1661*a9fa9459Szrj 									\
1662*a9fa9459Szrj     __x1 += __ll_highpart (__x0);/* this can't give carry */		\
1663*a9fa9459Szrj     __x1 += __x2;		/* but this indeed can */		\
1664*a9fa9459Szrj     if (__x1 < __x2)		/* did we get it? */			\
1665*a9fa9459Szrj       __x3 += __ll_B;		/* yes, add it in the proper pos.  */	\
1666*a9fa9459Szrj 									\
1667*a9fa9459Szrj     (w1) = __x3 + __ll_highpart (__x1);					\
1668*a9fa9459Szrj     (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
1669*a9fa9459Szrj   } while (0)
1670*a9fa9459Szrj #endif
1671*a9fa9459Szrj 
1672*a9fa9459Szrj #if !defined (__umulsidi3)
1673*a9fa9459Szrj #define __umulsidi3(u, v) \
1674*a9fa9459Szrj   ({DWunion __w;							\
1675*a9fa9459Szrj     umul_ppmm (__w.s.high, __w.s.low, u, v);				\
1676*a9fa9459Szrj     __w.ll; })
1677*a9fa9459Szrj #endif
1678*a9fa9459Szrj 
1679*a9fa9459Szrj /* Define this unconditionally, so it can be used for debugging.  */
1680*a9fa9459Szrj #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1681*a9fa9459Szrj   do {									\
1682*a9fa9459Szrj     UWtype __d1, __d0, __q1, __q0;					\
1683*a9fa9459Szrj     UWtype __r1, __r0, __m;						\
1684*a9fa9459Szrj     __d1 = __ll_highpart (d);						\
1685*a9fa9459Szrj     __d0 = __ll_lowpart (d);						\
1686*a9fa9459Szrj 									\
1687*a9fa9459Szrj     __r1 = (n1) % __d1;							\
1688*a9fa9459Szrj     __q1 = (n1) / __d1;							\
1689*a9fa9459Szrj     __m = (UWtype) __q1 * __d0;						\
1690*a9fa9459Szrj     __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
1691*a9fa9459Szrj     if (__r1 < __m)							\
1692*a9fa9459Szrj       {									\
1693*a9fa9459Szrj 	__q1--, __r1 += (d);						\
1694*a9fa9459Szrj 	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1695*a9fa9459Szrj 	  if (__r1 < __m)						\
1696*a9fa9459Szrj 	    __q1--, __r1 += (d);					\
1697*a9fa9459Szrj       }									\
1698*a9fa9459Szrj     __r1 -= __m;							\
1699*a9fa9459Szrj 									\
1700*a9fa9459Szrj     __r0 = __r1 % __d1;							\
1701*a9fa9459Szrj     __q0 = __r1 / __d1;							\
1702*a9fa9459Szrj     __m = (UWtype) __q0 * __d0;						\
1703*a9fa9459Szrj     __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
1704*a9fa9459Szrj     if (__r0 < __m)							\
1705*a9fa9459Szrj       {									\
1706*a9fa9459Szrj 	__q0--, __r0 += (d);						\
1707*a9fa9459Szrj 	if (__r0 >= (d))						\
1708*a9fa9459Szrj 	  if (__r0 < __m)						\
1709*a9fa9459Szrj 	    __q0--, __r0 += (d);					\
1710*a9fa9459Szrj       }									\
1711*a9fa9459Szrj     __r0 -= __m;							\
1712*a9fa9459Szrj 									\
1713*a9fa9459Szrj     (q) = (UWtype) __q1 * __ll_B | __q0;				\
1714*a9fa9459Szrj     (r) = __r0;								\
1715*a9fa9459Szrj   } while (0)
1716*a9fa9459Szrj 
1717*a9fa9459Szrj /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1718*a9fa9459Szrj    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1719*a9fa9459Szrj #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1720*a9fa9459Szrj #define udiv_qrnnd(q, r, nh, nl, d) \
1721*a9fa9459Szrj   do {									\
1722*a9fa9459Szrj     extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype);	\
1723*a9fa9459Szrj     UWtype __r;								\
1724*a9fa9459Szrj     (q) = __udiv_w_sdiv (&__r, nh, nl, d);				\
1725*a9fa9459Szrj     (r) = __r;								\
1726*a9fa9459Szrj   } while (0)
1727*a9fa9459Szrj #endif
1728*a9fa9459Szrj 
1729*a9fa9459Szrj /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1730*a9fa9459Szrj #if !defined (udiv_qrnnd)
1731*a9fa9459Szrj #define UDIV_NEEDS_NORMALIZATION 1
1732*a9fa9459Szrj #define udiv_qrnnd __udiv_qrnnd_c
1733*a9fa9459Szrj #endif
1734*a9fa9459Szrj 
1735*a9fa9459Szrj #if !defined (count_leading_zeros)
1736*a9fa9459Szrj #define count_leading_zeros(count, x) \
1737*a9fa9459Szrj   do {									\
1738*a9fa9459Szrj     UWtype __xr = (x);							\
1739*a9fa9459Szrj     UWtype __a;								\
1740*a9fa9459Szrj 									\
1741*a9fa9459Szrj     if (W_TYPE_SIZE <= 32)						\
1742*a9fa9459Szrj       {									\
1743*a9fa9459Szrj 	__a = __xr < ((UWtype)1<<2*__BITS4)				\
1744*a9fa9459Szrj 	  ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)			\
1745*a9fa9459Szrj 	  : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);	\
1746*a9fa9459Szrj       }									\
1747*a9fa9459Szrj     else								\
1748*a9fa9459Szrj       {									\
1749*a9fa9459Szrj 	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
1750*a9fa9459Szrj 	  if (((__xr >> __a) & 0xff) != 0)				\
1751*a9fa9459Szrj 	    break;							\
1752*a9fa9459Szrj       }									\
1753*a9fa9459Szrj 									\
1754*a9fa9459Szrj     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);		\
1755*a9fa9459Szrj   } while (0)
1756*a9fa9459Szrj #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1757*a9fa9459Szrj #endif
1758*a9fa9459Szrj 
1759*a9fa9459Szrj #if !defined (count_trailing_zeros)
1760*a9fa9459Szrj /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1761*a9fa9459Szrj    defined in asm, but if it is not, the C version above is good enough.  */
1762*a9fa9459Szrj #define count_trailing_zeros(count, x) \
1763*a9fa9459Szrj   do {									\
1764*a9fa9459Szrj     UWtype __ctz_x = (x);						\
1765*a9fa9459Szrj     UWtype __ctz_c;							\
1766*a9fa9459Szrj     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
1767*a9fa9459Szrj     (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
1768*a9fa9459Szrj   } while (0)
1769*a9fa9459Szrj #endif
1770*a9fa9459Szrj 
1771*a9fa9459Szrj #ifndef UDIV_NEEDS_NORMALIZATION
1772*a9fa9459Szrj #define UDIV_NEEDS_NORMALIZATION 0
1773*a9fa9459Szrj #endif
1774