1 /* Copyright (C) 2008-2018 Free Software Foundation, Inc.
2 
3    This file is free software; you can redistribute it and/or modify it under
4    the terms of the GNU General Public License as published by the Free
5    Software Foundation; either version 3 of the License, or (at your option)
6    any later version.
7 
8    This file is distributed in the hope that it will be useful, but WITHOUT
9    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11    for more details.
12 
13    Under Section 7 of GPL version 3, you are granted additional
14    permissions described in the GCC Runtime Library Exception, version
15    3.1, as published by the Free Software Foundation.
16 
17    You should have received a copy of the GNU General Public License and
18    a copy of the GCC Runtime Library Exception along with this program;
19    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
20    <http://www.gnu.org/licenses/>.  */
21 
22 #include <spu_intrinsics.h>
23 
24 typedef unsigned int UTItype __attribute__ ((mode (TI)));
25 typedef int TItype __attribute__ ((mode (TI)));
26 TItype __divti3 (TItype u, TItype v);
27 TItype __modti3 (TItype u, TItype v);
28 UTItype __udivti3 (UTItype u, UTItype v);
29 UTItype __umodti3 (UTItype u, UTItype v);
30 UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w);
31 
32 union qword_UTItype
33   {
34     qword q;
35     UTItype t;
36   };
37 
38 inline static qword
si_from_UTItype(UTItype t)39 si_from_UTItype (UTItype t)
40 {
41   union qword_UTItype u;
42   u.t = t;
43   return u.q;
44 }
45 
46 inline static UTItype
si_to_UTItype(qword q)47 si_to_UTItype (qword q)
48 {
49   union qword_UTItype u;
50   u.q = q;
51   return u.t;
52 }
53 
54 inline static unsigned int
count_leading_zeros(UTItype x)55 count_leading_zeros (UTItype x)
56 {
57   qword c = si_clz (*(qword *) & x);
58   qword cmp0 = si_cgti (c, 31);
59   qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4));
60   qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8));
61   qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4)));
62   s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8)));
63   s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12)));
64   return si_to_uint (s);
65 }
66 
67 /* Based on implementation of udivmodsi4, which is essentially
68  * an optimized version of libgcc/udivmodsi4.c
69         clz      %7,%2
70         clz      %4,%1
71         il       %5,1
72         fsmbi    %0,0
73         sf       %7,%4,%7
74         ori      %3,%1,0
75         shl      %5,%5,%7
76         shl      %4,%2,%7
77 1:      or       %8,%0,%5
78         rotmi    %5,%5,-1
79         clgt     %6,%4,%3
80         sf       %7,%4,%3
81         rotmi    %4,%4,-1
82         selb     %0,%8,%0,%6
83         selb     %3,%7,%3,%6
84 3:      brnz     %5,1b
85  */
86 
87 UTItype
__udivmodti4(UTItype num,UTItype den,UTItype * rp)88 __udivmodti4 (UTItype num, UTItype den, UTItype * rp)
89 {
90   qword shift =
91     si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
92   qword n0 = si_from_UTItype (num);
93   qword d0 = si_from_UTItype (den);
94   qword bit = si_andi (si_fsmbi (1), 1);
95   qword r0 = si_il (0);
96   qword m1 = si_fsmbi (0x000f);
97   qword mask, r1, n1;
98 
99   d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
100   bit = si_shlqbybi (si_shlqbi (bit, shift), shift);
101 
102   do
103     {
104       r1 = si_or (r0, bit);
105 
106       // n1 = n0 - d0 in TImode
107       n1 = si_bg (d0, n0);
108       n1 = si_shlqbyi (n1, 4);
109       n1 = si_sf (m1, n1);
110       n1 = si_bgx (d0, n0, n1);
111       n1 = si_shlqbyi (n1, 4);
112       n1 = si_sf (m1, n1);
113       n1 = si_bgx (d0, n0, n1);
114       n1 = si_shlqbyi (n1, 4);
115       n1 = si_sf (m1, n1);
116       n1 = si_sfx (d0, n0, n1);
117 
118       mask = si_fsm (si_cgti (n1, -1));
119       r0 = si_selb (r0, r1, mask);
120       n0 = si_selb (n0, n1, mask);
121       bit = si_rotqmbii (bit, -1);
122       d0 = si_rotqmbii (d0, -1);
123     }
124   while (si_to_uint (si_orx (bit)));
125   if (rp)
126     *rp = si_to_UTItype (n0);
127   return si_to_UTItype (r0);
128 }
129 
130 UTItype
__udivti3(UTItype n,UTItype d)131 __udivti3 (UTItype n, UTItype d)
132 {
133   return __udivmodti4 (n, d, (UTItype *)0);
134 }
135 
136 UTItype
__umodti3(UTItype n,UTItype d)137 __umodti3 (UTItype n, UTItype d)
138 {
139   UTItype w;
140   __udivmodti4 (n, d, &w);
141   return w;
142 }
143 
144 TItype
__divti3(TItype n,TItype d)145 __divti3 (TItype n, TItype d)
146 {
147   int c = 0;
148   TItype w;
149 
150   if (n < 0)
151     {
152         c = ~c;
153         n = -n;
154     }
155   if (d < 0)
156     {
157         c = ~c;
158         d = -d;
159     }
160 
161   w = __udivmodti4 (n, d, (UTItype *)0);
162   if (c)
163     w = -w;
164   return w;
165 }
166 
167 TItype
__modti3(TItype n,TItype d)168 __modti3 (TItype n, TItype d)
169 {
170   int c = 0;
171   TItype w;
172 
173   if (n < 0)
174     {
175         c = ~c;
176         n = -n;
177     }
178   if (d < 0)
179     {
180         c = ~c;
181         d = -d;
182     }
183 
184   __udivmodti4 (n, d, (UTItype *) &w);
185   if (c)
186     w = -w;
187   return w;
188 }
189