1 /*
2 * Copyright (c) 2016, Intel Corporation.
3 * Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4 * Intel Math Library (LIBM) Source Code
5 *
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7 *
8 * This code is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License version 2 only, as
10 * published by the Free Software Foundation.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23 * or visit www.oracle.com if you need additional information or have any
24 * questions.
25 *
26 */
27 
28 #include "precompiled.hpp"
29 #include "asm/assembler.hpp"
30 #include "asm/assembler.inline.hpp"
31 #include "macroAssembler_x86.hpp"
32 #include "runtime/stubRoutines.hpp"
33 #include "utilities/globalDefinitions.hpp"
34 
35 /******************************************************************************/
36 //                     ALGORITHM DESCRIPTION - EXP()
37 //                     ---------------------
38 //
39 // Description:
40 //  Let K = 64 (table size).
41 //        x    x/log(2)     n
42 //       e  = 2          = 2 * T[j] * (1 + P(y))
43 //  where
44 //       x = m*log(2)/K + y,    y in [-log(2)/K..log(2)/K]
45 //       m = n*K + j,           m,n,j - signed integer, j in [-K/2..K/2]
46 //                  j/K
47 //       values of 2   are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
48 //
49 //       P(y) is a minimax polynomial approximation of exp(x)-1
50 //       on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
51 //
52 //  To avoid problems with arithmetic overflow and underflow,
53 //            n                        n1  n2
54 //  value of 2  is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
55 //  where BIAS is a value of exponent bias.
56 //
57 // Special cases:
58 //  exp(NaN) = NaN
59 //  exp(+INF) = +INF
60 //  exp(-INF) = 0
61 //  exp(x) = 1 for subnormals
62 //  for finite argument, only exp(0)=1 is exact
63 //  For IEEE double
64 //    if x >  709.782712893383973096 then exp(x) overflow
65 //    if x < -745.133219101941108420 then exp(x) underflow
66 //
67 /******************************************************************************/
68 
69 #ifdef _LP64
70 // The 64 bit code is at most SSE2 compliant
71 ATTRIBUTE_ALIGNED(16) juint _cv[] =
72 {
73     0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
74     0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
75     0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
76     0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
77     0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
78 };
79 
80 ATTRIBUTE_ALIGNED(16) juint _shifter[] =
81 {
82     0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
83 };
84 
85 ATTRIBUTE_ALIGNED(16) juint _mmask[] =
86 {
87     0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
88 };
89 
90 ATTRIBUTE_ALIGNED(16) juint _bias[] =
91 {
92     0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
93 };
94 
95 ATTRIBUTE_ALIGNED(16) juint _Tbl_addr[] =
96 {
97     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
98     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
99     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
100     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
101     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
102     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
103     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
104     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
105     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
106     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
107     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
108     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
109     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
110     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
111     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
112     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
113     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
114     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
115     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
116     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
117     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
118     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
119     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
120     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
121     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
122     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
123     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
124     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
125     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
126     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
127     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
128     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
129     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
130     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
131     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
132     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
133     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
134     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
135     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
136     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
137     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
138     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
139     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
140     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
141     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
142     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
143     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
144     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
145     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
146     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
147     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
148     0x000fa7c1UL
149 };
150 
151 ATTRIBUTE_ALIGNED(16) juint _ALLONES[] =
152 {
153     0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
154 };
155 
156 ATTRIBUTE_ALIGNED(16) juint _ebias[] =
157 {
158     0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
159 };
160 
161 ATTRIBUTE_ALIGNED(4) juint _XMAX[] =
162 {
163     0xffffffffUL, 0x7fefffffUL
164 };
165 
166 ATTRIBUTE_ALIGNED(4) juint _XMIN[] =
167 {
168     0x00000000UL, 0x00100000UL
169 };
170 
171 ATTRIBUTE_ALIGNED(4) juint _INF[] =
172 {
173     0x00000000UL, 0x7ff00000UL
174 };
175 
176 ATTRIBUTE_ALIGNED(4) juint _ZERO[] =
177 {
178     0x00000000UL, 0x00000000UL
179 };
180 
181 ATTRIBUTE_ALIGNED(4) juint _ONE_val[] =
182 {
183     0x00000000UL, 0x3ff00000UL
184 };
185 
186 
187 // Registers:
188 // input: xmm0
189 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
190 //          rax, rdx, rcx, tmp - r11
191 
192 // Code generated by Intel C compiler for LIBM library
193 
fast_exp(XMMRegister xmm0,XMMRegister xmm1,XMMRegister xmm2,XMMRegister xmm3,XMMRegister xmm4,XMMRegister xmm5,XMMRegister xmm6,XMMRegister xmm7,Register eax,Register ecx,Register edx,Register tmp)194 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
195   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
196   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
197   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
198   Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
199 
200   assert_different_registers(tmp, eax, ecx, edx);
201   address cv = (address)_cv;
202   address Shifter = (address)_shifter;
203   address mmask = (address)_mmask;
204   address bias = (address)_bias;
205   address Tbl_addr = (address)_Tbl_addr;
206   address ALLONES = (address)_ALLONES;
207   address ebias = (address)_ebias;
208   address XMAX = (address)_XMAX;
209   address XMIN = (address)_XMIN;
210   address INF = (address)_INF;
211   address ZERO = (address)_ZERO;
212   address ONE_val = (address)_ONE_val;
213 
214   bind(start);
215   subq(rsp, 24);
216   movsd(Address(rsp, 8), xmm0);
217   unpcklpd(xmm0, xmm0);
218   movdqu(xmm1, ExternalAddress(cv));       // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
219   movdqu(xmm6, ExternalAddress(Shifter));  // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
220   movdqu(xmm2, ExternalAddress(16 + cv));    // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
221   movdqu(xmm3, ExternalAddress(32 + cv));    // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
222   pextrw(eax, xmm0, 3);
223   andl(eax, 32767);
224   movl(edx, 16527);
225   subl(edx, eax);
226   subl(eax, 15504);
227   orl(edx, eax);
228   cmpl(edx, INT_MIN);
229   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
230   mulpd(xmm1, xmm0);
231   addpd(xmm1, xmm6);
232   movapd(xmm7, xmm1);
233   subpd(xmm1, xmm6);
234   mulpd(xmm2, xmm1);
235   movdqu(xmm4, ExternalAddress(64 + cv));    // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
236   mulpd(xmm3, xmm1);
237   movdqu(xmm5, ExternalAddress(80 + cv));    // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
238   subpd(xmm0, xmm2);
239   movdl(eax, xmm7);
240   movl(ecx, eax);
241   andl(ecx, 63);
242   shll(ecx, 4);
243   sarl(eax, 6);
244   movl(edx, eax);
245   movdqu(xmm6, ExternalAddress(mmask));    // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
246   pand(xmm7, xmm6);
247   movdqu(xmm6, ExternalAddress(bias));     // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
248   paddq(xmm7, xmm6);
249   psllq(xmm7, 46);
250   subpd(xmm0, xmm3);
251   lea(tmp, ExternalAddress(Tbl_addr));
252   movdqu(xmm2, Address(ecx, tmp));
253   mulpd(xmm4, xmm0);
254   movapd(xmm6, xmm0);
255   movapd(xmm1, xmm0);
256   mulpd(xmm6, xmm6);
257   mulpd(xmm0, xmm6);
258   addpd(xmm5, xmm4);
259   mulsd(xmm0, xmm6);
260   mulpd(xmm6, ExternalAddress(48 + cv));     // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
261   addsd(xmm1, xmm2);
262   unpckhpd(xmm2, xmm2);
263   mulpd(xmm0, xmm5);
264   addsd(xmm1, xmm0);
265   por(xmm2, xmm7);
266   unpckhpd(xmm0, xmm0);
267   addsd(xmm0, xmm1);
268   addsd(xmm0, xmm6);
269   addl(edx, 894);
270   cmpl(edx, 1916);
271   jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
272   mulsd(xmm0, xmm2);
273   addsd(xmm0, xmm2);
274   jmp(B1_5);
275 
276   bind(L_2TAG_PACKET_1_0_2);
277   xorpd(xmm3, xmm3);
278   movdqu(xmm4, ExternalAddress(ALLONES));  // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
279   movl(edx, -1022);
280   subl(edx, eax);
281   movdl(xmm5, edx);
282   psllq(xmm4, xmm5);
283   movl(ecx, eax);
284   sarl(eax, 1);
285   pinsrw(xmm3, eax, 3);
286   movdqu(xmm6, ExternalAddress(ebias));    // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
287   psllq(xmm3, 4);
288   psubd(xmm2, xmm3);
289   mulsd(xmm0, xmm2);
290   cmpl(edx, 52);
291   jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
292   pand(xmm4, xmm2);
293   paddd(xmm3, xmm6);
294   subsd(xmm2, xmm4);
295   addsd(xmm0, xmm2);
296   cmpl(ecx, 1023);
297   jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
298   pextrw(ecx, xmm0, 3);
299   andl(ecx, 32768);
300   orl(edx, ecx);
301   cmpl(edx, 0);
302   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
303   movapd(xmm6, xmm0);
304   addsd(xmm0, xmm4);
305   mulsd(xmm0, xmm3);
306   pextrw(ecx, xmm0, 3);
307   andl(ecx, 32752);
308   cmpl(ecx, 0);
309   jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
310   jmp(B1_5);
311 
312   bind(L_2TAG_PACKET_5_0_2);
313   mulsd(xmm6, xmm3);
314   mulsd(xmm4, xmm3);
315   movdqu(xmm0, xmm6);
316   pxor(xmm6, xmm4);
317   psrad(xmm6, 31);
318   pshufd(xmm6, xmm6, 85);
319   psllq(xmm0, 1);
320   psrlq(xmm0, 1);
321   pxor(xmm0, xmm6);
322   psrlq(xmm6, 63);
323   paddq(xmm0, xmm6);
324   paddq(xmm0, xmm4);
325   movl(Address(rsp, 0), 15);
326   jmp(L_2TAG_PACKET_6_0_2);
327 
328   bind(L_2TAG_PACKET_4_0_2);
329   addsd(xmm0, xmm4);
330   mulsd(xmm0, xmm3);
331   jmp(B1_5);
332 
333   bind(L_2TAG_PACKET_3_0_2);
334   addsd(xmm0, xmm4);
335   mulsd(xmm0, xmm3);
336   pextrw(ecx, xmm0, 3);
337   andl(ecx, 32752);
338   cmpl(ecx, 32752);
339   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
340   jmp(B1_5);
341 
342   bind(L_2TAG_PACKET_2_0_2);
343   paddd(xmm3, xmm6);
344   addpd(xmm0, xmm2);
345   mulsd(xmm0, xmm3);
346   movl(Address(rsp, 0), 15);
347   jmp(L_2TAG_PACKET_6_0_2);
348 
349   bind(L_2TAG_PACKET_8_0_2);
350   cmpl(eax, 2146435072);
351   jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
352   movl(eax, Address(rsp, 12));
353   cmpl(eax, INT_MIN);
354   jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
355   movsd(xmm0, ExternalAddress(XMAX));      // 0xffffffffUL, 0x7fefffffUL
356   mulsd(xmm0, xmm0);
357 
358   bind(L_2TAG_PACKET_7_0_2);
359   movl(Address(rsp, 0), 14);
360   jmp(L_2TAG_PACKET_6_0_2);
361 
362   bind(L_2TAG_PACKET_10_0_2);
363   movsd(xmm0, ExternalAddress(XMIN));      // 0x00000000UL, 0x00100000UL
364   mulsd(xmm0, xmm0);
365   movl(Address(rsp, 0), 15);
366   jmp(L_2TAG_PACKET_6_0_2);
367 
368   bind(L_2TAG_PACKET_9_0_2);
369   movl(edx, Address(rsp, 8));
370   cmpl(eax, 2146435072);
371   jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
372   cmpl(edx, 0);
373   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
374   movl(eax, Address(rsp, 12));
375   cmpl(eax, 2146435072);
376   jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
377   movsd(xmm0, ExternalAddress(INF));       // 0x00000000UL, 0x7ff00000UL
378   jmp(B1_5);
379 
380   bind(L_2TAG_PACKET_12_0_2);
381   movsd(xmm0, ExternalAddress(ZERO));      // 0x00000000UL, 0x00000000UL
382   jmp(B1_5);
383 
384   bind(L_2TAG_PACKET_11_0_2);
385   movsd(xmm0, Address(rsp, 8));
386   addsd(xmm0, xmm0);
387   jmp(B1_5);
388 
389   bind(L_2TAG_PACKET_0_0_2);
390   movl(eax, Address(rsp, 12));
391   andl(eax, 2147483647);
392   cmpl(eax, 1083179008);
393   jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
394   movsd(Address(rsp, 8), xmm0);
395   addsd(xmm0, ExternalAddress(ONE_val));   // 0x00000000UL, 0x3ff00000UL
396   jmp(B1_5);
397 
398   bind(L_2TAG_PACKET_6_0_2);
399   movq(Address(rsp, 16), xmm0);
400 
401   bind(B1_3);
402   movq(xmm0, Address(rsp, 16));
403 
404   bind(B1_5);
405   addq(rsp, 24);
406 }
407 #else
408 // The 32 bit code is at most SSE2 compliant
409 ATTRIBUTE_ALIGNED(16) juint _static_const_table[] =
410 {
411     0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
412     0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
413     0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
414     0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
415     0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
416     0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
417     0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
418     0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
419     0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
420     0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
421     0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
422     0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
423     0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
424     0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
425     0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
426     0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
427     0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
428     0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
429     0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
430     0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
431     0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
432     0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
433     0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
434     0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
435     0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
436     0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
437     0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
438     0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
439     0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
440     0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
441     0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
442     0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
443     0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
444     0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
445     0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
446     0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
447     0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
448     0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
449     0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
450     0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
451     0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
452     0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
453     0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
454     0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
455     0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
456     0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
457     0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
458     0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
459     0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
460     0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
461     0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
462     0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
463     0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
464     0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
465     0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
466     0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
467     0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
468     0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
469     0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
470     0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
471     0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
472     0x00100000UL
473 };
474 
475 //registers,
476 // input: (rbp + 8)
477 // scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
478 //          rax, rdx, rcx, rbx (tmp)
479 
480 // Code generated by Intel C compiler for LIBM library
481 
fast_exp(XMMRegister xmm0,XMMRegister xmm1,XMMRegister xmm2,XMMRegister xmm3,XMMRegister xmm4,XMMRegister xmm5,XMMRegister xmm6,XMMRegister xmm7,Register eax,Register ecx,Register edx,Register tmp)482 void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
483   Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
484   Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
485   Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
486   Label L_2TAG_PACKET_12_0_2, start;
487 
488   assert_different_registers(tmp, eax, ecx, edx);
489   address static_const_table = (address)_static_const_table;
490 
491   bind(start);
492   subl(rsp, 120);
493   movl(Address(rsp, 64), tmp);
494   lea(tmp, ExternalAddress(static_const_table));
495   movsd(xmm0, Address(rsp, 128));
496   unpcklpd(xmm0, xmm0);
497   movdqu(xmm1, Address(tmp, 64));          // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
498   movdqu(xmm6, Address(tmp, 48));          // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
499   movdqu(xmm2, Address(tmp, 80));          // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
500   movdqu(xmm3, Address(tmp, 96));          // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
501   pextrw(eax, xmm0, 3);
502   andl(eax, 32767);
503   movl(edx, 16527);
504   subl(edx, eax);
505   subl(eax, 15504);
506   orl(edx, eax);
507   cmpl(edx, INT_MIN);
508   jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
509   mulpd(xmm1, xmm0);
510   addpd(xmm1, xmm6);
511   movapd(xmm7, xmm1);
512   subpd(xmm1, xmm6);
513   mulpd(xmm2, xmm1);
514   movdqu(xmm4, Address(tmp, 128));         // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
515   mulpd(xmm3, xmm1);
516   movdqu(xmm5, Address(tmp, 144));         // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
517   subpd(xmm0, xmm2);
518   movdl(eax, xmm7);
519   movl(ecx, eax);
520   andl(ecx, 63);
521   shll(ecx, 4);
522   sarl(eax, 6);
523   movl(edx, eax);
524   movdqu(xmm6, Address(tmp, 16));          // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
525   pand(xmm7, xmm6);
526   movdqu(xmm6, Address(tmp, 32));          // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
527   paddq(xmm7, xmm6);
528   psllq(xmm7, 46);
529   subpd(xmm0, xmm3);
530   movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
531   mulpd(xmm4, xmm0);
532   movapd(xmm6, xmm0);
533   movapd(xmm1, xmm0);
534   mulpd(xmm6, xmm6);
535   mulpd(xmm0, xmm6);
536   addpd(xmm5, xmm4);
537   mulsd(xmm0, xmm6);
538   mulpd(xmm6, Address(tmp, 112));          // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
539   addsd(xmm1, xmm2);
540   unpckhpd(xmm2, xmm2);
541   mulpd(xmm0, xmm5);
542   addsd(xmm1, xmm0);
543   por(xmm2, xmm7);
544   unpckhpd(xmm0, xmm0);
545   addsd(xmm0, xmm1);
546   addsd(xmm0, xmm6);
547   addl(edx, 894);
548   cmpl(edx, 1916);
549   jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
550   mulsd(xmm0, xmm2);
551   addsd(xmm0, xmm2);
552   jmp(L_2TAG_PACKET_2_0_2);
553 
554   bind(L_2TAG_PACKET_1_0_2);
555   fnstcw(Address(rsp, 24));
556   movzwl(edx, Address(rsp, 24));
557   orl(edx, 768);
558   movw(Address(rsp, 28), edx);
559   fldcw(Address(rsp, 28));
560   movl(edx, eax);
561   sarl(eax, 1);
562   subl(edx, eax);
563   movdqu(xmm6, Address(tmp, 0));           // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
564   pandn(xmm6, xmm2);
565   addl(eax, 1023);
566   movdl(xmm3, eax);
567   psllq(xmm3, 52);
568   por(xmm6, xmm3);
569   addl(edx, 1023);
570   movdl(xmm4, edx);
571   psllq(xmm4, 52);
572   movsd(Address(rsp, 8), xmm0);
573   fld_d(Address(rsp, 8));
574   movsd(Address(rsp, 16), xmm6);
575   fld_d(Address(rsp, 16));
576   fmula(1);
577   faddp(1);
578   movsd(Address(rsp, 8), xmm4);
579   fld_d(Address(rsp, 8));
580   fmulp(1);
581   fstp_d(Address(rsp, 8));
582   movsd(xmm0, Address(rsp, 8));
583   fldcw(Address(rsp, 24));
584   pextrw(ecx, xmm0, 3);
585   andl(ecx, 32752);
586   cmpl(ecx, 32752);
587   jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
588   cmpl(ecx, 0);
589   jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
590   jmp(L_2TAG_PACKET_2_0_2);
591   cmpl(ecx, INT_MIN);
592   jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
593   cmpl(ecx, -1064950997);
594   jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
595   jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
596   movl(edx, Address(rsp, 128));
597   cmpl(edx, -17155601);
598   jcc(Assembler::below, L_2TAG_PACKET_2_0_2);
599   jmp(L_2TAG_PACKET_4_0_2);
600 
601   bind(L_2TAG_PACKET_3_0_2);
602   movl(edx, 14);
603   jmp(L_2TAG_PACKET_5_0_2);
604 
605   bind(L_2TAG_PACKET_4_0_2);
606   movl(edx, 15);
607 
608   bind(L_2TAG_PACKET_5_0_2);
609   movsd(Address(rsp, 0), xmm0);
610   movsd(xmm0, Address(rsp, 128));
611   fld_d(Address(rsp, 0));
612   jmp(L_2TAG_PACKET_6_0_2);
613 
614   bind(L_2TAG_PACKET_7_0_2);
615   cmpl(eax, 2146435072);
616   jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
617   movl(eax, Address(rsp, 132));
618   cmpl(eax, INT_MIN);
619   jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
620   movsd(xmm0, Address(tmp, 1208));         // 0xffffffffUL, 0x7fefffffUL
621   mulsd(xmm0, xmm0);
622   movl(edx, 14);
623   jmp(L_2TAG_PACKET_5_0_2);
624 
625   bind(L_2TAG_PACKET_9_0_2);
626   movsd(xmm0, Address(tmp, 1216));
627   mulsd(xmm0, xmm0);
628   movl(edx, 15);
629   jmp(L_2TAG_PACKET_5_0_2);
630 
631   bind(L_2TAG_PACKET_8_0_2);
632   movl(edx, Address(rsp, 128));
633   cmpl(eax, 2146435072);
634   jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
635   cmpl(edx, 0);
636   jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
637   movl(eax, Address(rsp, 132));
638   cmpl(eax, 2146435072);
639   jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
640   movsd(xmm0, Address(tmp, 1192));         // 0x00000000UL, 0x7ff00000UL
641   jmp(L_2TAG_PACKET_2_0_2);
642 
643   bind(L_2TAG_PACKET_11_0_2);
644   movsd(xmm0, Address(tmp, 1200));         // 0x00000000UL, 0x00000000UL
645   jmp(L_2TAG_PACKET_2_0_2);
646 
647   bind(L_2TAG_PACKET_10_0_2);
648   movsd(xmm0, Address(rsp, 128));
649   addsd(xmm0, xmm0);
650   jmp(L_2TAG_PACKET_2_0_2);
651 
652   bind(L_2TAG_PACKET_0_0_2);
653   movl(eax, Address(rsp, 132));
654   andl(eax, 2147483647);
655   cmpl(eax, 1083179008);
656   jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
657   movsd(xmm0, Address(rsp, 128));
658   addsd(xmm0, Address(tmp, 1184));         // 0x00000000UL, 0x3ff00000UL
659   jmp(L_2TAG_PACKET_2_0_2);
660 
661   bind(L_2TAG_PACKET_2_0_2);
662   movsd(Address(rsp, 48), xmm0);
663   fld_d(Address(rsp, 48));
664 
665   bind(L_2TAG_PACKET_6_0_2);
666   movl(tmp, Address(rsp, 64));
667 }
668 #endif
669